Total coverage: 317479 (17%)of 1898595
358 13911 1 2 2 4 31 41 27 1 1 14 77 3 5 50 18 7 44 18 26 1 1 75 5 60 1 53 3 40 68 7 742 693 37 35 67 13 4 1 2 2 2 26 1 2 1 2 1 13 13 1 2 1 2 14463 2 2 5 32 1 3 3 78 1 11 54 24 149 152 3 4 77 10 70 3 3 13297 741 14530 1833 2 676 13780 14355 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ioctl.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/syscalls.h> #include <linux/mm.h> #include <linux/capability.h> #include <linux/compat.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/security.h> #include <linux/export.h> #include <linux/uaccess.h> #include <linux/writeback.h> #include <linux/buffer_head.h> #include <linux/falloc.h> #include <linux/sched/signal.h> #include <linux/fiemap.h> #include <linux/mount.h> #include <linux/fscrypt.h> #include <linux/fileattr.h> #include "internal.h" #include <asm/ioctls.h> /* So that the fiemap access checks can't overflow on 32 bit machines. */ #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) /** * vfs_ioctl - call filesystem specific ioctl methods * @filp: open file to invoke ioctl method on * @cmd: ioctl command to execute * @arg: command-specific argument for ioctl * * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise * returns -ENOTTY. * * Returns 0 on success, -errno on error. */ static int vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int error = -ENOTTY; if (!filp->f_op->unlocked_ioctl) goto out; error = filp->f_op->unlocked_ioctl(filp, cmd, arg); if (error == -ENOIOCTLCMD) error = -ENOTTY; out: return error; } static int ioctl_fibmap(struct file *filp, int __user *p) { struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; int error, ur_block; sector_t block; if (!capable(CAP_SYS_RAWIO)) return -EPERM; error = get_user(ur_block, p); if (error) return error; if (ur_block < 0) return -EINVAL; block = ur_block; error = bmap(inode, &block); if (block > INT_MAX) { error = -ERANGE; pr_warn_ratelimited("[%s/%d] FS: %s File: %pD4 would truncate fibmap result\n", current->comm, task_pid_nr(current), sb->s_id, filp); } if (error) ur_block = 0; else ur_block = block; if (put_user(ur_block, p)) error = -EFAULT; return error; } /** * fiemap_fill_next_extent - Fiemap helper function * @fieinfo: Fiemap context passed into ->fiemap * @logical: Extent logical start offset, in bytes * @phys: Extent physical start offset, in bytes * @len: Extent length, in bytes * @flags: FIEMAP_EXTENT flags that describe this extent * * Called from file system ->fiemap callback. Will populate extent * info as passed in via arguments and copy to user memory. On * success, extent count on fieinfo is incremented. * * Returns 0 on success, -errno on error, 1 if this was the last * extent that will fit in user array. */ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, u64 phys, u64 len, u32 flags) { struct fiemap_extent extent; struct fiemap_extent __user *dest = fieinfo->fi_extents_start; /* only count the extents */ if (fieinfo->fi_extents_max == 0) { fieinfo->fi_extents_mapped++; return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; } if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) return 1; #define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) #define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) #define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) if (flags & SET_UNKNOWN_FLAGS) flags |= FIEMAP_EXTENT_UNKNOWN; if (flags & SET_NO_UNMOUNTED_IO_FLAGS) flags |= FIEMAP_EXTENT_ENCODED; if (flags & SET_NOT_ALIGNED_FLAGS) flags |= FIEMAP_EXTENT_NOT_ALIGNED; memset(&extent, 0, sizeof(extent)); extent.fe_logical = logical; extent.fe_physical = phys; extent.fe_length = len; extent.fe_flags = flags; dest += fieinfo->fi_extents_mapped; if (copy_to_user(dest, &extent, sizeof(extent))) return -EFAULT; fieinfo->fi_extents_mapped++; if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) return 1; return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; } EXPORT_SYMBOL(fiemap_fill_next_extent); /** * fiemap_prep - check validity of requested flags for fiemap * @inode: Inode to operate on * @fieinfo: Fiemap context passed into ->fiemap * @start: Start of the mapped range * @len: Length of the mapped range, can be truncated by this function. * @supported_flags: Set of fiemap flags that the file system understands * * This function must be called from each ->fiemap instance to validate the * fiemap request against the file system parameters. * * Returns 0 on success, or a negative error on failure. */ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 *len, u32 supported_flags) { u64 maxbytes = inode->i_sb->s_maxbytes; u32 incompat_flags; int ret = 0; if (*len == 0) return -EINVAL; if (start >= maxbytes) return -EFBIG; /* * Shrink request scope to what the fs can actually handle. */ if (*len > maxbytes || (maxbytes - *len) < start) *len = maxbytes - start; supported_flags |= FIEMAP_FLAG_SYNC; supported_flags &= FIEMAP_FLAGS_COMPAT; incompat_flags = fieinfo->fi_flags & ~supported_flags; if (incompat_flags) { fieinfo->fi_flags = incompat_flags; return -EBADR; } if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) ret = filemap_write_and_wait(inode->i_mapping); return ret; } EXPORT_SYMBOL(fiemap_prep); static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap) { struct fiemap fiemap; struct fiemap_extent_info fieinfo = { 0, }; struct inode *inode = file_inode(filp); int error; if (!inode->i_op->fiemap) return -EOPNOTSUPP; if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap))) return -EFAULT; if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) return -EINVAL; fieinfo.fi_flags = fiemap.fm_flags; fieinfo.fi_extents_max = fiemap.fm_extent_count; fieinfo.fi_extents_start = ufiemap->fm_extents; error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, fiemap.fm_length); fiemap.fm_flags = fieinfo.fi_flags; fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap))) error = -EFAULT; return error; } static int ioctl_file_clone(struct file *dst_file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { CLASS(fd, src_file)(srcfd); loff_t cloned; int ret; if (fd_empty(src_file)) return -EBADF; cloned = vfs_clone_file_range(fd_file(src_file), off, dst_file, destoff, olen, 0); if (cloned < 0) ret = cloned; else if (olen && cloned != olen) ret = -EINVAL; else ret = 0; return ret; } static int ioctl_file_clone_range(struct file *file, struct file_clone_range __user *argp) { struct file_clone_range args; if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; return ioctl_file_clone(file, args.src_fd, args.src_offset, args.src_length, args.dest_offset); } /* * This provides compatibility with legacy XFS pre-allocation ioctls * which predate the fallocate syscall. * * Only the l_start, l_len and l_whence fields of the 'struct space_resv' * are used here, rest are ignored. */ static int ioctl_preallocate(struct file *filp, int mode, void __user *argp) { struct inode *inode = file_inode(filp); struct space_resv sr; if (copy_from_user(&sr, argp, sizeof(sr))) return -EFAULT; switch (sr.l_whence) { case SEEK_SET: break; case SEEK_CUR: sr.l_start += filp->f_pos; break; case SEEK_END: sr.l_start += i_size_read(inode); break; default: return -EINVAL; } return vfs_fallocate(filp, mode | FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len); } /* on ia32 l_start is on a 32-bit boundary */ #if defined CONFIG_COMPAT && defined(CONFIG_X86_64) /* just account for different alignment */ static int compat_ioctl_preallocate(struct file *file, int mode, struct space_resv_32 __user *argp) { struct inode *inode = file_inode(file); struct space_resv_32 sr; if (copy_from_user(&sr, argp, sizeof(sr))) return -EFAULT; switch (sr.l_whence) { case SEEK_SET: break; case SEEK_CUR: sr.l_start += file->f_pos; break; case SEEK_END: sr.l_start += i_size_read(inode); break; default: return -EINVAL; } return vfs_fallocate(file, mode | FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len); } #endif static int file_ioctl(struct file *filp, unsigned int cmd, int __user *p) { switch (cmd) { case FIBMAP: return ioctl_fibmap(filp, p); case FS_IOC_RESVSP: case FS_IOC_RESVSP64: return ioctl_preallocate(filp, 0, p); case FS_IOC_UNRESVSP: case FS_IOC_UNRESVSP64: return ioctl_preallocate(filp, FALLOC_FL_PUNCH_HOLE, p); case FS_IOC_ZERO_RANGE: return ioctl_preallocate(filp, FALLOC_FL_ZERO_RANGE, p); } return -ENOIOCTLCMD; } static int ioctl_fionbio(struct file *filp, int __user *argp) { unsigned int flag; int on, error; error = get_user(on, argp); if (error) return error; flag = O_NONBLOCK; #ifdef __sparc__ /* SunOS compatibility item. */ if (O_NONBLOCK != O_NDELAY) flag |= O_NDELAY; #endif spin_lock(&filp->f_lock); if (on) filp->f_flags |= flag; else filp->f_flags &= ~flag; spin_unlock(&filp->f_lock); return error; } static int ioctl_fioasync(unsigned int fd, struct file *filp, int __user *argp) { unsigned int flag; int on, error; error = get_user(on, argp); if (error) return error; flag = on ? FASYNC : 0; /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { if (filp->f_op->fasync) /* fasync() adjusts filp->f_flags */ error = filp->f_op->fasync(fd, filp, on); else error = -ENOTTY; } return error < 0 ? error : 0; } static int ioctl_fsfreeze(struct file *filp) { struct super_block *sb = file_inode(filp)->i_sb; if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; /* If filesystem doesn't support freeze feature, return. */ if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL) return -EOPNOTSUPP; /* Freeze */ if (sb->s_op->freeze_super) return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL); return freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL); } static int ioctl_fsthaw(struct file *filp) { struct super_block *sb = file_inode(filp)->i_sb; if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; /* Thaw */ if (sb->s_op->thaw_super) return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL); return thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL); } static int ioctl_file_dedupe_range(struct file *file, struct file_dedupe_range __user *argp) { struct file_dedupe_range *same = NULL; int ret; unsigned long size; u16 count; if (get_user(count, &argp->dest_count)) { ret = -EFAULT; goto out; } size = struct_size(same, info, count); if (size > PAGE_SIZE) { ret = -ENOMEM; goto out; } same = memdup_user(argp, size); if (IS_ERR(same)) { ret = PTR_ERR(same); same = NULL; goto out; } same->dest_count = count; ret = vfs_dedupe_file_range(file, same); if (ret) goto out; ret = copy_to_user(argp, same, size); if (ret) ret = -EFAULT; out: kfree(same); return ret; } static int ioctl_getfsuuid(struct file *file, void __user *argp) { struct super_block *sb = file_inode(file)->i_sb; struct fsuuid2 u = { .len = sb->s_uuid_len, }; if (!sb->s_uuid_len) return -ENOTTY; memcpy(&u.uuid[0], &sb->s_uuid, sb->s_uuid_len); return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0; } static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp) { struct super_block *sb = file_inode(file)->i_sb; if (!strlen(sb->s_sysfs_name)) return -ENOTTY; struct fs_sysfs_path u = {}; u.len = scnprintf(u.name, sizeof(u.name), "%s/%s", sb->s_type->name, sb->s_sysfs_name); return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0; } /* * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. * It's just a simple helper for sys_ioctl and compat_sys_ioctl. * * When you add any new common ioctls to the switches above and below, * please ensure they have compatible arguments in compat mode. * * The LSM mailing list should also be notified of any command additions or * changes, as specific LSMs may be affected. */ static int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct inode *inode = file_inode(filp); switch (cmd) { case FIOCLEX: set_close_on_exec(fd, 1); return 0; case FIONCLEX: set_close_on_exec(fd, 0); return 0; case FIONBIO: return ioctl_fionbio(filp, argp); case FIOASYNC: return ioctl_fioasync(fd, filp, argp); case FIOQSIZE: if (S_ISDIR(inode->i_mode) || (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) || S_ISLNK(inode->i_mode)) { loff_t res = inode_get_bytes(inode); return copy_to_user(argp, &res, sizeof(res)) ? -EFAULT : 0; } return -ENOTTY; case FIFREEZE: return ioctl_fsfreeze(filp); case FITHAW: return ioctl_fsthaw(filp); case FS_IOC_FIEMAP: return ioctl_fiemap(filp, argp); case FIGETBSZ: /* anon_bdev filesystems may not have a block size */ if (!inode->i_sb->s_blocksize) return -EINVAL; return put_user(inode->i_sb->s_blocksize, (int __user *)argp); case FICLONE: return ioctl_file_clone(filp, arg, 0, 0, 0); case FICLONERANGE: return ioctl_file_clone_range(filp, argp); case FIDEDUPERANGE: return ioctl_file_dedupe_range(filp, argp); case FIONREAD: if (!S_ISREG(inode->i_mode) || IS_ANON_FILE(inode)) return vfs_ioctl(filp, cmd, arg); return put_user(i_size_read(inode) - filp->f_pos, (int __user *)argp); case FS_IOC_GETFLAGS: return ioctl_getflags(filp, argp); case FS_IOC_SETFLAGS: return ioctl_setflags(filp, argp); case FS_IOC_FSGETXATTR: return ioctl_fsgetxattr(filp, argp); case FS_IOC_FSSETXATTR: return ioctl_fssetxattr(filp, argp); case FS_IOC_GETFSUUID: return ioctl_getfsuuid(filp, argp); case FS_IOC_GETFSSYSFSPATH: return ioctl_get_fs_sysfs_path(filp, argp); default: if (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) return file_ioctl(filp, cmd, argp); break; } return -ENOIOCTLCMD; } SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { CLASS(fd, f)(fd); int error; if (fd_empty(f)) return -EBADF; error = security_file_ioctl(fd_file(f), cmd, arg); if (error) return error; error = do_vfs_ioctl(fd_file(f), fd, cmd, arg); if (error == -ENOIOCTLCMD) error = vfs_ioctl(fd_file(f), cmd, arg); return error; } #ifdef CONFIG_COMPAT /** * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation * @file: The file to operate on. * @cmd: The ioctl command number. * @arg: The argument to the ioctl. * * This is not normally called as a function, but instead set in struct * file_operations as * * .compat_ioctl = compat_ptr_ioctl, * * On most architectures, the compat_ptr_ioctl() just passes all arguments * to the corresponding ->ioctl handler. The exception is arch/s390, where * compat_ptr() clears the top bit of a 32-bit pointer value, so user space * pointers to the second 2GB alias the first 2GB, as is the case for * native 32-bit s390 user space. * * The compat_ptr_ioctl() function must therefore be used only with ioctl * functions that either ignore the argument or pass a pointer to a * compatible data type. * * If any ioctl command handled by fops->unlocked_ioctl passes a plain * integer instead of a pointer, or any of the passed data types * is incompatible between 32-bit and 64-bit architectures, a proper * handler is required instead of compat_ptr_ioctl. */ long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { if (!file->f_op->unlocked_ioctl) return -ENOIOCTLCMD; return file->f_op->unlocked_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); } EXPORT_SYMBOL(compat_ptr_ioctl); COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { CLASS(fd, f)(fd); int error; if (fd_empty(f)) return -EBADF; error = security_file_ioctl_compat(fd_file(f), cmd, arg); if (error) return error; switch (cmd) { /* FICLONE takes an int argument, so don't use compat_ptr() */ case FICLONE: error = ioctl_file_clone(fd_file(f), arg, 0, 0, 0); break; #if defined(CONFIG_X86_64) /* these get messy on amd64 due to alignment differences */ case FS_IOC_RESVSP_32: case FS_IOC_RESVSP64_32: error = compat_ioctl_preallocate(fd_file(f), 0, compat_ptr(arg)); break; case FS_IOC_UNRESVSP_32: case FS_IOC_UNRESVSP64_32: error = compat_ioctl_preallocate(fd_file(f), FALLOC_FL_PUNCH_HOLE, compat_ptr(arg)); break; case FS_IOC_ZERO_RANGE_32: error = compat_ioctl_preallocate(fd_file(f), FALLOC_FL_ZERO_RANGE, compat_ptr(arg)); break; #endif /* * These access 32-bit values anyway so no further handling is * necessary. */ case FS_IOC32_GETFLAGS: case FS_IOC32_SETFLAGS: cmd = (cmd == FS_IOC32_GETFLAGS) ? FS_IOC_GETFLAGS : FS_IOC_SETFLAGS; fallthrough; /* * everything else in do_vfs_ioctl() takes either a compatible * pointer argument or no argument -- call it with a modified * argument. */ default: error = do_vfs_ioctl(fd_file(f), fd, cmd, (unsigned long)compat_ptr(arg)); if (error != -ENOIOCTLCMD) break; if (fd_file(f)->f_op->compat_ioctl) error = fd_file(f)->f_op->compat_ioctl(fd_file(f), cmd, arg); if (error == -ENOIOCTLCMD) error = -ENOTTY; break; } return error; } #endif
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 // SPDX-License-Identifier: GPL-2.0+ /* * LEGO USB Tower driver * * Copyright (C) 2003 David Glance <davidgsf@sourceforge.net> * 2001-2004 Juergen Stuber <starblue@users.sourceforge.net> * * derived from USB Skeleton driver - 0.5 * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com) * * History: * * 2001-10-13 - 0.1 js * - first version * 2001-11-03 - 0.2 js * - simplified buffering, one-shot URBs for writing * 2001-11-10 - 0.3 js * - removed IOCTL (setting power/mode is more complicated, postponed) * 2001-11-28 - 0.4 js * - added vendor commands for mode of operation and power level in open * 2001-12-04 - 0.5 js * - set IR mode by default (by oversight 0.4 set VLL mode) * 2002-01-11 - 0.5? pcchan * - make read buffer reusable and work around bytes_to_write issue between * uhci and legusbtower * 2002-09-23 - 0.52 david (david@csse.uwa.edu.au) * - imported into lejos project * - changed wake_up to wake_up_interruptible * - changed to use lego0 rather than tower0 * - changed dbg() to use __func__ rather than deprecated __func__ * 2003-01-12 - 0.53 david (david@csse.uwa.edu.au) * - changed read and write to write everything or * timeout (from a patch by Chris Riesen and Brett Thaeler driver) * - added ioctl functionality to set timeouts * 2003-07-18 - 0.54 davidgsf (david@csse.uwa.edu.au) * - initial import into LegoUSB project * - merge of existing LegoUSB.c driver * 2003-07-18 - 0.56 davidgsf (david@csse.uwa.edu.au) * - port to 2.6 style driver * 2004-02-29 - 0.6 Juergen Stuber <starblue@users.sourceforge.net> * - fix locking * - unlink read URBs which are no longer needed * - allow increased buffer size, eliminates need for timeout on write * - have read URB running continuously * - added poll * - forbid seeking * - added nonblocking I/O * - changed back __func__ to __func__ * - read and log tower firmware version * - reset tower on probe, avoids failure of first write * 2004-03-09 - 0.7 Juergen Stuber <starblue@users.sourceforge.net> * - timeout read now only after inactivity, shorten default accordingly * 2004-03-11 - 0.8 Juergen Stuber <starblue@users.sourceforge.net> * - log major, minor instead of possibly confusing device filename * - whitespace cleanup * 2004-03-12 - 0.9 Juergen Stuber <starblue@users.sourceforge.net> * - normalize whitespace in debug messages * - take care about endianness in control message responses * 2004-03-13 - 0.91 Juergen Stuber <starblue@users.sourceforge.net> * - make default intervals longer to accommodate current EHCI driver * 2004-03-19 - 0.92 Juergen Stuber <starblue@users.sourceforge.net> * - replaced atomic_t by memory barriers * 2004-04-21 - 0.93 Juergen Stuber <starblue@users.sourceforge.net> * - wait for completion of write urb in release (needed for remotecontrol) * - corrected poll for write direction (missing negation) * 2004-04-22 - 0.94 Juergen Stuber <starblue@users.sourceforge.net> * - make device locking interruptible * 2004-04-30 - 0.95 Juergen Stuber <starblue@users.sourceforge.net> * - check for valid udev on resubmitting and unlinking urbs * 2004-08-03 - 0.96 Juergen Stuber <starblue@users.sourceforge.net> * - move reset into open to clean out spurious data */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/completion.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/poll.h> #define DRIVER_AUTHOR "Juergen Stuber <starblue@sourceforge.net>" #define DRIVER_DESC "LEGO USB Tower Driver" /* The defaults are chosen to work with the latest versions of leJOS and NQC. */ /* Some legacy software likes to receive packets in one piece. * In this case read_buffer_size should exceed the maximal packet length * (417 for datalog uploads), and packet_timeout should be set. */ static int read_buffer_size = 480; module_param(read_buffer_size, int, 0); MODULE_PARM_DESC(read_buffer_size, "Read buffer size"); /* Some legacy software likes to send packets in one piece. * In this case write_buffer_size should exceed the maximal packet length * (417 for firmware and program downloads). * A problem with long writes is that the following read may time out * if the software is not prepared to wait long enough. */ static int write_buffer_size = 480; module_param(write_buffer_size, int, 0); MODULE_PARM_DESC(write_buffer_size, "Write buffer size"); /* Some legacy software expects reads to contain whole LASM packets. * To achieve this, characters which arrive before a packet timeout * occurs will be returned in a single read operation. * A problem with long reads is that the software may time out * if it is not prepared to wait long enough. * The packet timeout should be greater than the time between the * reception of subsequent characters, which should arrive about * every 5ms for the standard 2400 baud. * Set it to 0 to disable. */ static int packet_timeout = 50; module_param(packet_timeout, int, 0); MODULE_PARM_DESC(packet_timeout, "Packet timeout in ms"); /* Some legacy software expects blocking reads to time out. * Timeout occurs after the specified time of read and write inactivity. * Set it to 0 to disable. */ static int read_timeout = 200; module_param(read_timeout, int, 0); MODULE_PARM_DESC(read_timeout, "Read timeout in ms"); /* As of kernel version 2.6.4 ehci-hcd uses an * "only one interrupt transfer per frame" shortcut * to simplify the scheduling of periodic transfers. * This conflicts with our standard 1ms intervals for in and out URBs. * We use default intervals of 2ms for in and 8ms for out transfers, * which is fast enough for 2400 baud and allows a small additional load. * Increase the interval to allow more devices that do interrupt transfers, * or set to 0 to use the standard interval from the endpoint descriptors. */ static int interrupt_in_interval = 2; module_param(interrupt_in_interval, int, 0); MODULE_PARM_DESC(interrupt_in_interval, "Interrupt in interval in ms"); static int interrupt_out_interval = 8; module_param(interrupt_out_interval, int, 0); MODULE_PARM_DESC(interrupt_out_interval, "Interrupt out interval in ms"); /* Define these values to match your device */ #define LEGO_USB_TOWER_VENDOR_ID 0x0694 #define LEGO_USB_TOWER_PRODUCT_ID 0x0001 /* Vendor requests */ #define LEGO_USB_TOWER_REQUEST_RESET 0x04 #define LEGO_USB_TOWER_REQUEST_GET_VERSION 0xFD struct tower_reset_reply { __le16 size; __u8 err_code; __u8 spare; }; struct tower_get_version_reply { __le16 size; __u8 err_code; __u8 spare; __u8 major; __u8 minor; __le16 build_no; }; /* table of devices that work with this driver */ static const struct usb_device_id tower_table[] = { { USB_DEVICE(LEGO_USB_TOWER_VENDOR_ID, LEGO_USB_TOWER_PRODUCT_ID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, tower_table); #define LEGO_USB_TOWER_MINOR_BASE 160 /* Structure to hold all of our device specific stuff */ struct lego_usb_tower { struct mutex lock; /* locks this structure */ struct usb_device *udev; /* save off the usb device pointer */ unsigned char minor; /* the starting minor number for this device */ int open_count; /* number of times this port has been opened */ unsigned long disconnected:1; char *read_buffer; size_t read_buffer_length; /* this much came in */ size_t read_packet_length; /* this much will be returned on read */ spinlock_t read_buffer_lock; int packet_timeout_jiffies; unsigned long read_last_arrival; wait_queue_head_t read_wait; wait_queue_head_t write_wait; char *interrupt_in_buffer; struct usb_endpoint_descriptor *interrupt_in_endpoint; struct urb *interrupt_in_urb; int interrupt_in_interval; int interrupt_in_done; char *interrupt_out_buffer; struct usb_endpoint_descriptor *interrupt_out_endpoint; struct urb *interrupt_out_urb; int interrupt_out_interval; int interrupt_out_busy; }; /* local function prototypes */ static ssize_t tower_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos); static ssize_t tower_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos); static inline void tower_delete(struct lego_usb_tower *dev); static int tower_open(struct inode *inode, struct file *file); static int tower_release(struct inode *inode, struct file *file); static __poll_t tower_poll(struct file *file, poll_table *wait); static loff_t tower_llseek(struct file *file, loff_t off, int whence); static void tower_check_for_read_packet(struct lego_usb_tower *dev); static void tower_interrupt_in_callback(struct urb *urb); static void tower_interrupt_out_callback(struct urb *urb); static int tower_probe(struct usb_interface *interface, const struct usb_device_id *id); static void tower_disconnect(struct usb_interface *interface); /* file operations needed when we register this driver */ static const struct file_operations tower_fops = { .owner = THIS_MODULE, .read = tower_read, .write = tower_write, .open = tower_open, .release = tower_release, .poll = tower_poll, .llseek = tower_llseek, }; static char *legousbtower_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } /* * usb class driver info in order to get a minor number from the usb core, * and to have the device registered with the driver core */ static struct usb_class_driver tower_class = { .name = "legousbtower%d", .devnode = legousbtower_devnode, .fops = &tower_fops, .minor_base = LEGO_USB_TOWER_MINOR_BASE, }; /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver tower_driver = { .name = "legousbtower", .probe = tower_probe, .disconnect = tower_disconnect, .id_table = tower_table, }; /* * lego_usb_tower_debug_data */ static inline void lego_usb_tower_debug_data(struct device *dev, const char *function, int size, const unsigned char *data) { dev_dbg(dev, "%s - length = %d, data = %*ph\n", function, size, size, data); } /* * tower_delete */ static inline void tower_delete(struct lego_usb_tower *dev) { /* free data structures */ usb_free_urb(dev->interrupt_in_urb); usb_free_urb(dev->interrupt_out_urb); kfree(dev->read_buffer); kfree(dev->interrupt_in_buffer); kfree(dev->interrupt_out_buffer); usb_put_dev(dev->udev); kfree(dev); } /* * tower_open */ static int tower_open(struct inode *inode, struct file *file) { struct lego_usb_tower *dev = NULL; int subminor; int retval = 0; struct usb_interface *interface; struct tower_reset_reply reset_reply; int result; nonseekable_open(inode, file); subminor = iminor(inode); interface = usb_find_interface(&tower_driver, subminor); if (!interface) { pr_err("error, can't find device for minor %d\n", subminor); retval = -ENODEV; goto exit; } dev = usb_get_intfdata(interface); if (!dev) { retval = -ENODEV; goto exit; } /* lock this device */ if (mutex_lock_interruptible(&dev->lock)) { retval = -ERESTARTSYS; goto exit; } /* allow opening only once */ if (dev->open_count) { retval = -EBUSY; goto unlock_exit; } /* reset the tower */ result = usb_control_msg_recv(dev->udev, 0, LEGO_USB_TOWER_REQUEST_RESET, USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, &reset_reply, sizeof(reset_reply), 1000, GFP_KERNEL); if (result < 0) { dev_err(&dev->udev->dev, "LEGO USB Tower reset control request failed\n"); retval = result; goto unlock_exit; } /* initialize in direction */ dev->read_buffer_length = 0; dev->read_packet_length = 0; usb_fill_int_urb(dev->interrupt_in_urb, dev->udev, usb_rcvintpipe(dev->udev, dev->interrupt_in_endpoint->bEndpointAddress), dev->interrupt_in_buffer, usb_endpoint_maxp(dev->interrupt_in_endpoint), tower_interrupt_in_callback, dev, dev->interrupt_in_interval); dev->interrupt_in_done = 0; mb(); retval = usb_submit_urb(dev->interrupt_in_urb, GFP_KERNEL); if (retval) { dev_err(&dev->udev->dev, "Couldn't submit interrupt_in_urb %d\n", retval); goto unlock_exit; } /* save device in the file's private structure */ file->private_data = dev; dev->open_count = 1; unlock_exit: mutex_unlock(&dev->lock); exit: return retval; } /* * tower_release */ static int tower_release(struct inode *inode, struct file *file) { struct lego_usb_tower *dev; int retval = 0; dev = file->private_data; if (dev == NULL) { retval = -ENODEV; goto exit; } mutex_lock(&dev->lock); if (dev->disconnected) { /* the device was unplugged before the file was released */ /* unlock here as tower_delete frees dev */ mutex_unlock(&dev->lock); tower_delete(dev); goto exit; } /* wait until write transfer is finished */ if (dev->interrupt_out_busy) { wait_event_interruptible_timeout(dev->write_wait, !dev->interrupt_out_busy, 2 * HZ); } /* shutdown transfers */ usb_kill_urb(dev->interrupt_in_urb); usb_kill_urb(dev->interrupt_out_urb); dev->open_count = 0; mutex_unlock(&dev->lock); exit: return retval; } /* * tower_check_for_read_packet * * To get correct semantics for signals and non-blocking I/O * with packetizing we pretend not to see any data in the read buffer * until it has been there unchanged for at least * dev->packet_timeout_jiffies, or until the buffer is full. */ static void tower_check_for_read_packet(struct lego_usb_tower *dev) { spin_lock_irq(&dev->read_buffer_lock); if (!packet_timeout || time_after(jiffies, dev->read_last_arrival + dev->packet_timeout_jiffies) || dev->read_buffer_length == read_buffer_size) { dev->read_packet_length = dev->read_buffer_length; } dev->interrupt_in_done = 0; spin_unlock_irq(&dev->read_buffer_lock); } /* * tower_poll */ static __poll_t tower_poll(struct file *file, poll_table *wait) { struct lego_usb_tower *dev; __poll_t mask = 0; dev = file->private_data; if (dev->disconnected) return EPOLLERR | EPOLLHUP; poll_wait(file, &dev->read_wait, wait); poll_wait(file, &dev->write_wait, wait); tower_check_for_read_packet(dev); if (dev->read_packet_length > 0) mask |= EPOLLIN | EPOLLRDNORM; if (!dev->interrupt_out_busy) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } /* * tower_llseek */ static loff_t tower_llseek(struct file *file, loff_t off, int whence) { return -ESPIPE; /* unseekable */ } /* * tower_read */ static ssize_t tower_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct lego_usb_tower *dev; size_t bytes_to_read; int i; int retval = 0; unsigned long timeout = 0; dev = file->private_data; /* lock this object */ if (mutex_lock_interruptible(&dev->lock)) { retval = -ERESTARTSYS; goto exit; } /* verify that the device wasn't unplugged */ if (dev->disconnected) { retval = -ENODEV; goto unlock_exit; } /* verify that we actually have some data to read */ if (count == 0) { dev_dbg(&dev->udev->dev, "read request of 0 bytes\n"); goto unlock_exit; } if (read_timeout) timeout = jiffies + msecs_to_jiffies(read_timeout); /* wait for data */ tower_check_for_read_packet(dev); while (dev->read_packet_length == 0) { if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto unlock_exit; } retval = wait_event_interruptible_timeout(dev->read_wait, dev->interrupt_in_done, dev->packet_timeout_jiffies); if (retval < 0) goto unlock_exit; /* reset read timeout during read or write activity */ if (read_timeout && (dev->read_buffer_length || dev->interrupt_out_busy)) { timeout = jiffies + msecs_to_jiffies(read_timeout); } /* check for read timeout */ if (read_timeout && time_after(jiffies, timeout)) { retval = -ETIMEDOUT; goto unlock_exit; } tower_check_for_read_packet(dev); } /* copy the data from read_buffer into userspace */ bytes_to_read = min(count, dev->read_packet_length); if (copy_to_user(buffer, dev->read_buffer, bytes_to_read)) { retval = -EFAULT; goto unlock_exit; } spin_lock_irq(&dev->read_buffer_lock); dev->read_buffer_length -= bytes_to_read; dev->read_packet_length -= bytes_to_read; for (i = 0; i < dev->read_buffer_length; i++) dev->read_buffer[i] = dev->read_buffer[i+bytes_to_read]; spin_unlock_irq(&dev->read_buffer_lock); retval = bytes_to_read; unlock_exit: /* unlock the device */ mutex_unlock(&dev->lock); exit: return retval; } /* * tower_write */ static ssize_t tower_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct lego_usb_tower *dev; size_t bytes_to_write; int retval = 0; dev = file->private_data; /* lock this object */ if (mutex_lock_interruptible(&dev->lock)) { retval = -ERESTARTSYS; goto exit; } /* verify that the device wasn't unplugged */ if (dev->disconnected) { retval = -ENODEV; goto unlock_exit; } /* verify that we actually have some data to write */ if (count == 0) { dev_dbg(&dev->udev->dev, "write request of 0 bytes\n"); goto unlock_exit; } /* wait until previous transfer is finished */ while (dev->interrupt_out_busy) { if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto unlock_exit; } retval = wait_event_interruptible(dev->write_wait, !dev->interrupt_out_busy); if (retval) goto unlock_exit; } /* write the data into interrupt_out_buffer from userspace */ bytes_to_write = min_t(int, count, write_buffer_size); dev_dbg(&dev->udev->dev, "%s: count = %zd, bytes_to_write = %zd\n", __func__, count, bytes_to_write); if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write)) { retval = -EFAULT; goto unlock_exit; } /* send off the urb */ usb_fill_int_urb(dev->interrupt_out_urb, dev->udev, usb_sndintpipe(dev->udev, dev->interrupt_out_endpoint->bEndpointAddress), dev->interrupt_out_buffer, bytes_to_write, tower_interrupt_out_callback, dev, dev->interrupt_out_interval); dev->interrupt_out_busy = 1; wmb(); retval = usb_submit_urb(dev->interrupt_out_urb, GFP_KERNEL); if (retval) { dev->interrupt_out_busy = 0; dev_err(&dev->udev->dev, "Couldn't submit interrupt_out_urb %d\n", retval); goto unlock_exit; } retval = bytes_to_write; unlock_exit: /* unlock the device */ mutex_unlock(&dev->lock); exit: return retval; } /* * tower_interrupt_in_callback */ static void tower_interrupt_in_callback(struct urb *urb) { struct lego_usb_tower *dev = urb->context; int status = urb->status; int retval; unsigned long flags; lego_usb_tower_debug_data(&dev->udev->dev, __func__, urb->actual_length, urb->transfer_buffer); if (status) { if (status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN) { goto exit; } else { dev_dbg(&dev->udev->dev, "%s: nonzero status received: %d\n", __func__, status); goto resubmit; /* maybe we can recover */ } } if (urb->actual_length > 0) { spin_lock_irqsave(&dev->read_buffer_lock, flags); if (dev->read_buffer_length + urb->actual_length < read_buffer_size) { memcpy(dev->read_buffer + dev->read_buffer_length, dev->interrupt_in_buffer, urb->actual_length); dev->read_buffer_length += urb->actual_length; dev->read_last_arrival = jiffies; dev_dbg(&dev->udev->dev, "%s: received %d bytes\n", __func__, urb->actual_length); } else { pr_warn("read_buffer overflow, %d bytes dropped\n", urb->actual_length); } spin_unlock_irqrestore(&dev->read_buffer_lock, flags); } resubmit: retval = usb_submit_urb(dev->interrupt_in_urb, GFP_ATOMIC); if (retval) { dev_err(&dev->udev->dev, "%s: usb_submit_urb failed (%d)\n", __func__, retval); } exit: dev->interrupt_in_done = 1; wake_up_interruptible(&dev->read_wait); } /* * tower_interrupt_out_callback */ static void tower_interrupt_out_callback(struct urb *urb) { struct lego_usb_tower *dev = urb->context; int status = urb->status; lego_usb_tower_debug_data(&dev->udev->dev, __func__, urb->actual_length, urb->transfer_buffer); /* sync/async unlink faults aren't errors */ if (status && !(status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN)) { dev_dbg(&dev->udev->dev, "%s: nonzero write bulk status received: %d\n", __func__, status); } dev->interrupt_out_busy = 0; wake_up_interruptible(&dev->write_wait); } /* * tower_probe * * Called by the usb core when a new device is connected that it thinks * this driver might be interested in. */ static int tower_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct device *idev = &interface->dev; struct usb_device *udev = interface_to_usbdev(interface); struct lego_usb_tower *dev; struct tower_get_version_reply get_version_reply; int retval = -ENOMEM; int result; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto exit; mutex_init(&dev->lock); dev->udev = usb_get_dev(udev); spin_lock_init(&dev->read_buffer_lock); dev->packet_timeout_jiffies = msecs_to_jiffies(packet_timeout); dev->read_last_arrival = jiffies; init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); result = usb_find_common_endpoints_reverse(interface->cur_altsetting, NULL, NULL, &dev->interrupt_in_endpoint, &dev->interrupt_out_endpoint); if (result) { dev_err(idev, "interrupt endpoints not found\n"); retval = result; goto error; } dev->read_buffer = kmalloc(read_buffer_size, GFP_KERNEL); if (!dev->read_buffer) goto error; dev->interrupt_in_buffer = kmalloc(usb_endpoint_maxp(dev->interrupt_in_endpoint), GFP_KERNEL); if (!dev->interrupt_in_buffer) goto error; dev->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->interrupt_in_urb) goto error; dev->interrupt_out_buffer = kmalloc(write_buffer_size, GFP_KERNEL); if (!dev->interrupt_out_buffer) goto error; dev->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->interrupt_out_urb) goto error; dev->interrupt_in_interval = interrupt_in_interval ? interrupt_in_interval : dev->interrupt_in_endpoint->bInterval; dev->interrupt_out_interval = interrupt_out_interval ? interrupt_out_interval : dev->interrupt_out_endpoint->bInterval; /* get the firmware version and log it */ result = usb_control_msg_recv(udev, 0, LEGO_USB_TOWER_REQUEST_GET_VERSION, USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, &get_version_reply, sizeof(get_version_reply), 1000, GFP_KERNEL); if (result) { dev_err(idev, "get version request failed: %d\n", result); retval = result; goto error; } dev_info(&interface->dev, "LEGO USB Tower firmware version is %d.%d build %d\n", get_version_reply.major, get_version_reply.minor, le16_to_cpu(get_version_reply.build_no)); /* we can register the device now, as it is ready */ usb_set_intfdata(interface, dev); retval = usb_register_dev(interface, &tower_class); if (retval) { /* something prevented us from registering this driver */ dev_err(idev, "Not able to get a minor for this device.\n"); goto error; } dev->minor = interface->minor; /* let the user know what node this device is now attached to */ dev_info(&interface->dev, "LEGO USB Tower #%d now attached to major " "%d minor %d\n", (dev->minor - LEGO_USB_TOWER_MINOR_BASE), USB_MAJOR, dev->minor); exit: return retval; error: tower_delete(dev); return retval; } /* * tower_disconnect * * Called by the usb core when the device is removed from the system. */ static void tower_disconnect(struct usb_interface *interface) { struct lego_usb_tower *dev; int minor; dev = usb_get_intfdata(interface); minor = dev->minor; /* give back our minor and prevent further open() */ usb_deregister_dev(interface, &tower_class); /* stop I/O */ usb_poison_urb(dev->interrupt_in_urb); usb_poison_urb(dev->interrupt_out_urb); mutex_lock(&dev->lock); /* if the device is not opened, then we clean up right now */ if (!dev->open_count) { mutex_unlock(&dev->lock); tower_delete(dev); } else { dev->disconnected = 1; /* wake up pollers */ wake_up_interruptible_all(&dev->read_wait); wake_up_interruptible_all(&dev->write_wait); mutex_unlock(&dev->lock); } dev_info(&interface->dev, "LEGO USB Tower #%d now disconnected\n", (minor - LEGO_USB_TOWER_MINOR_BASE)); } module_usb_driver(tower_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
20 20 7 7 20 19 20 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/err.h> #include <linux/sched.h> #include <linux/types.h> #include <linux/vmalloc.h> #include <linux/ceph/messenger.h> #include <linux/ceph/msgpool.h> static void *msgpool_alloc(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; struct ceph_msg *msg; msg = ceph_msg_new2(pool->type, pool->front_len, pool->max_data_items, gfp_mask, true); if (!msg) { dout("msgpool_alloc %s failed\n", pool->name); } else { dout("msgpool_alloc %s %p\n", pool->name, msg); msg->pool = pool; } return msg; } static void msgpool_free(void *element, void *arg) { struct ceph_msgpool *pool = arg; struct ceph_msg *msg = element; dout("msgpool_release %s %p\n", pool->name, msg); msg->pool = NULL; ceph_msg_put(msg); } int ceph_msgpool_init(struct ceph_msgpool *pool, int type, int front_len, int max_data_items, int size, const char *name) { dout("msgpool %s init\n", name); pool->type = type; pool->front_len = front_len; pool->max_data_items = max_data_items; pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); if (!pool->pool) return -ENOMEM; pool->name = name; return 0; } void ceph_msgpool_destroy(struct ceph_msgpool *pool) { dout("msgpool %s destroy\n", pool->name); mempool_destroy(pool->pool); } struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len, int max_data_items) { struct ceph_msg *msg; if (front_len > pool->front_len || max_data_items > pool->max_data_items) { pr_warn_ratelimited("%s need %d/%d, pool %s has %d/%d\n", __func__, front_len, max_data_items, pool->name, pool->front_len, pool->max_data_items); WARN_ON_ONCE(1); /* try to alloc a fresh message */ return ceph_msg_new2(pool->type, front_len, max_data_items, GFP_NOFS, false); } msg = mempool_alloc(pool->pool, GFP_NOFS); dout("msgpool_get %s %p\n", pool->name, msg); return msg; } void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) { dout("msgpool_put %s %p\n", pool->name, msg); /* reset msg front_len; user may have changed it */ msg->front.iov_len = pool->front_len; msg->hdr.front_len = cpu_to_le32(pool->front_len); msg->data_length = 0; msg->num_data_items = 0; kref_init(&msg->kref); /* retake single ref */ mempool_free(msg, pool->pool); }
11 11 16 27 13 13 13 2 9 2 11 11 9 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 // SPDX-License-Identifier: GPL-2.0-only /* * LED support for the input layer * * Copyright 2010-2015 Samuel Thibault <samuel.thibault@ens-lyon.org> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/leds.h> #include <linux/input.h> #if IS_ENABLED(CONFIG_VT) #define VT_TRIGGER(_name) .trigger = _name #else #define VT_TRIGGER(_name) .trigger = NULL #endif #if IS_ENABLED(CONFIG_SND_CTL_LED) #define AUDIO_TRIGGER(_name) .trigger = _name #else #define AUDIO_TRIGGER(_name) .trigger = NULL #endif static const struct { const char *name; const char *trigger; } input_led_info[LED_CNT] = { [LED_NUML] = { "numlock", VT_TRIGGER("kbd-numlock") }, [LED_CAPSL] = { "capslock", VT_TRIGGER("kbd-capslock") }, [LED_SCROLLL] = { "scrolllock", VT_TRIGGER("kbd-scrolllock") }, [LED_COMPOSE] = { "compose" }, [LED_KANA] = { "kana", VT_TRIGGER("kbd-kanalock") }, [LED_SLEEP] = { "sleep" } , [LED_SUSPEND] = { "suspend" }, [LED_MUTE] = { "mute", AUDIO_TRIGGER("audio-mute") }, [LED_MISC] = { "misc" }, [LED_MAIL] = { "mail" }, [LED_CHARGING] = { "charging" }, }; struct input_led { struct led_classdev cdev; struct input_handle *handle; unsigned int code; /* One of LED_* constants */ }; struct input_leds { struct input_handle handle; unsigned int num_leds; struct input_led leds[] __counted_by(num_leds); }; static enum led_brightness input_leds_brightness_get(struct led_classdev *cdev) { struct input_led *led = container_of(cdev, struct input_led, cdev); struct input_dev *input = led->handle->dev; return test_bit(led->code, input->led) ? cdev->max_brightness : 0; } static void input_leds_brightness_set(struct led_classdev *cdev, enum led_brightness brightness) { struct input_led *led = container_of(cdev, struct input_led, cdev); input_inject_event(led->handle, EV_LED, led->code, !!brightness); } static void input_leds_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { } static int input_leds_get_count(struct input_dev *dev) { unsigned int led_code; int count = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) if (input_led_info[led_code].name) count++; return count; } static int input_leds_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { struct input_leds *leds; struct input_led *led; unsigned int num_leds; unsigned int led_code; int led_no; int error; num_leds = input_leds_get_count(dev); if (!num_leds) return -ENXIO; leds = kzalloc(struct_size(leds, leds, num_leds), GFP_KERNEL); if (!leds) return -ENOMEM; leds->num_leds = num_leds; leds->handle.dev = dev; leds->handle.handler = handler; leds->handle.name = "leds"; leds->handle.private = leds; error = input_register_handle(&leds->handle); if (error) goto err_free_mem; error = input_open_device(&leds->handle); if (error) goto err_unregister_handle; led_no = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) { if (!input_led_info[led_code].name) continue; led = &leds->leds[led_no]; led->handle = &leds->handle; led->code = led_code; led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s", dev_name(&dev->dev), input_led_info[led_code].name); if (!led->cdev.name) { error = -ENOMEM; goto err_unregister_leds; } led->cdev.max_brightness = 1; led->cdev.brightness_get = input_leds_brightness_get; led->cdev.brightness_set = input_leds_brightness_set; led->cdev.default_trigger = input_led_info[led_code].trigger; error = led_classdev_register(&dev->dev, &led->cdev); if (error) { dev_err(&dev->dev, "failed to register LED %s: %d\n", led->cdev.name, error); kfree(led->cdev.name); goto err_unregister_leds; } led_no++; } return 0; err_unregister_leds: while (--led_no >= 0) { struct input_led *led = &leds->leds[led_no]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(&leds->handle); err_unregister_handle: input_unregister_handle(&leds->handle); err_free_mem: kfree(leds); return error; } static void input_leds_disconnect(struct input_handle *handle) { struct input_leds *leds = handle->private; int i; for (i = 0; i < leds->num_leds; i++) { struct input_led *led = &leds->leds[i]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(handle); input_unregister_handle(handle); kfree(leds); } static const struct input_device_id input_leds_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_LED) }, }, { }, }; MODULE_DEVICE_TABLE(input, input_leds_ids); static struct input_handler input_leds_handler = { .event = input_leds_event, .connect = input_leds_connect, .disconnect = input_leds_disconnect, .name = "leds", .id_table = input_leds_ids, }; static int __init input_leds_init(void) { return input_register_handler(&input_leds_handler); } module_init(input_leds_init); static void __exit input_leds_exit(void) { input_unregister_handler(&input_leds_handler); } module_exit(input_leds_exit); MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>"); MODULE_AUTHOR("Dmitry Torokhov <dmitry.torokhov@gmail.com>"); MODULE_DESCRIPTION("Input -> LEDs Bridge"); MODULE_LICENSE("GPL v2");
4 4 4 4 4 4 4 4 36 37 37 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 // SPDX-License-Identifier: GPL-2.0-only /* * linux/net/sunrpc/stats.c * * procfs-based user access to generic RPC statistics. The stats files * reside in /proc/net/rpc. * * The read routines assume that the buffer passed in is just big enough. * If you implement an RPC service that has its own stats routine which * appends the generic RPC stats, make sure you don't exceed the PAGE_SIZE * limit. * * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/metrics.h> #include <linux/rcupdate.h> #include <trace/events/sunrpc.h> #include "netns.h" #define RPCDBG_FACILITY RPCDBG_MISC /* * Get RPC client stats */ static int rpc_proc_show(struct seq_file *seq, void *v) { const struct rpc_stat *statp = seq->private; const struct rpc_program *prog = statp->program; unsigned int i, j; seq_printf(seq, "net %u %u %u %u\n", statp->netcnt, statp->netudpcnt, statp->nettcpcnt, statp->nettcpconn); seq_printf(seq, "rpc %u %u %u\n", statp->rpccnt, statp->rpcretrans, statp->rpcauthrefresh); for (i = 0; i < prog->nrvers; i++) { const struct rpc_version *vers = prog->version[i]; if (!vers) continue; seq_printf(seq, "proc%u %u", vers->number, vers->nrprocs); for (j = 0; j < vers->nrprocs; j++) seq_printf(seq, " %u", vers->counts[j]); seq_putc(seq, '\n'); } return 0; } static int rpc_proc_open(struct inode *inode, struct file *file) { return single_open(file, rpc_proc_show, pde_data(inode)); } static const struct proc_ops rpc_proc_ops = { .proc_open = rpc_proc_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, }; /* * Get RPC server stats */ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { const struct svc_program *prog = statp->program; const struct svc_version *vers; unsigned int i, j, k; unsigned long count; seq_printf(seq, "net %u %u %u %u\n", statp->netcnt, statp->netudpcnt, statp->nettcpcnt, statp->nettcpconn); seq_printf(seq, "rpc %u %u %u %u %u\n", statp->rpccnt, statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt, statp->rpcbadfmt, statp->rpcbadauth, statp->rpcbadclnt); for (i = 0; i < prog->pg_nvers; i++) { vers = prog->pg_vers[i]; if (!vers) continue; seq_printf(seq, "proc%d %u", i, vers->vs_nproc); for (j = 0; j < vers->vs_nproc; j++) { count = 0; for_each_possible_cpu(k) count += per_cpu(vers->vs_count[j], k); seq_printf(seq, " %lu", count); } seq_putc(seq, '\n'); } } EXPORT_SYMBOL_GPL(svc_seq_show); /** * rpc_alloc_iostats - allocate an rpc_iostats structure * @clnt: RPC program, version, and xprt * */ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { struct rpc_iostats *stats; int i; stats = kcalloc(clnt->cl_maxproc, sizeof(*stats), GFP_KERNEL); if (stats) { for (i = 0; i < clnt->cl_maxproc; i++) spin_lock_init(&stats[i].om_lock); } return stats; } EXPORT_SYMBOL_GPL(rpc_alloc_iostats); /** * rpc_free_iostats - release an rpc_iostats structure * @stats: doomed rpc_iostats structure * */ void rpc_free_iostats(struct rpc_iostats *stats) { kfree(stats); } EXPORT_SYMBOL_GPL(rpc_free_iostats); /** * rpc_count_iostats_metrics - tally up per-task stats * @task: completed rpc_task * @op_metrics: stat structure for OP that will accumulate stats from @task */ void rpc_count_iostats_metrics(const struct rpc_task *task, struct rpc_iostats *op_metrics) { struct rpc_rqst *req = task->tk_rqstp; ktime_t backlog, execute, now; if (!op_metrics || !req) return; now = ktime_get(); spin_lock(&op_metrics->om_lock); op_metrics->om_ops++; /* kernel API: om_ops must never become larger than om_ntrans */ op_metrics->om_ntrans += max(req->rq_ntrans, 1); op_metrics->om_timeouts += task->tk_timeouts; op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent; op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; backlog = 0; if (ktime_to_ns(req->rq_xtime)) { backlog = ktime_sub(req->rq_xtime, task->tk_start); op_metrics->om_queue = ktime_add(op_metrics->om_queue, backlog); } op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); execute = ktime_sub(now, task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); if (task->tk_status < 0) op_metrics->om_error_status++; spin_unlock(&op_metrics->om_lock); trace_rpc_stats_latency(req->rq_task, backlog, req->rq_rtt, execute); } EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics); /** * rpc_count_iostats - tally up per-task stats * @task: completed rpc_task * @stats: array of stat structures * * Uses the statidx from @task */ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) { rpc_count_iostats_metrics(task, &stats[task->tk_msg.rpc_proc->p_statidx]); } EXPORT_SYMBOL_GPL(rpc_count_iostats); static void _print_name(struct seq_file *seq, unsigned int op, const struct rpc_procinfo *procs) { if (procs[op].p_name) seq_printf(seq, "\t%12s: ", procs[op].p_name); else if (op == 0) seq_printf(seq, "\t NULL: "); else seq_printf(seq, "\t%12u: ", op); } static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b) { a->om_ops += b->om_ops; a->om_ntrans += b->om_ntrans; a->om_timeouts += b->om_timeouts; a->om_bytes_sent += b->om_bytes_sent; a->om_bytes_recv += b->om_bytes_recv; a->om_queue = ktime_add(a->om_queue, b->om_queue); a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); a->om_execute = ktime_add(a->om_execute, b->om_execute); a->om_error_status += b->om_error_status; } static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, int op, const struct rpc_procinfo *procs) { _print_name(seq, op, procs); seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", stats->om_ops, stats->om_ntrans, stats->om_timeouts, stats->om_bytes_sent, stats->om_bytes_recv, ktime_to_ms(stats->om_queue), ktime_to_ms(stats->om_rtt), ktime_to_ms(stats->om_execute), stats->om_error_status); } static int do_print_stats(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *seqv) { struct seq_file *seq = seqv; xprt->ops->print_stats(xprt, seq); return 0; } void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) { unsigned int op, maxproc = clnt->cl_maxproc; if (!clnt->cl_metrics) return; seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS); seq_printf(seq, "p/v: %u/%u (%s)\n", clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name); rpc_clnt_iterate_for_each_xprt(clnt, do_print_stats, seq); seq_printf(seq, "\tper-op statistics\n"); for (op = 0; op < maxproc; op++) { struct rpc_iostats stats = {}; struct rpc_clnt *next = clnt; do { _add_rpc_iostats(&stats, &next->cl_metrics[op]); if (next == next->cl_parent) break; next = next->cl_parent; } while (next); _print_rpc_iostats(seq, &stats, op, clnt->cl_procinfo); } } EXPORT_SYMBOL_GPL(rpc_clnt_show_stats); /* * Register/unregister RPC proc files */ static inline struct proc_dir_entry * do_register(struct net *net, const char *name, void *data, const struct proc_ops *proc_ops) { struct sunrpc_net *sn; dprintk("RPC: registering /proc/net/rpc/%s\n", name); sn = net_generic(net, sunrpc_net_id); return proc_create_data(name, 0, sn->proc_net_rpc, proc_ops, data); } struct proc_dir_entry * rpc_proc_register(struct net *net, struct rpc_stat *statp) { return do_register(net, statp->program->name, statp, &rpc_proc_ops); } EXPORT_SYMBOL_GPL(rpc_proc_register); void rpc_proc_unregister(struct net *net, const char *name) { struct sunrpc_net *sn; sn = net_generic(net, sunrpc_net_id); remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(rpc_proc_unregister); struct proc_dir_entry * svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops) { return do_register(net, statp->program->pg_name, net, proc_ops); } EXPORT_SYMBOL_GPL(svc_proc_register); void svc_proc_unregister(struct net *net, const char *name) { struct sunrpc_net *sn; sn = net_generic(net, sunrpc_net_id); remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(svc_proc_unregister); int rpc_proc_init(struct net *net) { struct sunrpc_net *sn; dprintk("RPC: registering /proc/net/rpc\n"); sn = net_generic(net, sunrpc_net_id); sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net); if (sn->proc_net_rpc == NULL) return -ENOMEM; return 0; } void rpc_proc_exit(struct net *net) { dprintk("RPC: unregistering /proc/net/rpc\n"); remove_proc_entry("rpc", net->proc_net); }
14857 14909 14684 848 239 180 46 76 621 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) /* Copyright (C) 2016-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * SipHash: a fast short-input PRF * https://131002.net/siphash/ * * This implementation is specifically for SipHash2-4 for a secure PRF * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for * hashtables. */ #include <linux/siphash.h> #include <linux/unaligned.h> #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 #include <linux/dcache.h> #include <asm/word-at-a-time.h> #endif #define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3) #define PREAMBLE(len) \ u64 v0 = SIPHASH_CONST_0; \ u64 v1 = SIPHASH_CONST_1; \ u64 v2 = SIPHASH_CONST_2; \ u64 v3 = SIPHASH_CONST_3; \ u64 b = ((u64)(len)) << 56; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ v1 ^= key->key[1]; \ v0 ^= key->key[0]; #define POSTAMBLE \ v3 ^= b; \ SIPROUND; \ SIPROUND; \ v0 ^= b; \ v2 ^= 0xff; \ SIPROUND; \ SIPROUND; \ SIPROUND; \ SIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); const u8 left = len & (sizeof(u64) - 1); u64 m; PREAMBLE(len) for (; data != end; data += sizeof(u64)) { m = le64_to_cpup(data); v3 ^= m; SIPROUND; SIPROUND; v0 ^= m; } #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 if (left) b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & bytemask_from_count(left))); #else switch (left) { case 7: b |= ((u64)end[6]) << 48; fallthrough; case 6: b |= ((u64)end[5]) << 40; fallthrough; case 5: b |= ((u64)end[4]) << 32; fallthrough; case 4: b |= le32_to_cpup(data); break; case 3: b |= ((u64)end[2]) << 16; fallthrough; case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } #endif POSTAMBLE } EXPORT_SYMBOL(__siphash_aligned); #endif u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); const u8 left = len & (sizeof(u64) - 1); u64 m; PREAMBLE(len) for (; data != end; data += sizeof(u64)) { m = get_unaligned_le64(data); v3 ^= m; SIPROUND; SIPROUND; v0 ^= m; } #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 if (left) b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & bytemask_from_count(left))); #else switch (left) { case 7: b |= ((u64)end[6]) << 48; fallthrough; case 6: b |= ((u64)end[5]) << 40; fallthrough; case 5: b |= ((u64)end[4]) << 32; fallthrough; case 4: b |= get_unaligned_le32(end); break; case 3: b |= ((u64)end[2]) << 16; fallthrough; case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } #endif POSTAMBLE } EXPORT_SYMBOL(__siphash_unaligned); /** * siphash_1u64 - compute 64-bit siphash PRF value of a u64 * @first: first u64 * @key: the siphash key */ u64 siphash_1u64(const u64 first, const siphash_key_t *key) { PREAMBLE(8) v3 ^= first; SIPROUND; SIPROUND; v0 ^= first; POSTAMBLE } EXPORT_SYMBOL(siphash_1u64); /** * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64 * @first: first u64 * @second: second u64 * @key: the siphash key */ u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key) { PREAMBLE(16) v3 ^= first; SIPROUND; SIPROUND; v0 ^= first; v3 ^= second; SIPROUND; SIPROUND; v0 ^= second; POSTAMBLE } EXPORT_SYMBOL(siphash_2u64); /** * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64 * @first: first u64 * @second: second u64 * @third: third u64 * @key: the siphash key */ u64 siphash_3u64(const u64 first, const u64 second, const u64 third, const siphash_key_t *key) { PREAMBLE(24) v3 ^= first; SIPROUND; SIPROUND; v0 ^= first; v3 ^= second; SIPROUND; SIPROUND; v0 ^= second; v3 ^= third; SIPROUND; SIPROUND; v0 ^= third; POSTAMBLE } EXPORT_SYMBOL(siphash_3u64); /** * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64 * @first: first u64 * @second: second u64 * @third: third u64 * @forth: forth u64 * @key: the siphash key */ u64 siphash_4u64(const u64 first, const u64 second, const u64 third, const u64 forth, const siphash_key_t *key) { PREAMBLE(32) v3 ^= first; SIPROUND; SIPROUND; v0 ^= first; v3 ^= second; SIPROUND; SIPROUND; v0 ^= second; v3 ^= third; SIPROUND; SIPROUND; v0 ^= third; v3 ^= forth; SIPROUND; SIPROUND; v0 ^= forth; POSTAMBLE } EXPORT_SYMBOL(siphash_4u64); u64 siphash_1u32(const u32 first, const siphash_key_t *key) { PREAMBLE(4) b |= first; POSTAMBLE } EXPORT_SYMBOL(siphash_1u32); u64 siphash_3u32(const u32 first, const u32 second, const u32 third, const siphash_key_t *key) { u64 combined = (u64)second << 32 | first; PREAMBLE(12) v3 ^= combined; SIPROUND; SIPROUND; v0 ^= combined; b |= third; POSTAMBLE } EXPORT_SYMBOL(siphash_3u32); #if BITS_PER_LONG == 64 /* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3. */ #define HSIPROUND SIPROUND #define HPREAMBLE(len) PREAMBLE(len) #define HPOSTAMBLE \ v3 ^= b; \ HSIPROUND; \ v0 ^= b; \ v2 ^= 0xff; \ HSIPROUND; \ HSIPROUND; \ HSIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); const u8 left = len & (sizeof(u64) - 1); u64 m; HPREAMBLE(len) for (; data != end; data += sizeof(u64)) { m = le64_to_cpup(data); v3 ^= m; HSIPROUND; v0 ^= m; } #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 if (left) b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & bytemask_from_count(left))); #else switch (left) { case 7: b |= ((u64)end[6]) << 48; fallthrough; case 6: b |= ((u64)end[5]) << 40; fallthrough; case 5: b |= ((u64)end[4]) << 32; fallthrough; case 4: b |= le32_to_cpup(data); break; case 3: b |= ((u64)end[2]) << 16; fallthrough; case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } #endif HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); #endif u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); const u8 left = len & (sizeof(u64) - 1); u64 m; HPREAMBLE(len) for (; data != end; data += sizeof(u64)) { m = get_unaligned_le64(data); v3 ^= m; HSIPROUND; v0 ^= m; } #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 if (left) b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & bytemask_from_count(left))); #else switch (left) { case 7: b |= ((u64)end[6]) << 48; fallthrough; case 6: b |= ((u64)end[5]) << 40; fallthrough; case 5: b |= ((u64)end[4]) << 32; fallthrough; case 4: b |= get_unaligned_le32(end); break; case 3: b |= ((u64)end[2]) << 16; fallthrough; case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } #endif HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); /** * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 * @first: first u32 * @key: the hsiphash key */ u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) { HPREAMBLE(4) b |= first; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_1u32); /** * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 * @first: first u32 * @second: second u32 * @key: the hsiphash key */ u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) { u64 combined = (u64)second << 32 | first; HPREAMBLE(8) v3 ^= combined; HSIPROUND; v0 ^= combined; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_2u32); /** * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 * @first: first u32 * @second: second u32 * @third: third u32 * @key: the hsiphash key */ u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, const hsiphash_key_t *key) { u64 combined = (u64)second << 32 | first; HPREAMBLE(12) v3 ^= combined; HSIPROUND; v0 ^= combined; b |= third; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_3u32); /** * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 * @first: first u32 * @second: second u32 * @third: third u32 * @forth: forth u32 * @key: the hsiphash key */ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, const u32 forth, const hsiphash_key_t *key) { u64 combined = (u64)second << 32 | first; HPREAMBLE(16) v3 ^= combined; HSIPROUND; v0 ^= combined; combined = (u64)forth << 32 | third; v3 ^= combined; HSIPROUND; v0 ^= combined; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_4u32); #else #define HSIPROUND HSIPHASH_PERMUTATION(v0, v1, v2, v3) #define HPREAMBLE(len) \ u32 v0 = HSIPHASH_CONST_0; \ u32 v1 = HSIPHASH_CONST_1; \ u32 v2 = HSIPHASH_CONST_2; \ u32 v3 = HSIPHASH_CONST_3; \ u32 b = ((u32)(len)) << 24; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ v1 ^= key->key[1]; \ v0 ^= key->key[0]; #define HPOSTAMBLE \ v3 ^= b; \ HSIPROUND; \ v0 ^= b; \ v2 ^= 0xff; \ HSIPROUND; \ HSIPROUND; \ HSIPROUND; \ return v1 ^ v3; #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u32)); const u8 left = len & (sizeof(u32) - 1); u32 m; HPREAMBLE(len) for (; data != end; data += sizeof(u32)) { m = le32_to_cpup(data); v3 ^= m; HSIPROUND; v0 ^= m; } switch (left) { case 3: b |= ((u32)end[2]) << 16; fallthrough; case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); #endif u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u32)); const u8 left = len & (sizeof(u32) - 1); u32 m; HPREAMBLE(len) for (; data != end; data += sizeof(u32)) { m = get_unaligned_le32(data); v3 ^= m; HSIPROUND; v0 ^= m; } switch (left) { case 3: b |= ((u32)end[2]) << 16; fallthrough; case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); /** * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 * @first: first u32 * @key: the hsiphash key */ u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) { HPREAMBLE(4) v3 ^= first; HSIPROUND; v0 ^= first; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_1u32); /** * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 * @first: first u32 * @second: second u32 * @key: the hsiphash key */ u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) { HPREAMBLE(8) v3 ^= first; HSIPROUND; v0 ^= first; v3 ^= second; HSIPROUND; v0 ^= second; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_2u32); /** * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 * @first: first u32 * @second: second u32 * @third: third u32 * @key: the hsiphash key */ u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, const hsiphash_key_t *key) { HPREAMBLE(12) v3 ^= first; HSIPROUND; v0 ^= first; v3 ^= second; HSIPROUND; v0 ^= second; v3 ^= third; HSIPROUND; v0 ^= third; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_3u32); /** * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 * @first: first u32 * @second: second u32 * @third: third u32 * @forth: forth u32 * @key: the hsiphash key */ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, const u32 forth, const hsiphash_key_t *key) { HPREAMBLE(16) v3 ^= first; HSIPROUND; v0 ^= first; v3 ^= second; HSIPROUND; v0 ^= second; v3 ^= third; HSIPROUND; v0 ^= third; v3 ^= forth; HSIPROUND; v0 ^= forth; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_4u32); #endif
13 3 9 7 147 161 45 222 1 1 1 2 12 7 7 22 5 17 15 7 12 15 10 15 13 4 4 4 2 4 4 4 2 4 1 3 2 10 9 9 9 12 2 3 2 1 3 3 15 2 20 18 13 20 22 22 22 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 // SPDX-License-Identifier: GPL-2.0 /* * This module exports the functions: * * 'int set_selection_user(struct tiocl_selection __user *, * struct tty_struct *)' * 'int set_selection_kernel(struct tiocl_selection *, struct tty_struct *)' * 'void clear_selection(void)' * 'int paste_selection(struct tty_struct *)' * 'int sel_loadlut(u32 __user *)' * * Now that /dev/vcs exists, most of this can disappear again. */ #include <linux/module.h> #include <linux/tty.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/kbd_kern.h> #include <linux/vt_kern.h> #include <linux/consolemap.h> #include <linux/selection.h> #include <linux/tiocl.h> #include <linux/console.h> #include <linux/tty_flip.h> #include <linux/sched/signal.h> /* Don't take this from <ctype.h>: 011-015 on the screen aren't spaces */ #define is_space_on_vt(c) ((c) == ' ') /* FIXME: all this needs locking */ static struct vc_selection { struct mutex lock; struct vc_data *cons; /* must not be deallocated */ char *buffer; unsigned int buf_len; volatile int start; /* cleared by clear_selection */ int end; } vc_sel = { .lock = __MUTEX_INITIALIZER(vc_sel.lock), .start = -1, }; /* clear_selection, highlight and highlight_pointer can be called from interrupt (via scrollback/front) */ /* set reverse video on characters s-e of console with selection. */ static inline void highlight(const int s, const int e) { invert_screen(vc_sel.cons, s, e-s+2, true); } /* use complementary color to show the pointer */ static inline void highlight_pointer(const int where) { complement_pos(vc_sel.cons, where); } static u32 sel_pos(int n, bool unicode) { if (unicode) return screen_glyph_unicode(vc_sel.cons, n / 2); return inverse_translate(vc_sel.cons, screen_glyph(vc_sel.cons, n), false); } /** * clear_selection - remove current selection * * Remove the current selection highlight, if any from the console holding the * selection. * * Locking: The caller must hold the console lock. */ void clear_selection(void) { highlight_pointer(-1); /* hide the pointer */ if (vc_sel.start != -1) { highlight(vc_sel.start, vc_sel.end); vc_sel.start = -1; } } EXPORT_SYMBOL_GPL(clear_selection); bool vc_is_sel(const struct vc_data *vc) { return vc == vc_sel.cons; } /* * User settable table: what characters are to be considered alphabetic? * 128 bits. Locked by the console lock. */ static u32 inwordLut[]={ 0x00000000, /* control chars */ 0x03FFE000, /* digits and "-./" */ 0x87FFFFFE, /* uppercase and '_' */ 0x07FFFFFE, /* lowercase */ }; static inline int inword(const u32 c) { return c > 0x7f || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1); } /** * sel_loadlut() - load the LUT table * @lut: user table * * Load the LUT table from user space. Make a temporary copy so a partial * update doesn't make a mess. * * Locking: The console lock is acquired. */ int sel_loadlut(u32 __user *lut) { u32 tmplut[ARRAY_SIZE(inwordLut)]; if (copy_from_user(tmplut, lut, sizeof(inwordLut))) return -EFAULT; guard(console_lock)(); memcpy(inwordLut, tmplut, sizeof(inwordLut)); return 0; } /* does screen address p correspond to character at LH/RH edge of screen? */ static inline int atedge(const int p, int size_row) { return (!(p % size_row) || !((p + 2) % size_row)); } /* stores the char in UTF8 and returns the number of bytes used (1-4) */ static int store_utf8(u32 c, char *p) { if (c < 0x80) { /* 0******* */ p[0] = c; return 1; } else if (c < 0x800) { /* 110***** 10****** */ p[0] = 0xc0 | (c >> 6); p[1] = 0x80 | (c & 0x3f); return 2; } else if (c < 0x10000) { /* 1110**** 10****** 10****** */ p[0] = 0xe0 | (c >> 12); p[1] = 0x80 | ((c >> 6) & 0x3f); p[2] = 0x80 | (c & 0x3f); return 3; } else if (c < 0x110000) { /* 11110*** 10****** 10****** 10****** */ p[0] = 0xf0 | (c >> 18); p[1] = 0x80 | ((c >> 12) & 0x3f); p[2] = 0x80 | ((c >> 6) & 0x3f); p[3] = 0x80 | (c & 0x3f); return 4; } else { /* outside Unicode, replace with U+FFFD */ p[0] = 0xef; p[1] = 0xbf; p[2] = 0xbd; return 3; } } /** * set_selection_user - set the current selection. * @sel: user selection info * @tty: the console tty * * Invoked by the ioctl handle for the vt layer. * * Locking: The entire selection process is managed under the console_lock. * It's a lot under the lock but its hardly a performance path. */ int set_selection_user(const struct tiocl_selection __user *sel, struct tty_struct *tty) { struct tiocl_selection v; if (copy_from_user(&v, sel, sizeof(*sel))) return -EFAULT; /* * TIOCL_SELCLEAR and TIOCL_SELPOINTER are OK to use without * CAP_SYS_ADMIN as they do not modify the selection. */ switch (v.sel_mode) { case TIOCL_SELCLEAR: case TIOCL_SELPOINTER: break; default: if (!capable(CAP_SYS_ADMIN)) return -EPERM; } return set_selection_kernel(&v, tty); } static int vc_selection_store_chars(struct vc_data *vc, bool unicode) { char *bp, *obp; unsigned int i; /* Allocate a new buffer before freeing the old one ... */ /* chars can take up to 4 bytes with unicode */ bp = kmalloc_array((vc_sel.end - vc_sel.start) / 2 + 1, unicode ? 4 : 1, GFP_KERNEL | __GFP_NOWARN); if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection(); return -ENOMEM; } kfree(vc_sel.buffer); vc_sel.buffer = bp; obp = bp; for (i = vc_sel.start; i <= vc_sel.end; i += 2) { u32 c = sel_pos(i, unicode); if (unicode) bp += store_utf8(c, bp); else *bp++ = c; if (!is_space_on_vt(c)) obp = bp; if (!((i + 2) % vc->vc_size_row)) { /* strip trailing blanks from line and add newline, unless non-space at end of line. */ if (obp != bp) { bp = obp; *bp++ = '\r'; } obp = bp; } } vc_sel.buf_len = bp - vc_sel.buffer; return 0; } static int vc_do_selection(struct vc_data *vc, unsigned short mode, int ps, int pe) { int new_sel_start, new_sel_end, spc; bool unicode = vt_do_kdgkbmode(fg_console) == K_UNICODE; switch (mode) { case TIOCL_SELCHAR: /* character-by-character selection */ new_sel_start = ps; new_sel_end = pe; break; case TIOCL_SELWORD: /* word-by-word selection */ spc = is_space_on_vt(sel_pos(ps, unicode)); for (new_sel_start = ps; ; ps -= 2) { if ((spc && !is_space_on_vt(sel_pos(ps, unicode))) || (!spc && !inword(sel_pos(ps, unicode)))) break; new_sel_start = ps; if (!(ps % vc->vc_size_row)) break; } spc = is_space_on_vt(sel_pos(pe, unicode)); for (new_sel_end = pe; ; pe += 2) { if ((spc && !is_space_on_vt(sel_pos(pe, unicode))) || (!spc && !inword(sel_pos(pe, unicode)))) break; new_sel_end = pe; if (!((pe + 2) % vc->vc_size_row)) break; } break; case TIOCL_SELLINE: /* line-by-line selection */ new_sel_start = rounddown(ps, vc->vc_size_row); new_sel_end = rounddown(pe, vc->vc_size_row) + vc->vc_size_row - 2; break; case TIOCL_SELPOINTER: highlight_pointer(pe); return 0; default: return -EINVAL; } /* remove the pointer */ highlight_pointer(-1); /* select to end of line if on trailing space */ if (new_sel_end > new_sel_start && !atedge(new_sel_end, vc->vc_size_row) && is_space_on_vt(sel_pos(new_sel_end, unicode))) { for (pe = new_sel_end + 2; ; pe += 2) if (!is_space_on_vt(sel_pos(pe, unicode)) || atedge(pe, vc->vc_size_row)) break; if (is_space_on_vt(sel_pos(pe, unicode))) new_sel_end = pe; } if (vc_sel.start == -1) /* no current selection */ highlight(new_sel_start, new_sel_end); else if (new_sel_start == vc_sel.start) { if (new_sel_end == vc_sel.end) /* no action required */ return 0; else if (new_sel_end > vc_sel.end) /* extend to right */ highlight(vc_sel.end + 2, new_sel_end); else /* contract from right */ highlight(new_sel_end + 2, vc_sel.end); } else if (new_sel_end == vc_sel.end) { if (new_sel_start < vc_sel.start) /* extend to left */ highlight(new_sel_start, vc_sel.start - 2); else /* contract from left */ highlight(vc_sel.start, new_sel_start - 2); } else /* some other case; start selection from scratch */ { clear_selection(); highlight(new_sel_start, new_sel_end); } vc_sel.start = new_sel_start; vc_sel.end = new_sel_end; return vc_selection_store_chars(vc, unicode); } static int vc_selection(struct vc_data *vc, struct tiocl_selection *v, struct tty_struct *tty) { int ps, pe; poke_blanked_console(); if (v->sel_mode == TIOCL_SELCLEAR) { /* useful for screendump without selection highlights */ clear_selection(); return 0; } /* Historically 0 => max value */ v->xs = umin(v->xs - 1, vc->vc_cols - 1); v->ys = umin(v->ys - 1, vc->vc_rows - 1); v->xe = umin(v->xe - 1, vc->vc_cols - 1); v->ye = umin(v->ye - 1, vc->vc_rows - 1); if (mouse_reporting() && (v->sel_mode & TIOCL_SELMOUSEREPORT)) { mouse_report(tty, v->sel_mode & TIOCL_SELBUTTONMASK, v->xs, v->ys); return 0; } ps = v->ys * vc->vc_size_row + (v->xs << 1); pe = v->ye * vc->vc_size_row + (v->xe << 1); if (ps > pe) /* make vc_sel.start <= vc_sel.end */ swap(ps, pe); if (vc_sel.cons != vc) { clear_selection(); vc_sel.cons = vc; } return vc_do_selection(vc, v->sel_mode, ps, pe); } int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) { guard(mutex)(&vc_sel.lock); guard(console_lock)(); return vc_selection(vc_cons[fg_console].d, v, tty); } EXPORT_SYMBOL_GPL(set_selection_kernel); /* Insert the contents of the selection buffer into the * queue of the tty associated with the current console. * Invoked by ioctl(). * * Locking: called without locks. Calls the ldisc wrongly with * unsafe methods, */ int paste_selection(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; int pasted = 0; size_t count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); int ret = 0; bool bp = vc->vc_bracketed_paste; static const char bracketed_paste_start[] = "\033[200~"; static const char bracketed_paste_end[] = "\033[201~"; const char *bps = bp ? bracketed_paste_start : NULL; const char *bpe = bp ? bracketed_paste_end : NULL; scoped_guard(console_lock) poke_blanked_console(); ld = tty_ldisc_ref_wait(tty); if (!ld) return -EIO; /* ldisc was hung up */ tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); mutex_lock(&vc_sel.lock); while (vc_sel.buffer && (vc_sel.buf_len > pasted || bpe)) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { ret = -EINTR; break; } if (tty_throttled(tty)) { mutex_unlock(&vc_sel.lock); schedule(); mutex_lock(&vc_sel.lock); continue; } __set_current_state(TASK_RUNNING); if (bps) { bps += tty_ldisc_receive_buf(ld, bps, NULL, strlen(bps)); if (*bps != '\0') continue; bps = NULL; } count = vc_sel.buf_len - pasted; if (count) { pasted += tty_ldisc_receive_buf(ld, vc_sel.buffer + pasted, NULL, count); if (vc_sel.buf_len > pasted) continue; } if (bpe) { bpe += tty_ldisc_receive_buf(ld, bpe, NULL, strlen(bpe)); if (*bpe == '\0') bpe = NULL; } } mutex_unlock(&vc_sel.lock); remove_wait_queue(&vc->paste_wait, &wait); __set_current_state(TASK_RUNNING); tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); return ret; } EXPORT_SYMBOL_GPL(paste_selection);
10 9 1 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 // SPDX-License-Identifier: GPL-2.0-only /* * ebtable_broute * * Authors: * Bart De Schuymer <bdschuym@pandora.be> * * April, 2002 * * This table lets you choose between routing and bridging for frames * entering on a bridge enslaved nic. This table is traversed before any * other ebtables table. See net/bridge/br_input.c. */ #include <linux/netfilter_bridge/ebtables.h> #include <linux/module.h> #include <linux/if_bridge.h> #include "../br_private.h" /* EBT_ACCEPT means the frame will be bridged * EBT_DROP means the frame will be routed */ static struct ebt_entries initial_chain = { .name = "BROUTING", .policy = EBT_ACCEPT, }; static struct ebt_replace_kernel initial_table = { .name = "broute", .valid_hooks = 1 << NF_BR_BROUTING, .entries_size = sizeof(struct ebt_entries), .hook_entry = { [NF_BR_BROUTING] = &initial_chain, }, .entries = (char *)&initial_chain, }; static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, .me = THIS_MODULE, }; static unsigned int ebt_broute(void *priv, struct sk_buff *skb, const struct nf_hook_state *s) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); struct nf_hook_state state; unsigned char *dest; int ret; if (!p || p->state != BR_STATE_FORWARDING) return NF_ACCEPT; nf_hook_state_init(&state, NF_BR_BROUTING, NFPROTO_BRIDGE, s->in, NULL, NULL, s->net, NULL); ret = ebt_do_table(priv, skb, &state); if (ret != NF_DROP) return ret; /* DROP in ebtables -t broute means that the * skb should be routed, not bridged. * This is awkward, but can't be changed for compatibility * reasons. * * We map DROP to ACCEPT and set the ->br_netfilter_broute flag. */ BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1; /* undo PACKET_HOST mangling done in br_input in case the dst * address matches the logical bridge but not the port. */ dest = eth_hdr(skb)->h_dest; if (skb->pkt_type == PACKET_HOST && !ether_addr_equal(skb->dev->dev_addr, dest) && ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_OTHERHOST; return NF_ACCEPT; } static const struct nf_hook_ops ebt_ops_broute = { .hook = ebt_broute, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_FIRST, }; static int broute_table_init(struct net *net) { return ebt_register_table(net, &broute_table, &ebt_ops_broute); } static void __net_exit broute_net_pre_exit(struct net *net) { ebt_unregister_table_pre_exit(net, "broute"); } static void __net_exit broute_net_exit(struct net *net) { ebt_unregister_table(net, "broute"); } static struct pernet_operations broute_net_ops = { .exit = broute_net_exit, .pre_exit = broute_net_pre_exit, }; static int __init ebtable_broute_init(void) { int ret = ebt_register_template(&broute_table, broute_table_init); if (ret) return ret; ret = register_pernet_subsys(&broute_net_ops); if (ret) { ebt_unregister_template(&broute_table); return ret; } return 0; } static void __exit ebtable_broute_fini(void) { unregister_pernet_subsys(&broute_net_ops); ebt_unregister_template(&broute_table); } module_init(ebtable_broute_init); module_exit(ebtable_broute_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Force packets to be routed instead of bridged");
97 97 97 95 2 3 48 52 8 50 4 46 52 176 176 125 117 8 4 121 122 125 116 11 124 2 125 4 122 125 21 3 18 3 3 2 2 2 2 2 5 1 1 3 3 3 3 5 2 3 12 12 12 11 2 2 12 102 101 1 4 3 1 5 2 3 5 5 3 2 2 3 3 2 3 2 135 135 15 7 11 1 15 8 1 3 15 16 10 82 79 4 82 82 123 124 119 119 118 1 95 1 4 90 1 1 1 5 2 1 1 1 124 11 114 12 7 9 86 10 80 5 12 1 4 4 3 134 133 1 4 3 35 89 2 2 2 90 140 139 139 35 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 // SPDX-License-Identifier: GPL-2.0+ /* * NILFS module and super block management. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi. */ /* * linux/fs/ext2/super.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * from * * linux/fs/minix/inode.c * * Copyright (C) 1991, 1992 Linus Torvalds * * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 */ #include <linux/module.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/crc32.h> #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include "nilfs.h" #include "export.h" #include "mdt.h" #include "alloc.h" #include "btree.h" #include "btnode.h" #include "page.h" #include "cpfile.h" #include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */ #include "ifile.h" #include "dat.h" #include "segment.h" #include "segbuf.h" MODULE_AUTHOR("NTT Corp."); MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); MODULE_LICENSE("GPL"); static struct kmem_cache *nilfs_inode_cachep; struct kmem_cache *nilfs_transaction_cachep; struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); void __nilfs_msg(struct super_block *sb, const char *fmt, ...) { struct va_format vaf; va_list args; int level; va_start(args, fmt); level = printk_get_level(fmt); vaf.fmt = printk_skip_level(fmt); vaf.va = &args; if (sb) printk("%c%cNILFS (%s): %pV\n", KERN_SOH_ASCII, level, sb->s_id, &vaf); else printk("%c%cNILFS: %pV\n", KERN_SOH_ASCII, level, &vaf); va_end(args); } static void nilfs_set_error(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; down_write(&nilfs->ns_sem); if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { nilfs->ns_mount_state |= NILFS_ERROR_FS; sbp = nilfs_prepare_super(sb, 0); if (likely(sbp)) { sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); if (sbp[1]) sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } } up_write(&nilfs->ns_sem); } /** * __nilfs_error() - report failure condition on a filesystem * @sb: super block instance * @function: name of calling function * @fmt: format string for message to be output * @...: optional arguments to @fmt * * __nilfs_error() sets an ERROR_FS flag on the superblock as well as * reporting an error message. This function should be called when * NILFS detects incoherences or defects of meta data on disk. * * This implements the body of nilfs_error() macro. Normally, * nilfs_error() should be used. As for sustainable errors such as a * single-shot I/O error, nilfs_err() should be used instead. * * Callers should not add a trailing newline since this will do it. */ void __nilfs_error(struct super_block *sb, const char *function, const char *fmt, ...) { struct the_nilfs *nilfs = sb->s_fs_info; struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n", sb->s_id, function, &vaf); va_end(args); if (!sb_rdonly(sb)) { nilfs_set_error(sb); if (nilfs_test_opt(nilfs, ERRORS_RO)) { printk(KERN_CRIT "Remounting filesystem read-only\n"); sb->s_flags |= SB_RDONLY; } } if (nilfs_test_opt(nilfs, ERRORS_PANIC)) panic("NILFS (device %s): panic forced after error\n", sb->s_id); } struct inode *nilfs_alloc_inode(struct super_block *sb) { struct nilfs_inode_info *ii; ii = alloc_inode_sb(sb, nilfs_inode_cachep, GFP_NOFS); if (!ii) return NULL; ii->i_bh = NULL; ii->i_state = 0; ii->i_type = 0; ii->i_cno = 0; ii->i_assoc_inode = NULL; ii->i_bmap = &ii->i_bmap_data; return &ii->vfs_inode; } static void nilfs_free_inode(struct inode *inode) { if (nilfs_is_metadata_file_inode(inode)) nilfs_mdt_destroy(inode); kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } static int nilfs_sync_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; int err; retry: set_buffer_dirty(nilfs->ns_sbh[0]); if (nilfs_test_opt(nilfs, BARRIER)) { err = __sync_dirty_buffer(nilfs->ns_sbh[0], REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } else { err = sync_dirty_buffer(nilfs->ns_sbh[0]); } if (unlikely(err)) { nilfs_err(sb, "unable to write superblock: err=%d", err); if (err == -EIO && nilfs->ns_sbh[1]) { /* * sbp[0] points to newer log than sbp[1], * so copy sbp[0] to sbp[1] to take over sbp[0]. */ memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0], nilfs->ns_sbsize); nilfs_fall_back_super_block(nilfs); goto retry; } } else { struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; nilfs->ns_sbwcount++; /* * The latest segment becomes trailable from the position * written in superblock. */ clear_nilfs_discontinued(nilfs); /* update GC protection for recent segments */ if (nilfs->ns_sbh[1]) { if (flag == NILFS_SB_COMMIT_ALL) { set_buffer_dirty(nilfs->ns_sbh[1]); if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0) goto out; } if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) < le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno)) sbp = nilfs->ns_sbp[1]; } spin_lock(&nilfs->ns_last_segment_lock); nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); spin_unlock(&nilfs->ns_last_segment_lock); } out: return err; } void nilfs_set_log_cursor(struct nilfs_super_block *sbp, struct the_nilfs *nilfs) { sector_t nfreeblocks; /* nilfs->ns_sem must be locked by the caller. */ nilfs_count_free_blocks(nilfs, &nfreeblocks); sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); spin_lock(&nilfs->ns_last_segment_lock); sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); spin_unlock(&nilfs->ns_last_segment_lock); } struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, int flip) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; /* nilfs->ns_sem must be locked by the caller. */ if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { if (sbp[1] && sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) { memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); } else { nilfs_crit(sb, "superblock broke"); return NULL; } } else if (sbp[1] && sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); } if (flip && sbp[1]) nilfs_swap_super_block(nilfs); return sbp; } int nilfs_commit_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; time64_t t; /* nilfs->ns_sem must be locked by the caller. */ t = ktime_get_real_seconds(); nilfs->ns_sbwtime = t; sbp[0]->s_wtime = cpu_to_le64(t); sbp[0]->s_sum = 0; sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp[0], nilfs->ns_sbsize)); if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) { sbp[1]->s_wtime = sbp[0]->s_wtime; sbp[1]->s_sum = 0; sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp[1], nilfs->ns_sbsize)); } clear_nilfs_sb_dirty(nilfs); nilfs->ns_flushed_device = 1; /* make sure store to ns_flushed_device cannot be reordered */ smp_wmb(); return nilfs_sync_super(sb, flag); } /** * nilfs_cleanup_super() - write filesystem state for cleanup * @sb: super block instance to be unmounted or degraded to read-only * * This function restores state flags in the on-disk super block. * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the * filesystem was not clean previously. * * Return: 0 on success, %-EIO if I/O error or superblock is corrupted. */ int nilfs_cleanup_super(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int flag = NILFS_SB_COMMIT; int ret = -EIO; sbp = nilfs_prepare_super(sb, 0); if (sbp) { sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); nilfs_set_log_cursor(sbp[0], nilfs); if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { /* * make the "clean" flag also to the opposite * super block if both super blocks point to * the same checkpoint. */ sbp[1]->s_state = sbp[0]->s_state; flag = NILFS_SB_COMMIT_ALL; } ret = nilfs_commit_super(sb, flag); } return ret; } /** * nilfs_move_2nd_super - relocate secondary super block * @sb: super block instance * @sb2off: new offset of the secondary super block (in bytes) * * Return: 0 on success, or a negative error code on failure. */ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) { struct the_nilfs *nilfs = sb->s_fs_info; struct buffer_head *nsbh; struct nilfs_super_block *nsbp; sector_t blocknr, newblocknr; unsigned long offset; int sb2i; /* array index of the secondary superblock */ int ret = 0; /* nilfs->ns_sem must be locked by the caller. */ if (nilfs->ns_sbh[1] && nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) { sb2i = 1; blocknr = nilfs->ns_sbh[1]->b_blocknr; } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { sb2i = 0; blocknr = nilfs->ns_sbh[0]->b_blocknr; } else { sb2i = -1; blocknr = 0; } if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) goto out; /* super block location is unchanged */ /* Get new super block buffer */ newblocknr = sb2off >> nilfs->ns_blocksize_bits; offset = sb2off & (nilfs->ns_blocksize - 1); nsbh = sb_getblk(sb, newblocknr); if (!nsbh) { nilfs_warn(sb, "unable to move secondary superblock to block %llu", (unsigned long long)newblocknr); ret = -EIO; goto out; } nsbp = (void *)nsbh->b_data + offset; lock_buffer(nsbh); if (sb2i >= 0) { /* * The position of the second superblock only changes by 4KiB, * which is larger than the maximum superblock data size * (= 1KiB), so there is no need to use memmove() to allow * overlap between source and destination. */ memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); /* * Zero fill after copy to avoid overwriting in case of move * within the same block. */ memset(nsbh->b_data, 0, offset); memset((void *)nsbp + nilfs->ns_sbsize, 0, nsbh->b_size - offset - nilfs->ns_sbsize); } else { memset(nsbh->b_data, 0, nsbh->b_size); } set_buffer_uptodate(nsbh); unlock_buffer(nsbh); if (sb2i >= 0) { brelse(nilfs->ns_sbh[sb2i]); nilfs->ns_sbh[sb2i] = nsbh; nilfs->ns_sbp[sb2i] = nsbp; } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) { /* secondary super block will be restored to index 1 */ nilfs->ns_sbh[1] = nsbh; nilfs->ns_sbp[1] = nsbp; } else { brelse(nsbh); } out: return ret; } /** * nilfs_resize_fs - resize the filesystem * @sb: super block instance * @newsize: new size of the filesystem (in bytes) * * Return: 0 on success, or a negative error code on failure. */ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; __u64 devsize, newnsegs; loff_t sb2off; int ret; ret = -ERANGE; devsize = bdev_nr_bytes(sb->s_bdev); if (newsize > devsize) goto out; /* * Prevent underflow in second superblock position calculation. * The exact minimum size check is done in nilfs_sufile_resize(). */ if (newsize < 4096) { ret = -ENOSPC; goto out; } /* * Write lock is required to protect some functions depending * on the number of segments, the number of reserved segments, * and so forth. */ down_write(&nilfs->ns_segctor_sem); sb2off = NILFS_SB2_OFFSET_BYTES(newsize); newnsegs = sb2off >> nilfs->ns_blocksize_bits; newnsegs = div64_ul(newnsegs, nilfs->ns_blocks_per_segment); ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs); up_write(&nilfs->ns_segctor_sem); if (ret < 0) goto out; ret = nilfs_construct_segment(sb); if (ret < 0) goto out; down_write(&nilfs->ns_sem); nilfs_move_2nd_super(sb, sb2off); ret = -EIO; sbp = nilfs_prepare_super(sb, 0); if (likely(sbp)) { nilfs_set_log_cursor(sbp[0], nilfs); /* * Drop NILFS_RESIZE_FS flag for compatibility with * mount-time resize which may be implemented in a * future release. */ sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_RESIZE_FS); sbp[0]->s_dev_size = cpu_to_le64(newsize); sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments); if (sbp[1]) memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } up_write(&nilfs->ns_sem); /* * Reset the range of allocatable segments last. This order * is important in the case of expansion because the secondary * superblock must be protected from log write until migration * completes. */ if (!ret) nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1); out: return ret; } static void nilfs_put_super(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; nilfs_detach_log_writer(sb); if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); destroy_nilfs(nilfs); sb->s_fs_info = NULL; } static int nilfs_sync_fs(struct super_block *sb, int wait) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int err = 0; /* This function is called when super block should be written back */ if (wait) err = nilfs_construct_segment(sb); down_write(&nilfs->ns_sem); if (nilfs_sb_dirty(nilfs)) { sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs)); if (likely(sbp)) { nilfs_set_log_cursor(sbp[0], nilfs); nilfs_commit_super(sb, NILFS_SB_COMMIT); } } up_write(&nilfs->ns_sem); if (!err) err = nilfs_flush_device(nilfs); return err; } int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, struct nilfs_root **rootp) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; int err = -ENOMEM; root = nilfs_find_or_create_root( nilfs, curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno); if (!root) return err; if (root->ifile) goto reuse; /* already attached checkpoint */ down_read(&nilfs->ns_segctor_sem); err = nilfs_ifile_read(sb, root, cno, nilfs->ns_inode_size); up_read(&nilfs->ns_segctor_sem); if (unlikely(err)) goto failed; reuse: *rootp = root; return 0; failed: if (err == -EINVAL) nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)", (unsigned long long)cno); nilfs_put_root(root); return err; } static int nilfs_freeze(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; int err; if (sb_rdonly(sb)) return 0; /* Mark super block clean */ down_write(&nilfs->ns_sem); err = nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); return err; } static int nilfs_unfreeze(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; if (sb_rdonly(sb)) return 0; down_write(&nilfs->ns_sem); nilfs_setup_super(sb, false); up_write(&nilfs->ns_sem); return 0; } static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root; struct the_nilfs *nilfs = root->nilfs; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); unsigned long long blocks; unsigned long overhead; unsigned long nrsvblocks; sector_t nfreeblocks; u64 nmaxinodes, nfreeinodes; int err; /* * Compute all of the segment blocks * * The blocks before first segment and after last segment * are excluded. */ blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments - nilfs->ns_first_data_block; nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment; /* * Compute the overhead * * When distributing meta data blocks outside segment structure, * We must count them as the overhead. */ overhead = 0; err = nilfs_count_free_blocks(nilfs, &nfreeblocks); if (unlikely(err)) return err; err = nilfs_ifile_count_free_inodes(root->ifile, &nmaxinodes, &nfreeinodes); if (unlikely(err)) { nilfs_warn(sb, "failed to count free inodes: err=%d", err); if (err == -ERANGE) { /* * If nilfs_palloc_count_max_entries() returns * -ERANGE error code then we simply treat * curent inodes count as maximum possible and * zero as free inodes value. */ nmaxinodes = atomic64_read(&root->inodes_count); nfreeinodes = 0; err = 0; } else return err; } buf->f_type = NILFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = blocks - overhead; buf->f_bfree = nfreeblocks; buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? (buf->f_bfree - nrsvblocks) : 0; buf->f_files = nmaxinodes; buf->f_ffree = nfreeinodes; buf->f_namelen = NILFS_NAME_LEN; buf->f_fsid = u64_to_fsid(id); return 0; } static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct super_block *sb = dentry->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root; if (!nilfs_test_opt(nilfs, BARRIER)) seq_puts(seq, ",nobarrier"); if (root->cno != NILFS_CPTREE_CURRENT_CNO) seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); if (nilfs_test_opt(nilfs, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); if (nilfs_test_opt(nilfs, ERRORS_CONT)) seq_puts(seq, ",errors=continue"); if (nilfs_test_opt(nilfs, STRICT_ORDER)) seq_puts(seq, ",order=strict"); if (nilfs_test_opt(nilfs, NORECOVERY)) seq_puts(seq, ",norecovery"); if (nilfs_test_opt(nilfs, DISCARD)) seq_puts(seq, ",discard"); return 0; } static const struct super_operations nilfs_sops = { .alloc_inode = nilfs_alloc_inode, .free_inode = nilfs_free_inode, .dirty_inode = nilfs_dirty_inode, .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, .sync_fs = nilfs_sync_fs, .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, .statfs = nilfs_statfs, .show_options = nilfs_show_options }; enum { Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery, Opt_discard, }; static const struct constant_table nilfs_param_err[] = { {"continue", NILFS_MOUNT_ERRORS_CONT}, {"panic", NILFS_MOUNT_ERRORS_PANIC}, {"remount-ro", NILFS_MOUNT_ERRORS_RO}, {} }; static const struct fs_parameter_spec nilfs_param_spec[] = { fsparam_enum ("errors", Opt_err, nilfs_param_err), fsparam_flag_no ("barrier", Opt_barrier), fsparam_u64 ("cp", Opt_snapshot), fsparam_string ("order", Opt_order), fsparam_flag ("norecovery", Opt_norecovery), fsparam_flag_no ("discard", Opt_discard), {} }; struct nilfs_fs_context { unsigned long ns_mount_opt; __u64 cno; }; static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct nilfs_fs_context *nilfs = fc->fs_private; int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; struct fs_parse_result result; int opt; opt = fs_parse(fc, nilfs_param_spec, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_barrier: if (result.negated) nilfs_clear_opt(nilfs, BARRIER); else nilfs_set_opt(nilfs, BARRIER); break; case Opt_order: if (strcmp(param->string, "relaxed") == 0) /* Ordered data semantics */ nilfs_clear_opt(nilfs, STRICT_ORDER); else if (strcmp(param->string, "strict") == 0) /* Strict in-order semantics */ nilfs_set_opt(nilfs, STRICT_ORDER); else return -EINVAL; break; case Opt_err: nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE; nilfs->ns_mount_opt |= result.uint_32; break; case Opt_snapshot: if (is_remount) { struct super_block *sb = fc->root->d_sb; nilfs_err(sb, "\"%s\" option is invalid for remount", param->key); return -EINVAL; } if (result.uint_64 == 0) { nilfs_err(NULL, "invalid option \"cp=0\": invalid checkpoint number 0"); return -EINVAL; } nilfs->cno = result.uint_64; break; case Opt_norecovery: nilfs_set_opt(nilfs, NORECOVERY); break; case Opt_discard: if (result.negated) nilfs_clear_opt(nilfs, DISCARD); else nilfs_set_opt(nilfs, DISCARD); break; default: return -EINVAL; } return 0; } static int nilfs_setup_super(struct super_block *sb, int is_mount) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int max_mnt_count; int mnt_count; /* nilfs->ns_sem must be locked by the caller. */ sbp = nilfs_prepare_super(sb, 0); if (!sbp) return -EIO; if (!is_mount) goto skip_mount_setup; max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count); mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); if (nilfs->ns_mount_state & NILFS_ERROR_FS) { nilfs_warn(sb, "mounting fs with errors"); #if 0 } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { nilfs_warn(sb, "maximal mount count reached"); #endif } if (!max_mnt_count) sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); sbp[0]->s_mtime = cpu_to_le64(ktime_get_real_seconds()); skip_mount_setup: sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); /* synchronize sbp[1] with sbp[0] */ if (sbp[1]) memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, u64 pos, int blocksize, struct buffer_head **pbh) { unsigned long long sb_index = pos; unsigned long offset; offset = do_div(sb_index, blocksize); *pbh = sb_bread(sb, sb_index); if (!*pbh) return NULL; return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); } int nilfs_store_magic(struct super_block *sb, struct nilfs_super_block *sbp) { struct the_nilfs *nilfs = sb->s_fs_info; sb->s_magic = le16_to_cpu(sbp->s_magic); /* FS independent flags */ #ifdef NILFS_ATIME_DISABLE sb->s_flags |= SB_NOATIME; #endif nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid); nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid); nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval); nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max); return 0; } int nilfs_check_feature_compatibility(struct super_block *sb, struct nilfs_super_block *sbp) { __u64 features; features = le64_to_cpu(sbp->s_feature_incompat) & ~NILFS_FEATURE_INCOMPAT_SUPP; if (features) { nilfs_err(sb, "couldn't mount because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; } features = le64_to_cpu(sbp->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; if (!sb_rdonly(sb) && features) { nilfs_err(sb, "couldn't mount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; } return 0; } static int nilfs_get_root_dentry(struct super_block *sb, struct nilfs_root *root, struct dentry **root_dentry) { struct inode *inode; struct dentry *dentry; int ret = 0; inode = nilfs_iget(sb, root, NILFS_ROOT_INO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); nilfs_err(sb, "error %d getting root inode", ret); goto out; } if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { iput(inode); nilfs_err(sb, "corrupt root inode"); ret = -EINVAL; goto out; } if (root->cno == NILFS_CPTREE_CURRENT_CNO) { dentry = d_find_alias(inode); if (!dentry) { dentry = d_make_root(inode); if (!dentry) { ret = -ENOMEM; goto failed_dentry; } } else { iput(inode); } } else { dentry = d_obtain_root(inode); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto failed_dentry; } } *root_dentry = dentry; out: return ret; failed_dentry: nilfs_err(sb, "error %d getting root dentry", ret); goto out; } static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, struct dentry **root_dentry) { struct the_nilfs *nilfs = s->s_fs_info; struct nilfs_root *root; int ret; mutex_lock(&nilfs->ns_snapshot_mount_mutex); down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); up_read(&nilfs->ns_segctor_sem); if (ret < 0) { ret = (ret == -ENOENT) ? -EINVAL : ret; goto out; } else if (!ret) { nilfs_err(s, "The specified checkpoint is not a snapshot (checkpoint number=%llu)", (unsigned long long)cno); ret = -EINVAL; goto out; } ret = nilfs_attach_checkpoint(s, cno, false, &root); if (ret) { nilfs_err(s, "error %d while loading snapshot (checkpoint number=%llu)", ret, (unsigned long long)cno); goto out; } ret = nilfs_get_root_dentry(s, root, root_dentry); nilfs_put_root(root); out: mutex_unlock(&nilfs->ns_snapshot_mount_mutex); return ret; } /** * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint * @root_dentry: root dentry of the tree to be shrunk * * Return: true if the tree was in-use, false otherwise. */ static bool nilfs_tree_is_busy(struct dentry *root_dentry) { shrink_dcache_parent(root_dentry); return d_count(root_dentry) > 1; } int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; struct inode *inode; struct dentry *dentry; int ret; if (cno > nilfs->ns_cno) return false; if (cno >= nilfs_last_cno(nilfs)) return true; /* protect recent checkpoints */ ret = false; root = nilfs_lookup_root(nilfs, cno); if (root) { inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); if (inode) { dentry = d_find_alias(inode); if (dentry) { ret = nilfs_tree_is_busy(dentry); dput(dentry); } iput(inode); } nilfs_put_root(root); } return ret; } /** * nilfs_fill_super() - initialize a super block instance * @sb: super_block * @fc: filesystem context * * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. * * Return: 0 on success, or a negative error code on failure. */ static int nilfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct the_nilfs *nilfs; struct nilfs_root *fsroot; struct nilfs_fs_context *ctx = fc->fs_private; __u64 cno; int err; nilfs = alloc_nilfs(sb); if (!nilfs) return -ENOMEM; sb->s_fs_info = nilfs; err = init_nilfs(nilfs, sb); if (err) goto failed_nilfs; /* Copy in parsed mount options */ nilfs->ns_mount_opt = ctx->ns_mount_opt; sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; sb->s_bdi = bdi_get(sb->s_bdev->bd_disk->bdi); err = load_nilfs(nilfs, sb); if (err) goto failed_nilfs; super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, sizeof(nilfs->ns_sbp[0]->s_uuid)); super_set_sysfs_name_bdev(sb); cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { nilfs_err(sb, "error %d while loading last checkpoint (checkpoint number=%llu)", err, (unsigned long long)cno); goto failed_unload; } if (!sb_rdonly(sb)) { err = nilfs_attach_log_writer(sb, fsroot); if (err) goto failed_checkpoint; } err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root); if (err) goto failed_segctor; nilfs_put_root(fsroot); if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } return 0; failed_segctor: nilfs_detach_log_writer(sb); failed_checkpoint: nilfs_put_root(fsroot); failed_unload: nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); failed_nilfs: destroy_nilfs(nilfs); return err; } static int nilfs_reconfigure(struct fs_context *fc) { struct nilfs_fs_context *ctx = fc->fs_private; struct super_block *sb = fc->root->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; int err; sync_filesystem(sb); err = -EINVAL; if (!nilfs_valid_fs(nilfs)) { nilfs_warn(sb, "couldn't remount because the filesystem is in an incomplete recovery state"); goto ignore_opts; } if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) goto out; if (fc->sb_flags & SB_RDONLY) { sb->s_flags |= SB_RDONLY; /* * Remounting a valid RW partition RDONLY, so set * the RDONLY flag and then mark the partition as valid again. */ down_write(&nilfs->ns_sem); nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } else { __u64 features; struct nilfs_root *root; /* * Mounting a RDONLY partition read-write, so reread and * store the current valid flag. (It may have been changed * by fsck since we originally mounted the partition.) */ down_read(&nilfs->ns_sem); features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; up_read(&nilfs->ns_sem); if (features) { nilfs_warn(sb, "couldn't remount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); err = -EROFS; goto ignore_opts; } sb->s_flags &= ~SB_RDONLY; root = NILFS_I(d_inode(sb->s_root))->i_root; err = nilfs_attach_log_writer(sb, root); if (err) { sb->s_flags |= SB_RDONLY; goto ignore_opts; } down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } out: sb->s_flags = (sb->s_flags & ~SB_POSIXACL); /* Copy over parsed remount options */ nilfs->ns_mount_opt = ctx->ns_mount_opt; return 0; ignore_opts: return err; } static int nilfs_get_tree(struct fs_context *fc) { struct nilfs_fs_context *ctx = fc->fs_private; struct super_block *s; dev_t dev; int err; if (ctx->cno && !(fc->sb_flags & SB_RDONLY)) { nilfs_err(NULL, "invalid option \"cp=%llu\": read-only option is not specified", ctx->cno); return -EINVAL; } err = lookup_bdev(fc->source, &dev); if (err) return err; s = sget_dev(fc, dev); if (IS_ERR(s)) return PTR_ERR(s); if (!s->s_root) { err = setup_bdev_super(s, fc->sb_flags, fc); if (!err) err = nilfs_fill_super(s, fc); if (err) goto failed_super; s->s_flags |= SB_ACTIVE; } else if (!ctx->cno) { if (nilfs_tree_is_busy(s->s_root)) { if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { nilfs_err(s, "the device already has a %s mount.", sb_rdonly(s) ? "read-only" : "read/write"); err = -EBUSY; goto failed_super; } } else { /* * Try reconfigure to setup mount states if the current * tree is not mounted and only snapshots use this sb. * * Since nilfs_reconfigure() requires fc->root to be * set, set it first and release it on failure. */ fc->root = dget(s->s_root); err = nilfs_reconfigure(fc); if (err) { dput(fc->root); fc->root = NULL; /* prevent double release */ goto failed_super; } return 0; } } if (ctx->cno) { struct dentry *root_dentry; err = nilfs_attach_snapshot(s, ctx->cno, &root_dentry); if (err) goto failed_super; fc->root = root_dentry; return 0; } fc->root = dget(s->s_root); return 0; failed_super: deactivate_locked_super(s); return err; } static void nilfs_free_fc(struct fs_context *fc) { kfree(fc->fs_private); } static const struct fs_context_operations nilfs_context_ops = { .parse_param = nilfs_parse_param, .get_tree = nilfs_get_tree, .reconfigure = nilfs_reconfigure, .free = nilfs_free_fc, }; static int nilfs_init_fs_context(struct fs_context *fc) { struct nilfs_fs_context *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; fc->fs_private = ctx; fc->ops = &nilfs_context_ops; return 0; } struct file_system_type nilfs_fs_type = { .owner = THIS_MODULE, .name = "nilfs2", .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = nilfs_init_fs_context, .parameters = nilfs_param_spec, }; MODULE_ALIAS_FS("nilfs2"); static void nilfs_inode_init_once(void *obj) { struct nilfs_inode_info *ii = obj; INIT_LIST_HEAD(&ii->i_dirty); #ifdef CONFIG_NILFS_XATTR init_rwsem(&ii->xattr_sem); #endif inode_init_once(&ii->vfs_inode); } static void nilfs_segbuf_init_once(void *obj) { memset(obj, 0, sizeof(struct nilfs_segment_buffer)); } static void nilfs_destroy_cachep(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(nilfs_inode_cachep); kmem_cache_destroy(nilfs_transaction_cachep); kmem_cache_destroy(nilfs_segbuf_cachep); kmem_cache_destroy(nilfs_btree_path_cache); } static int __init nilfs_init_cachep(void) { nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", sizeof(struct nilfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, nilfs_inode_init_once); if (!nilfs_inode_cachep) goto fail; nilfs_transaction_cachep = kmem_cache_create("nilfs2_transaction_cache", sizeof(struct nilfs_transaction_info), 0, SLAB_RECLAIM_ACCOUNT, NULL); if (!nilfs_transaction_cachep) goto fail; nilfs_segbuf_cachep = kmem_cache_create("nilfs2_segbuf_cache", sizeof(struct nilfs_segment_buffer), 0, SLAB_RECLAIM_ACCOUNT, nilfs_segbuf_init_once); if (!nilfs_segbuf_cachep) goto fail; nilfs_btree_path_cache = kmem_cache_create("nilfs2_btree_path_cache", sizeof(struct nilfs_btree_path) * NILFS_BTREE_LEVEL_MAX, 0, 0, NULL); if (!nilfs_btree_path_cache) goto fail; return 0; fail: nilfs_destroy_cachep(); return -ENOMEM; } static int __init init_nilfs_fs(void) { int err; err = nilfs_init_cachep(); if (err) goto fail; err = nilfs_sysfs_init(); if (err) goto free_cachep; err = register_filesystem(&nilfs_fs_type); if (err) goto deinit_sysfs_entry; printk(KERN_INFO "NILFS version 2 loaded\n"); return 0; deinit_sysfs_entry: nilfs_sysfs_exit(); free_cachep: nilfs_destroy_cachep(); fail: return err; } static void __exit exit_nilfs_fs(void) { nilfs_destroy_cachep(); nilfs_sysfs_exit(); unregister_filesystem(&nilfs_fs_type); } module_init(init_nilfs_fs) module_exit(exit_nilfs_fs)
2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 // SPDX-License-Identifier: GPL-2.0-only /* * AES-GMAC for IEEE 802.11 BIP-GMAC-128 and BIP-GMAC-256 * Copyright 2015, Qualcomm Atheros, Inc. */ #include <linux/kernel.h> #include <linux/types.h> #include <linux/err.h> #include <crypto/aead.h> #include <crypto/aes.h> #include <net/mac80211.h> #include "key.h" #include "aes_gmac.h" int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, const u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[5]; u8 *zero, *__aad, iv[AES_BLOCK_SIZE]; struct aead_request *aead_req; int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); const __le16 *fc; int ret; if (data_len < IEEE80211_GMAC_MIC_LEN) return -EINVAL; aead_req = kzalloc(reqsize + IEEE80211_GMAC_MIC_LEN + GMAC_AAD_LEN, GFP_ATOMIC); if (!aead_req) return -ENOMEM; zero = (u8 *)aead_req + reqsize; __aad = zero + IEEE80211_GMAC_MIC_LEN; memcpy(__aad, aad, GMAC_AAD_LEN); fc = (const __le16 *)aad; if (ieee80211_is_beacon(*fc)) { /* mask Timestamp field to zero */ sg_init_table(sg, 5); sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN); sg_set_buf(&sg[1], zero, 8); sg_set_buf(&sg[2], data + 8, data_len - 8 - IEEE80211_GMAC_MIC_LEN); sg_set_buf(&sg[3], zero, IEEE80211_GMAC_MIC_LEN); sg_set_buf(&sg[4], mic, IEEE80211_GMAC_MIC_LEN); } else { sg_init_table(sg, 4); sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN); sg_set_buf(&sg[1], data, data_len - IEEE80211_GMAC_MIC_LEN); sg_set_buf(&sg[2], zero, IEEE80211_GMAC_MIC_LEN); sg_set_buf(&sg[3], mic, IEEE80211_GMAC_MIC_LEN); } memcpy(iv, nonce, GMAC_NONCE_LEN); memset(iv + GMAC_NONCE_LEN, 0, sizeof(iv) - GMAC_NONCE_LEN); iv[AES_BLOCK_SIZE - 1] = 0x01; aead_request_set_tfm(aead_req, tfm); aead_request_set_crypt(aead_req, sg, sg, 0, iv); aead_request_set_ad(aead_req, GMAC_AAD_LEN + data_len); ret = crypto_aead_encrypt(aead_req); kfree_sensitive(aead_req); return ret; } struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[], size_t key_len) { struct crypto_aead *tfm; int err; tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) return tfm; err = crypto_aead_setkey(tfm, key, key_len); if (!err) err = crypto_aead_setauthsize(tfm, IEEE80211_GMAC_MIC_LEN); if (!err) return tfm; crypto_free_aead(tfm); return ERR_PTR(err); } void ieee80211_aes_gmac_key_free(struct crypto_aead *tfm) { crypto_free_aead(tfm); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 // SPDX-License-Identifier: GPL-2.0-only /* * CAN driver for esd electronics gmbh CAN-USB/2, CAN-USB/3 and CAN-USB/Micro * * Copyright (C) 2010-2012 esd electronic system design gmbh, Matthias Fuchs <socketcan@esd.eu> * Copyright (C) 2022-2024 esd electronics gmbh, Frank Jungclaus <frank.jungclaus@esd.eu> */ #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/error.h> #include <linux/err.h> #include <linux/ethtool.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/signal.h> #include <linux/slab.h> #include <linux/units.h> #include <linux/usb.h> MODULE_AUTHOR("Matthias Fuchs <socketcan@esd.eu>"); MODULE_AUTHOR("Frank Jungclaus <frank.jungclaus@esd.eu>"); MODULE_DESCRIPTION("CAN driver for esd electronics gmbh CAN-USB/2, CAN-USB/3 and CAN-USB/Micro interfaces"); MODULE_LICENSE("GPL v2"); /* USB vendor and product ID */ #define ESD_USB_ESDGMBH_VENDOR_ID 0x0ab4 #define ESD_USB_CANUSB2_PRODUCT_ID 0x0010 #define ESD_USB_CANUSBM_PRODUCT_ID 0x0011 #define ESD_USB_CANUSB3_PRODUCT_ID 0x0014 /* CAN controller clock frequencies */ #define ESD_USB_2_CAN_CLOCK (60 * MEGA) /* Hz */ #define ESD_USB_M_CAN_CLOCK (36 * MEGA) /* Hz */ #define ESD_USB_3_CAN_CLOCK (80 * MEGA) /* Hz */ /* Maximum number of CAN nets */ #define ESD_USB_MAX_NETS 2 /* USB commands */ #define ESD_USB_CMD_VERSION 1 /* also used for VERSION_REPLY */ #define ESD_USB_CMD_CAN_RX 2 /* device to host only */ #define ESD_USB_CMD_CAN_TX 3 /* also used for TX_DONE */ #define ESD_USB_CMD_SETBAUD 4 /* also used for SETBAUD_REPLY */ #define ESD_USB_CMD_TS 5 /* also used for TS_REPLY */ #define ESD_USB_CMD_IDADD 6 /* also used for IDADD_REPLY */ /* esd CAN message flags - dlc field */ #define ESD_USB_RTR BIT(4) #define ESD_USB_NO_BRS BIT(4) #define ESD_USB_ESI BIT(5) #define ESD_USB_FD BIT(7) /* esd CAN message flags - id field */ #define ESD_USB_EXTID BIT(29) #define ESD_USB_EVENT BIT(30) #define ESD_USB_IDMASK GENMASK(28, 0) /* esd CAN event ids */ #define ESD_USB_EV_CAN_ERROR_EXT 2 /* CAN controller specific diagnostic data */ /* baudrate message flags */ #define ESD_USB_LOM BIT(30) /* Listen Only Mode */ #define ESD_USB_UBR BIT(31) /* User Bit Rate (controller BTR) in bits 0..27 */ #define ESD_USB_NO_BAUDRATE GENMASK(30, 0) /* bit rate unconfigured */ /* bit timing esd CAN-USB */ #define ESD_USB_2_TSEG1_SHIFT 16 #define ESD_USB_2_TSEG2_SHIFT 20 #define ESD_USB_2_SJW_SHIFT 14 #define ESD_USB_M_SJW_SHIFT 24 #define ESD_USB_TRIPLE_SAMPLES BIT(23) /* Transmitter Delay Compensation */ #define ESD_USB_3_TDC_MODE_AUTO 0 /* esd IDADD message */ #define ESD_USB_ID_ENABLE BIT(7) #define ESD_USB_MAX_ID_SEGMENT 64 /* SJA1000 ECC register (emulated by usb firmware) */ #define ESD_USB_SJA1000_ECC_SEG GENMASK(4, 0) #define ESD_USB_SJA1000_ECC_DIR BIT(5) #define ESD_USB_SJA1000_ECC_ERR BIT(2, 1) #define ESD_USB_SJA1000_ECC_BIT 0x00 #define ESD_USB_SJA1000_ECC_FORM BIT(6) #define ESD_USB_SJA1000_ECC_STUFF BIT(7) #define ESD_USB_SJA1000_ECC_MASK GENMASK(7, 6) /* esd bus state event codes */ #define ESD_USB_BUSSTATE_MASK GENMASK(7, 6) #define ESD_USB_BUSSTATE_WARN BIT(6) #define ESD_USB_BUSSTATE_ERRPASSIVE BIT(7) #define ESD_USB_BUSSTATE_BUSOFF GENMASK(7, 6) #define ESD_USB_RX_BUFFER_SIZE 1024 #define ESD_USB_MAX_RX_URBS 4 #define ESD_USB_MAX_TX_URBS 16 /* must be power of 2 */ /* Modes for CAN-USB/3, to be used for esd_usb_3_set_baudrate_msg_x.mode */ #define ESD_USB_3_BAUDRATE_MODE_DISABLE 0 /* remove from bus */ #define ESD_USB_3_BAUDRATE_MODE_INDEX 1 /* ESD (CiA) bit rate idx */ #define ESD_USB_3_BAUDRATE_MODE_BTR_CTRL 2 /* BTR values (controller)*/ #define ESD_USB_3_BAUDRATE_MODE_BTR_CANONICAL 3 /* BTR values (canonical) */ #define ESD_USB_3_BAUDRATE_MODE_NUM 4 /* numerical bit rate */ #define ESD_USB_3_BAUDRATE_MODE_AUTOBAUD 5 /* autobaud */ /* Flags for CAN-USB/3, to be used for esd_usb_3_set_baudrate_msg_x.flags */ #define ESD_USB_3_BAUDRATE_FLAG_FD BIT(0) /* enable CAN FD mode */ #define ESD_USB_3_BAUDRATE_FLAG_LOM BIT(1) /* enable listen only mode */ #define ESD_USB_3_BAUDRATE_FLAG_STM BIT(2) /* enable self test mode */ #define ESD_USB_3_BAUDRATE_FLAG_TRS BIT(3) /* enable triple sampling */ #define ESD_USB_3_BAUDRATE_FLAG_TXP BIT(4) /* enable transmit pause */ struct esd_usb_header_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 rsvd[2]; }; struct esd_usb_version_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 rsvd; u8 flags; __le32 drv_version; }; struct esd_usb_version_reply_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 nets; u8 features; __le32 version; u8 name[16]; __le32 rsvd; __le32 ts; }; struct esd_usb_rx_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 dlc; __le32 ts; __le32 id; /* upper 3 bits contain flags */ union { u8 data[CAN_MAX_DLEN]; u8 data_fd[CANFD_MAX_DLEN]; struct { u8 status; /* CAN Controller Status */ u8 ecc; /* Error Capture Register */ u8 rec; /* RX Error Counter */ u8 tec; /* TX Error Counter */ } ev_can_err_ext; /* For ESD_EV_CAN_ERROR_EXT */ }; }; struct esd_usb_tx_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 dlc; u32 hnd; /* opaque handle, not used by device */ __le32 id; /* upper 3 bits contain flags */ union { u8 data[CAN_MAX_DLEN]; u8 data_fd[CANFD_MAX_DLEN]; }; }; struct esd_usb_tx_done_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 status; u32 hnd; /* opaque handle, not used by device */ __le32 ts; }; struct esd_usb_id_filter_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 option; __le32 mask[ESD_USB_MAX_ID_SEGMENT + 1]; /* +1 for 29bit extended IDs */ }; struct esd_usb_set_baudrate_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 rsvd; __le32 baud; }; /* CAN-USB/3 baudrate configuration, used for nominal as well as for data bit rate */ struct esd_usb_3_baudrate_cfg { __le16 brp; /* bit rate pre-scaler */ __le16 tseg1; /* time segment before sample point */ __le16 tseg2; /* time segment after sample point */ __le16 sjw; /* synchronization jump Width */ }; /* In principle, the esd CAN-USB/3 supports Transmitter Delay Compensation (TDC), * but currently only the automatic TDC mode is supported by this driver. * An implementation for manual TDC configuration will follow. * * For information about struct esd_usb_3_tdc_cfg, see * NTCAN Application Developers Manual, 6.2.25 NTCAN_TDC_CFG + related chapters * https://esd.eu/fileadmin/esd/docs/manuals/NTCAN_Part1_Function_API_Manual_en_56.pdf */ struct esd_usb_3_tdc_cfg { u8 tdc_mode; /* transmitter delay compensation mode */ u8 ssp_offset; /* secondary sample point offset in mtq */ s8 ssp_shift; /* secondary sample point shift in mtq */ u8 tdc_filter; /* TDC filter in mtq */ }; /* Extended version of the above set_baudrate_msg for a CAN-USB/3 * to define the CAN bit timing configuration of the CAN controller in * CAN FD mode as well as in Classical CAN mode. * * The payload of this command is a NTCAN_BAUDRATE_X structure according to * esd electronics gmbh, NTCAN Application Developers Manual, 6.2.15 NTCAN_BAUDRATE_X * https://esd.eu/fileadmin/esd/docs/manuals/NTCAN_Part1_Function_API_Manual_en_56.pdf */ struct esd_usb_3_set_baudrate_msg_x { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 rsvd; /*reserved */ /* Payload ... */ __le16 mode; /* mode word, see ESD_USB_3_BAUDRATE_MODE_xxx */ __le16 flags; /* control flags, see ESD_USB_3_BAUDRATE_FLAG_xxx */ struct esd_usb_3_tdc_cfg tdc; /* TDC configuration */ struct esd_usb_3_baudrate_cfg nom; /* nominal bit rate */ struct esd_usb_3_baudrate_cfg data; /* data bit rate */ }; /* Main message type used between library and application */ union __packed esd_usb_msg { struct esd_usb_header_msg hdr; struct esd_usb_version_msg version; struct esd_usb_version_reply_msg version_reply; struct esd_usb_rx_msg rx; struct esd_usb_tx_msg tx; struct esd_usb_tx_done_msg txdone; struct esd_usb_set_baudrate_msg setbaud; struct esd_usb_3_set_baudrate_msg_x setbaud_x; struct esd_usb_id_filter_msg filter; }; static struct usb_device_id esd_usb_table[] = { {USB_DEVICE(ESD_USB_ESDGMBH_VENDOR_ID, ESD_USB_CANUSB2_PRODUCT_ID)}, {USB_DEVICE(ESD_USB_ESDGMBH_VENDOR_ID, ESD_USB_CANUSBM_PRODUCT_ID)}, {USB_DEVICE(ESD_USB_ESDGMBH_VENDOR_ID, ESD_USB_CANUSB3_PRODUCT_ID)}, {} }; MODULE_DEVICE_TABLE(usb, esd_usb_table); struct esd_usb_net_priv; struct esd_tx_urb_context { struct esd_usb_net_priv *priv; u32 echo_index; }; struct esd_usb { struct usb_device *udev; struct esd_usb_net_priv *nets[ESD_USB_MAX_NETS]; struct usb_anchor rx_submitted; int net_count; u32 version; int rxinitdone; int in_usb_disconnect; void *rxbuf[ESD_USB_MAX_RX_URBS]; dma_addr_t rxbuf_dma[ESD_USB_MAX_RX_URBS]; }; struct esd_usb_net_priv { struct can_priv can; /* must be the first member */ atomic_t active_tx_jobs; struct usb_anchor tx_submitted; struct esd_tx_urb_context tx_contexts[ESD_USB_MAX_TX_URBS]; struct esd_usb *usb; struct net_device *netdev; int index; u8 old_state; struct can_berr_counter bec; }; static void esd_usb_rx_event(struct esd_usb_net_priv *priv, union esd_usb_msg *msg) { struct net_device_stats *stats = &priv->netdev->stats; struct can_frame *cf; struct sk_buff *skb; u32 id = le32_to_cpu(msg->rx.id) & ESD_USB_IDMASK; if (id == ESD_USB_EV_CAN_ERROR_EXT) { u8 state = msg->rx.ev_can_err_ext.status; u8 ecc = msg->rx.ev_can_err_ext.ecc; priv->bec.rxerr = msg->rx.ev_can_err_ext.rec; priv->bec.txerr = msg->rx.ev_can_err_ext.tec; netdev_dbg(priv->netdev, "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n", msg->rx.dlc, state, ecc, priv->bec.rxerr, priv->bec.txerr); /* if berr-reporting is off, only pass through on state change ... */ if (!(priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) && state == priv->old_state) return; skb = alloc_can_err_skb(priv->netdev, &cf); if (!skb) stats->rx_dropped++; if (state != priv->old_state) { enum can_state tx_state, rx_state; enum can_state new_state = CAN_STATE_ERROR_ACTIVE; priv->old_state = state; switch (state & ESD_USB_BUSSTATE_MASK) { case ESD_USB_BUSSTATE_BUSOFF: new_state = CAN_STATE_BUS_OFF; can_bus_off(priv->netdev); break; case ESD_USB_BUSSTATE_WARN: new_state = CAN_STATE_ERROR_WARNING; break; case ESD_USB_BUSSTATE_ERRPASSIVE: new_state = CAN_STATE_ERROR_PASSIVE; break; default: new_state = CAN_STATE_ERROR_ACTIVE; priv->bec.txerr = 0; priv->bec.rxerr = 0; break; } if (new_state != priv->can.state) { tx_state = (priv->bec.txerr >= priv->bec.rxerr) ? new_state : 0; rx_state = (priv->bec.txerr <= priv->bec.rxerr) ? new_state : 0; can_change_state(priv->netdev, cf, tx_state, rx_state); } } else if (skb) { priv->can.can_stats.bus_error++; stats->rx_errors++; cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; switch (ecc & ESD_USB_SJA1000_ECC_MASK) { case ESD_USB_SJA1000_ECC_BIT: cf->data[2] |= CAN_ERR_PROT_BIT; break; case ESD_USB_SJA1000_ECC_FORM: cf->data[2] |= CAN_ERR_PROT_FORM; break; case ESD_USB_SJA1000_ECC_STUFF: cf->data[2] |= CAN_ERR_PROT_STUFF; break; default: break; } /* Error occurred during transmission? */ if (!(ecc & ESD_USB_SJA1000_ECC_DIR)) cf->data[2] |= CAN_ERR_PROT_TX; /* Bit stream position in CAN frame as the error was detected */ cf->data[3] = ecc & ESD_USB_SJA1000_ECC_SEG; } if (skb) { cf->can_id |= CAN_ERR_CNT; cf->data[6] = priv->bec.txerr; cf->data[7] = priv->bec.rxerr; netif_rx(skb); } } } static void esd_usb_rx_can_msg(struct esd_usb_net_priv *priv, union esd_usb_msg *msg) { struct net_device_stats *stats = &priv->netdev->stats; struct can_frame *cf; struct canfd_frame *cfd; struct sk_buff *skb; u32 id; u8 len; if (!netif_device_present(priv->netdev)) return; id = le32_to_cpu(msg->rx.id); if (id & ESD_USB_EVENT) { esd_usb_rx_event(priv, msg); } else { if (msg->rx.dlc & ESD_USB_FD) { skb = alloc_canfd_skb(priv->netdev, &cfd); } else { skb = alloc_can_skb(priv->netdev, &cf); cfd = (struct canfd_frame *)cf; } if (skb == NULL) { stats->rx_dropped++; return; } cfd->can_id = id & ESD_USB_IDMASK; if (msg->rx.dlc & ESD_USB_FD) { /* masking by 0x0F is already done within can_fd_dlc2len() */ cfd->len = can_fd_dlc2len(msg->rx.dlc); len = cfd->len; if ((msg->rx.dlc & ESD_USB_NO_BRS) == 0) cfd->flags |= CANFD_BRS; if (msg->rx.dlc & ESD_USB_ESI) cfd->flags |= CANFD_ESI; } else { can_frame_set_cc_len(cf, msg->rx.dlc & ~ESD_USB_RTR, priv->can.ctrlmode); len = cf->len; if (msg->rx.dlc & ESD_USB_RTR) { cf->can_id |= CAN_RTR_FLAG; len = 0; } } if (id & ESD_USB_EXTID) cfd->can_id |= CAN_EFF_FLAG; memcpy(cfd->data, msg->rx.data_fd, len); stats->rx_bytes += len; stats->rx_packets++; netif_rx(skb); } } static void esd_usb_tx_done_msg(struct esd_usb_net_priv *priv, union esd_usb_msg *msg) { struct net_device_stats *stats = &priv->netdev->stats; struct net_device *netdev = priv->netdev; struct esd_tx_urb_context *context; if (!netif_device_present(netdev)) return; context = &priv->tx_contexts[msg->txdone.hnd & (ESD_USB_MAX_TX_URBS - 1)]; if (!msg->txdone.status) { stats->tx_packets++; stats->tx_bytes += can_get_echo_skb(netdev, context->echo_index, NULL); } else { stats->tx_errors++; can_free_echo_skb(netdev, context->echo_index, NULL); } /* Release context */ context->echo_index = ESD_USB_MAX_TX_URBS; atomic_dec(&priv->active_tx_jobs); netif_wake_queue(netdev); } static void esd_usb_read_bulk_callback(struct urb *urb) { struct esd_usb *dev = urb->context; int err; int pos = 0; int i; switch (urb->status) { case 0: /* success */ break; case -ENOENT: case -EPIPE: case -EPROTO: case -ESHUTDOWN: return; default: dev_info(dev->udev->dev.parent, "Rx URB aborted (%pe)\n", ERR_PTR(urb->status)); goto resubmit_urb; } while (pos < urb->actual_length) { union esd_usb_msg *msg; msg = (union esd_usb_msg *)(urb->transfer_buffer + pos); switch (msg->hdr.cmd) { case ESD_USB_CMD_CAN_RX: if (msg->rx.net >= dev->net_count) { dev_err(dev->udev->dev.parent, "format error\n"); break; } esd_usb_rx_can_msg(dev->nets[msg->rx.net], msg); break; case ESD_USB_CMD_CAN_TX: if (msg->txdone.net >= dev->net_count) { dev_err(dev->udev->dev.parent, "format error\n"); break; } esd_usb_tx_done_msg(dev->nets[msg->txdone.net], msg); break; } pos += msg->hdr.len * sizeof(u32); /* convert to # of bytes */ if (pos > urb->actual_length) { dev_err(dev->udev->dev.parent, "format error\n"); break; } } resubmit_urb: usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), urb->transfer_buffer, ESD_USB_RX_BUFFER_SIZE, esd_usb_read_bulk_callback, dev); usb_anchor_urb(urb, &dev->rx_submitted); err = usb_submit_urb(urb, GFP_ATOMIC); if (!err) return; usb_unanchor_urb(urb); if (err == -ENODEV) { for (i = 0; i < dev->net_count; i++) { if (dev->nets[i]) netif_device_detach(dev->nets[i]->netdev); } } else { dev_err(dev->udev->dev.parent, "failed resubmitting read bulk urb: %pe\n", ERR_PTR(err)); } } /* callback for bulk IN urb */ static void esd_usb_write_bulk_callback(struct urb *urb) { struct esd_tx_urb_context *context = urb->context; struct esd_usb_net_priv *priv; struct net_device *netdev; size_t size = sizeof(union esd_usb_msg); WARN_ON(!context); priv = context->priv; netdev = priv->netdev; /* free up our allocated buffer */ usb_free_coherent(urb->dev, size, urb->transfer_buffer, urb->transfer_dma); if (!netif_device_present(netdev)) return; if (urb->status) netdev_info(netdev, "Tx URB aborted (%pe)\n", ERR_PTR(urb->status)); netif_trans_update(netdev); } static ssize_t firmware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); struct esd_usb *dev = usb_get_intfdata(intf); return sprintf(buf, "%d.%d.%d\n", (dev->version >> 12) & 0xf, (dev->version >> 8) & 0xf, dev->version & 0xff); } static DEVICE_ATTR_RO(firmware); static ssize_t hardware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); struct esd_usb *dev = usb_get_intfdata(intf); return sprintf(buf, "%d.%d.%d\n", (dev->version >> 28) & 0xf, (dev->version >> 24) & 0xf, (dev->version >> 16) & 0xff); } static DEVICE_ATTR_RO(hardware); static ssize_t nets_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); struct esd_usb *dev = usb_get_intfdata(intf); return sprintf(buf, "%d", dev->net_count); } static DEVICE_ATTR_RO(nets); static int esd_usb_send_msg(struct esd_usb *dev, union esd_usb_msg *msg) { int actual_length; return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2), msg, msg->hdr.len * sizeof(u32), /* convert to # of bytes */ &actual_length, 1000); } static int esd_usb_wait_msg(struct esd_usb *dev, union esd_usb_msg *msg) { int actual_length; return usb_bulk_msg(dev->udev, usb_rcvbulkpipe(dev->udev, 1), msg, sizeof(*msg), &actual_length, 1000); } static int esd_usb_setup_rx_urbs(struct esd_usb *dev) { int i, err = 0; if (dev->rxinitdone) return 0; for (i = 0; i < ESD_USB_MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { err = -ENOMEM; break; } buf = usb_alloc_coherent(dev->udev, ESD_USB_RX_BUFFER_SIZE, GFP_KERNEL, &buf_dma); if (!buf) { dev_warn(dev->udev->dev.parent, "No memory left for USB buffer\n"); err = -ENOMEM; goto freeurb; } urb->transfer_dma = buf_dma; usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), buf, ESD_USB_RX_BUFFER_SIZE, esd_usb_read_bulk_callback, dev); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &dev->rx_submitted); err = usb_submit_urb(urb, GFP_KERNEL); if (err) { usb_unanchor_urb(urb); usb_free_coherent(dev->udev, ESD_USB_RX_BUFFER_SIZE, buf, urb->transfer_dma); goto freeurb; } dev->rxbuf[i] = buf; dev->rxbuf_dma[i] = buf_dma; freeurb: /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); if (err) break; } /* Did we submit any URBs */ if (i == 0) { dev_err(dev->udev->dev.parent, "couldn't setup read URBs\n"); return err; } /* Warn if we've couldn't transmit all the URBs */ if (i < ESD_USB_MAX_RX_URBS) { dev_warn(dev->udev->dev.parent, "rx performance may be slow\n"); } dev->rxinitdone = 1; return 0; } /* Start interface */ static int esd_usb_start(struct esd_usb_net_priv *priv) { struct esd_usb *dev = priv->usb; struct net_device *netdev = priv->netdev; union esd_usb_msg *msg; int err, i; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) { err = -ENOMEM; goto out; } /* Enable all IDs * The IDADD message takes up to 64 32 bit bitmasks (2048 bits). * Each bit represents one 11 bit CAN identifier. A set bit * enables reception of the corresponding CAN identifier. A cleared * bit disabled this identifier. An additional bitmask value * following the CAN 2.0A bits is used to enable reception of * extended CAN frames. Only the LSB of this final mask is checked * for the complete 29 bit ID range. The IDADD message also allows * filter configuration for an ID subset. In this case you can add * the number of the starting bitmask (0..64) to the filter.option * field followed by only some bitmasks. */ msg->hdr.cmd = ESD_USB_CMD_IDADD; msg->hdr.len = sizeof(struct esd_usb_id_filter_msg) / sizeof(u32); /* # of 32bit words */ msg->filter.net = priv->index; msg->filter.option = ESD_USB_ID_ENABLE; /* start with segment 0 */ for (i = 0; i < ESD_USB_MAX_ID_SEGMENT; i++) msg->filter.mask[i] = cpu_to_le32(GENMASK(31, 0)); /* enable 29bit extended IDs */ msg->filter.mask[ESD_USB_MAX_ID_SEGMENT] = cpu_to_le32(BIT(0)); err = esd_usb_send_msg(dev, msg); if (err) goto out; err = esd_usb_setup_rx_urbs(dev); if (err) goto out; priv->can.state = CAN_STATE_ERROR_ACTIVE; out: if (err == -ENODEV) netif_device_detach(netdev); if (err) netdev_err(netdev, "couldn't start device: %pe\n", ERR_PTR(err)); kfree(msg); return err; } static void unlink_all_urbs(struct esd_usb *dev) { struct esd_usb_net_priv *priv; int i, j; usb_kill_anchored_urbs(&dev->rx_submitted); for (i = 0; i < ESD_USB_MAX_RX_URBS; ++i) usb_free_coherent(dev->udev, ESD_USB_RX_BUFFER_SIZE, dev->rxbuf[i], dev->rxbuf_dma[i]); for (i = 0; i < dev->net_count; i++) { priv = dev->nets[i]; if (priv) { usb_kill_anchored_urbs(&priv->tx_submitted); atomic_set(&priv->active_tx_jobs, 0); for (j = 0; j < ESD_USB_MAX_TX_URBS; j++) priv->tx_contexts[j].echo_index = ESD_USB_MAX_TX_URBS; } } } static int esd_usb_open(struct net_device *netdev) { struct esd_usb_net_priv *priv = netdev_priv(netdev); int err; /* common open */ err = open_candev(netdev); if (err) return err; /* finally start device */ err = esd_usb_start(priv); if (err) { close_candev(netdev); return err; } netif_start_queue(netdev); return 0; } static netdev_tx_t esd_usb_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct esd_usb_net_priv *priv = netdev_priv(netdev); struct esd_usb *dev = priv->usb; struct esd_tx_urb_context *context = NULL; struct net_device_stats *stats = &netdev->stats; struct canfd_frame *cfd = (struct canfd_frame *)skb->data; union esd_usb_msg *msg; struct urb *urb; u8 *buf; int i, err; int ret = NETDEV_TX_OK; size_t size = sizeof(union esd_usb_msg); if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* create a URB, and a buffer for it, and copy the data to the URB */ urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { stats->tx_dropped++; dev_kfree_skb(skb); goto nourbmem; } buf = usb_alloc_coherent(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); stats->tx_dropped++; dev_kfree_skb(skb); goto nobufmem; } msg = (union esd_usb_msg *)buf; /* minimal length as # of 32bit words */ msg->hdr.len = offsetof(struct esd_usb_tx_msg, data) / sizeof(u32); msg->hdr.cmd = ESD_USB_CMD_CAN_TX; msg->tx.net = priv->index; if (can_is_canfd_skb(skb)) { msg->tx.dlc = can_fd_len2dlc(cfd->len); msg->tx.dlc |= ESD_USB_FD; if ((cfd->flags & CANFD_BRS) == 0) msg->tx.dlc |= ESD_USB_NO_BRS; } else { msg->tx.dlc = can_get_cc_dlc((struct can_frame *)cfd, priv->can.ctrlmode); if (cfd->can_id & CAN_RTR_FLAG) msg->tx.dlc |= ESD_USB_RTR; } msg->tx.id = cpu_to_le32(cfd->can_id & CAN_ERR_MASK); if (cfd->can_id & CAN_EFF_FLAG) msg->tx.id |= cpu_to_le32(ESD_USB_EXTID); memcpy(msg->tx.data_fd, cfd->data, cfd->len); /* round up, then divide by 4 to add the payload length as # of 32bit words */ msg->hdr.len += DIV_ROUND_UP(cfd->len, sizeof(u32)); for (i = 0; i < ESD_USB_MAX_TX_URBS; i++) { if (priv->tx_contexts[i].echo_index == ESD_USB_MAX_TX_URBS) { context = &priv->tx_contexts[i]; break; } } /* This may never happen */ if (!context) { netdev_warn(netdev, "couldn't find free context\n"); ret = NETDEV_TX_BUSY; goto releasebuf; } context->priv = priv; context->echo_index = i; /* hnd must not be 0 - MSB is stripped in txdone handling */ msg->tx.hnd = BIT(31) | i; /* returned in TX done message */ usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf, msg->hdr.len * sizeof(u32), /* convert to # of bytes */ esd_usb_write_bulk_callback, context); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &priv->tx_submitted); can_put_echo_skb(skb, netdev, context->echo_index, 0); atomic_inc(&priv->active_tx_jobs); /* Slow down tx path */ if (atomic_read(&priv->active_tx_jobs) >= ESD_USB_MAX_TX_URBS) netif_stop_queue(netdev); err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { can_free_echo_skb(netdev, context->echo_index, NULL); atomic_dec(&priv->active_tx_jobs); usb_unanchor_urb(urb); stats->tx_dropped++; if (err == -ENODEV) netif_device_detach(netdev); else netdev_warn(netdev, "failed tx_urb %pe\n", ERR_PTR(err)); goto releasebuf; } netif_trans_update(netdev); /* Release our reference to this URB, the USB core will eventually free * it entirely. */ usb_free_urb(urb); return NETDEV_TX_OK; releasebuf: usb_free_coherent(dev->udev, size, buf, urb->transfer_dma); nobufmem: usb_free_urb(urb); nourbmem: return ret; } /* Stop interface */ static int esd_usb_stop(struct esd_usb_net_priv *priv) { union esd_usb_msg *msg; int err; int i; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; /* Disable all IDs (see esd_usb_start()) */ msg->hdr.cmd = ESD_USB_CMD_IDADD; msg->hdr.len = sizeof(struct esd_usb_id_filter_msg) / sizeof(u32);/* # of 32bit words */ msg->filter.net = priv->index; msg->filter.option = ESD_USB_ID_ENABLE; /* start with segment 0 */ for (i = 0; i <= ESD_USB_MAX_ID_SEGMENT; i++) msg->filter.mask[i] = 0; err = esd_usb_send_msg(priv->usb, msg); if (err < 0) { netdev_err(priv->netdev, "sending idadd message failed: %pe\n", ERR_PTR(err)); goto bail; } /* set CAN controller to reset mode */ msg->hdr.len = sizeof(struct esd_usb_set_baudrate_msg) / sizeof(u32); /* # of 32bit words */ msg->hdr.cmd = ESD_USB_CMD_SETBAUD; msg->setbaud.net = priv->index; msg->setbaud.rsvd = 0; msg->setbaud.baud = cpu_to_le32(ESD_USB_NO_BAUDRATE); err = esd_usb_send_msg(priv->usb, msg); if (err < 0) netdev_err(priv->netdev, "sending setbaud message failed: %pe\n", ERR_PTR(err)); bail: kfree(msg); return err; } static int esd_usb_close(struct net_device *netdev) { struct esd_usb_net_priv *priv = netdev_priv(netdev); int err = 0; if (!priv->usb->in_usb_disconnect) { /* It's moot to try this in usb_disconnect()! */ err = esd_usb_stop(priv); } priv->can.state = CAN_STATE_STOPPED; netif_stop_queue(netdev); close_candev(netdev); return err; } static const struct net_device_ops esd_usb_netdev_ops = { .ndo_open = esd_usb_open, .ndo_stop = esd_usb_close, .ndo_start_xmit = esd_usb_start_xmit, }; static const struct ethtool_ops esd_usb_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; static const struct can_bittiming_const esd_usb_2_bittiming_const = { .name = "esd_usb_2", .tseg1_min = 1, .tseg1_max = 16, .tseg2_min = 1, .tseg2_max = 8, .sjw_max = 4, .brp_min = 1, .brp_max = 1024, .brp_inc = 1, }; static int esd_usb_2_set_bittiming(struct net_device *netdev) { const struct can_bittiming_const *btc = &esd_usb_2_bittiming_const; struct esd_usb_net_priv *priv = netdev_priv(netdev); struct can_bittiming *bt = &priv->can.bittiming; union esd_usb_msg *msg; int err; u32 canbtr; int sjw_shift; canbtr = ESD_USB_UBR; if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) canbtr |= ESD_USB_LOM; canbtr |= (bt->brp - 1) & (btc->brp_max - 1); if (le16_to_cpu(priv->usb->udev->descriptor.idProduct) == ESD_USB_CANUSBM_PRODUCT_ID) sjw_shift = ESD_USB_M_SJW_SHIFT; else sjw_shift = ESD_USB_2_SJW_SHIFT; canbtr |= ((bt->sjw - 1) & (btc->sjw_max - 1)) << sjw_shift; canbtr |= ((bt->prop_seg + bt->phase_seg1 - 1) & (btc->tseg1_max - 1)) << ESD_USB_2_TSEG1_SHIFT; canbtr |= ((bt->phase_seg2 - 1) & (btc->tseg2_max - 1)) << ESD_USB_2_TSEG2_SHIFT; if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) canbtr |= ESD_USB_TRIPLE_SAMPLES; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; msg->hdr.len = sizeof(struct esd_usb_set_baudrate_msg) / sizeof(u32); /* # of 32bit words */ msg->hdr.cmd = ESD_USB_CMD_SETBAUD; msg->setbaud.net = priv->index; msg->setbaud.rsvd = 0; msg->setbaud.baud = cpu_to_le32(canbtr); netdev_dbg(netdev, "setting BTR=%#x\n", canbtr); err = esd_usb_send_msg(priv->usb, msg); kfree(msg); return err; } /* Nominal bittiming constants, see * Microchip SAM E70/S70/V70/V71, Data Sheet, Rev. G - 07/2022 * 48.6.8 MCAN Nominal Bit Timing and Prescaler Register */ static const struct can_bittiming_const esd_usb_3_nom_bittiming_const = { .name = "esd_usb_3", .tseg1_min = 2, .tseg1_max = 256, .tseg2_min = 2, .tseg2_max = 128, .sjw_max = 128, .brp_min = 1, .brp_max = 512, .brp_inc = 1, }; /* Data bittiming constants, see * Microchip SAM E70/S70/V70/V71, Data Sheet, Rev. G - 07/2022 * 48.6.4 MCAN Data Bit Timing and Prescaler Register */ static const struct can_bittiming_const esd_usb_3_data_bittiming_const = { .name = "esd_usb_3", .tseg1_min = 2, .tseg1_max = 32, .tseg2_min = 1, .tseg2_max = 16, .sjw_max = 8, .brp_min = 1, .brp_max = 32, .brp_inc = 1, }; static int esd_usb_3_set_bittiming(struct net_device *netdev) { const struct can_bittiming_const *nom_btc = &esd_usb_3_nom_bittiming_const; const struct can_bittiming_const *data_btc = &esd_usb_3_data_bittiming_const; struct esd_usb_net_priv *priv = netdev_priv(netdev); struct can_bittiming *nom_bt = &priv->can.bittiming; struct can_bittiming *data_bt = &priv->can.fd.data_bittiming; struct esd_usb_3_set_baudrate_msg_x *baud_x; union esd_usb_msg *msg; u16 flags = 0; int err; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; baud_x = &msg->setbaud_x; /* Canonical is the most reasonable mode for SocketCAN on CAN-USB/3 ... */ baud_x->mode = cpu_to_le16(ESD_USB_3_BAUDRATE_MODE_BTR_CANONICAL); if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) flags |= ESD_USB_3_BAUDRATE_FLAG_LOM; baud_x->nom.brp = cpu_to_le16(nom_bt->brp & (nom_btc->brp_max - 1)); baud_x->nom.sjw = cpu_to_le16(nom_bt->sjw & (nom_btc->sjw_max - 1)); baud_x->nom.tseg1 = cpu_to_le16((nom_bt->prop_seg + nom_bt->phase_seg1) & (nom_btc->tseg1_max - 1)); baud_x->nom.tseg2 = cpu_to_le16(nom_bt->phase_seg2 & (nom_btc->tseg2_max - 1)); if (priv->can.ctrlmode & CAN_CTRLMODE_FD) { baud_x->data.brp = cpu_to_le16(data_bt->brp & (data_btc->brp_max - 1)); baud_x->data.sjw = cpu_to_le16(data_bt->sjw & (data_btc->sjw_max - 1)); baud_x->data.tseg1 = cpu_to_le16((data_bt->prop_seg + data_bt->phase_seg1) & (data_btc->tseg1_max - 1)); baud_x->data.tseg2 = cpu_to_le16(data_bt->phase_seg2 & (data_btc->tseg2_max - 1)); flags |= ESD_USB_3_BAUDRATE_FLAG_FD; } /* Currently this driver only supports the automatic TDC mode */ baud_x->tdc.tdc_mode = ESD_USB_3_TDC_MODE_AUTO; baud_x->tdc.ssp_offset = 0; baud_x->tdc.ssp_shift = 0; baud_x->tdc.tdc_filter = 0; baud_x->flags = cpu_to_le16(flags); baud_x->net = priv->index; baud_x->rsvd = 0; /* set len as # of 32bit words */ msg->hdr.len = sizeof(struct esd_usb_3_set_baudrate_msg_x) / sizeof(u32); msg->hdr.cmd = ESD_USB_CMD_SETBAUD; netdev_dbg(netdev, "ctrlmode=%#x/%#x, esd-net=%u, esd-mode=%#x, esd-flags=%#x\n", priv->can.ctrlmode, priv->can.ctrlmode_supported, priv->index, le16_to_cpu(baud_x->mode), flags); err = esd_usb_send_msg(priv->usb, msg); kfree(msg); return err; } static int esd_usb_get_berr_counter(const struct net_device *netdev, struct can_berr_counter *bec) { struct esd_usb_net_priv *priv = netdev_priv(netdev); bec->txerr = priv->bec.txerr; bec->rxerr = priv->bec.rxerr; return 0; } static int esd_usb_set_mode(struct net_device *netdev, enum can_mode mode) { switch (mode) { case CAN_MODE_START: netif_wake_queue(netdev); break; default: return -EOPNOTSUPP; } return 0; } static int esd_usb_probe_one_net(struct usb_interface *intf, int index) { struct esd_usb *dev = usb_get_intfdata(intf); struct net_device *netdev; struct esd_usb_net_priv *priv; int err = 0; int i; netdev = alloc_candev(sizeof(*priv), ESD_USB_MAX_TX_URBS); if (!netdev) { dev_err(&intf->dev, "couldn't alloc candev\n"); err = -ENOMEM; goto done; } priv = netdev_priv(netdev); init_usb_anchor(&priv->tx_submitted); atomic_set(&priv->active_tx_jobs, 0); for (i = 0; i < ESD_USB_MAX_TX_URBS; i++) priv->tx_contexts[i].echo_index = ESD_USB_MAX_TX_URBS; priv->usb = dev; priv->netdev = netdev; priv->index = index; priv->can.state = CAN_STATE_STOPPED; priv->can.ctrlmode_supported = CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_CC_LEN8_DLC | CAN_CTRLMODE_BERR_REPORTING; switch (le16_to_cpu(dev->udev->descriptor.idProduct)) { case ESD_USB_CANUSB3_PRODUCT_ID: priv->can.clock.freq = ESD_USB_3_CAN_CLOCK; priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; priv->can.bittiming_const = &esd_usb_3_nom_bittiming_const; priv->can.fd.data_bittiming_const = &esd_usb_3_data_bittiming_const; priv->can.do_set_bittiming = esd_usb_3_set_bittiming; priv->can.fd.do_set_data_bittiming = esd_usb_3_set_bittiming; break; case ESD_USB_CANUSBM_PRODUCT_ID: priv->can.clock.freq = ESD_USB_M_CAN_CLOCK; priv->can.bittiming_const = &esd_usb_2_bittiming_const; priv->can.do_set_bittiming = esd_usb_2_set_bittiming; break; case ESD_USB_CANUSB2_PRODUCT_ID: default: priv->can.clock.freq = ESD_USB_2_CAN_CLOCK; priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; priv->can.bittiming_const = &esd_usb_2_bittiming_const; priv->can.do_set_bittiming = esd_usb_2_set_bittiming; break; } priv->can.do_set_mode = esd_usb_set_mode; priv->can.do_get_berr_counter = esd_usb_get_berr_counter; netdev->flags |= IFF_ECHO; /* we support local echo */ netdev->netdev_ops = &esd_usb_netdev_ops; netdev->ethtool_ops = &esd_usb_ethtool_ops; SET_NETDEV_DEV(netdev, &intf->dev); netdev->dev_id = index; err = register_candev(netdev); if (err) { dev_err(&intf->dev, "couldn't register CAN device: %pe\n", ERR_PTR(err)); free_candev(netdev); err = -ENOMEM; goto done; } dev->nets[index] = priv; netdev_info(netdev, "registered\n"); done: return err; } /* probe function for new USB devices * * check version information and number of available * CAN interfaces */ static int esd_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct esd_usb *dev; union esd_usb_msg *msg; int i, err; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { err = -ENOMEM; goto done; } dev->udev = interface_to_usbdev(intf); init_usb_anchor(&dev->rx_submitted); usb_set_intfdata(intf, dev); msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) { err = -ENOMEM; goto free_msg; } /* query number of CAN interfaces (nets) */ msg->hdr.cmd = ESD_USB_CMD_VERSION; msg->hdr.len = sizeof(struct esd_usb_version_msg) / sizeof(u32); /* # of 32bit words */ msg->version.rsvd = 0; msg->version.flags = 0; msg->version.drv_version = 0; err = esd_usb_send_msg(dev, msg); if (err < 0) { dev_err(&intf->dev, "sending version message failed\n"); goto free_msg; } err = esd_usb_wait_msg(dev, msg); if (err < 0) { dev_err(&intf->dev, "no version message answer\n"); goto free_msg; } dev->net_count = (int)msg->version_reply.nets; dev->version = le32_to_cpu(msg->version_reply.version); if (device_create_file(&intf->dev, &dev_attr_firmware)) dev_err(&intf->dev, "Couldn't create device file for firmware\n"); if (device_create_file(&intf->dev, &dev_attr_hardware)) dev_err(&intf->dev, "Couldn't create device file for hardware\n"); if (device_create_file(&intf->dev, &dev_attr_nets)) dev_err(&intf->dev, "Couldn't create device file for nets\n"); /* do per device probing */ for (i = 0; i < dev->net_count; i++) esd_usb_probe_one_net(intf, i); free_msg: kfree(msg); if (err) kfree(dev); done: return err; } /* called by the usb core when the device is removed from the system */ static void esd_usb_disconnect(struct usb_interface *intf) { struct esd_usb *dev = usb_get_intfdata(intf); struct net_device *netdev; int i; device_remove_file(&intf->dev, &dev_attr_firmware); device_remove_file(&intf->dev, &dev_attr_hardware); device_remove_file(&intf->dev, &dev_attr_nets); usb_set_intfdata(intf, NULL); if (dev) { dev->in_usb_disconnect = 1; for (i = 0; i < dev->net_count; i++) { if (dev->nets[i]) { netdev = dev->nets[i]->netdev; netdev_info(netdev, "unregister\n"); unregister_netdev(netdev); free_candev(netdev); } } unlink_all_urbs(dev); kfree(dev); } } /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver esd_usb_driver = { .name = KBUILD_MODNAME, .probe = esd_usb_probe, .disconnect = esd_usb_disconnect, .id_table = esd_usb_table, }; module_usb_driver(esd_usb_driver);
7 7 9 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) International Business Machines Corp., 2006 * * Author: Artem Bityutskiy (Битюцкий Артём) */ /* Here we keep miscellaneous functions which are used all over the UBI code */ #include "ubi.h" /** * ubi_calc_data_len - calculate how much real data is stored in a buffer. * @ubi: UBI device description object * @buf: a buffer with the contents of the physical eraseblock * @length: the buffer length * * This function calculates how much "real data" is stored in @buf and returnes * the length. Continuous 0xFF bytes at the end of the buffer are not * considered as "real data". */ int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length) { int i; ubi_assert(!(length & (ubi->min_io_size - 1))); for (i = length - 1; i >= 0; i--) if (((const uint8_t *)buf)[i] != 0xFF) break; /* The resulting length must be aligned to the minimum flash I/O size */ length = ALIGN(i + 1, ubi->min_io_size); return length; } /** * ubi_check_volume - check the contents of a static volume. * @ubi: UBI device description object * @vol_id: ID of the volume to check * * This function checks if static volume @vol_id is corrupted by fully reading * it and checking data CRC. This function returns %0 if the volume is not * corrupted, %1 if it is corrupted and a negative error code in case of * failure. Dynamic volumes are not checked and zero is returned immediately. */ int ubi_check_volume(struct ubi_device *ubi, int vol_id) { void *buf; int err = 0, i; struct ubi_volume *vol = ubi->volumes[vol_id]; if (vol->vol_type != UBI_STATIC_VOLUME) return 0; buf = vmalloc(vol->usable_leb_size); if (!buf) return -ENOMEM; for (i = 0; i < vol->used_ebs; i++) { int size; cond_resched(); if (i == vol->used_ebs - 1) size = vol->last_eb_bytes; else size = vol->usable_leb_size; err = ubi_eba_read_leb(ubi, vol, i, buf, 0, size, 1); if (err) { if (mtd_is_eccerr(err)) err = 1; break; } } vfree(buf); return err; } /** * ubi_update_reserved - update bad eraseblock handling accounting data. * @ubi: UBI device description object * * This function calculates the gap between current number of PEBs reserved for * bad eraseblock handling and the required level of PEBs that must be * reserved, and if necessary, reserves more PEBs to fill that gap, according * to availability. Should be called with ubi->volumes_lock held. */ void ubi_update_reserved(struct ubi_device *ubi) { int need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; if (need <= 0 || ubi->avail_pebs == 0) return; need = min_t(int, need, ubi->avail_pebs); ubi->avail_pebs -= need; ubi->rsvd_pebs += need; ubi->beb_rsvd_pebs += need; ubi_msg(ubi, "reserved more %d PEBs for bad PEB handling", need); } /** * ubi_calculate_reserved - calculate how many PEBs must be reserved for bad * eraseblock handling. * @ubi: UBI device description object */ void ubi_calculate_reserved(struct ubi_device *ubi) { /* * Calculate the actual number of PEBs currently needed to be reserved * for future bad eraseblock handling. */ ubi->beb_rsvd_level = ubi->bad_peb_limit - ubi->bad_peb_count; if (ubi->beb_rsvd_level < 0) { ubi->beb_rsvd_level = 0; ubi_warn(ubi, "number of bad PEBs (%d) is above the expected limit (%d), not reserving any PEBs for bad PEB handling, will use available PEBs (if any)", ubi->bad_peb_count, ubi->bad_peb_limit); } } /** * ubi_check_pattern - check if buffer contains only a certain byte pattern. * @buf: buffer to check * @patt: the pattern to check * @size: buffer size in bytes * * This function returns %1 in there are only @patt bytes in @buf, and %0 if * something else was also found. */ int ubi_check_pattern(const void *buf, uint8_t patt, int size) { int i; for (i = 0; i < size; i++) if (((const uint8_t *)buf)[i] != patt) return 0; return 1; } /* Normal UBI messages */ void ubi_msg(const struct ubi_device *ubi, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_notice(UBI_NAME_STR "%d: %pV\n", ubi->ubi_num, &vaf); va_end(args); } /* UBI warning messages */ void ubi_warn(const struct ubi_device *ubi, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_warn(UBI_NAME_STR "%d warning: %ps: %pV\n", ubi->ubi_num, __builtin_return_address(0), &vaf); va_end(args); } /* UBI error messages */ void ubi_err(const struct ubi_device *ubi, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_err(UBI_NAME_STR "%d error: %ps: %pV\n", ubi->ubi_num, __builtin_return_address(0), &vaf); va_end(args); }
3 3 17 2 2 2 2 11 11 11 2 3 2 2 2 2 2 2 2 11 11 1 11 8 3 11 9 9 3 3 9 9 2 2 6 3 3 3 2 3 3 6 5 6 8 2 2 2 2 3 3 3 11 11 10 10 10 10 11 11 2 2 2 2 11 4 4 4 3 4 4 1 1 1 1 4 4 4 8 8 11 11 3 8 3 8 11 11 16 16 16 16 11 11 11 11 9 10 9 10 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/if_vlan.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/netdevice.h> #include <net/addrconf.h> #include <rdma/ib_cache.h> #include "core_priv.h" struct ib_pkey_cache { int table_len; u16 table[] __counted_by(table_len); }; struct ib_update_work { struct work_struct work; struct ib_event event; bool enforce_security; }; union ib_gid zgid; EXPORT_SYMBOL(zgid); enum gid_attr_find_mask { GID_ATTR_FIND_MASK_GID = 1UL << 0, GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, }; enum gid_table_entry_state { GID_TABLE_ENTRY_INVALID = 1, GID_TABLE_ENTRY_VALID = 2, /* * Indicates that entry is pending to be removed, there may * be active users of this GID entry. * When last user of the GID entry releases reference to it, * GID entry is detached from the table. */ GID_TABLE_ENTRY_PENDING_DEL = 3, }; struct roce_gid_ndev_storage { struct rcu_head rcu_head; struct net_device *ndev; }; struct ib_gid_table_entry { struct kref kref; struct work_struct del_work; struct ib_gid_attr attr; void *context; /* Store the ndev pointer to release reference later on in * call_rcu context because by that time gid_table_entry * and attr might be already freed. So keep a copy of it. * ndev_storage is freed by rcu callback. */ struct roce_gid_ndev_storage *ndev_storage; enum gid_table_entry_state state; }; struct ib_gid_table { int sz; /* In RoCE, adding a GID to the table requires: * (a) Find if this GID is already exists. * (b) Find a free space. * (c) Write the new GID * * Delete requires different set of operations: * (a) Find the GID * (b) Delete it. * **/ /* Any writer to data_vec must hold this lock and the write side of * rwlock. Readers must hold only rwlock. All writers must be in a * sleepable context. */ struct mutex lock; /* rwlock protects data_vec[ix]->state and entry pointer. */ rwlock_t rwlock; struct ib_gid_table_entry **data_vec; /* bit field, each bit indicates the index of default GID */ u32 default_gid_indices; }; static void dispatch_gid_change_event(struct ib_device *ib_dev, u32 port) { struct ib_event event; event.device = ib_dev; event.element.port_num = port; event.event = IB_EVENT_GID_CHANGE; ib_dispatch_event_clients(&event); } static const char * const gid_type_str[] = { /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for * user space compatibility reasons. */ [IB_GID_TYPE_IB] = "IB/RoCE v1", [IB_GID_TYPE_ROCE] = "IB/RoCE v1", [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", }; const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) { if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) return gid_type_str[gid_type]; return "Invalid GID type"; } EXPORT_SYMBOL(ib_cache_gid_type_str); /** rdma_is_zero_gid - Check if given GID is zero or not. * @gid: GID to check * Returns true if given GID is zero, returns false otherwise. */ bool rdma_is_zero_gid(const union ib_gid *gid) { return !memcmp(gid, &zgid, sizeof(*gid)); } EXPORT_SYMBOL(rdma_is_zero_gid); /** is_gid_index_default - Check if a given index belongs to * reserved default GIDs or not. * @table: GID table pointer * @index: Index to check in GID table * Returns true if index is one of the reserved default GID index otherwise * returns false. */ static bool is_gid_index_default(const struct ib_gid_table *table, unsigned int index) { return index < 32 && (BIT(index) & table->default_gid_indices); } int ib_cache_gid_parse_type_str(const char *buf) { unsigned int i; size_t len; int err = -EINVAL; len = strlen(buf); if (len == 0) return -EINVAL; if (buf[len - 1] == '\n') len--; for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i) if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) && len == strlen(gid_type_str[i])) { err = i; break; } return err; } EXPORT_SYMBOL(ib_cache_gid_parse_type_str); static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u32 port) { return device->port_data[port].cache.gid; } static bool is_gid_entry_free(const struct ib_gid_table_entry *entry) { return !entry; } static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry) { return entry && entry->state == GID_TABLE_ENTRY_VALID; } static void schedule_free_gid(struct kref *kref) { struct ib_gid_table_entry *entry = container_of(kref, struct ib_gid_table_entry, kref); queue_work(ib_wq, &entry->del_work); } static void put_gid_ndev(struct rcu_head *head) { struct roce_gid_ndev_storage *storage = container_of(head, struct roce_gid_ndev_storage, rcu_head); WARN_ON(!storage->ndev); /* At this point its safe to release netdev reference, * as all callers working on gid_attr->ndev are done * using this netdev. */ dev_put(storage->ndev); kfree(storage); } static void free_gid_entry_locked(struct ib_gid_table_entry *entry) { struct ib_device *device = entry->attr.device; u32 port_num = entry->attr.port_num; struct ib_gid_table *table = rdma_gid_table(device, port_num); dev_dbg(&device->dev, "%s port=%u index=%u gid %pI6\n", __func__, port_num, entry->attr.index, entry->attr.gid.raw); write_lock_irq(&table->rwlock); /* * The only way to avoid overwriting NULL in table is * by comparing if it is same entry in table or not! * If new entry in table is added by the time we free here, * don't overwrite the table entry. */ if (entry == table->data_vec[entry->attr.index]) table->data_vec[entry->attr.index] = NULL; /* Now this index is ready to be allocated */ write_unlock_irq(&table->rwlock); if (entry->ndev_storage) call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev); kfree(entry); } static void free_gid_entry(struct kref *kref) { struct ib_gid_table_entry *entry = container_of(kref, struct ib_gid_table_entry, kref); free_gid_entry_locked(entry); } /** * free_gid_work - Release reference to the GID entry * @work: Work structure to refer to GID entry which needs to be * deleted. * * free_gid_work() frees the entry from the HCA's hardware table * if provider supports it. It releases reference to netdevice. */ static void free_gid_work(struct work_struct *work) { struct ib_gid_table_entry *entry = container_of(work, struct ib_gid_table_entry, del_work); struct ib_device *device = entry->attr.device; u32 port_num = entry->attr.port_num; struct ib_gid_table *table = rdma_gid_table(device, port_num); mutex_lock(&table->lock); free_gid_entry_locked(entry); mutex_unlock(&table->lock); } static struct ib_gid_table_entry * alloc_gid_entry(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry; struct net_device *ndev; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return NULL; ndev = rcu_dereference_protected(attr->ndev, 1); if (ndev) { entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage), GFP_KERNEL); if (!entry->ndev_storage) { kfree(entry); return NULL; } dev_hold(ndev); entry->ndev_storage->ndev = ndev; } kref_init(&entry->kref); memcpy(&entry->attr, attr, sizeof(*attr)); INIT_WORK(&entry->del_work, free_gid_work); entry->state = GID_TABLE_ENTRY_INVALID; return entry; } static void store_gid_entry(struct ib_gid_table *table, struct ib_gid_table_entry *entry) { entry->state = GID_TABLE_ENTRY_VALID; dev_dbg(&entry->attr.device->dev, "%s port=%u index=%u gid %pI6\n", __func__, entry->attr.port_num, entry->attr.index, entry->attr.gid.raw); lockdep_assert_held(&table->lock); write_lock_irq(&table->rwlock); table->data_vec[entry->attr.index] = entry; write_unlock_irq(&table->rwlock); } static void get_gid_entry(struct ib_gid_table_entry *entry) { kref_get(&entry->kref); } static void put_gid_entry(struct ib_gid_table_entry *entry) { kref_put(&entry->kref, schedule_free_gid); } static void put_gid_entry_locked(struct ib_gid_table_entry *entry) { kref_put(&entry->kref, free_gid_entry); } static int add_roce_gid(struct ib_gid_table_entry *entry) { const struct ib_gid_attr *attr = &entry->attr; int ret; if (!attr->ndev) { dev_err(&attr->device->dev, "%s NULL netdev port=%u index=%u\n", __func__, attr->port_num, attr->index); return -EINVAL; } if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { ret = attr->device->ops.add_gid(attr, &entry->context); if (ret) { dev_err(&attr->device->dev, "%s GID add failed port=%u index=%u\n", __func__, attr->port_num, attr->index); return ret; } } return 0; } /** * del_gid - Delete GID table entry * * @ib_dev: IB device whose GID entry to be deleted * @port: Port number of the IB device * @table: GID table of the IB device for a port * @ix: GID entry index to delete * */ static void del_gid(struct ib_device *ib_dev, u32 port, struct ib_gid_table *table, int ix) { struct roce_gid_ndev_storage *ndev_storage; struct ib_gid_table_entry *entry; lockdep_assert_held(&table->lock); dev_dbg(&ib_dev->dev, "%s port=%u index=%d gid %pI6\n", __func__, port, ix, table->data_vec[ix]->attr.gid.raw); write_lock_irq(&table->rwlock); entry = table->data_vec[ix]; entry->state = GID_TABLE_ENTRY_PENDING_DEL; /* * For non RoCE protocol, GID entry slot is ready to use. */ if (!rdma_protocol_roce(ib_dev, port)) table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); if (rdma_cap_roce_gid_table(ib_dev, port)) ib_dev->ops.del_gid(&entry->attr, &entry->context); ndev_storage = entry->ndev_storage; if (ndev_storage) { entry->ndev_storage = NULL; rcu_assign_pointer(entry->attr.ndev, NULL); call_rcu(&ndev_storage->rcu_head, put_gid_ndev); } put_gid_entry_locked(entry); } /** * add_modify_gid - Add or modify GID table entry * * @table: GID table in which GID to be added or modified * @attr: Attributes of the GID * * Returns 0 on success or appropriate error code. It accepts zero * GID addition for non RoCE ports for HCA's who report them as valid * GID. However such zero GIDs are not added to the cache. */ static int add_modify_gid(struct ib_gid_table *table, const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry; int ret = 0; /* * Invalidate any old entry in the table to make it safe to write to * this index. */ if (is_gid_entry_valid(table->data_vec[attr->index])) del_gid(attr->device, attr->port_num, table, attr->index); /* * Some HCA's report multiple GID entries with only one valid GID, and * leave other unused entries as the zero GID. Convert zero GIDs to * empty table entries instead of storing them. */ if (rdma_is_zero_gid(&attr->gid)) return 0; entry = alloc_gid_entry(attr); if (!entry) return -ENOMEM; if (rdma_protocol_roce(attr->device, attr->port_num)) { ret = add_roce_gid(entry); if (ret) goto done; } store_gid_entry(table, entry); return 0; done: put_gid_entry(entry); return ret; } /* rwlock should be read locked, or lock should be held */ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, const struct ib_gid_attr *val, bool default_gid, unsigned long mask, int *pempty) { int i = 0; int found = -1; int empty = pempty ? -1 : 0; while (i < table->sz && (found < 0 || empty < 0)) { struct ib_gid_table_entry *data = table->data_vec[i]; struct ib_gid_attr *attr; int curr_index = i; i++; /* find_gid() is used during GID addition where it is expected * to return a free entry slot which is not duplicate. * Free entry slot is requested and returned if pempty is set, * so lookup free slot only if requested. */ if (pempty && empty < 0) { if (is_gid_entry_free(data) && default_gid == is_gid_index_default(table, curr_index)) { /* * Found an invalid (free) entry; allocate it. * If default GID is requested, then our * found slot must be one of the DEFAULT * reserved slots or we fail. * This ensures that only DEFAULT reserved * slots are used for default property GIDs. */ empty = curr_index; } } /* * Additionally find_gid() is used to find valid entry during * lookup operation; so ignore the entries which are marked as * pending for removal and the entries which are marked as * invalid. */ if (!is_gid_entry_valid(data)) continue; if (found >= 0) continue; attr = &data->attr; if (mask & GID_ATTR_FIND_MASK_GID_TYPE && attr->gid_type != val->gid_type) continue; if (mask & GID_ATTR_FIND_MASK_GID && memcmp(gid, &data->attr.gid, sizeof(*gid))) continue; if (mask & GID_ATTR_FIND_MASK_NETDEV && attr->ndev != val->ndev) continue; if (mask & GID_ATTR_FIND_MASK_DEFAULT && is_gid_index_default(table, curr_index) != default_gid) continue; found = curr_index; } if (pempty) *pempty = empty; return found; } static void make_default_gid(struct net_device *dev, union ib_gid *gid) { gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); addrconf_ifid_eui48(&gid->raw[8], dev); } static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port, union ib_gid *gid, struct ib_gid_attr *attr, unsigned long mask, bool default_gid) { struct ib_gid_table *table; int ret = 0; int empty; int ix; /* Do not allow adding zero GID in support of * IB spec version 1.3 section 4.1.1 point (6) and * section 12.7.10 and section 12.7.20 */ if (rdma_is_zero_gid(gid)) return -EINVAL; table = rdma_gid_table(ib_dev, port); mutex_lock(&table->lock); ix = find_gid(table, gid, attr, default_gid, mask, &empty); if (ix >= 0) goto out_unlock; if (empty < 0) { ret = -ENOSPC; goto out_unlock; } attr->device = ib_dev; attr->index = empty; attr->port_num = port; attr->gid = *gid; ret = add_modify_gid(table, attr); if (!ret) dispatch_gid_change_event(ib_dev, port); out_unlock: mutex_unlock(&table->lock); if (ret) pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n", __func__, gid->raw, ret); return ret; } int ib_cache_gid_add(struct ib_device *ib_dev, u32 port, union ib_gid *gid, struct ib_gid_attr *attr) { unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_NETDEV; return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false); } static int _ib_cache_gid_del(struct ib_device *ib_dev, u32 port, union ib_gid *gid, struct ib_gid_attr *attr, unsigned long mask, bool default_gid) { struct ib_gid_table *table; int ret = 0; int ix; table = rdma_gid_table(ib_dev, port); mutex_lock(&table->lock); ix = find_gid(table, gid, attr, default_gid, mask, NULL); if (ix < 0) { ret = -EINVAL; goto out_unlock; } del_gid(ib_dev, port, table, ix); dispatch_gid_change_event(ib_dev, port); out_unlock: mutex_unlock(&table->lock); if (ret) pr_debug("%s: can't delete gid %pI6 error=%d\n", __func__, gid->raw, ret); return ret; } int ib_cache_gid_del(struct ib_device *ib_dev, u32 port, union ib_gid *gid, struct ib_gid_attr *attr) { unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_DEFAULT | GID_ATTR_FIND_MASK_NETDEV; return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false); } int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port, struct net_device *ndev) { struct ib_gid_table *table; int ix; bool deleted = false; table = rdma_gid_table(ib_dev, port); mutex_lock(&table->lock); for (ix = 0; ix < table->sz; ix++) { if (is_gid_entry_valid(table->data_vec[ix]) && table->data_vec[ix]->attr.ndev == ndev) { del_gid(ib_dev, port, table, ix); deleted = true; } } mutex_unlock(&table->lock); if (deleted) dispatch_gid_change_event(ib_dev, port); return 0; } /** * rdma_find_gid_by_port - Returns the GID entry attributes when it finds * a valid GID entry for given search parameters. It searches for the specified * GID value in the local software cache. * @ib_dev: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @port: The port number of the device where the GID value should be searched. * @ndev: In RoCE, the net device of the device. NULL means ignore. * * Returns sgid attributes if the GID is found with valid reference or * returns ERR_PTR for the error. * The caller must invoke rdma_put_gid_attr() to release the reference. */ const struct ib_gid_attr * rdma_find_gid_by_port(struct ib_device *ib_dev, const union ib_gid *gid, enum ib_gid_type gid_type, u32 port, struct net_device *ndev) { int local_index; struct ib_gid_table *table; unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; const struct ib_gid_attr *attr; unsigned long flags; if (!rdma_is_port_valid(ib_dev, port)) return ERR_PTR(-ENOENT); table = rdma_gid_table(ib_dev, port); if (ndev) mask |= GID_ATTR_FIND_MASK_NETDEV; read_lock_irqsave(&table->rwlock, flags); local_index = find_gid(table, gid, &val, false, mask, NULL); if (local_index >= 0) { get_gid_entry(table->data_vec[local_index]); attr = &table->data_vec[local_index]->attr; read_unlock_irqrestore(&table->rwlock, flags); return attr; } read_unlock_irqrestore(&table->rwlock, flags); return ERR_PTR(-ENOENT); } EXPORT_SYMBOL(rdma_find_gid_by_port); /** * rdma_find_gid_by_filter - Returns the GID table attribute where a * specified GID value occurs * @ib_dev: The device to query. * @gid: The GID value to search for. * @port: The port number of the device where the GID value could be * searched. * @filter: The filter function is executed on any matching GID in the table. * If the filter function returns true, the corresponding index is returned, * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. * @context: Private data to pass into the call-back. * * rdma_find_gid_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. * */ const struct ib_gid_attr *rdma_find_gid_by_filter( struct ib_device *ib_dev, const union ib_gid *gid, u32 port, bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, void *), void *context) { const struct ib_gid_attr *res = ERR_PTR(-ENOENT); struct ib_gid_table *table; unsigned long flags; unsigned int i; if (!rdma_is_port_valid(ib_dev, port)) return ERR_PTR(-EINVAL); table = rdma_gid_table(ib_dev, port); read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { struct ib_gid_table_entry *entry = table->data_vec[i]; if (!is_gid_entry_valid(entry)) continue; if (memcmp(gid, &entry->attr.gid, sizeof(*gid))) continue; if (filter(gid, &entry->attr, context)) { get_gid_entry(entry); res = &entry->attr; break; } } read_unlock_irqrestore(&table->rwlock, flags); return res; } static struct ib_gid_table *alloc_gid_table(int sz) { struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return NULL; table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL); if (!table->data_vec) goto err_free_table; mutex_init(&table->lock); table->sz = sz; rwlock_init(&table->rwlock); return table; err_free_table: kfree(table); return NULL; } static void release_gid_table(struct ib_device *device, struct ib_gid_table *table) { int i; if (!table) return; for (i = 0; i < table->sz; i++) { if (is_gid_entry_free(table->data_vec[i])) continue; WARN_ONCE(true, "GID entry ref leak for dev %s index %d ref=%u\n", dev_name(&device->dev), i, kref_read(&table->data_vec[i]->kref)); } mutex_destroy(&table->lock); kfree(table->data_vec); kfree(table); } static void cleanup_gid_table_port(struct ib_device *ib_dev, u32 port, struct ib_gid_table *table) { int i; if (!table) return; mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { if (is_gid_entry_valid(table->data_vec[i])) del_gid(ib_dev, port, table, i); } mutex_unlock(&table->lock); } void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port, struct net_device *ndev, unsigned long gid_type_mask, enum ib_cache_gid_default_mode mode) { union ib_gid gid = { }; struct ib_gid_attr gid_attr; unsigned int gid_type; unsigned long mask; mask = GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_DEFAULT | GID_ATTR_FIND_MASK_NETDEV; memset(&gid_attr, 0, sizeof(gid_attr)); gid_attr.ndev = ndev; for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { if (1UL << gid_type & ~gid_type_mask) continue; gid_attr.gid_type = gid_type; if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { make_default_gid(ndev, &gid); __ib_cache_gid_add(ib_dev, port, &gid, &gid_attr, mask, true); } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) { _ib_cache_gid_del(ib_dev, port, &gid, &gid_attr, mask, true); } } } static void gid_table_reserve_default(struct ib_device *ib_dev, u32 port, struct ib_gid_table *table) { unsigned int i; unsigned long roce_gid_type_mask; unsigned int num_default_gids; roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); num_default_gids = hweight_long(roce_gid_type_mask); /* Reserve starting indices for default GIDs */ for (i = 0; i < num_default_gids && i < table->sz; i++) table->default_gid_indices |= BIT(i); } static void gid_table_release_one(struct ib_device *ib_dev) { u32 p; rdma_for_each_port (ib_dev, p) { release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid); ib_dev->port_data[p].cache.gid = NULL; } } static int _gid_table_setup_one(struct ib_device *ib_dev) { struct ib_gid_table *table; u32 rdma_port; rdma_for_each_port (ib_dev, rdma_port) { table = alloc_gid_table( ib_dev->port_data[rdma_port].immutable.gid_tbl_len); if (!table) goto rollback_table_setup; gid_table_reserve_default(ib_dev, rdma_port, table); ib_dev->port_data[rdma_port].cache.gid = table; } return 0; rollback_table_setup: gid_table_release_one(ib_dev); return -ENOMEM; } static void gid_table_cleanup_one(struct ib_device *ib_dev) { u32 p; rdma_for_each_port (ib_dev, p) cleanup_gid_table_port(ib_dev, p, ib_dev->port_data[p].cache.gid); } static int gid_table_setup_one(struct ib_device *ib_dev) { int err; err = _gid_table_setup_one(ib_dev); if (err) return err; rdma_roce_rescan_device(ib_dev); return err; } /** * rdma_query_gid - Read the GID content from the GID software cache * @device: Device to query the GID * @port_num: Port number of the device * @index: Index of the GID table entry to read * @gid: Pointer to GID where to store the entry's GID * * rdma_query_gid() only reads the GID entry content for requested device, * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't * hold any reference to the GID table entry in the HCA or software cache. * * Returns 0 on success or appropriate error code. * */ int rdma_query_gid(struct ib_device *device, u32 port_num, int index, union ib_gid *gid) { struct ib_gid_table *table; unsigned long flags; int res; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); if (index < 0 || index >= table->sz) { res = -EINVAL; goto done; } if (!is_gid_entry_valid(table->data_vec[index])) { res = -ENOENT; goto done; } memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); res = 0; done: read_unlock_irqrestore(&table->rwlock, flags); return res; } EXPORT_SYMBOL(rdma_query_gid); /** * rdma_read_gid_hw_context - Read the HW GID context from GID attribute * @attr: Potinter to the GID attribute * * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding * to the SGID attr. Callers are required to already be holding the reference * to an existing GID entry. * * Returns the HW GID context * */ void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr) { return container_of(attr, struct ib_gid_table_entry, attr)->context; } EXPORT_SYMBOL(rdma_read_gid_hw_context); /** * rdma_find_gid - Returns SGID attributes if the matching GID is found. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @ndev: In RoCE, the net device of the device. NULL means ignore. * * rdma_find_gid() searches for the specified GID value in the software cache. * * Returns GID attributes if a valid GID is found or returns ERR_PTR for the * error. The caller must invoke rdma_put_gid_attr() to release the reference. * */ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, const union ib_gid *gid, enum ib_gid_type gid_type, struct net_device *ndev) { unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; u32 p; if (ndev) mask |= GID_ATTR_FIND_MASK_NETDEV; rdma_for_each_port(device, p) { struct ib_gid_table *table; unsigned long flags; int index; table = device->port_data[p].cache.gid; read_lock_irqsave(&table->rwlock, flags); index = find_gid(table, gid, &gid_attr_val, false, mask, NULL); if (index >= 0) { const struct ib_gid_attr *attr; get_gid_entry(table->data_vec[index]); attr = &table->data_vec[index]->attr; read_unlock_irqrestore(&table->rwlock, flags); return attr; } read_unlock_irqrestore(&table->rwlock, flags); } return ERR_PTR(-ENOENT); } EXPORT_SYMBOL(rdma_find_gid); int ib_get_cached_pkey(struct ib_device *device, u32 port_num, int index, u16 *pkey) { struct ib_pkey_cache *cache; unsigned long flags; int ret = 0; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; if (!cache || index < 0 || index >= cache->table_len) ret = -EINVAL; else *pkey = cache->table[index]; read_unlock_irqrestore(&device->cache_lock, flags); return ret; } EXPORT_SYMBOL(ib_get_cached_pkey); void ib_get_cached_subnet_prefix(struct ib_device *device, u32 port_num, u64 *sn_pfx) { unsigned long flags; read_lock_irqsave(&device->cache_lock, flags); *sn_pfx = device->port_data[port_num].cache.subnet_prefix; read_unlock_irqrestore(&device->cache_lock, flags); } EXPORT_SYMBOL(ib_get_cached_subnet_prefix); int ib_find_cached_pkey(struct ib_device *device, u32 port_num, u16 pkey, u16 *index) { struct ib_pkey_cache *cache; unsigned long flags; int i; int ret = -ENOENT; int partial_ix = -1; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; if (!cache) { ret = -EINVAL; goto err; } *index = -1; for (i = 0; i < cache->table_len; ++i) if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { if (cache->table[i] & 0x8000) { *index = i; ret = 0; break; } else { partial_ix = i; } } if (ret && partial_ix >= 0) { *index = partial_ix; ret = 0; } err: read_unlock_irqrestore(&device->cache_lock, flags); return ret; } EXPORT_SYMBOL(ib_find_cached_pkey); int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc) { unsigned long flags; int ret = 0; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; read_lock_irqsave(&device->cache_lock, flags); *lmc = device->port_data[port_num].cache.lmc; read_unlock_irqrestore(&device->cache_lock, flags); return ret; } EXPORT_SYMBOL(ib_get_cached_lmc); int ib_get_cached_port_state(struct ib_device *device, u32 port_num, enum ib_port_state *port_state) { unsigned long flags; int ret = 0; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; read_lock_irqsave(&device->cache_lock, flags); *port_state = device->port_data[port_num].cache.port_state; read_unlock_irqrestore(&device->cache_lock, flags); return ret; } EXPORT_SYMBOL(ib_get_cached_port_state); /** * rdma_get_gid_attr - Returns GID attributes for a port of a device * at a requested gid_index, if a valid GID entry exists. * @device: The device to query. * @port_num: The port number on the device where the GID value * is to be queried. * @index: Index of the GID table entry whose attributes are to * be queried. * * rdma_get_gid_attr() acquires reference count of gid attributes from the * cached GID table. Caller must invoke rdma_put_gid_attr() to release * reference to gid attribute regardless of link layer. * * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error * code. */ const struct ib_gid_attr * rdma_get_gid_attr(struct ib_device *device, u32 port_num, int index) { const struct ib_gid_attr *attr = ERR_PTR(-ENODATA); struct ib_gid_table *table; unsigned long flags; if (!rdma_is_port_valid(device, port_num)) return ERR_PTR(-EINVAL); table = rdma_gid_table(device, port_num); if (index < 0 || index >= table->sz) return ERR_PTR(-EINVAL); read_lock_irqsave(&table->rwlock, flags); if (!is_gid_entry_valid(table->data_vec[index])) goto done; get_gid_entry(table->data_vec[index]); attr = &table->data_vec[index]->attr; done: read_unlock_irqrestore(&table->rwlock, flags); return attr; } EXPORT_SYMBOL(rdma_get_gid_attr); /** * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries. * @device: The device to query. * @entries: Entries where GID entries are returned. * @max_entries: Maximum number of entries that can be returned. * Entries array must be allocated to hold max_entries number of entries. * * Returns number of entries on success or appropriate error code. */ ssize_t rdma_query_gid_table(struct ib_device *device, struct ib_uverbs_gid_entry *entries, size_t max_entries) { const struct ib_gid_attr *gid_attr; ssize_t num_entries = 0, ret; struct ib_gid_table *table; u32 port_num, i; struct net_device *ndev; unsigned long flags; rdma_for_each_port(device, port_num) { table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { if (!is_gid_entry_valid(table->data_vec[i])) continue; if (num_entries >= max_entries) { ret = -EINVAL; goto err; } gid_attr = &table->data_vec[i]->attr; memcpy(&entries->gid, &gid_attr->gid, sizeof(gid_attr->gid)); entries->gid_index = gid_attr->index; entries->port_num = gid_attr->port_num; entries->gid_type = gid_attr->gid_type; ndev = rcu_dereference_protected( gid_attr->ndev, lockdep_is_held(&table->rwlock)); if (ndev) entries->netdev_ifindex = ndev->ifindex; num_entries++; entries++; } read_unlock_irqrestore(&table->rwlock, flags); } return num_entries; err: read_unlock_irqrestore(&table->rwlock, flags); return ret; } EXPORT_SYMBOL(rdma_query_gid_table); /** * rdma_put_gid_attr - Release reference to the GID attribute * @attr: Pointer to the GID attribute whose reference * needs to be released. * * rdma_put_gid_attr() must be used to release reference whose * reference is acquired using rdma_get_gid_attr() or any APIs * which returns a pointer to the ib_gid_attr regardless of link layer * of IB or RoCE. * */ void rdma_put_gid_attr(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry = container_of(attr, struct ib_gid_table_entry, attr); put_gid_entry(entry); } EXPORT_SYMBOL(rdma_put_gid_attr); /** * rdma_hold_gid_attr - Get reference to existing GID attribute * * @attr: Pointer to the GID attribute whose reference * needs to be taken. * * Increase the reference count to a GID attribute to keep it from being * freed. Callers are required to already be holding a reference to attribute. * */ void rdma_hold_gid_attr(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry = container_of(attr, struct ib_gid_table_entry, attr); get_gid_entry(entry); } EXPORT_SYMBOL(rdma_hold_gid_attr); /** * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice * which must be in UP state. * * @attr:Pointer to the GID attribute * * Returns pointer to netdevice if the netdevice was attached to GID and * netdevice is in UP state. Caller must hold RCU lock as this API * reads the netdev flags which can change while netdevice migrates to * different net namespace. Returns ERR_PTR with error code otherwise. * */ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry = container_of(attr, struct ib_gid_table_entry, attr); struct ib_device *device = entry->attr.device; struct net_device *ndev = ERR_PTR(-EINVAL); u32 port_num = entry->attr.port_num; struct ib_gid_table *table; unsigned long flags; bool valid; table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); valid = is_gid_entry_valid(table->data_vec[attr->index]); if (valid) { ndev = rcu_dereference(attr->ndev); if (!ndev) ndev = ERR_PTR(-ENODEV); } read_unlock_irqrestore(&table->rwlock, flags); return ndev; } EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu); static int get_lower_dev_vlan(struct net_device *lower_dev, struct netdev_nested_priv *priv) { u16 *vlan_id = (u16 *)priv->data; if (is_vlan_dev(lower_dev)) *vlan_id = vlan_dev_vlan_id(lower_dev); /* We are interested only in first level vlan device, so * always return 1 to stop iterating over next level devices. */ return 1; } /** * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address * of a GID entry. * * @attr: GID attribute pointer whose L2 fields to be read * @vlan_id: Pointer to vlan id to fill up if the GID entry has * vlan id. It is optional. * @smac: Pointer to smac to fill up for a GID entry. It is optional. * * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id * (if gid entry has vlan) and source MAC, or returns error. */ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, u16 *vlan_id, u8 *smac) { struct netdev_nested_priv priv = { .data = (void *)vlan_id, }; struct net_device *ndev; rcu_read_lock(); ndev = rcu_dereference(attr->ndev); if (!ndev) { rcu_read_unlock(); return -ENODEV; } if (smac) ether_addr_copy(smac, ndev->dev_addr); if (vlan_id) { *vlan_id = 0xffff; if (is_vlan_dev(ndev)) { *vlan_id = vlan_dev_vlan_id(ndev); } else { /* If the netdev is upper device and if it's lower * device is vlan device, consider vlan id of * the lower vlan device for this gid entry. */ netdev_walk_all_lower_dev_rcu(attr->ndev, get_lower_dev_vlan, &priv); } } rcu_read_unlock(); return 0; } EXPORT_SYMBOL(rdma_read_gid_l2_fields); static int config_non_roce_gid_cache(struct ib_device *device, u32 port, struct ib_port_attr *tprops) { struct ib_gid_attr gid_attr = {}; struct ib_gid_table *table; int ret = 0; int i; gid_attr.device = device; gid_attr.port_num = port; table = rdma_gid_table(device, port); mutex_lock(&table->lock); for (i = 0; i < tprops->gid_tbl_len; ++i) { if (!device->ops.query_gid) continue; ret = device->ops.query_gid(device, port, i, &gid_attr.gid); if (ret) { dev_warn(&device->dev, "query_gid failed (%d) for index %d\n", ret, i); goto err; } if (rdma_protocol_iwarp(device, port)) { struct net_device *ndev; ndev = ib_device_get_netdev(device, port); if (!ndev) continue; RCU_INIT_POINTER(gid_attr.ndev, ndev); dev_put(ndev); } gid_attr.index = i; tprops->subnet_prefix = be64_to_cpu(gid_attr.gid.global.subnet_prefix); add_modify_gid(table, &gid_attr); } err: mutex_unlock(&table->lock); return ret; } static int ib_cache_update(struct ib_device *device, u32 port, bool update_gids, bool update_pkeys, bool enforce_security) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL; struct ib_pkey_cache *old_pkey_cache = NULL; int i; int ret; if (!rdma_is_port_valid(device, port)) return -EINVAL; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) return -ENOMEM; ret = ib_query_port(device, port, tprops); if (ret) { dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret); goto err; } if (!rdma_protocol_roce(device, port) && update_gids) { ret = config_non_roce_gid_cache(device, port, tprops); if (ret) goto err; } update_pkeys &= !!tprops->pkey_tbl_len; if (update_pkeys) { pkey_cache = kmalloc(struct_size(pkey_cache, table, tprops->pkey_tbl_len), GFP_KERNEL); if (!pkey_cache) { ret = -ENOMEM; goto err; } pkey_cache->table_len = tprops->pkey_tbl_len; for (i = 0; i < pkey_cache->table_len; ++i) { ret = ib_query_pkey(device, port, i, pkey_cache->table + i); if (ret) { dev_warn(&device->dev, "ib_query_pkey failed (%d) for index %d\n", ret, i); goto err; } } } write_lock_irq(&device->cache_lock); if (update_pkeys) { old_pkey_cache = device->port_data[port].cache.pkey; device->port_data[port].cache.pkey = pkey_cache; } device->port_data[port].cache.lmc = tprops->lmc; if (device->port_data[port].cache.port_state != IB_PORT_NOP && device->port_data[port].cache.port_state != tprops->state) ibdev_info(device, "Port: %d Link %s\n", port, ib_port_state_to_str(tprops->state)); device->port_data[port].cache.port_state = tprops->state; device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; write_unlock_irq(&device->cache_lock); if (enforce_security) ib_security_cache_change(device, port, tprops->subnet_prefix); kfree(old_pkey_cache); kfree(tprops); return 0; err: kfree(pkey_cache); kfree(tprops); return ret; } static void ib_cache_event_task(struct work_struct *_work) { struct ib_update_work *work = container_of(_work, struct ib_update_work, work); int ret; /* Before distributing the cache update event, first sync * the cache. */ ret = ib_cache_update(work->event.device, work->event.element.port_num, work->event.event == IB_EVENT_GID_CHANGE, work->event.event == IB_EVENT_PKEY_CHANGE, work->enforce_security); /* GID event is notified already for individual GID entries by * dispatch_gid_change_event(). Hence, notifiy for rest of the * events. */ if (!ret && work->event.event != IB_EVENT_GID_CHANGE) ib_dispatch_event_clients(&work->event); kfree(work); } static void ib_generic_event_task(struct work_struct *_work) { struct ib_update_work *work = container_of(_work, struct ib_update_work, work); ib_dispatch_event_clients(&work->event); kfree(work); } static bool is_cache_update_event(const struct ib_event *event) { return (event->event == IB_EVENT_PORT_ERR || event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER || event->event == IB_EVENT_GID_CHANGE); } /** * ib_dispatch_event - Dispatch an asynchronous event * @event:Event to dispatch * * Low-level drivers must call ib_dispatch_event() to dispatch the * event to all registered event handlers when an asynchronous event * occurs. */ void ib_dispatch_event(const struct ib_event *event) { struct ib_update_work *work; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; if (is_cache_update_event(event)) INIT_WORK(&work->work, ib_cache_event_task); else INIT_WORK(&work->work, ib_generic_event_task); work->event = *event; if (event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_GID_CHANGE) work->enforce_security = true; queue_work(ib_wq, &work->work); } EXPORT_SYMBOL(ib_dispatch_event); int ib_cache_setup_one(struct ib_device *device) { u32 p; int err; err = gid_table_setup_one(device); if (err) return err; rdma_for_each_port (device, p) { err = ib_cache_update(device, p, true, true, true); if (err) { gid_table_cleanup_one(device); return err; } } return 0; } void ib_cache_release_one(struct ib_device *device) { u32 p; /* * The release function frees all the cache elements. * This function should be called as part of freeing * all the device's resources when the cache could no * longer be accessed. */ rdma_for_each_port (device, p) kfree(device->port_data[p].cache.pkey); gid_table_release_one(device); } void ib_cache_cleanup_one(struct ib_device *device) { /* The cleanup function waits for all in-progress workqueue * elements and cleans up the GID cache. This function should be * called after the device was removed from the devices list and * all clients were removed, so the cache exists but is * non-functional and shouldn't be updated anymore. */ flush_workqueue(ib_wq); gid_table_cleanup_one(device); /* * Flush the wq second time for any pending GID delete work. */ flush_workqueue(ib_wq); }
20 20 20 20 20 20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 // SPDX-License-Identifier: ISC /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include <linux/export.h> #include <linux/module.h> #include <linux/firmware.h> #include <linux/of.h> #include <linux/property.h> #include <linux/dmi.h> #include <linux/ctype.h> #include <linux/pm_qos.h> #include <linux/nvmem-consumer.h> #include <asm/byteorder.h> #include "core.h" #include "mac.h" #include "htc.h" #include "hif.h" #include "wmi.h" #include "bmi.h" #include "debug.h" #include "htt.h" #include "testmode.h" #include "wmi-ops.h" #include "coredump.h" #include "leds.h" unsigned int ath10k_debug_mask; EXPORT_SYMBOL(ath10k_debug_mask); static unsigned int ath10k_cryptmode_param; static bool uart_print; static bool skip_otp; static bool fw_diag_log; /* frame mode values are mapped as per enum ath10k_hw_txrx_mode */ unsigned int ath10k_frame_mode = ATH10K_HW_TXRX_NATIVE_WIFI; unsigned long ath10k_coredump_mask = BIT(ATH10K_FW_CRASH_DUMP_REGISTERS) | BIT(ATH10K_FW_CRASH_DUMP_CE_DATA); /* FIXME: most of these should be readonly */ module_param_named(debug_mask, ath10k_debug_mask, uint, 0644); module_param_named(cryptmode, ath10k_cryptmode_param, uint, 0644); module_param(uart_print, bool, 0644); module_param(skip_otp, bool, 0644); module_param(fw_diag_log, bool, 0644); module_param_named(frame_mode, ath10k_frame_mode, uint, 0644); module_param_named(coredump_mask, ath10k_coredump_mask, ulong, 0444); MODULE_PARM_DESC(debug_mask, "Debugging mask"); MODULE_PARM_DESC(uart_print, "Uart target debugging"); MODULE_PARM_DESC(skip_otp, "Skip otp failure for calibration in testmode"); MODULE_PARM_DESC(cryptmode, "Crypto mode: 0-hardware, 1-software"); MODULE_PARM_DESC(frame_mode, "Datapath frame mode (0: raw, 1: native wifi (default), 2: ethernet)"); MODULE_PARM_DESC(coredump_mask, "Bitfield of what to include in firmware crash file"); MODULE_PARM_DESC(fw_diag_log, "Diag based fw log debugging"); static const struct ath10k_hw_params ath10k_hw_params_list[] = { { .id = QCA988X_HW_2_0_VERSION, .dev_id = QCA988X_2_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca988x hw2.0", .patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 1, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 2116, .fw = { .dir = QCA988X_HW_2_0_FW_DIR, .board_size = QCA988X_BOARD_DATA_SZ, .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = true, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA988X_HW_2_0_VERSION, .dev_id = QCA988X_2_0_DEVICE_ID_UBNT, .name = "qca988x hw2.0 ubiquiti", .patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 0, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 2116, .fw = { .dir = QCA988X_HW_2_0_FW_DIR, .board_size = QCA988X_BOARD_DATA_SZ, .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = true, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9887_HW_1_0_VERSION, .dev_id = QCA9887_1_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9887 hw1.0", .patch_load_addr = QCA9887_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 1, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 2116, .fw = { .dir = QCA9887_HW_1_0_FW_DIR, .board_size = QCA9887_BOARD_DATA_SZ, .board_ext_size = QCA9887_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_3_2_VERSION, .dev_id = QCA6174_3_2_DEVICE_ID, .bus = ATH10K_BUS_SDIO, .name = "qca6174 hw3.2 sdio", .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR, .uart_pin = 19, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 0, .fw = { .dir = QCA6174_HW_3_0_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca6174_sdio_ops, .hw_clk = qca6174_clk, .target_cpu_freq = 176000000, .decap_align_bytes = 4, .n_cipher_suites = 8, .num_peers = 10, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .uart_pin_workaround = true, .tx_stats_over_pktlog = false, .credit_size_workaround = false, .bmi_large_size_download = true, .supports_peer_stats_info = true, .dynamic_sar_support = true, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_2_1_VERSION, .dev_id = QCA6164_2_1_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca6164 hw2.1", .patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_2_1_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_2_1_VERSION, .dev_id = QCA6174_2_1_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca6174 hw2.1", .patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_2_1_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_3_0_VERSION, .dev_id = QCA6174_2_1_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca6174 hw3.0", .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_3_0_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_3_2_VERSION, .dev_id = QCA6174_2_1_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca6174 hw3.2", .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { /* uses same binaries as hw3.0 */ .dir = QCA6174_HW_3_0_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca6174_ops, .hw_clk = qca6174_clk, .target_cpu_freq = 176000000, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = true, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .supports_peer_stats_info = true, .dynamic_sar_support = true, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = true, }, { .id = QCA99X0_HW_2_0_DEV_VERSION, .dev_id = QCA99X0_2_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca99x0 hw2.0", .patch_load_addr = QCA99X0_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 17, .otp_exe_param = 0x00000700, .continuous_frag_desc = true, .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, .cal_data_len = 12064, .fw = { .dir = QCA99X0_HW_2_0_FW_DIR, .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &qca99x0_rx_desc_ops, .hw_ops = &qca99x0_ops, .decap_align_bytes = 1, .spectral_bin_discard = 4, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 11, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9984_HW_1_0_DEV_VERSION, .dev_id = QCA9984_1_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9984/qca9994 hw1.0", .patch_load_addr = QCA9984_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 17, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH, .otp_exe_param = 0x00000700, .continuous_frag_desc = true, .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, .cal_data_len = 12064, .fw = { .dir = QCA9984_HW_1_0_FW_DIR, .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, .ext_board_size = QCA99X0_EXT_BOARD_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &qca99x0_rx_desc_ops, .hw_ops = &qca99x0_ops, .decap_align_bytes = 1, .spectral_bin_discard = 12, .spectral_bin_offset = 8, /* Can do only 2x2 VHT160 or 80+80. 1560Mbps is 4x4 80Mhz * or 2x2 160Mhz, long-guard-interval. */ .vht160_mcs_rx_highest = 1560, .vht160_mcs_tx_highest = 1560, .n_cipher_suites = 11, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9888_HW_2_0_DEV_VERSION, .dev_id = QCA9888_2_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9888 hw2.0", .patch_load_addr = QCA9888_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 17, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH, .otp_exe_param = 0x00000700, .continuous_frag_desc = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, .tx_chain_mask = 3, .rx_chain_mask = 3, .max_spatial_stream = 2, .cal_data_len = 12064, .fw = { .dir = QCA9888_HW_2_0_FW_DIR, .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &qca99x0_rx_desc_ops, .hw_ops = &qca99x0_ops, .decap_align_bytes = 1, .spectral_bin_discard = 12, .spectral_bin_offset = 8, /* Can do only 1x1 VHT160 or 80+80. 780Mbps is 2x2 80Mhz or * 1x1 160Mhz, long-guard-interval. */ .vht160_mcs_rx_highest = 780, .vht160_mcs_tx_highest = 780, .n_cipher_suites = 11, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9377_HW_1_0_DEV_VERSION, .dev_id = QCA9377_1_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9377 hw1.0", .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA9377_HW_1_0_FW_DIR, .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9377_HW_1_1_DEV_VERSION, .dev_id = QCA9377_1_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9377 hw1.1", .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA9377_HW_1_0_FW_DIR, .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca6174_ops, .hw_clk = qca6174_clk, .target_cpu_freq = 176000000, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = true, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9377_HW_1_1_DEV_VERSION, .dev_id = QCA9377_1_0_DEVICE_ID, .bus = ATH10K_BUS_SDIO, .name = "qca9377 hw1.1 sdio", .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 19, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA9377_HW_1_0_FW_DIR, .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca6174_ops, .hw_clk = qca6174_clk, .target_cpu_freq = 176000000, .decap_align_bytes = 4, .n_cipher_suites = 8, .num_peers = TARGET_QCA9377_HL_NUM_PEERS, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .uart_pin_workaround = true, .credit_size_workaround = true, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA4019_HW_1_0_DEV_VERSION, .dev_id = 0, .bus = ATH10K_BUS_AHB, .name = "qca4019 hw1.0", .patch_load_addr = QCA4019_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 0, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH, .otp_exe_param = 0x0010000, .continuous_frag_desc = true, .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 125000, .max_probe_resp_desc_thres = 24, .tx_chain_mask = 0x3, .rx_chain_mask = 0x3, .max_spatial_stream = 2, .cal_data_len = 12064, .fw = { .dir = QCA4019_HW_1_0_FW_DIR, .board_size = QCA4019_BOARD_DATA_SZ, .board_ext_size = QCA4019_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &qca99x0_rx_desc_ops, .hw_ops = &qca99x0_ops, .decap_align_bytes = 1, .spectral_bin_discard = 4, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 11, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = WCN3990_HW_1_0_DEV_VERSION, .dev_id = 0, .bus = ATH10K_BUS_SNOC, .name = "wcn3990 hw1.0", .led_pin = 0, .continuous_frag_desc = true, .tx_chain_mask = 0x7, .rx_chain_mask = 0x7, .max_spatial_stream = 4, .fw = { .dir = WCN3990_HW_1_0_FW_DIR, .board_size = WCN3990_BOARD_DATA_SZ, .board_ext_size = WCN3990_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &wcn3990_rx_desc_ops, .hw_ops = &wcn3990_ops, .decap_align_bytes = 1, .num_peers = TARGET_HL_TLV_NUM_PEERS, .n_cipher_suites = 11, .ast_skid_limit = TARGET_HL_TLV_AST_SKID_LIMIT, .num_wds_entries = TARGET_HL_TLV_NUM_WDS_ENTRIES, .target_64bit = true, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL_DUAL_MAC, .shadow_reg_support = true, .rri_on_ddr = true, .hw_filter_reset_required = false, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = true, .hw_restart_disconnect = true, .use_fw_tx_credits = false, .delay_unmap_buffer = true, .mcast_frame_registration = false, }, }; static const char *const ath10k_core_fw_feature_str[] = { [ATH10K_FW_FEATURE_EXT_WMI_MGMT_RX] = "wmi-mgmt-rx", [ATH10K_FW_FEATURE_WMI_10X] = "wmi-10.x", [ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX] = "has-wmi-mgmt-tx", [ATH10K_FW_FEATURE_NO_P2P] = "no-p2p", [ATH10K_FW_FEATURE_WMI_10_2] = "wmi-10.2", [ATH10K_FW_FEATURE_MULTI_VIF_PS_SUPPORT] = "multi-vif-ps", [ATH10K_FW_FEATURE_WOWLAN_SUPPORT] = "wowlan", [ATH10K_FW_FEATURE_IGNORE_OTP_RESULT] = "ignore-otp", [ATH10K_FW_FEATURE_NO_NWIFI_DECAP_4ADDR_PADDING] = "no-4addr-pad", [ATH10K_FW_FEATURE_SUPPORTS_SKIP_CLOCK_INIT] = "skip-clock-init", [ATH10K_FW_FEATURE_RAW_MODE_SUPPORT] = "raw-mode", [ATH10K_FW_FEATURE_SUPPORTS_ADAPTIVE_CCA] = "adaptive-cca", [ATH10K_FW_FEATURE_MFP_SUPPORT] = "mfp", [ATH10K_FW_FEATURE_PEER_FLOW_CONTROL] = "peer-flow-ctrl", [ATH10K_FW_FEATURE_BTCOEX_PARAM] = "btcoex-param", [ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR] = "skip-null-func-war", [ATH10K_FW_FEATURE_ALLOWS_MESH_BCAST] = "allows-mesh-bcast", [ATH10K_FW_FEATURE_NO_PS] = "no-ps", [ATH10K_FW_FEATURE_MGMT_TX_BY_REF] = "mgmt-tx-by-reference", [ATH10K_FW_FEATURE_NON_BMI] = "non-bmi", [ATH10K_FW_FEATURE_SINGLE_CHAN_INFO_PER_CHANNEL] = "single-chan-info-per-channel", [ATH10K_FW_FEATURE_PEER_FIXED_RATE] = "peer-fixed-rate", [ATH10K_FW_FEATURE_IRAM_RECOVERY] = "iram-recovery", }; static unsigned int ath10k_core_get_fw_feature_str(char *buf, size_t buf_len, enum ath10k_fw_features feat) { /* make sure that ath10k_core_fw_feature_str[] gets updated */ BUILD_BUG_ON(ARRAY_SIZE(ath10k_core_fw_feature_str) != ATH10K_FW_FEATURE_COUNT); if (feat >= ARRAY_SIZE(ath10k_core_fw_feature_str) || WARN_ON(!ath10k_core_fw_feature_str[feat])) { return scnprintf(buf, buf_len, "bit%d", feat); } return scnprintf(buf, buf_len, "%s", ath10k_core_fw_feature_str[feat]); } void ath10k_core_get_fw_features_str(struct ath10k *ar, char *buf, size_t buf_len) { size_t len = 0; int i; for (i = 0; i < ATH10K_FW_FEATURE_COUNT; i++) { if (test_bit(i, ar->normal_mode_fw.fw_file.fw_features)) { if (len > 0) len += scnprintf(buf + len, buf_len - len, ","); len += ath10k_core_get_fw_feature_str(buf + len, buf_len - len, i); } } } static void ath10k_send_suspend_complete(struct ath10k *ar) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot suspend complete\n"); complete(&ar->target_suspend); } static int ath10k_init_sdio(struct ath10k *ar, enum ath10k_firmware_mode mode) { bool mtu_workaround = ar->hw_params.credit_size_workaround; int ret; u32 param = 0; ret = ath10k_bmi_write32(ar, hi_mbox_io_block_sz, 256); if (ret) return ret; ret = ath10k_bmi_write32(ar, hi_mbox_isr_yield_limit, 99); if (ret) return ret; ret = ath10k_bmi_read32(ar, hi_acs_flags, &param); if (ret) return ret; param |= HI_ACS_FLAGS_SDIO_REDUCE_TX_COMPL_SET; if (mode == ATH10K_FIRMWARE_MODE_NORMAL && !mtu_workaround) param |= HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE; else param &= ~HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE; if (mode == ATH10K_FIRMWARE_MODE_UTF) param &= ~HI_ACS_FLAGS_SDIO_SWAP_MAILBOX_SET; else param |= HI_ACS_FLAGS_SDIO_SWAP_MAILBOX_SET; ret = ath10k_bmi_write32(ar, hi_acs_flags, param); if (ret) return ret; ret = ath10k_bmi_read32(ar, hi_option_flag2, &param); if (ret) return ret; param |= HI_OPTION_SDIO_CRASH_DUMP_ENHANCEMENT_HOST; ret = ath10k_bmi_write32(ar, hi_option_flag2, param); if (ret) return ret; return 0; } static int ath10k_init_configure_target(struct ath10k *ar) { u32 param_host; int ret; /* tell target which HTC version it is used*/ ret = ath10k_bmi_write32(ar, hi_app_host_interest, HTC_PROTOCOL_VERSION); if (ret) { ath10k_err(ar, "settings HTC version failed\n"); return ret; } /* set the firmware mode to STA/IBSS/AP */ ret = ath10k_bmi_read32(ar, hi_option_flag, &param_host); if (ret) { ath10k_err(ar, "setting firmware mode (1/2) failed\n"); return ret; } /* TODO following parameters need to be re-visited. */ /* num_device */ param_host |= (1 << HI_OPTION_NUM_DEV_SHIFT); /* Firmware mode */ /* FIXME: Why FW_MODE_AP ??.*/ param_host |= (HI_OPTION_FW_MODE_AP << HI_OPTION_FW_MODE_SHIFT); /* mac_addr_method */ param_host |= (1 << HI_OPTION_MAC_ADDR_METHOD_SHIFT); /* firmware_bridge */ param_host |= (0 << HI_OPTION_FW_BRIDGE_SHIFT); /* fwsubmode */ param_host |= (0 << HI_OPTION_FW_SUBMODE_SHIFT); ret = ath10k_bmi_write32(ar, hi_option_flag, param_host); if (ret) { ath10k_err(ar, "setting firmware mode (2/2) failed\n"); return ret; } /* We do all byte-swapping on the host */ ret = ath10k_bmi_write32(ar, hi_be, 0); if (ret) { ath10k_err(ar, "setting host CPU BE mode failed\n"); return ret; } /* FW descriptor/Data swap flags */ ret = ath10k_bmi_write32(ar, hi_fw_swap, 0); if (ret) { ath10k_err(ar, "setting FW data/desc swap flags failed\n"); return ret; } /* Some devices have a special sanity check that verifies the PCI * Device ID is written to this host interest var. It is known to be * required to boot QCA6164. */ ret = ath10k_bmi_write32(ar, hi_hci_uart_pwr_mgmt_params_ext, ar->dev_id); if (ret) { ath10k_err(ar, "failed to set pwr_mgmt_params: %d\n", ret); return ret; } return 0; } static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar, const char *dir, const char *file) { char filename[100]; const struct firmware *fw; int ret; if (file == NULL) return ERR_PTR(-ENOENT); if (dir == NULL) dir = "."; if (ar->board_name) { snprintf(filename, sizeof(filename), "%s/%s/%s", dir, ar->board_name, file); ret = firmware_request_nowarn(&fw, filename, ar->dev); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n", filename, ret); if (!ret) return fw; } snprintf(filename, sizeof(filename), "%s/%s", dir, file); ret = firmware_request_nowarn(&fw, filename, ar->dev); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n", filename, ret); if (ret) return ERR_PTR(ret); return fw; } static int ath10k_push_board_ext_data(struct ath10k *ar, const void *data, size_t data_len) { u32 board_data_size = ar->hw_params.fw.board_size; u32 board_ext_data_size = ar->hw_params.fw.board_ext_size; u32 board_ext_data_addr; int ret; ret = ath10k_bmi_read32(ar, hi_board_ext_data, &board_ext_data_addr); if (ret) { ath10k_err(ar, "could not read board ext data addr (%d)\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot push board extended data addr 0x%x\n", board_ext_data_addr); if (board_ext_data_addr == 0) return 0; if (data_len != (board_data_size + board_ext_data_size)) { ath10k_err(ar, "invalid board (ext) data sizes %zu != %d+%d\n", data_len, board_data_size, board_ext_data_size); return -EINVAL; } ret = ath10k_bmi_write_memory(ar, board_ext_data_addr, data + board_data_size, board_ext_data_size); if (ret) { ath10k_err(ar, "could not write board ext data (%d)\n", ret); return ret; } ret = ath10k_bmi_write32(ar, hi_board_ext_data_config, (board_ext_data_size << 16) | 1); if (ret) { ath10k_err(ar, "could not write board ext data bit (%d)\n", ret); return ret; } return 0; } static int ath10k_core_get_board_id_from_otp(struct ath10k *ar) { u32 result, address; u8 board_id, chip_id; bool ext_bid_support; int ret, bmi_board_id_param; address = ar->hw_params.patch_load_addr; if (!ar->normal_mode_fw.fw_file.otp_data || !ar->normal_mode_fw.fw_file.otp_len) { ath10k_warn(ar, "failed to retrieve board id because of invalid otp\n"); return -ENODATA; } if (ar->id.bmi_ids_valid) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot already acquired valid otp board id,skip download, board_id %d chip_id %d\n", ar->id.bmi_board_id, ar->id.bmi_chip_id); goto skip_otp_download; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot upload otp to 0x%x len %zd for board id\n", address, ar->normal_mode_fw.fw_file.otp_len); ret = ath10k_bmi_fast_download(ar, address, ar->normal_mode_fw.fw_file.otp_data, ar->normal_mode_fw.fw_file.otp_len); if (ret) { ath10k_err(ar, "could not write otp for board id check: %d\n", ret); return ret; } if (ar->cal_mode == ATH10K_PRE_CAL_MODE_DT || ar->cal_mode == ATH10K_PRE_CAL_MODE_FILE || ar->cal_mode == ATH10K_PRE_CAL_MODE_NVMEM) bmi_board_id_param = BMI_PARAM_GET_FLASH_BOARD_ID; else bmi_board_id_param = BMI_PARAM_GET_EEPROM_BOARD_ID; ret = ath10k_bmi_execute(ar, address, bmi_board_id_param, &result); if (ret) { ath10k_err(ar, "could not execute otp for board id check: %d\n", ret); return ret; } board_id = MS(result, ATH10K_BMI_BOARD_ID_FROM_OTP); chip_id = MS(result, ATH10K_BMI_CHIP_ID_FROM_OTP); ext_bid_support = (result & ATH10K_BMI_EXT_BOARD_ID_SUPPORT); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot get otp board id result 0x%08x board_id %d chip_id %d ext_bid_support %d\n", result, board_id, chip_id, ext_bid_support); ar->id.ext_bid_supported = ext_bid_support; if ((result & ATH10K_BMI_BOARD_ID_STATUS_MASK) != 0 || (board_id == 0)) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "board id does not exist in otp, ignore it\n"); return -EOPNOTSUPP; } ar->id.bmi_ids_valid = true; ar->id.bmi_board_id = board_id; ar->id.bmi_chip_id = chip_id; skip_otp_download: return 0; } static void ath10k_core_check_bdfext(const struct dmi_header *hdr, void *data) { struct ath10k *ar = data; const char *bdf_ext; const char *magic = ATH10K_SMBIOS_BDF_EXT_MAGIC; u8 bdf_enabled; int i; if (hdr->type != ATH10K_SMBIOS_BDF_EXT_TYPE) return; if (hdr->length != ATH10K_SMBIOS_BDF_EXT_LENGTH) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "wrong smbios bdf ext type length (%d).\n", hdr->length); return; } bdf_enabled = *((u8 *)hdr + ATH10K_SMBIOS_BDF_EXT_OFFSET); if (!bdf_enabled) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant name not found.\n"); return; } /* Only one string exists (per spec) */ bdf_ext = (char *)hdr + hdr->length; if (memcmp(bdf_ext, magic, strlen(magic)) != 0) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant magic does not match.\n"); return; } for (i = 0; i < strlen(bdf_ext); i++) { if (!isascii(bdf_ext[i]) || !isprint(bdf_ext[i])) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant name contains non ascii chars.\n"); return; } } /* Copy extension name without magic suffix */ if (strscpy(ar->id.bdf_ext, bdf_ext + strlen(magic), sizeof(ar->id.bdf_ext)) < 0) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant string is longer than the buffer can accommodate (variant: %s)\n", bdf_ext); return; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "found and validated bdf variant smbios_type 0x%x bdf %s\n", ATH10K_SMBIOS_BDF_EXT_TYPE, bdf_ext); } static int ath10k_core_check_smbios(struct ath10k *ar) { ar->id.bdf_ext[0] = '\0'; dmi_walk(ath10k_core_check_bdfext, ar); if (ar->id.bdf_ext[0] == '\0') return -ENODATA; return 0; } int ath10k_core_check_dt(struct ath10k *ar) { struct device_node *node; const char *variant = NULL; node = ar->dev->of_node; if (!node) return -ENOENT; of_property_read_string(node, "qcom,calibration-variant", &variant); if (!variant) of_property_read_string(node, "qcom,ath10k-calibration-variant", &variant); if (!variant) return -ENODATA; if (strscpy(ar->id.bdf_ext, variant, sizeof(ar->id.bdf_ext)) < 0) ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant string is longer than the buffer can accommodate (variant: %s)\n", variant); return 0; } EXPORT_SYMBOL(ath10k_core_check_dt); static int ath10k_download_fw(struct ath10k *ar) { u32 address, data_len; const void *data; int ret; struct pm_qos_request latency_qos = {}; address = ar->hw_params.patch_load_addr; data = ar->running_fw->fw_file.firmware_data; data_len = ar->running_fw->fw_file.firmware_len; ret = ath10k_swap_code_seg_configure(ar, &ar->running_fw->fw_file); if (ret) { ath10k_err(ar, "failed to configure fw code swap: %d\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot uploading firmware image %p len %d\n", data, data_len); /* Check if device supports to download firmware via * diag copy engine. Downloading firmware via diag CE * greatly reduces the time to download firmware. */ if (ar->hw_params.fw_diag_ce_download) { ret = ath10k_hw_diag_fast_download(ar, address, data, data_len); if (ret == 0) /* firmware upload via diag ce was successful */ return 0; ath10k_warn(ar, "failed to upload firmware via diag ce, trying BMI: %d", ret); } cpu_latency_qos_add_request(&latency_qos, 0); ret = ath10k_bmi_fast_download(ar, address, data, data_len); cpu_latency_qos_remove_request(&latency_qos); return ret; } void ath10k_core_free_board_files(struct ath10k *ar) { if (!IS_ERR(ar->normal_mode_fw.board)) release_firmware(ar->normal_mode_fw.board); if (!IS_ERR(ar->normal_mode_fw.ext_board)) release_firmware(ar->normal_mode_fw.ext_board); ar->normal_mode_fw.board = NULL; ar->normal_mode_fw.board_data = NULL; ar->normal_mode_fw.board_len = 0; ar->normal_mode_fw.ext_board = NULL; ar->normal_mode_fw.ext_board_data = NULL; ar->normal_mode_fw.ext_board_len = 0; } EXPORT_SYMBOL(ath10k_core_free_board_files); static void ath10k_core_free_firmware_files(struct ath10k *ar) { if (!IS_ERR(ar->normal_mode_fw.fw_file.firmware)) release_firmware(ar->normal_mode_fw.fw_file.firmware); if (!IS_ERR(ar->cal_file)) release_firmware(ar->cal_file); if (!IS_ERR(ar->pre_cal_file)) release_firmware(ar->pre_cal_file); ath10k_swap_code_seg_release(ar, &ar->normal_mode_fw.fw_file); ar->normal_mode_fw.fw_file.otp_data = NULL; ar->normal_mode_fw.fw_file.otp_len = 0; ar->normal_mode_fw.fw_file.firmware = NULL; ar->normal_mode_fw.fw_file.firmware_data = NULL; ar->normal_mode_fw.fw_file.firmware_len = 0; ar->cal_file = NULL; ar->pre_cal_file = NULL; } static int ath10k_fetch_cal_file(struct ath10k *ar) { char filename[100]; /* pre-cal-<bus>-<id>.bin */ scnprintf(filename, sizeof(filename), "pre-cal-%s-%s.bin", ath10k_bus_str(ar->hif.bus), dev_name(ar->dev)); ar->pre_cal_file = ath10k_fetch_fw_file(ar, ATH10K_FW_DIR, filename); if (!IS_ERR(ar->pre_cal_file)) goto success; /* cal-<bus>-<id>.bin */ scnprintf(filename, sizeof(filename), "cal-%s-%s.bin", ath10k_bus_str(ar->hif.bus), dev_name(ar->dev)); ar->cal_file = ath10k_fetch_fw_file(ar, ATH10K_FW_DIR, filename); if (IS_ERR(ar->cal_file)) /* calibration file is optional, don't print any warnings */ return PTR_ERR(ar->cal_file); success: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found calibration file %s/%s\n", ATH10K_FW_DIR, filename); return 0; } static int ath10k_core_fetch_board_data_api_1(struct ath10k *ar, int bd_ie_type) { const struct firmware *fw; char boardname[100]; if (bd_ie_type == ATH10K_BD_IE_BOARD) { scnprintf(boardname, sizeof(boardname), "board-%s-%s.bin", ath10k_bus_str(ar->hif.bus), dev_name(ar->dev)); ar->normal_mode_fw.board = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, boardname); if (IS_ERR(ar->normal_mode_fw.board)) { fw = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, ATH10K_BOARD_DATA_FILE); ar->normal_mode_fw.board = fw; } if (IS_ERR(ar->normal_mode_fw.board)) return PTR_ERR(ar->normal_mode_fw.board); ar->normal_mode_fw.board_data = ar->normal_mode_fw.board->data; ar->normal_mode_fw.board_len = ar->normal_mode_fw.board->size; } else if (bd_ie_type == ATH10K_BD_IE_BOARD_EXT) { fw = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, ATH10K_EBOARD_DATA_FILE); ar->normal_mode_fw.ext_board = fw; if (IS_ERR(ar->normal_mode_fw.ext_board)) return PTR_ERR(ar->normal_mode_fw.ext_board); ar->normal_mode_fw.ext_board_data = ar->normal_mode_fw.ext_board->data; ar->normal_mode_fw.ext_board_len = ar->normal_mode_fw.ext_board->size; } return 0; } static int ath10k_core_parse_bd_ie_board(struct ath10k *ar, const void *buf, size_t buf_len, const char *boardname, int bd_ie_type) { const struct ath10k_fw_ie *hdr; bool name_match_found; int ret, board_ie_id; size_t board_ie_len; const void *board_ie_data; name_match_found = false; /* go through ATH10K_BD_IE_BOARD_ elements */ while (buf_len > sizeof(struct ath10k_fw_ie)) { hdr = buf; board_ie_id = le32_to_cpu(hdr->id); board_ie_len = le32_to_cpu(hdr->len); board_ie_data = hdr->data; buf_len -= sizeof(*hdr); buf += sizeof(*hdr); if (buf_len < ALIGN(board_ie_len, 4)) { ath10k_err(ar, "invalid ATH10K_BD_IE_BOARD length: %zu < %zu\n", buf_len, ALIGN(board_ie_len, 4)); ret = -EINVAL; goto out; } switch (board_ie_id) { case ATH10K_BD_IE_BOARD_NAME: ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "board name", "", board_ie_data, board_ie_len); if (board_ie_len != strlen(boardname)) break; ret = memcmp(board_ie_data, boardname, strlen(boardname)); if (ret) break; name_match_found = true; ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found match for name '%s'", boardname); break; case ATH10K_BD_IE_BOARD_DATA: if (!name_match_found) /* no match found */ break; if (bd_ie_type == ATH10K_BD_IE_BOARD) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found board data for '%s'", boardname); ar->normal_mode_fw.board_data = board_ie_data; ar->normal_mode_fw.board_len = board_ie_len; } else if (bd_ie_type == ATH10K_BD_IE_BOARD_EXT) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found eboard data for '%s'", boardname); ar->normal_mode_fw.ext_board_data = board_ie_data; ar->normal_mode_fw.ext_board_len = board_ie_len; } ret = 0; goto out; default: ath10k_warn(ar, "unknown ATH10K_BD_IE_BOARD found: %d\n", board_ie_id); break; } /* jump over the padding */ board_ie_len = ALIGN(board_ie_len, 4); buf_len -= board_ie_len; buf += board_ie_len; } /* no match found */ ret = -ENOENT; out: return ret; } static int ath10k_core_search_bd(struct ath10k *ar, const char *boardname, const u8 *data, size_t len) { size_t ie_len; struct ath10k_fw_ie *hdr; int ret = -ENOENT, ie_id; while (len > sizeof(struct ath10k_fw_ie)) { hdr = (struct ath10k_fw_ie *)data; ie_id = le32_to_cpu(hdr->id); ie_len = le32_to_cpu(hdr->len); len -= sizeof(*hdr); data = hdr->data; if (len < ALIGN(ie_len, 4)) { ath10k_err(ar, "invalid length for board ie_id %d ie_len %zu len %zu\n", ie_id, ie_len, len); return -EINVAL; } switch (ie_id) { case ATH10K_BD_IE_BOARD: ret = ath10k_core_parse_bd_ie_board(ar, data, ie_len, boardname, ATH10K_BD_IE_BOARD); if (ret == -ENOENT) /* no match found, continue */ break; /* either found or error, so stop searching */ goto out; case ATH10K_BD_IE_BOARD_EXT: ret = ath10k_core_parse_bd_ie_board(ar, data, ie_len, boardname, ATH10K_BD_IE_BOARD_EXT); if (ret == -ENOENT) /* no match found, continue */ break; /* either found or error, so stop searching */ goto out; } /* jump over the padding */ ie_len = ALIGN(ie_len, 4); len -= ie_len; data += ie_len; } out: /* return result of parse_bd_ie_board() or -ENOENT */ return ret; } static int ath10k_core_fetch_board_data_api_n(struct ath10k *ar, const char *boardname, const char *fallback_boardname1, const char *fallback_boardname2, const char *filename) { size_t len, magic_len; const u8 *data; int ret; /* Skip if already fetched during board data download */ if (!ar->normal_mode_fw.board) ar->normal_mode_fw.board = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, filename); if (IS_ERR(ar->normal_mode_fw.board)) return PTR_ERR(ar->normal_mode_fw.board); data = ar->normal_mode_fw.board->data; len = ar->normal_mode_fw.board->size; /* magic has extra null byte padded */ magic_len = strlen(ATH10K_BOARD_MAGIC) + 1; if (len < magic_len) { ath10k_err(ar, "failed to find magic value in %s/%s, file too short: %zu\n", ar->hw_params.fw.dir, filename, len); ret = -EINVAL; goto err; } if (memcmp(data, ATH10K_BOARD_MAGIC, magic_len)) { ath10k_err(ar, "found invalid board magic\n"); ret = -EINVAL; goto err; } /* magic is padded to 4 bytes */ magic_len = ALIGN(magic_len, 4); if (len < magic_len) { ath10k_err(ar, "failed: %s/%s too small to contain board data, len: %zu\n", ar->hw_params.fw.dir, filename, len); ret = -EINVAL; goto err; } data += magic_len; len -= magic_len; /* attempt to find boardname in the IE list */ ret = ath10k_core_search_bd(ar, boardname, data, len); /* if we didn't find it and have a fallback name, try that */ if (ret == -ENOENT && fallback_boardname1) ret = ath10k_core_search_bd(ar, fallback_boardname1, data, len); if (ret == -ENOENT && fallback_boardname2) ret = ath10k_core_search_bd(ar, fallback_boardname2, data, len); if (ret == -ENOENT) { ath10k_err(ar, "failed to fetch board data for %s from %s/%s\n", boardname, ar->hw_params.fw.dir, filename); ret = -ENODATA; } if (ret) goto err; return 0; err: ath10k_core_free_board_files(ar); return ret; } static int ath10k_core_create_board_name(struct ath10k *ar, char *name, size_t name_len, bool with_variant, bool with_chip_id) { /* strlen(',variant=') + strlen(ar->id.bdf_ext) */ char variant[9 + ATH10K_SMBIOS_BDF_EXT_STR_LENGTH] = {}; if (with_variant && ar->id.bdf_ext[0] != '\0') scnprintf(variant, sizeof(variant), ",variant=%s", ar->id.bdf_ext); if (ar->id.bmi_ids_valid) { scnprintf(name, name_len, "bus=%s,bmi-chip-id=%d,bmi-board-id=%d%s", ath10k_bus_str(ar->hif.bus), ar->id.bmi_chip_id, ar->id.bmi_board_id, variant); goto out; } if (ar->id.qmi_ids_valid) { if (with_chip_id) scnprintf(name, name_len, "bus=%s,qmi-board-id=%x,qmi-chip-id=%x%s", ath10k_bus_str(ar->hif.bus), ar->id.qmi_board_id, ar->id.qmi_chip_id, variant); else scnprintf(name, name_len, "bus=%s,qmi-board-id=%x", ath10k_bus_str(ar->hif.bus), ar->id.qmi_board_id); goto out; } scnprintf(name, name_len, "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x%s", ath10k_bus_str(ar->hif.bus), ar->id.vendor, ar->id.device, ar->id.subsystem_vendor, ar->id.subsystem_device, variant); out: ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using board name '%s'\n", name); return 0; } static int ath10k_core_create_eboard_name(struct ath10k *ar, char *name, size_t name_len) { if (ar->id.bmi_ids_valid) { scnprintf(name, name_len, "bus=%s,bmi-chip-id=%d,bmi-eboard-id=%d", ath10k_bus_str(ar->hif.bus), ar->id.bmi_chip_id, ar->id.bmi_eboard_id); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using eboard name '%s'\n", name); return 0; } /* Fallback if returned board id is zero */ return -1; } int ath10k_core_fetch_board_file(struct ath10k *ar, int bd_ie_type) { char boardname[100], fallback_boardname1[100], fallback_boardname2[100]; int ret; if (bd_ie_type == ATH10K_BD_IE_BOARD) { /* With variant and chip id */ ret = ath10k_core_create_board_name(ar, boardname, sizeof(boardname), true, true); if (ret) { ath10k_err(ar, "failed to create board name: %d", ret); return ret; } /* Without variant and only chip-id */ ret = ath10k_core_create_board_name(ar, fallback_boardname1, sizeof(boardname), false, true); if (ret) { ath10k_err(ar, "failed to create 1st fallback board name: %d", ret); return ret; } /* Without variant and without chip-id */ ret = ath10k_core_create_board_name(ar, fallback_boardname2, sizeof(boardname), false, false); if (ret) { ath10k_err(ar, "failed to create 2nd fallback board name: %d", ret); return ret; } } else if (bd_ie_type == ATH10K_BD_IE_BOARD_EXT) { ret = ath10k_core_create_eboard_name(ar, boardname, sizeof(boardname)); if (ret) { ath10k_err(ar, "fallback to eboard.bin since board id 0"); goto fallback; } } ar->bd_api = 2; ret = ath10k_core_fetch_board_data_api_n(ar, boardname, fallback_boardname1, fallback_boardname2, ATH10K_BOARD_API2_FILE); if (!ret) goto success; fallback: ar->bd_api = 1; ret = ath10k_core_fetch_board_data_api_1(ar, bd_ie_type); if (ret) { ath10k_err(ar, "failed to fetch board-2.bin or board.bin from %s\n", ar->hw_params.fw.dir); return ret; } success: ath10k_dbg(ar, ATH10K_DBG_BOOT, "using board api %d\n", ar->bd_api); return 0; } EXPORT_SYMBOL(ath10k_core_fetch_board_file); static int ath10k_core_get_ext_board_id_from_otp(struct ath10k *ar) { u32 result, address; u8 ext_board_id; int ret; address = ar->hw_params.patch_load_addr; if (!ar->normal_mode_fw.fw_file.otp_data || !ar->normal_mode_fw.fw_file.otp_len) { ath10k_warn(ar, "failed to retrieve extended board id due to otp binary missing\n"); return -ENODATA; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot upload otp to 0x%x len %zd for ext board id\n", address, ar->normal_mode_fw.fw_file.otp_len); ret = ath10k_bmi_fast_download(ar, address, ar->normal_mode_fw.fw_file.otp_data, ar->normal_mode_fw.fw_file.otp_len); if (ret) { ath10k_err(ar, "could not write otp for ext board id check: %d\n", ret); return ret; } ret = ath10k_bmi_execute(ar, address, BMI_PARAM_GET_EXT_BOARD_ID, &result); if (ret) { ath10k_err(ar, "could not execute otp for ext board id check: %d\n", ret); return ret; } if (!result) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "ext board id does not exist in otp, ignore it\n"); return -EOPNOTSUPP; } ext_board_id = result & ATH10K_BMI_EBOARD_ID_STATUS_MASK; ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot get otp ext board id result 0x%08x ext_board_id %d\n", result, ext_board_id); ar->id.bmi_eboard_id = ext_board_id; return 0; } static int ath10k_download_board_data(struct ath10k *ar, const void *data, size_t data_len) { u32 board_data_size = ar->hw_params.fw.board_size; u32 eboard_data_size = ar->hw_params.fw.ext_board_size; u32 board_address; u32 ext_board_address; int ret; ret = ath10k_push_board_ext_data(ar, data, data_len); if (ret) { ath10k_err(ar, "could not push board ext data (%d)\n", ret); goto exit; } ret = ath10k_bmi_read32(ar, hi_board_data, &board_address); if (ret) { ath10k_err(ar, "could not read board data addr (%d)\n", ret); goto exit; } ret = ath10k_bmi_write_memory(ar, board_address, data, min_t(u32, board_data_size, data_len)); if (ret) { ath10k_err(ar, "could not write board data (%d)\n", ret); goto exit; } ret = ath10k_bmi_write32(ar, hi_board_data_initialized, 1); if (ret) { ath10k_err(ar, "could not write board data bit (%d)\n", ret); goto exit; } if (!ar->id.ext_bid_supported) goto exit; /* Extended board data download */ ret = ath10k_core_get_ext_board_id_from_otp(ar); if (ret == -EOPNOTSUPP) { /* Not fetching ext_board_data if ext board id is 0 */ ath10k_dbg(ar, ATH10K_DBG_BOOT, "otp returned ext board id 0\n"); return 0; } else if (ret) { ath10k_err(ar, "failed to get extended board id: %d\n", ret); goto exit; } ret = ath10k_core_fetch_board_file(ar, ATH10K_BD_IE_BOARD_EXT); if (ret) goto exit; if (ar->normal_mode_fw.ext_board_data) { ext_board_address = board_address + EXT_BOARD_ADDRESS_OFFSET; ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot writing ext board data to addr 0x%x", ext_board_address); ret = ath10k_bmi_write_memory(ar, ext_board_address, ar->normal_mode_fw.ext_board_data, min_t(u32, eboard_data_size, data_len)); if (ret) ath10k_err(ar, "failed to write ext board data: %d\n", ret); } exit: return ret; } static int ath10k_download_and_run_otp(struct ath10k *ar) { u32 result, address = ar->hw_params.patch_load_addr; u32 bmi_otp_exe_param = ar->hw_params.otp_exe_param; int ret; ret = ath10k_download_board_data(ar, ar->running_fw->board_data, ar->running_fw->board_len); if (ret) { ath10k_err(ar, "failed to download board data: %d\n", ret); return ret; } /* OTP is optional */ if (!ar->running_fw->fw_file.otp_data || !ar->running_fw->fw_file.otp_len) { ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %p otp_len %zd)!\n", ar->running_fw->fw_file.otp_data, ar->running_fw->fw_file.otp_len); return 0; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot upload otp to 0x%x len %zd\n", address, ar->running_fw->fw_file.otp_len); ret = ath10k_bmi_fast_download(ar, address, ar->running_fw->fw_file.otp_data, ar->running_fw->fw_file.otp_len); if (ret) { ath10k_err(ar, "could not write otp (%d)\n", ret); return ret; } /* As of now pre-cal is valid for 10_4 variants */ if (ar->cal_mode == ATH10K_PRE_CAL_MODE_DT || ar->cal_mode == ATH10K_PRE_CAL_MODE_FILE || ar->cal_mode == ATH10K_PRE_CAL_MODE_NVMEM) bmi_otp_exe_param = BMI_PARAM_FLASH_SECTION_ALL; ret = ath10k_bmi_execute(ar, address, bmi_otp_exe_param, &result); if (ret) { ath10k_err(ar, "could not execute otp (%d)\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot otp execute result %d\n", result); if (!(skip_otp || test_bit(ATH10K_FW_FEATURE_IGNORE_OTP_RESULT, ar->running_fw->fw_file.fw_features)) && result != 0) { ath10k_err(ar, "otp calibration failed: %d", result); return -EINVAL; } return 0; } static int ath10k_download_cal_file(struct ath10k *ar, const struct firmware *file) { int ret; if (!file) return -ENOENT; if (IS_ERR(file)) return PTR_ERR(file); ret = ath10k_download_board_data(ar, file->data, file->size); if (ret) { ath10k_err(ar, "failed to download cal_file data: %d\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot cal file downloaded\n"); return 0; } static int ath10k_download_cal_dt(struct ath10k *ar, const char *dt_name) { struct device_node *node; int data_len; void *data; int ret; node = ar->dev->of_node; if (!node) /* Device Tree is optional, don't print any warnings if * there's no node for ath10k. */ return -ENOENT; if (!of_get_property(node, dt_name, &data_len)) { /* The calibration data node is optional */ return -ENOENT; } if (data_len != ar->hw_params.cal_data_len) { ath10k_warn(ar, "invalid calibration data length in DT: %d\n", data_len); ret = -EMSGSIZE; goto out; } data = kmalloc(data_len, GFP_KERNEL); if (!data) { ret = -ENOMEM; goto out; } ret = of_property_read_u8_array(node, dt_name, data, data_len); if (ret) { ath10k_warn(ar, "failed to read calibration data from DT: %d\n", ret); goto out_free; } ret = ath10k_download_board_data(ar, data, data_len); if (ret) { ath10k_warn(ar, "failed to download calibration data from Device Tree: %d\n", ret); goto out_free; } ret = 0; out_free: kfree(data); out: return ret; } static int ath10k_download_cal_eeprom(struct ath10k *ar) { size_t data_len; void *data = NULL; int ret; ret = ath10k_hif_fetch_cal_eeprom(ar, &data, &data_len); if (ret) { if (ret != -EOPNOTSUPP) ath10k_warn(ar, "failed to read calibration data from EEPROM: %d\n", ret); goto out_free; } ret = ath10k_download_board_data(ar, data, data_len); if (ret) { ath10k_warn(ar, "failed to download calibration data from EEPROM: %d\n", ret); goto out_free; } ret = 0; out_free: kfree(data); return ret; } static int ath10k_download_cal_nvmem(struct ath10k *ar, const char *cell_name) { struct nvmem_cell *cell; void *buf; size_t len; int ret; cell = devm_nvmem_cell_get(ar->dev, cell_name); if (IS_ERR(cell)) { ret = PTR_ERR(cell); return ret; } buf = nvmem_cell_read(cell, &len); if (IS_ERR(buf)) return PTR_ERR(buf); if (ar->hw_params.cal_data_len != len) { kfree(buf); ath10k_warn(ar, "invalid calibration data length in nvmem-cell '%s': %zu != %u\n", cell_name, len, ar->hw_params.cal_data_len); return -EMSGSIZE; } ret = ath10k_download_board_data(ar, buf, len); kfree(buf); if (ret) ath10k_warn(ar, "failed to download calibration data from nvmem-cell '%s': %d\n", cell_name, ret); return ret; } int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name, struct ath10k_fw_file *fw_file) { size_t magic_len, len, ie_len; int ie_id, i, index, bit, ret; struct ath10k_fw_ie *hdr; const u8 *data; __le32 *timestamp, *version; /* first fetch the firmware file (firmware-*.bin) */ fw_file->firmware = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, name); if (IS_ERR(fw_file->firmware)) return PTR_ERR(fw_file->firmware); data = fw_file->firmware->data; len = fw_file->firmware->size; /* magic also includes the null byte, check that as well */ magic_len = strlen(ATH10K_FIRMWARE_MAGIC) + 1; if (len < magic_len) { ath10k_err(ar, "firmware file '%s/%s' too small to contain magic: %zu\n", ar->hw_params.fw.dir, name, len); ret = -EINVAL; goto err; } if (memcmp(data, ATH10K_FIRMWARE_MAGIC, magic_len) != 0) { ath10k_err(ar, "invalid firmware magic\n"); ret = -EINVAL; goto err; } /* jump over the padding */ magic_len = ALIGN(magic_len, 4); len -= magic_len; data += magic_len; /* loop elements */ while (len > sizeof(struct ath10k_fw_ie)) { hdr = (struct ath10k_fw_ie *)data; ie_id = le32_to_cpu(hdr->id); ie_len = le32_to_cpu(hdr->len); len -= sizeof(*hdr); data += sizeof(*hdr); if (len < ie_len) { ath10k_err(ar, "invalid length for FW IE %d (%zu < %zu)\n", ie_id, len, ie_len); ret = -EINVAL; goto err; } switch (ie_id) { case ATH10K_FW_IE_FW_VERSION: if (ie_len > sizeof(fw_file->fw_version) - 1) break; memcpy(fw_file->fw_version, data, ie_len); fw_file->fw_version[ie_len] = '\0'; ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw version %s\n", fw_file->fw_version); break; case ATH10K_FW_IE_TIMESTAMP: if (ie_len != sizeof(u32)) break; timestamp = (__le32 *)data; ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw timestamp %d\n", le32_to_cpup(timestamp)); break; case ATH10K_FW_IE_FEATURES: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found firmware features ie (%zd B)\n", ie_len); for (i = 0; i < ATH10K_FW_FEATURE_COUNT; i++) { index = i / 8; bit = i % 8; if (index == ie_len) break; if (data[index] & (1 << bit)) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "Enabling feature bit: %i\n", i); __set_bit(i, fw_file->fw_features); } } ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "features", "", fw_file->fw_features, sizeof(fw_file->fw_features)); break; case ATH10K_FW_IE_FW_IMAGE: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw image ie (%zd B)\n", ie_len); fw_file->firmware_data = data; fw_file->firmware_len = ie_len; break; case ATH10K_FW_IE_OTP_IMAGE: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found otp image ie (%zd B)\n", ie_len); fw_file->otp_data = data; fw_file->otp_len = ie_len; break; case ATH10K_FW_IE_WMI_OP_VERSION: if (ie_len != sizeof(u32)) break; version = (__le32 *)data; fw_file->wmi_op_version = le32_to_cpup(version); ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw ie wmi op version %d\n", fw_file->wmi_op_version); break; case ATH10K_FW_IE_HTT_OP_VERSION: if (ie_len != sizeof(u32)) break; version = (__le32 *)data; fw_file->htt_op_version = le32_to_cpup(version); ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw ie htt op version %d\n", fw_file->htt_op_version); break; case ATH10K_FW_IE_FW_CODE_SWAP_IMAGE: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw code swap image ie (%zd B)\n", ie_len); fw_file->codeswap_data = data; fw_file->codeswap_len = ie_len; break; default: ath10k_warn(ar, "Unknown FW IE: %u\n", le32_to_cpu(hdr->id)); break; } /* jump over the padding */ ie_len = ALIGN(ie_len, 4); len -= ie_len; data += ie_len; } if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, fw_file->fw_features) && (!fw_file->firmware_data || !fw_file->firmware_len)) { ath10k_warn(ar, "No ATH10K_FW_IE_FW_IMAGE found from '%s/%s', skipping\n", ar->hw_params.fw.dir, name); ret = -ENOMEDIUM; goto err; } return 0; err: ath10k_core_free_firmware_files(ar); return ret; } static void ath10k_core_get_fw_name(struct ath10k *ar, char *fw_name, size_t fw_name_len, int fw_api) { switch (ar->hif.bus) { case ATH10K_BUS_SDIO: case ATH10K_BUS_USB: scnprintf(fw_name, fw_name_len, "%s-%s-%d.bin", ATH10K_FW_FILE_BASE, ath10k_bus_str(ar->hif.bus), fw_api); break; case ATH10K_BUS_PCI: case ATH10K_BUS_AHB: case ATH10K_BUS_SNOC: scnprintf(fw_name, fw_name_len, "%s-%d.bin", ATH10K_FW_FILE_BASE, fw_api); break; } } static int ath10k_core_fetch_firmware_files(struct ath10k *ar) { int ret, i; char fw_name[100]; /* calibration file is optional, don't check for any errors */ ath10k_fetch_cal_file(ar); for (i = ATH10K_FW_API_MAX; i >= ATH10K_FW_API_MIN; i--) { ar->fw_api = i; ath10k_dbg(ar, ATH10K_DBG_BOOT, "trying fw api %d\n", ar->fw_api); ath10k_core_get_fw_name(ar, fw_name, sizeof(fw_name), ar->fw_api); ret = ath10k_core_fetch_firmware_api_n(ar, fw_name, &ar->normal_mode_fw.fw_file); if (!ret) goto success; } /* we end up here if we couldn't fetch any firmware */ ath10k_err(ar, "Failed to find firmware-N.bin (N between %d and %d) from %s: %d", ATH10K_FW_API_MIN, ATH10K_FW_API_MAX, ar->hw_params.fw.dir, ret); return ret; success: ath10k_dbg(ar, ATH10K_DBG_BOOT, "using fw api %d\n", ar->fw_api); return 0; } static int ath10k_core_pre_cal_download(struct ath10k *ar) { int ret; ret = ath10k_download_cal_nvmem(ar, "pre-calibration"); if (ret == 0) { ar->cal_mode = ATH10K_PRE_CAL_MODE_NVMEM; goto success; } else if (ret == -EPROBE_DEFER) { return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find a pre-calibration nvmem-cell, try file next: %d\n", ret); ret = ath10k_download_cal_file(ar, ar->pre_cal_file); if (ret == 0) { ar->cal_mode = ATH10K_PRE_CAL_MODE_FILE; goto success; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find a pre calibration file, try DT next: %d\n", ret); ret = ath10k_download_cal_dt(ar, "qcom,pre-calibration-data"); if (ret == -ENOENT) ret = ath10k_download_cal_dt(ar, "qcom,ath10k-pre-calibration-data"); if (ret) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "unable to load pre cal data from DT: %d\n", ret); return ret; } ar->cal_mode = ATH10K_PRE_CAL_MODE_DT; success: ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using calibration mode %s\n", ath10k_cal_mode_str(ar->cal_mode)); return 0; } static int ath10k_core_pre_cal_config(struct ath10k *ar) { int ret; ret = ath10k_core_pre_cal_download(ar); if (ret) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "failed to load pre cal data: %d\n", ret); return ret; } ret = ath10k_core_get_board_id_from_otp(ar); if (ret) { ath10k_err(ar, "failed to get board id: %d\n", ret); return ret; } ret = ath10k_download_and_run_otp(ar); if (ret) { ath10k_err(ar, "failed to run otp: %d\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "pre cal configuration done successfully\n"); return 0; } static int ath10k_download_cal_data(struct ath10k *ar) { int ret; ret = ath10k_core_pre_cal_config(ar); if (ret == 0) return 0; ath10k_dbg(ar, ATH10K_DBG_BOOT, "pre cal download procedure failed, try cal file: %d\n", ret); ret = ath10k_download_cal_nvmem(ar, "calibration"); if (ret == 0) { ar->cal_mode = ATH10K_CAL_MODE_NVMEM; goto done; } else if (ret == -EPROBE_DEFER) { return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find a calibration nvmem-cell, try file next: %d\n", ret); ret = ath10k_download_cal_file(ar, ar->cal_file); if (ret == 0) { ar->cal_mode = ATH10K_CAL_MODE_FILE; goto done; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find a calibration file, try DT next: %d\n", ret); ret = ath10k_download_cal_dt(ar, "qcom,calibration-data"); if (ret == -ENOENT) ret = ath10k_download_cal_dt(ar, "qcom,ath10k-calibration-data"); if (ret == 0) { ar->cal_mode = ATH10K_CAL_MODE_DT; goto done; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find DT entry, try target EEPROM next: %d\n", ret); ret = ath10k_download_cal_eeprom(ar); if (ret == 0) { ar->cal_mode = ATH10K_CAL_MODE_EEPROM; goto done; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find target EEPROM entry, try OTP next: %d\n", ret); ret = ath10k_download_and_run_otp(ar); if (ret) { ath10k_err(ar, "failed to run otp: %d\n", ret); return ret; } ar->cal_mode = ATH10K_CAL_MODE_OTP; done: ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using calibration mode %s\n", ath10k_cal_mode_str(ar->cal_mode)); return 0; } static void ath10k_core_fetch_btcoex_dt(struct ath10k *ar) { struct device_node *node; u8 coex_support = 0; int ret; node = ar->dev->of_node; if (!node) goto out; ret = of_property_read_u8(node, "qcom,coexist-support", &coex_support); if (ret) { ar->coex_support = true; goto out; } if (coex_support) { ar->coex_support = true; } else { ar->coex_support = false; ar->coex_gpio_pin = -1; goto out; } ret = of_property_read_u32(node, "qcom,coexist-gpio-pin", &ar->coex_gpio_pin); if (ret) ar->coex_gpio_pin = -1; out: ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot coex_support %d coex_gpio_pin %d\n", ar->coex_support, ar->coex_gpio_pin); } static int ath10k_init_uart(struct ath10k *ar) { int ret; /* * Explicitly setting UART prints to zero as target turns it on * based on scratch registers. */ ret = ath10k_bmi_write32(ar, hi_serial_enable, 0); if (ret) { ath10k_warn(ar, "could not disable UART prints (%d)\n", ret); return ret; } if (!uart_print) { if (ar->hw_params.uart_pin_workaround) { ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin, ar->hw_params.uart_pin); if (ret) { ath10k_warn(ar, "failed to set UART TX pin: %d", ret); return ret; } } return 0; } ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin, ar->hw_params.uart_pin); if (ret) { ath10k_warn(ar, "could not enable UART prints (%d)\n", ret); return ret; } ret = ath10k_bmi_write32(ar, hi_serial_enable, 1); if (ret) { ath10k_warn(ar, "could not enable UART prints (%d)\n", ret); return ret; } /* Set the UART baud rate to 19200. */ ret = ath10k_bmi_write32(ar, hi_desired_baud_rate, 19200); if (ret) { ath10k_warn(ar, "could not set the baud rate (%d)\n", ret); return ret; } ath10k_info(ar, "UART prints enabled\n"); return 0; } static int ath10k_init_hw_params(struct ath10k *ar) { const struct ath10k_hw_params *hw_params; int i; for (i = 0; i < ARRAY_SIZE(ath10k_hw_params_list); i++) { hw_params = &ath10k_hw_params_list[i]; if (hw_params->bus == ar->hif.bus && hw_params->id == ar->target_version && hw_params->dev_id == ar->dev_id) break; } if (i == ARRAY_SIZE(ath10k_hw_params_list)) { ath10k_err(ar, "Unsupported hardware version: 0x%x\n", ar->target_version); return -EINVAL; } ar->hw_params = *hw_params; ath10k_dbg(ar, ATH10K_DBG_BOOT, "Hardware name %s version 0x%x\n", ar->hw_params.name, ar->target_version); return 0; } static void ath10k_core_recovery_check_work(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, recovery_check_work); long time_left; /* Sometimes the recovery will fail and then the next all recovery fail, * so avoid infinite recovery. */ if (atomic_read(&ar->fail_cont_count) >= ATH10K_RECOVERY_MAX_FAIL_COUNT) { ath10k_err(ar, "consecutive fail %d times, will shutdown driver!", atomic_read(&ar->fail_cont_count)); ar->state = ATH10K_STATE_WEDGED; return; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "total recovery count: %d", ++ar->recovery_count); if (atomic_read(&ar->pending_recovery)) { /* Sometimes it happened another recovery work before the previous one * completed, then the second recovery work will destroy the previous * one, thus below is to avoid that. */ time_left = wait_for_completion_timeout(&ar->driver_recovery, ATH10K_RECOVERY_TIMEOUT_HZ); if (time_left) { ath10k_warn(ar, "previous recovery succeeded, skip this!\n"); return; } /* Record the continuous recovery fail count when recovery failed. */ atomic_inc(&ar->fail_cont_count); /* Avoid having multiple recoveries at the same time. */ return; } atomic_inc(&ar->pending_recovery); queue_work(ar->workqueue, &ar->restart_work); } void ath10k_core_start_recovery(struct ath10k *ar) { /* Use workqueue_aux to avoid blocking recovery tracking */ queue_work(ar->workqueue_aux, &ar->recovery_check_work); } EXPORT_SYMBOL(ath10k_core_start_recovery); void ath10k_core_napi_enable(struct ath10k *ar) { lockdep_assert_held(&ar->conf_mutex); if (test_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags)) return; napi_enable(&ar->napi); set_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags); } EXPORT_SYMBOL(ath10k_core_napi_enable); void ath10k_core_napi_sync_disable(struct ath10k *ar) { lockdep_assert_held(&ar->conf_mutex); if (!test_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags)) return; napi_synchronize(&ar->napi); napi_disable(&ar->napi); clear_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags); } EXPORT_SYMBOL(ath10k_core_napi_sync_disable); static void ath10k_core_restart(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, restart_work); int ret; reinit_completion(&ar->driver_recovery); set_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags); /* Place a barrier to make sure the compiler doesn't reorder * CRASH_FLUSH and calling other functions. */ barrier(); ieee80211_stop_queues(ar->hw); ath10k_drain_tx(ar); complete(&ar->scan.started); complete(&ar->scan.completed); complete(&ar->scan.on_channel); complete(&ar->offchan_tx_completed); complete(&ar->install_key_done); complete(&ar->vdev_setup_done); complete(&ar->vdev_delete_done); complete(&ar->thermal.wmi_sync); complete(&ar->bss_survey_done); wake_up(&ar->htt.empty_tx_wq); wake_up(&ar->wmi.tx_credits_wq); wake_up(&ar->peer_mapping_wq); /* TODO: We can have one instance of cancelling coverage_class_work by * moving it to ath10k_halt(), so that both stop() and restart() would * call that but it takes conf_mutex() and if we call cancel_work_sync() * with conf_mutex it will deadlock. */ cancel_work_sync(&ar->set_coverage_class_work); mutex_lock(&ar->conf_mutex); switch (ar->state) { case ATH10K_STATE_ON: ar->state = ATH10K_STATE_RESTARTING; ath10k_halt(ar); ath10k_scan_finish(ar); ieee80211_restart_hw(ar->hw); break; case ATH10K_STATE_OFF: /* this can happen if driver is being unloaded * or if the crash happens during FW probing */ ath10k_warn(ar, "cannot restart a device that hasn't been started\n"); break; case ATH10K_STATE_RESTARTING: /* hw restart might be requested from multiple places */ break; case ATH10K_STATE_RESTARTED: ar->state = ATH10K_STATE_WEDGED; fallthrough; case ATH10K_STATE_WEDGED: ath10k_warn(ar, "device is wedged, will not restart\n"); break; case ATH10K_STATE_UTF: ath10k_warn(ar, "firmware restart in UTF mode not supported\n"); break; } mutex_unlock(&ar->conf_mutex); ret = ath10k_coredump_submit(ar); if (ret) ath10k_warn(ar, "failed to send firmware crash dump via devcoredump: %d", ret); } static void ath10k_core_set_coverage_class_work(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, set_coverage_class_work); if (ar->hw_params.hw_ops->set_coverage_class) ar->hw_params.hw_ops->set_coverage_class(ar, -1, -1); } static int ath10k_core_init_firmware_features(struct ath10k *ar) { struct ath10k_fw_file *fw_file = &ar->normal_mode_fw.fw_file; int max_num_peers; if (test_bit(ATH10K_FW_FEATURE_WMI_10_2, fw_file->fw_features) && !test_bit(ATH10K_FW_FEATURE_WMI_10X, fw_file->fw_features)) { ath10k_err(ar, "feature bits corrupted: 10.2 feature requires 10.x feature to be set as well"); return -EINVAL; } if (fw_file->wmi_op_version >= ATH10K_FW_WMI_OP_VERSION_MAX) { ath10k_err(ar, "unsupported WMI OP version (max %d): %d\n", ATH10K_FW_WMI_OP_VERSION_MAX, fw_file->wmi_op_version); return -EINVAL; } ar->wmi.rx_decap_mode = ATH10K_HW_TXRX_NATIVE_WIFI; switch (ath10k_cryptmode_param) { case ATH10K_CRYPT_MODE_HW: clear_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags); clear_bit(ATH10K_FLAG_HW_CRYPTO_DISABLED, &ar->dev_flags); break; case ATH10K_CRYPT_MODE_SW: if (!test_bit(ATH10K_FW_FEATURE_RAW_MODE_SUPPORT, fw_file->fw_features)) { ath10k_err(ar, "cryptmode > 0 requires raw mode support from firmware"); return -EINVAL; } set_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags); set_bit(ATH10K_FLAG_HW_CRYPTO_DISABLED, &ar->dev_flags); break; default: ath10k_info(ar, "invalid cryptmode: %d\n", ath10k_cryptmode_param); return -EINVAL; } ar->htt.max_num_amsdu = ATH10K_HTT_MAX_NUM_AMSDU_DEFAULT; ar->htt.max_num_ampdu = ATH10K_HTT_MAX_NUM_AMPDU_DEFAULT; if (ath10k_frame_mode == ATH10K_HW_TXRX_RAW) { if (!test_bit(ATH10K_FW_FEATURE_RAW_MODE_SUPPORT, fw_file->fw_features)) { ath10k_err(ar, "rawmode = 1 requires support from firmware"); return -EINVAL; } set_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags); } if (test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) { ar->wmi.rx_decap_mode = ATH10K_HW_TXRX_RAW; /* Workaround: * * Firmware A-MSDU aggregation breaks with RAW Tx encap mode * and causes enormous performance issues (malformed frames, * etc). * * Disabling A-MSDU makes RAW mode stable with heavy traffic * albeit a bit slower compared to regular operation. */ ar->htt.max_num_amsdu = 1; } /* Backwards compatibility for firmwares without * ATH10K_FW_IE_WMI_OP_VERSION. */ if (fw_file->wmi_op_version == ATH10K_FW_WMI_OP_VERSION_UNSET) { if (test_bit(ATH10K_FW_FEATURE_WMI_10X, fw_file->fw_features)) { if (test_bit(ATH10K_FW_FEATURE_WMI_10_2, fw_file->fw_features)) fw_file->wmi_op_version = ATH10K_FW_WMI_OP_VERSION_10_2; else fw_file->wmi_op_version = ATH10K_FW_WMI_OP_VERSION_10_1; } else { fw_file->wmi_op_version = ATH10K_FW_WMI_OP_VERSION_MAIN; } } switch (fw_file->wmi_op_version) { case ATH10K_FW_WMI_OP_VERSION_MAIN: max_num_peers = TARGET_NUM_PEERS; ar->max_num_stations = TARGET_NUM_STATIONS; ar->max_num_vdevs = TARGET_NUM_VDEVS; ar->htt.max_num_pending_tx = TARGET_NUM_MSDU_DESC; ar->fw_stats_req_mask = WMI_STAT_PDEV | WMI_STAT_VDEV | WMI_STAT_PEER; ar->max_spatial_stream = WMI_MAX_SPATIAL_STREAM; break; case ATH10K_FW_WMI_OP_VERSION_10_1: case ATH10K_FW_WMI_OP_VERSION_10_2: case ATH10K_FW_WMI_OP_VERSION_10_2_4: if (ath10k_peer_stats_enabled(ar)) { max_num_peers = TARGET_10X_TX_STATS_NUM_PEERS; ar->max_num_stations = TARGET_10X_TX_STATS_NUM_STATIONS; } else { max_num_peers = TARGET_10X_NUM_PEERS; ar->max_num_stations = TARGET_10X_NUM_STATIONS; } ar->max_num_vdevs = TARGET_10X_NUM_VDEVS; ar->htt.max_num_pending_tx = TARGET_10X_NUM_MSDU_DESC; ar->fw_stats_req_mask = WMI_STAT_PEER; ar->max_spatial_stream = WMI_MAX_SPATIAL_STREAM; break; case ATH10K_FW_WMI_OP_VERSION_TLV: max_num_peers = TARGET_TLV_NUM_PEERS; ar->max_num_stations = TARGET_TLV_NUM_STATIONS; ar->max_num_vdevs = TARGET_TLV_NUM_VDEVS; ar->max_num_tdls_vdevs = TARGET_TLV_NUM_TDLS_VDEVS; if (ar->hif.bus == ATH10K_BUS_SDIO) ar->htt.max_num_pending_tx = TARGET_TLV_NUM_MSDU_DESC_HL; else ar->htt.max_num_pending_tx = TARGET_TLV_NUM_MSDU_DESC; ar->wow.max_num_patterns = TARGET_TLV_NUM_WOW_PATTERNS; ar->fw_stats_req_mask = WMI_TLV_STAT_PDEV | WMI_TLV_STAT_VDEV | WMI_TLV_STAT_PEER | WMI_TLV_STAT_PEER_EXTD; ar->max_spatial_stream = WMI_MAX_SPATIAL_STREAM; ar->wmi.mgmt_max_num_pending_tx = TARGET_TLV_MGMT_NUM_MSDU_DESC; break; case ATH10K_FW_WMI_OP_VERSION_10_4: max_num_peers = TARGET_10_4_NUM_PEERS; ar->max_num_stations = TARGET_10_4_NUM_STATIONS; ar->num_active_peers = TARGET_10_4_ACTIVE_PEERS; ar->max_num_vdevs = TARGET_10_4_NUM_VDEVS; ar->num_tids = TARGET_10_4_TGT_NUM_TIDS; ar->fw_stats_req_mask = WMI_10_4_STAT_PEER | WMI_10_4_STAT_PEER_EXTD | WMI_10_4_STAT_VDEV_EXTD; ar->max_spatial_stream = ar->hw_params.max_spatial_stream; ar->max_num_tdls_vdevs = TARGET_10_4_NUM_TDLS_VDEVS; if (test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL, fw_file->fw_features)) ar->htt.max_num_pending_tx = TARGET_10_4_NUM_MSDU_DESC_PFC; else ar->htt.max_num_pending_tx = TARGET_10_4_NUM_MSDU_DESC; break; case ATH10K_FW_WMI_OP_VERSION_UNSET: case ATH10K_FW_WMI_OP_VERSION_MAX: default: WARN_ON(1); return -EINVAL; } if (ar->hw_params.num_peers) ar->max_num_peers = ar->hw_params.num_peers; else ar->max_num_peers = max_num_peers; /* Backwards compatibility for firmwares without * ATH10K_FW_IE_HTT_OP_VERSION. */ if (fw_file->htt_op_version == ATH10K_FW_HTT_OP_VERSION_UNSET) { switch (fw_file->wmi_op_version) { case ATH10K_FW_WMI_OP_VERSION_MAIN: fw_file->htt_op_version = ATH10K_FW_HTT_OP_VERSION_MAIN; break; case ATH10K_FW_WMI_OP_VERSION_10_1: case ATH10K_FW_WMI_OP_VERSION_10_2: case ATH10K_FW_WMI_OP_VERSION_10_2_4: fw_file->htt_op_version = ATH10K_FW_HTT_OP_VERSION_10_1; break; case ATH10K_FW_WMI_OP_VERSION_TLV: fw_file->htt_op_version = ATH10K_FW_HTT_OP_VERSION_TLV; break; case ATH10K_FW_WMI_OP_VERSION_10_4: case ATH10K_FW_WMI_OP_VERSION_UNSET: case ATH10K_FW_WMI_OP_VERSION_MAX: ath10k_err(ar, "htt op version not found from fw meta data"); return -EINVAL; } } return 0; } static int ath10k_core_reset_rx_filter(struct ath10k *ar) { int ret; int vdev_id; int vdev_type; int vdev_subtype; const u8 *vdev_addr; vdev_id = 0; vdev_type = WMI_VDEV_TYPE_STA; vdev_subtype = ath10k_wmi_get_vdev_subtype(ar, WMI_VDEV_SUBTYPE_NONE); vdev_addr = ar->mac_addr; ret = ath10k_wmi_vdev_create(ar, vdev_id, vdev_type, vdev_subtype, vdev_addr); if (ret) { ath10k_err(ar, "failed to create dummy vdev: %d\n", ret); return ret; } ret = ath10k_wmi_vdev_delete(ar, vdev_id); if (ret) { ath10k_err(ar, "failed to delete dummy vdev: %d\n", ret); return ret; } /* WMI and HTT may use separate HIF pipes and are not guaranteed to be * serialized properly implicitly. * * Moreover (most) WMI commands have no explicit acknowledges. It is * possible to infer it implicitly by poking firmware with echo * command - getting a reply means all preceding comments have been * (mostly) processed. * * In case of vdev create/delete this is sufficient. * * Without this it's possible to end up with a race when HTT Rx ring is * started before vdev create/delete hack is complete allowing a short * window of opportunity to receive (and Tx ACK) a bunch of frames. */ ret = ath10k_wmi_barrier(ar); if (ret) { ath10k_err(ar, "failed to ping firmware: %d\n", ret); return ret; } return 0; } static int ath10k_core_compat_services(struct ath10k *ar) { struct ath10k_fw_file *fw_file = &ar->normal_mode_fw.fw_file; /* all 10.x firmware versions support thermal throttling but don't * advertise the support via service flags so we have to hardcode * it here */ switch (fw_file->wmi_op_version) { case ATH10K_FW_WMI_OP_VERSION_10_1: case ATH10K_FW_WMI_OP_VERSION_10_2: case ATH10K_FW_WMI_OP_VERSION_10_2_4: case ATH10K_FW_WMI_OP_VERSION_10_4: set_bit(WMI_SERVICE_THERM_THROT, ar->wmi.svc_map); break; default: break; } return 0; } #define TGT_IRAM_READ_PER_ITR (8 * 1024) static int ath10k_core_copy_target_iram(struct ath10k *ar) { const struct ath10k_hw_mem_layout *hw_mem; const struct ath10k_mem_region *tmp, *mem_region = NULL; dma_addr_t paddr; void *vaddr = NULL; u8 num_read_itr; int i, ret; u32 len, remaining_len; /* copy target iram feature must work also when * ATH10K_FW_CRASH_DUMP_RAM_DATA is disabled, so * _ath10k_coredump_get_mem_layout() to accomplist that */ hw_mem = _ath10k_coredump_get_mem_layout(ar); if (!hw_mem) /* if CONFIG_DEV_COREDUMP is disabled we get NULL, then * just silently disable the feature by doing nothing */ return 0; for (i = 0; i < hw_mem->region_table.size; i++) { tmp = &hw_mem->region_table.regions[i]; if (tmp->type == ATH10K_MEM_REGION_TYPE_REG) { mem_region = tmp; break; } } if (!mem_region) return -ENOMEM; for (i = 0; i < ar->wmi.num_mem_chunks; i++) { if (ar->wmi.mem_chunks[i].req_id == WMI_IRAM_RECOVERY_HOST_MEM_REQ_ID) { vaddr = ar->wmi.mem_chunks[i].vaddr; len = ar->wmi.mem_chunks[i].len; break; } } if (!vaddr || !len) { ath10k_warn(ar, "No allocated memory for IRAM back up"); return -ENOMEM; } len = (len < mem_region->len) ? len : mem_region->len; paddr = mem_region->start; num_read_itr = len / TGT_IRAM_READ_PER_ITR; remaining_len = len % TGT_IRAM_READ_PER_ITR; for (i = 0; i < num_read_itr; i++) { ret = ath10k_hif_diag_read(ar, paddr, vaddr, TGT_IRAM_READ_PER_ITR); if (ret) { ath10k_warn(ar, "failed to copy firmware IRAM contents: %d", ret); return ret; } paddr += TGT_IRAM_READ_PER_ITR; vaddr += TGT_IRAM_READ_PER_ITR; } if (remaining_len) { ret = ath10k_hif_diag_read(ar, paddr, vaddr, remaining_len); if (ret) { ath10k_warn(ar, "failed to copy firmware IRAM contents: %d", ret); return ret; } } ath10k_dbg(ar, ATH10K_DBG_BOOT, "target IRAM back up completed\n"); return 0; } int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, const struct ath10k_fw_components *fw) { int status; u32 val; lockdep_assert_held(&ar->conf_mutex); clear_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags); ar->running_fw = fw; if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, ar->running_fw->fw_file.fw_features)) { ath10k_bmi_start(ar); /* Enable hardware clock to speed up firmware download */ if (ar->hw_params.hw_ops->enable_pll_clk) { status = ar->hw_params.hw_ops->enable_pll_clk(ar); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot enable pll ret %d\n", status); } if (ath10k_init_configure_target(ar)) { status = -EINVAL; goto err; } status = ath10k_download_cal_data(ar); if (status) goto err; /* Some of qca988x solutions are having global reset issue * during target initialization. Bypassing PLL setting before * downloading firmware and letting the SoC run on REF_CLK is * fixing the problem. Corresponding firmware change is also * needed to set the clock source once the target is * initialized. */ if (test_bit(ATH10K_FW_FEATURE_SUPPORTS_SKIP_CLOCK_INIT, ar->running_fw->fw_file.fw_features)) { status = ath10k_bmi_write32(ar, hi_skip_clock_init, 1); if (status) { ath10k_err(ar, "could not write to skip_clock_init: %d\n", status); goto err; } } status = ath10k_download_fw(ar); if (status) goto err; status = ath10k_init_uart(ar); if (status) goto err; if (ar->hif.bus == ATH10K_BUS_SDIO) { status = ath10k_init_sdio(ar, mode); if (status) { ath10k_err(ar, "failed to init SDIO: %d\n", status); goto err; } } } ar->htc.htc_ops.target_send_suspend_complete = ath10k_send_suspend_complete; status = ath10k_htc_init(ar); if (status) { ath10k_err(ar, "could not init HTC (%d)\n", status); goto err; } if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, ar->running_fw->fw_file.fw_features)) { status = ath10k_bmi_done(ar); if (status) goto err; } status = ath10k_wmi_attach(ar); if (status) { ath10k_err(ar, "WMI attach failed: %d\n", status); goto err; } status = ath10k_htt_init(ar); if (status) { ath10k_err(ar, "failed to init htt: %d\n", status); goto err_wmi_detach; } status = ath10k_htt_tx_start(&ar->htt); if (status) { ath10k_err(ar, "failed to alloc htt tx: %d\n", status); goto err_wmi_detach; } /* If firmware indicates Full Rx Reorder support it must be used in a * slightly different manner. Let HTT code know. */ ar->htt.rx_ring.in_ord_rx = !!(test_bit(WMI_SERVICE_RX_FULL_REORDER, ar->wmi.svc_map)); status = ath10k_htt_rx_alloc(&ar->htt); if (status) { ath10k_err(ar, "failed to alloc htt rx: %d\n", status); goto err_htt_tx_detach; } status = ath10k_hif_start(ar); if (status) { ath10k_err(ar, "could not start HIF: %d\n", status); goto err_htt_rx_detach; } status = ath10k_htc_wait_target(&ar->htc); if (status) { ath10k_err(ar, "failed to connect to HTC: %d\n", status); goto err_hif_stop; } status = ath10k_hif_start_post(ar); if (status) { ath10k_err(ar, "failed to swap mailbox: %d\n", status); goto err_hif_stop; } if (mode == ATH10K_FIRMWARE_MODE_NORMAL) { status = ath10k_htt_connect(&ar->htt); if (status) { ath10k_err(ar, "failed to connect htt (%d)\n", status); goto err_hif_stop; } } status = ath10k_wmi_connect(ar); if (status) { ath10k_err(ar, "could not connect wmi: %d\n", status); goto err_hif_stop; } status = ath10k_htc_start(&ar->htc); if (status) { ath10k_err(ar, "failed to start htc: %d\n", status); goto err_hif_stop; } if (mode == ATH10K_FIRMWARE_MODE_NORMAL) { status = ath10k_wmi_wait_for_service_ready(ar); if (status) { ath10k_warn(ar, "wmi service ready event not received"); goto err_hif_stop; } } ath10k_dbg(ar, ATH10K_DBG_BOOT, "firmware %s booted\n", ar->hw->wiphy->fw_version); if (test_bit(ATH10K_FW_FEATURE_IRAM_RECOVERY, ar->running_fw->fw_file.fw_features)) { status = ath10k_core_copy_target_iram(ar); if (status) { ath10k_warn(ar, "failed to copy target iram contents: %d", status); goto err_hif_stop; } } if (test_bit(WMI_SERVICE_EXT_RES_CFG_SUPPORT, ar->wmi.svc_map) && mode == ATH10K_FIRMWARE_MODE_NORMAL) { val = 0; if (ath10k_peer_stats_enabled(ar)) val = WMI_10_4_PEER_STATS; /* Enable vdev stats by default */ val |= WMI_10_4_VDEV_STATS; if (test_bit(WMI_SERVICE_BSS_CHANNEL_INFO_64, ar->wmi.svc_map)) val |= WMI_10_4_BSS_CHANNEL_INFO_64; ath10k_core_fetch_btcoex_dt(ar); /* 10.4 firmware supports BT-Coex without reloading firmware * via pdev param. To support Bluetooth coexistence pdev param, * WMI_COEX_GPIO_SUPPORT of extended resource config should be * enabled always. * * We can still enable BTCOEX if firmware has the support * even though btceox_support value is * ATH10K_DT_BTCOEX_NOT_FOUND */ if (test_bit(WMI_SERVICE_COEX_GPIO, ar->wmi.svc_map) && test_bit(ATH10K_FW_FEATURE_BTCOEX_PARAM, ar->running_fw->fw_file.fw_features) && ar->coex_support) val |= WMI_10_4_COEX_GPIO_SUPPORT; if (test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, ar->wmi.svc_map)) val |= WMI_10_4_TDLS_EXPLICIT_MODE_ONLY; if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map)) val |= WMI_10_4_TDLS_UAPSD_BUFFER_STA; if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI, ar->wmi.svc_map)) val |= WMI_10_4_TX_DATA_ACK_RSSI; if (test_bit(WMI_SERVICE_REPORT_AIRTIME, ar->wmi.svc_map)) val |= WMI_10_4_REPORT_AIRTIME; if (test_bit(WMI_SERVICE_EXT_PEER_TID_CONFIGS_SUPPORT, ar->wmi.svc_map)) val |= WMI_10_4_EXT_PEER_TID_CONFIGS_SUPPORT; status = ath10k_mac_ext_resource_config(ar, val); if (status) { ath10k_err(ar, "failed to send ext resource cfg command : %d\n", status); goto err_hif_stop; } } status = ath10k_wmi_cmd_init(ar); if (status) { ath10k_err(ar, "could not send WMI init command (%d)\n", status); goto err_hif_stop; } status = ath10k_wmi_wait_for_unified_ready(ar); if (status) { ath10k_err(ar, "wmi unified ready event not received\n"); goto err_hif_stop; } status = ath10k_core_compat_services(ar); if (status) { ath10k_err(ar, "compat services failed: %d\n", status); goto err_hif_stop; } status = ath10k_wmi_pdev_set_base_macaddr(ar, ar->mac_addr); if (status && status != -EOPNOTSUPP) { ath10k_err(ar, "failed to set base mac address: %d\n", status); goto err_hif_stop; } /* Some firmware revisions do not properly set up hardware rx filter * registers. * * A known example from QCA9880 and 10.2.4 is that MAC_PCU_ADDR1_MASK * is filled with 0s instead of 1s allowing HW to respond with ACKs to * any frames that matches MAC_PCU_RX_FILTER which is also * misconfigured to accept anything. * * The ADDR1 is programmed using internal firmware structure field and * can't be (easily/sanely) reached from the driver explicitly. It is * possible to implicitly make it correct by creating a dummy vdev and * then deleting it. */ if (ar->hw_params.hw_filter_reset_required && mode == ATH10K_FIRMWARE_MODE_NORMAL) { status = ath10k_core_reset_rx_filter(ar); if (status) { ath10k_err(ar, "failed to reset rx filter: %d\n", status); goto err_hif_stop; } } status = ath10k_htt_rx_ring_refill(ar); if (status) { ath10k_err(ar, "failed to refill htt rx ring: %d\n", status); goto err_hif_stop; } if (ar->max_num_vdevs >= 64) ar->free_vdev_map = 0xFFFFFFFFFFFFFFFFLL; else ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1; INIT_LIST_HEAD(&ar->arvifs); /* we don't care about HTT in UTF mode */ if (mode == ATH10K_FIRMWARE_MODE_NORMAL) { status = ath10k_htt_setup(&ar->htt); if (status) { ath10k_err(ar, "failed to setup htt: %d\n", status); goto err_hif_stop; } } status = ath10k_debug_start(ar); if (status) goto err_hif_stop; status = ath10k_hif_set_target_log_mode(ar, fw_diag_log); if (status && status != -EOPNOTSUPP) { ath10k_warn(ar, "set target log mode failed: %d\n", status); goto err_hif_stop; } status = ath10k_leds_start(ar); if (status) goto err_hif_stop; return 0; err_hif_stop: ath10k_hif_stop(ar); err_htt_rx_detach: ath10k_htt_rx_free(&ar->htt); err_htt_tx_detach: ath10k_htt_tx_free(&ar->htt); err_wmi_detach: ath10k_wmi_detach(ar); err: return status; } EXPORT_SYMBOL(ath10k_core_start); int ath10k_wait_for_suspend(struct ath10k *ar, u32 suspend_opt) { int ret; unsigned long time_left; reinit_completion(&ar->target_suspend); ret = ath10k_wmi_pdev_suspend_target(ar, suspend_opt); if (ret) { ath10k_warn(ar, "could not suspend target (%d)\n", ret); return ret; } time_left = wait_for_completion_timeout(&ar->target_suspend, 1 * HZ); if (!time_left) { ath10k_warn(ar, "suspend timed out - target pause event never came\n"); return -ETIMEDOUT; } return 0; } void ath10k_core_stop(struct ath10k *ar) { lockdep_assert_held(&ar->conf_mutex); ath10k_debug_stop(ar); /* try to suspend target */ if (ar->state != ATH10K_STATE_RESTARTING && ar->state != ATH10K_STATE_UTF) ath10k_wait_for_suspend(ar, WMI_PDEV_SUSPEND_AND_DISABLE_INTR); ath10k_hif_stop(ar); ath10k_htt_tx_stop(&ar->htt); ath10k_htt_rx_free(&ar->htt); ath10k_wmi_detach(ar); ar->id.bmi_ids_valid = false; } EXPORT_SYMBOL(ath10k_core_stop); /* mac80211 manages fw/hw initialization through start/stop hooks. However in * order to know what hw capabilities should be advertised to mac80211 it is * necessary to load the firmware (and tear it down immediately since start * hook will try to init it again) before registering */ static int ath10k_core_probe_fw(struct ath10k *ar) { struct bmi_target_info target_info = {}; int ret = 0; ret = ath10k_hif_power_up(ar, ATH10K_FIRMWARE_MODE_NORMAL); if (ret) { ath10k_err(ar, "could not power on hif bus (%d)\n", ret); return ret; } switch (ar->hif.bus) { case ATH10K_BUS_SDIO: ret = ath10k_bmi_get_target_info_sdio(ar, &target_info); if (ret) { ath10k_err(ar, "could not get target info (%d)\n", ret); goto err_power_down; } ar->target_version = target_info.version; ar->hw->wiphy->hw_version = target_info.version; break; case ATH10K_BUS_PCI: case ATH10K_BUS_AHB: case ATH10K_BUS_USB: ret = ath10k_bmi_get_target_info(ar, &target_info); if (ret) { ath10k_err(ar, "could not get target info (%d)\n", ret); goto err_power_down; } ar->target_version = target_info.version; ar->hw->wiphy->hw_version = target_info.version; break; case ATH10K_BUS_SNOC: ret = ath10k_hif_get_target_info(ar, &target_info); if (ret) { ath10k_err(ar, "could not get target info (%d)\n", ret); goto err_power_down; } ar->target_version = target_info.version; ar->hw->wiphy->hw_version = target_info.version; break; default: ath10k_err(ar, "incorrect hif bus type: %d\n", ar->hif.bus); } ret = ath10k_init_hw_params(ar); if (ret) { ath10k_err(ar, "could not get hw params (%d)\n", ret); goto err_power_down; } ret = ath10k_core_fetch_firmware_files(ar); if (ret) { ath10k_err(ar, "could not fetch firmware files (%d)\n", ret); goto err_power_down; } BUILD_BUG_ON(sizeof(ar->hw->wiphy->fw_version) != sizeof(ar->normal_mode_fw.fw_file.fw_version)); memcpy(ar->hw->wiphy->fw_version, ar->normal_mode_fw.fw_file.fw_version, sizeof(ar->hw->wiphy->fw_version)); ath10k_debug_print_hwfw_info(ar); if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, ar->normal_mode_fw.fw_file.fw_features)) { ret = ath10k_core_pre_cal_download(ar); if (ret) { /* pre calibration data download is not necessary * for all the chipsets. Ignore failures and continue. */ ath10k_dbg(ar, ATH10K_DBG_BOOT, "could not load pre cal data: %d\n", ret); } ret = ath10k_core_get_board_id_from_otp(ar); if (ret && ret != -EOPNOTSUPP) { ath10k_err(ar, "failed to get board id from otp: %d\n", ret); goto err_free_firmware_files; } ret = ath10k_core_check_smbios(ar); if (ret) ath10k_dbg(ar, ATH10K_DBG_BOOT, "SMBIOS bdf variant name not set.\n"); ret = ath10k_core_check_dt(ar); if (ret) ath10k_dbg(ar, ATH10K_DBG_BOOT, "DT bdf variant name not set.\n"); ret = ath10k_core_fetch_board_file(ar, ATH10K_BD_IE_BOARD); if (ret) { ath10k_err(ar, "failed to fetch board file: %d\n", ret); goto err_free_firmware_files; } ath10k_debug_print_board_info(ar); } device_get_mac_address(ar->dev, ar->mac_addr); ret = ath10k_core_init_firmware_features(ar); if (ret) { ath10k_err(ar, "fatal problem with firmware features: %d\n", ret); goto err_free_firmware_files; } if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, ar->normal_mode_fw.fw_file.fw_features)) { ret = ath10k_swap_code_seg_init(ar, &ar->normal_mode_fw.fw_file); if (ret) { ath10k_err(ar, "failed to initialize code swap segment: %d\n", ret); goto err_free_firmware_files; } } mutex_lock(&ar->conf_mutex); ret = ath10k_core_start(ar, ATH10K_FIRMWARE_MODE_NORMAL, &ar->normal_mode_fw); if (ret) { ath10k_err(ar, "could not init core (%d)\n", ret); goto err_unlock; } ath10k_debug_print_boot_info(ar); ath10k_core_stop(ar); mutex_unlock(&ar->conf_mutex); ath10k_hif_power_down(ar); return 0; err_unlock: mutex_unlock(&ar->conf_mutex); err_free_firmware_files: ath10k_core_free_firmware_files(ar); err_power_down: ath10k_hif_power_down(ar); return ret; } static void ath10k_core_register_work(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, register_work); int status; /* peer stats are enabled by default */ set_bit(ATH10K_FLAG_PEER_STATS, &ar->dev_flags); status = ath10k_core_probe_fw(ar); if (status) { ath10k_err(ar, "could not probe fw (%d)\n", status); goto err; } status = ath10k_mac_register(ar); if (status) { ath10k_err(ar, "could not register to mac80211 (%d)\n", status); goto err_release_fw; } status = ath10k_coredump_register(ar); if (status) { ath10k_err(ar, "unable to register coredump\n"); goto err_unregister_mac; } status = ath10k_debug_register(ar); if (status) { ath10k_err(ar, "unable to initialize debugfs\n"); goto err_unregister_coredump; } status = ath10k_spectral_create(ar); if (status) { ath10k_err(ar, "failed to initialize spectral\n"); goto err_debug_destroy; } status = ath10k_thermal_register(ar); if (status) { ath10k_err(ar, "could not register thermal device: %d\n", status); goto err_spectral_destroy; } status = ath10k_leds_register(ar); if (status) { ath10k_err(ar, "could not register leds: %d\n", status); goto err_thermal_unregister; } set_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags); return; err_thermal_unregister: ath10k_thermal_unregister(ar); err_spectral_destroy: ath10k_spectral_destroy(ar); err_debug_destroy: ath10k_debug_destroy(ar); err_unregister_coredump: ath10k_coredump_unregister(ar); err_unregister_mac: ath10k_mac_unregister(ar); err_release_fw: ath10k_core_free_firmware_files(ar); err: /* TODO: It's probably a good idea to release device from the driver * but calling device_release_driver() here will cause a deadlock. */ return; } int ath10k_core_register(struct ath10k *ar, const struct ath10k_bus_params *bus_params) { ar->bus_param = *bus_params; queue_work(ar->workqueue, &ar->register_work); return 0; } EXPORT_SYMBOL(ath10k_core_register); void ath10k_core_unregister(struct ath10k *ar) { cancel_work_sync(&ar->register_work); if (!test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags)) return; ath10k_leds_unregister(ar); ath10k_thermal_unregister(ar); /* Stop spectral before unregistering from mac80211 to remove the * relayfs debugfs file cleanly. Otherwise the parent debugfs tree * would be already be free'd recursively, leading to a double free. */ ath10k_spectral_destroy(ar); /* We must unregister from mac80211 before we stop HTC and HIF. * Otherwise we will fail to submit commands to FW and mac80211 will be * unhappy about callback failures. */ ath10k_mac_unregister(ar); ath10k_testmode_destroy(ar); ath10k_core_free_firmware_files(ar); ath10k_core_free_board_files(ar); ath10k_debug_unregister(ar); } EXPORT_SYMBOL(ath10k_core_unregister); struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, enum ath10k_bus bus, enum ath10k_hw_rev hw_rev, const struct ath10k_hif_ops *hif_ops) { struct ath10k *ar; int ret; ar = ath10k_mac_create(priv_size); if (!ar) return NULL; ar->ath_common.priv = ar; ar->ath_common.hw = ar->hw; ar->dev = dev; ar->hw_rev = hw_rev; ar->hif.ops = hif_ops; ar->hif.bus = bus; switch (hw_rev) { case ATH10K_HW_QCA988X: case ATH10K_HW_QCA9887: ar->regs = &qca988x_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca988x_values; break; case ATH10K_HW_QCA6174: case ATH10K_HW_QCA9377: ar->regs = &qca6174_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca6174_values; break; case ATH10K_HW_QCA99X0: case ATH10K_HW_QCA9984: ar->regs = &qca99x0_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca99x0_values; break; case ATH10K_HW_QCA9888: ar->regs = &qca99x0_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca9888_values; break; case ATH10K_HW_QCA4019: ar->regs = &qca4019_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca4019_values; break; case ATH10K_HW_WCN3990: ar->regs = &wcn3990_regs; ar->hw_ce_regs = &wcn3990_ce_regs; ar->hw_values = &wcn3990_values; break; default: ath10k_err(ar, "unsupported core hardware revision %d\n", hw_rev); ret = -EOPNOTSUPP; goto err_free_mac; } init_completion(&ar->scan.started); init_completion(&ar->scan.completed); init_completion(&ar->scan.on_channel); init_completion(&ar->target_suspend); init_completion(&ar->driver_recovery); init_completion(&ar->wow.wakeup_completed); init_completion(&ar->install_key_done); init_completion(&ar->vdev_setup_done); init_completion(&ar->vdev_delete_done); init_completion(&ar->thermal.wmi_sync); init_completion(&ar->bss_survey_done); init_completion(&ar->peer_delete_done); init_completion(&ar->peer_stats_info_complete); INIT_DELAYED_WORK(&ar->scan.timeout, ath10k_scan_timeout_work); ar->workqueue = create_singlethread_workqueue("ath10k_wq"); if (!ar->workqueue) goto err_free_mac; ar->workqueue_aux = create_singlethread_workqueue("ath10k_aux_wq"); if (!ar->workqueue_aux) goto err_free_wq; ar->workqueue_tx_complete = create_singlethread_workqueue("ath10k_tx_complete_wq"); if (!ar->workqueue_tx_complete) goto err_free_aux_wq; mutex_init(&ar->conf_mutex); mutex_init(&ar->dump_mutex); spin_lock_init(&ar->data_lock); for (int ac = 0; ac < IEEE80211_NUM_ACS; ac++) spin_lock_init(&ar->queue_lock[ac]); INIT_LIST_HEAD(&ar->peers); init_waitqueue_head(&ar->peer_mapping_wq); init_waitqueue_head(&ar->htt.empty_tx_wq); init_waitqueue_head(&ar->wmi.tx_credits_wq); skb_queue_head_init(&ar->htt.rx_indication_head); init_completion(&ar->offchan_tx_completed); INIT_WORK(&ar->offchan_tx_work, ath10k_offchan_tx_work); skb_queue_head_init(&ar->offchan_tx_queue); INIT_WORK(&ar->wmi_mgmt_tx_work, ath10k_mgmt_over_wmi_tx_work); skb_queue_head_init(&ar->wmi_mgmt_tx_queue); INIT_WORK(&ar->register_work, ath10k_core_register_work); INIT_WORK(&ar->restart_work, ath10k_core_restart); INIT_WORK(&ar->recovery_check_work, ath10k_core_recovery_check_work); INIT_WORK(&ar->set_coverage_class_work, ath10k_core_set_coverage_class_work); ar->napi_dev = alloc_netdev_dummy(0); if (!ar->napi_dev) goto err_free_tx_complete; ret = ath10k_coredump_create(ar); if (ret) goto err_free_netdev; ret = ath10k_debug_create(ar); if (ret) goto err_free_coredump; return ar; err_free_coredump: ath10k_coredump_destroy(ar); err_free_netdev: free_netdev(ar->napi_dev); err_free_tx_complete: destroy_workqueue(ar->workqueue_tx_complete); err_free_aux_wq: destroy_workqueue(ar->workqueue_aux); err_free_wq: destroy_workqueue(ar->workqueue); err_free_mac: ath10k_mac_destroy(ar); return NULL; } EXPORT_SYMBOL(ath10k_core_create); void ath10k_core_destroy(struct ath10k *ar) { destroy_workqueue(ar->workqueue); destroy_workqueue(ar->workqueue_aux); destroy_workqueue(ar->workqueue_tx_complete); free_netdev(ar->napi_dev); ath10k_debug_destroy(ar); ath10k_coredump_destroy(ar); ath10k_htt_tx_destroy(&ar->htt); ath10k_wmi_free_host_mem(ar); ath10k_mac_destroy(ar); } EXPORT_SYMBOL(ath10k_core_destroy); MODULE_AUTHOR("Qualcomm Atheros"); MODULE_DESCRIPTION("Core module for Qualcomm Atheros 802.11ac wireless LAN cards."); MODULE_LICENSE("Dual BSD/GPL");
4 1 3 1 1 1 3 3 1 2 1 4 1 3 4 9 6 1 4 4 1 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 // SPDX-License-Identifier: GPL-2.0-or-later /* * Linear symmetric key cipher operations. * * Generic encrypt/decrypt wrapper for ciphers. * * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au> */ #include <linux/cryptouser.h> #include <linux/err.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/string.h> #include <net/netlink.h> #include "skcipher.h" static inline struct crypto_lskcipher *__crypto_lskcipher_cast( struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_lskcipher, base); } static inline struct lskcipher_alg *__crypto_lskcipher_alg( struct crypto_alg *alg) { return container_of(alg, struct lskcipher_alg, co.base); } static int lskcipher_setkey_unaligned(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen) { unsigned long alignmask = crypto_lskcipher_alignmask(tfm); struct lskcipher_alg *cipher = crypto_lskcipher_alg(tfm); u8 *buffer, *alignbuffer; unsigned long absize; int ret; absize = keylen + alignmask; buffer = kmalloc(absize, GFP_ATOMIC); if (!buffer) return -ENOMEM; alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = cipher->setkey(tfm, alignbuffer, keylen); kfree_sensitive(buffer); return ret; } int crypto_lskcipher_setkey(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen) { unsigned long alignmask = crypto_lskcipher_alignmask(tfm); struct lskcipher_alg *cipher = crypto_lskcipher_alg(tfm); if (keylen < cipher->co.min_keysize || keylen > cipher->co.max_keysize) return -EINVAL; if ((unsigned long)key & alignmask) return lskcipher_setkey_unaligned(tfm, key, keylen); else return cipher->setkey(tfm, key, keylen); } EXPORT_SYMBOL_GPL(crypto_lskcipher_setkey); static int crypto_lskcipher_crypt_unaligned( struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv, int (*crypt)(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv, u32 flags)) { unsigned statesize = crypto_lskcipher_statesize(tfm); unsigned ivsize = crypto_lskcipher_ivsize(tfm); unsigned bs = crypto_lskcipher_blocksize(tfm); unsigned cs = crypto_lskcipher_chunksize(tfm); int err; u8 *tiv; u8 *p; BUILD_BUG_ON(MAX_CIPHER_BLOCKSIZE > PAGE_SIZE || MAX_CIPHER_ALIGNMASK >= PAGE_SIZE); tiv = kmalloc(PAGE_SIZE, GFP_ATOMIC); if (!tiv) return -ENOMEM; memcpy(tiv, iv, ivsize + statesize); p = kmalloc(PAGE_SIZE, GFP_ATOMIC); err = -ENOMEM; if (!p) goto out; while (len >= bs) { unsigned chunk = min((unsigned)PAGE_SIZE, len); int err; if (chunk > cs) chunk &= ~(cs - 1); memcpy(p, src, chunk); err = crypt(tfm, p, p, chunk, tiv, CRYPTO_LSKCIPHER_FLAG_FINAL); if (err) goto out; memcpy(dst, p, chunk); src += chunk; dst += chunk; len -= chunk; } err = len ? -EINVAL : 0; out: memcpy(iv, tiv, ivsize + statesize); kfree_sensitive(p); kfree_sensitive(tiv); return err; } static int crypto_lskcipher_crypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv, int (*crypt)(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv, u32 flags)) { unsigned long alignmask = crypto_lskcipher_alignmask(tfm); if (((unsigned long)src | (unsigned long)dst | (unsigned long)iv) & alignmask) return crypto_lskcipher_crypt_unaligned(tfm, src, dst, len, iv, crypt); return crypt(tfm, src, dst, len, iv, CRYPTO_LSKCIPHER_FLAG_FINAL); } int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv) { struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); return crypto_lskcipher_crypt(tfm, src, dst, len, iv, alg->encrypt); } EXPORT_SYMBOL_GPL(crypto_lskcipher_encrypt); int crypto_lskcipher_decrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv) { struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); return crypto_lskcipher_crypt(tfm, src, dst, len, iv, alg->decrypt); } EXPORT_SYMBOL_GPL(crypto_lskcipher_decrypt); static int crypto_lskcipher_crypt_sg(struct skcipher_request *req, int (*crypt)(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *ivs, u32 flags)) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct crypto_lskcipher **ctx = crypto_skcipher_ctx(skcipher); u8 *ivs = skcipher_request_ctx(req); struct crypto_lskcipher *tfm = *ctx; struct skcipher_walk walk; unsigned ivsize; u32 flags; int err; ivsize = crypto_lskcipher_ivsize(tfm); ivs = PTR_ALIGN(ivs, crypto_skcipher_alignmask(skcipher) + 1); memcpy(ivs, req->iv, ivsize); flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP; if (req->base.flags & CRYPTO_SKCIPHER_REQ_CONT) flags |= CRYPTO_LSKCIPHER_FLAG_CONT; if (!(req->base.flags & CRYPTO_SKCIPHER_REQ_NOTFINAL)) flags |= CRYPTO_LSKCIPHER_FLAG_FINAL; err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes) { err = crypt(tfm, walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, ivs, flags & ~(walk.nbytes == walk.total ? 0 : CRYPTO_LSKCIPHER_FLAG_FINAL)); err = skcipher_walk_done(&walk, err); flags |= CRYPTO_LSKCIPHER_FLAG_CONT; } memcpy(req->iv, ivs, ivsize); return err; } int crypto_lskcipher_encrypt_sg(struct skcipher_request *req) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct crypto_lskcipher **ctx = crypto_skcipher_ctx(skcipher); struct lskcipher_alg *alg = crypto_lskcipher_alg(*ctx); return crypto_lskcipher_crypt_sg(req, alg->encrypt); } int crypto_lskcipher_decrypt_sg(struct skcipher_request *req) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct crypto_lskcipher **ctx = crypto_skcipher_ctx(skcipher); struct lskcipher_alg *alg = crypto_lskcipher_alg(*ctx); return crypto_lskcipher_crypt_sg(req, alg->decrypt); } static void crypto_lskcipher_exit_tfm(struct crypto_tfm *tfm) { struct crypto_lskcipher *skcipher = __crypto_lskcipher_cast(tfm); struct lskcipher_alg *alg = crypto_lskcipher_alg(skcipher); alg->exit(skcipher); } static int crypto_lskcipher_init_tfm(struct crypto_tfm *tfm) { struct crypto_lskcipher *skcipher = __crypto_lskcipher_cast(tfm); struct lskcipher_alg *alg = crypto_lskcipher_alg(skcipher); if (alg->exit) skcipher->base.exit = crypto_lskcipher_exit_tfm; if (alg->init) return alg->init(skcipher); return 0; } static void crypto_lskcipher_free_instance(struct crypto_instance *inst) { struct lskcipher_instance *skcipher = container_of(inst, struct lskcipher_instance, s.base); skcipher->free(skcipher); } static void __maybe_unused crypto_lskcipher_show( struct seq_file *m, struct crypto_alg *alg) { struct lskcipher_alg *skcipher = __crypto_lskcipher_alg(alg); seq_printf(m, "type : lskcipher\n"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); seq_printf(m, "min keysize : %u\n", skcipher->co.min_keysize); seq_printf(m, "max keysize : %u\n", skcipher->co.max_keysize); seq_printf(m, "ivsize : %u\n", skcipher->co.ivsize); seq_printf(m, "chunksize : %u\n", skcipher->co.chunksize); seq_printf(m, "statesize : %u\n", skcipher->co.statesize); } static int __maybe_unused crypto_lskcipher_report( struct sk_buff *skb, struct crypto_alg *alg) { struct lskcipher_alg *skcipher = __crypto_lskcipher_alg(alg); struct crypto_report_blkcipher rblkcipher; memset(&rblkcipher, 0, sizeof(rblkcipher)); strscpy(rblkcipher.type, "lskcipher", sizeof(rblkcipher.type)); strscpy(rblkcipher.geniv, "<none>", sizeof(rblkcipher.geniv)); rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = skcipher->co.min_keysize; rblkcipher.max_keysize = skcipher->co.max_keysize; rblkcipher.ivsize = skcipher->co.ivsize; return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, sizeof(rblkcipher), &rblkcipher); } static const struct crypto_type crypto_lskcipher_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_lskcipher_init_tfm, .free = crypto_lskcipher_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_lskcipher_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_lskcipher_report, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_LSKCIPHER, .tfmsize = offsetof(struct crypto_lskcipher, base), .algsize = offsetof(struct lskcipher_alg, co.base), }; static void crypto_lskcipher_exit_tfm_sg(struct crypto_tfm *tfm) { struct crypto_lskcipher **ctx = crypto_tfm_ctx(tfm); crypto_free_lskcipher(*ctx); } int crypto_init_lskcipher_ops_sg(struct crypto_tfm *tfm) { struct crypto_lskcipher **ctx = crypto_tfm_ctx(tfm); struct crypto_alg *calg = tfm->__crt_alg; struct crypto_lskcipher *skcipher; if (!crypto_mod_get(calg)) return -EAGAIN; skcipher = crypto_create_tfm(calg, &crypto_lskcipher_type); if (IS_ERR(skcipher)) { crypto_mod_put(calg); return PTR_ERR(skcipher); } *ctx = skcipher; tfm->exit = crypto_lskcipher_exit_tfm_sg; return 0; } int crypto_grab_lskcipher(struct crypto_lskcipher_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_lskcipher_type; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } EXPORT_SYMBOL_GPL(crypto_grab_lskcipher); struct crypto_lskcipher *crypto_alloc_lskcipher(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_lskcipher_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_lskcipher); static int lskcipher_prepare_alg(struct lskcipher_alg *alg) { struct crypto_alg *base = &alg->co.base; int err; err = skcipher_prepare_alg_common(&alg->co); if (err) return err; if (alg->co.chunksize & (alg->co.chunksize - 1)) return -EINVAL; base->cra_type = &crypto_lskcipher_type; base->cra_flags |= CRYPTO_ALG_TYPE_LSKCIPHER; return 0; } int crypto_register_lskcipher(struct lskcipher_alg *alg) { struct crypto_alg *base = &alg->co.base; int err; err = lskcipher_prepare_alg(alg); if (err) return err; return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_lskcipher); void crypto_unregister_lskcipher(struct lskcipher_alg *alg) { crypto_unregister_alg(&alg->co.base); } EXPORT_SYMBOL_GPL(crypto_unregister_lskcipher); int crypto_register_lskciphers(struct lskcipher_alg *algs, int count) { int i, ret; for (i = 0; i < count; i++) { ret = crypto_register_lskcipher(&algs[i]); if (ret) goto err; } return 0; err: for (--i; i >= 0; --i) crypto_unregister_lskcipher(&algs[i]); return ret; } EXPORT_SYMBOL_GPL(crypto_register_lskciphers); void crypto_unregister_lskciphers(struct lskcipher_alg *algs, int count) { int i; for (i = count - 1; i >= 0; --i) crypto_unregister_lskcipher(&algs[i]); } EXPORT_SYMBOL_GPL(crypto_unregister_lskciphers); int lskcipher_register_instance(struct crypto_template *tmpl, struct lskcipher_instance *inst) { int err; if (WARN_ON(!inst->free)) return -EINVAL; err = lskcipher_prepare_alg(&inst->alg); if (err) return err; return crypto_register_instance(tmpl, lskcipher_crypto_instance(inst)); } EXPORT_SYMBOL_GPL(lskcipher_register_instance); static int lskcipher_setkey_simple(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen) { struct crypto_lskcipher *cipher = lskcipher_cipher_simple(tfm); crypto_lskcipher_clear_flags(cipher, CRYPTO_TFM_REQ_MASK); crypto_lskcipher_set_flags(cipher, crypto_lskcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); return crypto_lskcipher_setkey(cipher, key, keylen); } static int lskcipher_init_tfm_simple(struct crypto_lskcipher *tfm) { struct lskcipher_instance *inst = lskcipher_alg_instance(tfm); struct crypto_lskcipher **ctx = crypto_lskcipher_ctx(tfm); struct crypto_lskcipher_spawn *spawn; struct crypto_lskcipher *cipher; spawn = lskcipher_instance_ctx(inst); cipher = crypto_spawn_lskcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); *ctx = cipher; return 0; } static void lskcipher_exit_tfm_simple(struct crypto_lskcipher *tfm) { struct crypto_lskcipher **ctx = crypto_lskcipher_ctx(tfm); crypto_free_lskcipher(*ctx); } static void lskcipher_free_instance_simple(struct lskcipher_instance *inst) { crypto_drop_lskcipher(lskcipher_instance_ctx(inst)); kfree(inst); } /** * lskcipher_alloc_instance_simple - allocate instance of simple block cipher * * Allocate an lskcipher_instance for a simple block cipher mode of operation, * e.g. cbc or ecb. The instance context will have just a single crypto_spawn, * that for the underlying cipher. The {min,max}_keysize, ivsize, blocksize, * alignmask, and priority are set from the underlying cipher but can be * overridden if needed. The tfm context defaults to * struct crypto_lskcipher *, and default ->setkey(), ->init(), and * ->exit() methods are installed. * * @tmpl: the template being instantiated * @tb: the template parameters * * Return: a pointer to the new instance, or an ERR_PTR(). The caller still * needs to register the instance. */ struct lskcipher_instance *lskcipher_alloc_instance_simple( struct crypto_template *tmpl, struct rtattr **tb) { u32 mask; struct lskcipher_instance *inst; struct crypto_lskcipher_spawn *spawn; char ecb_name[CRYPTO_MAX_ALG_NAME]; struct lskcipher_alg *cipher_alg; const char *cipher_name; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_LSKCIPHER, &mask); if (err) return ERR_PTR(err); cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) return ERR_CAST(cipher_name); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return ERR_PTR(-ENOMEM); spawn = lskcipher_instance_ctx(inst); err = crypto_grab_lskcipher(spawn, lskcipher_crypto_instance(inst), cipher_name, 0, mask); ecb_name[0] = 0; if (err == -ENOENT && !!memcmp(tmpl->name, "ecb", 4)) { err = -ENAMETOOLONG; if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; err = crypto_grab_lskcipher(spawn, lskcipher_crypto_instance(inst), ecb_name, 0, mask); } if (err) goto err_free_inst; cipher_alg = crypto_lskcipher_spawn_alg(spawn); err = crypto_inst_setname(lskcipher_crypto_instance(inst), tmpl->name, &cipher_alg->co.base); if (err) goto err_free_inst; if (ecb_name[0]) { int len; err = -EINVAL; len = strscpy(ecb_name, &cipher_alg->co.base.cra_name[4], sizeof(ecb_name)); if (len < 2) goto err_free_inst; if (ecb_name[len - 1] != ')') goto err_free_inst; ecb_name[len - 1] = 0; err = -ENAMETOOLONG; if (snprintf(inst->alg.co.base.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", tmpl->name, ecb_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (strcmp(ecb_name, cipher_name) && snprintf(inst->alg.co.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", tmpl->name, cipher_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; } else { /* Don't allow nesting. */ err = -ELOOP; if ((cipher_alg->co.base.cra_flags & CRYPTO_ALG_INSTANCE)) goto err_free_inst; } err = -EINVAL; if (cipher_alg->co.ivsize) goto err_free_inst; inst->free = lskcipher_free_instance_simple; /* Default algorithm properties, can be overridden */ inst->alg.co.base.cra_blocksize = cipher_alg->co.base.cra_blocksize; inst->alg.co.base.cra_alignmask = cipher_alg->co.base.cra_alignmask; inst->alg.co.base.cra_priority = cipher_alg->co.base.cra_priority; inst->alg.co.min_keysize = cipher_alg->co.min_keysize; inst->alg.co.max_keysize = cipher_alg->co.max_keysize; inst->alg.co.ivsize = cipher_alg->co.base.cra_blocksize; inst->alg.co.statesize = cipher_alg->co.statesize; /* Use struct crypto_lskcipher * by default, can be overridden */ inst->alg.co.base.cra_ctxsize = sizeof(struct crypto_lskcipher *); inst->alg.setkey = lskcipher_setkey_simple; inst->alg.init = lskcipher_init_tfm_simple; inst->alg.exit = lskcipher_exit_tfm_simple; return inst; err_free_inst: lskcipher_free_instance_simple(inst); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(lskcipher_alloc_instance_simple);
14184 181 14044 14185 14185 120 103 14180 120 6 14180 120 156 95 2 32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/drivers/char/serial_core.h * * Copyright (C) 2000 Deep Blue Solutions Ltd. */ #ifndef LINUX_SERIAL_CORE_H #define LINUX_SERIAL_CORE_H #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/console.h> #include <linux/interrupt.h> #include <linux/lockdep.h> #include <linux/printk.h> #include <linux/spinlock.h> #include <linux/sched.h> #include <linux/tty.h> #include <linux/mutex.h> #include <linux/sysrq.h> #include <uapi/linux/serial_core.h> #ifdef CONFIG_SERIAL_CORE_CONSOLE #define uart_console(port) \ ((port)->cons && (port)->cons->index == (port)->line) #else #define uart_console(port) ({ (void)port; 0; }) #endif struct uart_port; struct serial_struct; struct serial_port_device; struct device; struct gpio_desc; /** * struct uart_ops -- interface between serial_core and the driver * * This structure describes all the operations that can be done on the * physical hardware. * * @tx_empty: ``unsigned int ()(struct uart_port *port)`` * * This function tests whether the transmitter fifo and shifter for the * @port is empty. If it is empty, this function should return * %TIOCSER_TEMT, otherwise return 0. If the port does not support this * operation, then it should return %TIOCSER_TEMT. * * Locking: none. * Interrupts: caller dependent. * This call must not sleep * * @set_mctrl: ``void ()(struct uart_port *port, unsigned int mctrl)`` * * This function sets the modem control lines for @port to the state * described by @mctrl. The relevant bits of @mctrl are: * * - %TIOCM_RTS RTS signal. * - %TIOCM_DTR DTR signal. * - %TIOCM_OUT1 OUT1 signal. * - %TIOCM_OUT2 OUT2 signal. * - %TIOCM_LOOP Set the port into loopback mode. * * If the appropriate bit is set, the signal should be driven * active. If the bit is clear, the signal should be driven * inactive. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @get_mctrl: ``unsigned int ()(struct uart_port *port)`` * * Returns the current state of modem control inputs of @port. The state * of the outputs should not be returned, since the core keeps track of * their state. The state information should include: * * - %TIOCM_CAR state of DCD signal * - %TIOCM_CTS state of CTS signal * - %TIOCM_DSR state of DSR signal * - %TIOCM_RI state of RI signal * * The bit is set if the signal is currently driven active. If * the port does not support CTS, DCD or DSR, the driver should * indicate that the signal is permanently active. If RI is * not available, the signal should not be indicated as active. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @stop_tx: ``void ()(struct uart_port *port)`` * * Stop transmitting characters. This might be due to the CTS line * becoming inactive or the tty layer indicating we want to stop * transmission due to an %XOFF character. * * The driver should stop transmitting characters as soon as possible. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @start_tx: ``void ()(struct uart_port *port)`` * * Start transmitting characters. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @throttle: ``void ()(struct uart_port *port)`` * * Notify the serial driver that input buffers for the line discipline are * close to full, and it should somehow signal that no more characters * should be sent to the serial port. * This will be called only if hardware assisted flow control is enabled. * * Locking: serialized with @unthrottle() and termios modification by the * tty layer. * * @unthrottle: ``void ()(struct uart_port *port)`` * * Notify the serial driver that characters can now be sent to the serial * port without fear of overrunning the input buffers of the line * disciplines. * * This will be called only if hardware assisted flow control is enabled. * * Locking: serialized with @throttle() and termios modification by the * tty layer. * * @send_xchar: ``void ()(struct uart_port *port, char ch)`` * * Transmit a high priority character, even if the port is stopped. This * is used to implement XON/XOFF flow control and tcflow(). If the serial * driver does not implement this function, the tty core will append the * character to the circular buffer and then call start_tx() / stop_tx() * to flush the data out. * * Do not transmit if @ch == '\0' (%__DISABLED_CHAR). * * Locking: none. * Interrupts: caller dependent. * * @start_rx: ``void ()(struct uart_port *port)`` * * Start receiving characters. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @stop_rx: ``void ()(struct uart_port *port)`` * * Stop receiving characters; the @port is in the process of being closed. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @enable_ms: ``void ()(struct uart_port *port)`` * * Enable the modem status interrupts. * * This method may be called multiple times. Modem status interrupts * should be disabled when the @shutdown() method is called. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @break_ctl: ``void ()(struct uart_port *port, int ctl)`` * * Control the transmission of a break signal. If @ctl is nonzero, the * break signal should be transmitted. The signal should be terminated * when another call is made with a zero @ctl. * * Locking: caller holds tty_port->mutex * * @startup: ``int ()(struct uart_port *port)`` * * Grab any interrupt resources and initialise any low level driver state. * Enable the port for reception. It should not activate RTS nor DTR; * this will be done via a separate call to @set_mctrl(). * * This method will only be called when the port is initially opened. * * Locking: port_sem taken. * Interrupts: globally disabled. * * @shutdown: ``void ()(struct uart_port *port)`` * * Disable the @port, disable any break condition that may be in effect, * and free any interrupt resources. It should not disable RTS nor DTR; * this will have already been done via a separate call to @set_mctrl(). * * Drivers must not access @port->state once this call has completed. * * This method will only be called when there are no more users of this * @port. * * Locking: port_sem taken. * Interrupts: caller dependent. * * @flush_buffer: ``void ()(struct uart_port *port)`` * * Flush any write buffers, reset any DMA state and stop any ongoing DMA * transfers. * * This will be called whenever the @port->state->xmit circular buffer is * cleared. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @set_termios: ``void ()(struct uart_port *port, struct ktermios *new, * struct ktermios *old)`` * * Change the @port parameters, including word length, parity, stop bits. * Update @port->read_status_mask and @port->ignore_status_mask to * indicate the types of events we are interested in receiving. Relevant * ktermios::c_cflag bits are: * * - %CSIZE - word size * - %CSTOPB - 2 stop bits * - %PARENB - parity enable * - %PARODD - odd parity (when %PARENB is in force) * - %ADDRB - address bit (changed through uart_port::rs485_config()). * - %CREAD - enable reception of characters (if not set, still receive * characters from the port, but throw them away). * - %CRTSCTS - if set, enable CTS status change reporting. * - %CLOCAL - if not set, enable modem status change reporting. * * Relevant ktermios::c_iflag bits are: * * - %INPCK - enable frame and parity error events to be passed to the TTY * layer. * - %BRKINT / %PARMRK - both of these enable break events to be passed to * the TTY layer. * - %IGNPAR - ignore parity and framing errors. * - %IGNBRK - ignore break errors. If %IGNPAR is also set, ignore overrun * errors as well. * * The interaction of the ktermios::c_iflag bits is as follows (parity * error given as an example): * * ============ ======= ======= ========================================= * Parity error INPCK IGNPAR * ============ ======= ======= ========================================= * n/a 0 n/a character received, marked as %TTY_NORMAL * None 1 n/a character received, marked as %TTY_NORMAL * Yes 1 0 character received, marked as %TTY_PARITY * Yes 1 1 character discarded * ============ ======= ======= ========================================= * * Other flags may be used (eg, xon/xoff characters) if your hardware * supports hardware "soft" flow control. * * Locking: caller holds tty_port->mutex * Interrupts: caller dependent. * This call must not sleep * * @set_ldisc: ``void ()(struct uart_port *port, struct ktermios *termios)`` * * Notifier for discipline change. See * Documentation/driver-api/tty/tty_ldisc.rst. * * Locking: caller holds tty_port->mutex * * @pm: ``void ()(struct uart_port *port, unsigned int state, * unsigned int oldstate)`` * * Perform any power management related activities on the specified @port. * @state indicates the new state (defined by enum uart_pm_state), * @oldstate indicates the previous state. * * This function should not be used to grab any resources. * * This will be called when the @port is initially opened and finally * closed, except when the @port is also the system console. This will * occur even if %CONFIG_PM is not set. * * Locking: none. * Interrupts: caller dependent. * * @type: ``const char *()(struct uart_port *port)`` * * Return a pointer to a string constant describing the specified @port, * or return %NULL, in which case the string 'unknown' is substituted. * * Locking: none. * Interrupts: caller dependent. * * @release_port: ``void ()(struct uart_port *port)`` * * Release any memory and IO region resources currently in use by the * @port. * * Locking: none. * Interrupts: caller dependent. * * @request_port: ``int ()(struct uart_port *port)`` * * Request any memory and IO region resources required by the port. If any * fail, no resources should be registered when this function returns, and * it should return -%EBUSY on failure. * * Locking: none. * Interrupts: caller dependent. * * @config_port: ``void ()(struct uart_port *port, int type)`` * * Perform any autoconfiguration steps required for the @port. @type * contains a bit mask of the required configuration. %UART_CONFIG_TYPE * indicates that the port requires detection and identification. * @port->type should be set to the type found, or %PORT_UNKNOWN if no * port was detected. * * %UART_CONFIG_IRQ indicates autoconfiguration of the interrupt signal, * which should be probed using standard kernel autoprobing techniques. * This is not necessary on platforms where ports have interrupts * internally hard wired (eg, system on a chip implementations). * * Locking: none. * Interrupts: caller dependent. * * @verify_port: ``int ()(struct uart_port *port, * struct serial_struct *serinfo)`` * * Verify the new serial port information contained within @serinfo is * suitable for this port type. * * Locking: none. * Interrupts: caller dependent. * * @ioctl: ``int ()(struct uart_port *port, unsigned int cmd, * unsigned long arg)`` * * Perform any port specific IOCTLs. IOCTL commands must be defined using * the standard numbering system found in <asm/ioctl.h>. * * Locking: none. * Interrupts: caller dependent. * * @poll_init: ``int ()(struct uart_port *port)`` * * Called by kgdb to perform the minimal hardware initialization needed to * support @poll_put_char() and @poll_get_char(). Unlike @startup(), this * should not request interrupts. * * Locking: %tty_mutex and tty_port->mutex taken. * Interrupts: n/a. * * @poll_put_char: ``void ()(struct uart_port *port, unsigned char ch)`` * * Called by kgdb to write a single character @ch directly to the serial * @port. It can and should block until there is space in the TX FIFO. * * Locking: none. * Interrupts: caller dependent. * This call must not sleep * * @poll_get_char: ``int ()(struct uart_port *port)`` * * Called by kgdb to read a single character directly from the serial * port. If data is available, it should be returned; otherwise the * function should return %NO_POLL_CHAR immediately. * * Locking: none. * Interrupts: caller dependent. * This call must not sleep */ struct uart_ops { unsigned int (*tx_empty)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int mctrl); unsigned int (*get_mctrl)(struct uart_port *); void (*stop_tx)(struct uart_port *); void (*start_tx)(struct uart_port *); void (*throttle)(struct uart_port *); void (*unthrottle)(struct uart_port *); void (*send_xchar)(struct uart_port *, char ch); void (*stop_rx)(struct uart_port *); void (*start_rx)(struct uart_port *); void (*enable_ms)(struct uart_port *); void (*break_ctl)(struct uart_port *, int ctl); int (*startup)(struct uart_port *); void (*shutdown)(struct uart_port *); void (*flush_buffer)(struct uart_port *); void (*set_termios)(struct uart_port *, struct ktermios *new, const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); void (*pm)(struct uart_port *, unsigned int state, unsigned int oldstate); const char *(*type)(struct uart_port *); void (*release_port)(struct uart_port *); int (*request_port)(struct uart_port *); void (*config_port)(struct uart_port *, int); int (*verify_port)(struct uart_port *, struct serial_struct *); int (*ioctl)(struct uart_port *, unsigned int, unsigned long); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct uart_port *); void (*poll_put_char)(struct uart_port *, unsigned char); int (*poll_get_char)(struct uart_port *); #endif }; #define NO_POLL_CHAR 0x00ff0000 #define UART_CONFIG_TYPE (1 << 0) #define UART_CONFIG_IRQ (1 << 1) struct uart_icount { __u32 cts; __u32 dsr; __u32 rng; __u32 dcd; __u32 rx; __u32 tx; __u32 frame; __u32 overrun; __u32 parity; __u32 brk; __u32 buf_overrun; }; typedef u64 __bitwise upf_t; typedef unsigned int __bitwise upstat_t; enum uart_iotype { UPIO_UNKNOWN = -1, UPIO_PORT = SERIAL_IO_PORT, /* 8b I/O port access */ UPIO_HUB6 = SERIAL_IO_HUB6, /* Hub6 ISA card */ UPIO_MEM = SERIAL_IO_MEM, /* driver-specific */ UPIO_MEM32 = SERIAL_IO_MEM32, /* 32b little endian */ UPIO_AU = SERIAL_IO_AU, /* Au1x00 and RT288x type IO */ UPIO_TSI = SERIAL_IO_TSI, /* Tsi108/109 type IO */ UPIO_MEM32BE = SERIAL_IO_MEM32BE, /* 32b big endian */ UPIO_MEM16 = SERIAL_IO_MEM16, /* 16b little endian */ }; struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ unsigned char __iomem *membase; /* read/write[bwl] */ u32 (*serial_in)(struct uart_port *, unsigned int offset); void (*serial_out)(struct uart_port *, unsigned int offset, u32 val); void (*set_termios)(struct uart_port *, struct ktermios *new, const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); unsigned int (*get_mctrl)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int); unsigned int (*get_divisor)(struct uart_port *, unsigned int baud, unsigned int *frac); void (*set_divisor)(struct uart_port *, unsigned int baud, unsigned int quot, unsigned int quot_frac); int (*startup)(struct uart_port *port); void (*shutdown)(struct uart_port *port); void (*throttle)(struct uart_port *port); void (*unthrottle)(struct uart_port *port); int (*handle_irq)(struct uart_port *); void (*pm)(struct uart_port *, unsigned int state, unsigned int old); void (*handle_break)(struct uart_port *); int (*rs485_config)(struct uart_port *, struct ktermios *termios, struct serial_rs485 *rs485); int (*iso7816_config)(struct uart_port *, struct serial_iso7816 *iso7816); unsigned int ctrl_id; /* optional serial core controller id */ unsigned int port_id; /* optional serial core port id */ unsigned int irq; /* irq number */ unsigned long irqflags; /* irq flags */ unsigned int uartclk; /* base uart clock */ unsigned int fifosize; /* tx fifo size */ unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ unsigned char quirks; /* internal quirks */ /* internal quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) enum uart_iotype iotype; /* io access style */ unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ struct uart_state *state; /* pointer to parent state */ struct uart_icount icount; /* statistics */ struct console *cons; /* struct console, if any */ /* flags must be updated while holding port mutex */ upf_t flags; /* * These flags must be equivalent to the flags defined in * include/uapi/linux/tty_flags.h which are the userspace definitions * assigned from the serial_struct flags in uart_set_info() * [for bit definitions in the UPF_CHANGE_MASK] * * Bits [0..ASYNCB_LAST_USER] are userspace defined/visible/changeable * The remaining bits are serial-core specific and not modifiable by * userspace. */ #ifdef CONFIG_HAS_IOPORT #define UPF_FOURPORT ((__force upf_t) ASYNC_FOURPORT /* 1 */ ) #else #define UPF_FOURPORT 0 #endif #define UPF_SAK ((__force upf_t) ASYNC_SAK /* 2 */ ) #define UPF_SPD_HI ((__force upf_t) ASYNC_SPD_HI /* 4 */ ) #define UPF_SPD_VHI ((__force upf_t) ASYNC_SPD_VHI /* 5 */ ) #define UPF_SPD_CUST ((__force upf_t) ASYNC_SPD_CUST /* 0x0030 */ ) #define UPF_SPD_WARP ((__force upf_t) ASYNC_SPD_WARP /* 0x1010 */ ) #define UPF_SPD_MASK ((__force upf_t) ASYNC_SPD_MASK /* 0x1030 */ ) #define UPF_SKIP_TEST ((__force upf_t) ASYNC_SKIP_TEST /* 6 */ ) #define UPF_AUTO_IRQ ((__force upf_t) ASYNC_AUTO_IRQ /* 7 */ ) #define UPF_HARDPPS_CD ((__force upf_t) ASYNC_HARDPPS_CD /* 11 */ ) #define UPF_SPD_SHI ((__force upf_t) ASYNC_SPD_SHI /* 12 */ ) #define UPF_LOW_LATENCY ((__force upf_t) ASYNC_LOW_LATENCY /* 13 */ ) #define UPF_BUGGY_UART ((__force upf_t) ASYNC_BUGGY_UART /* 14 */ ) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ ) #define UPF_NO_THRE_TEST ((__force upf_t) BIT_ULL(19)) /* Port has hardware-assisted h/w flow control */ #define UPF_AUTO_CTS ((__force upf_t) BIT_ULL(20)) #define UPF_AUTO_RTS ((__force upf_t) BIT_ULL(21)) #define UPF_HARD_FLOW ((__force upf_t) (UPF_AUTO_CTS | UPF_AUTO_RTS)) /* Port has hardware-assisted s/w flow control */ #define UPF_SOFT_FLOW ((__force upf_t) BIT_ULL(22)) #define UPF_CONS_FLOW ((__force upf_t) BIT_ULL(23)) #define UPF_SHARE_IRQ ((__force upf_t) BIT_ULL(24)) #define UPF_EXAR_EFR ((__force upf_t) BIT_ULL(25)) #define UPF_BUG_THRE ((__force upf_t) BIT_ULL(26)) /* The exact UART type is known and should not be probed. */ #define UPF_FIXED_TYPE ((__force upf_t) BIT_ULL(27)) #define UPF_BOOT_AUTOCONF ((__force upf_t) BIT_ULL(28)) #define UPF_FIXED_PORT ((__force upf_t) BIT_ULL(29)) #define UPF_DEAD ((__force upf_t) BIT_ULL(30)) #define UPF_IOREMAP ((__force upf_t) BIT_ULL(31)) #define UPF_FULL_PROBE ((__force upf_t) BIT_ULL(32)) #define __UPF_CHANGE_MASK 0x17fff #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) #define UPF_USR_MASK ((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY)) #if __UPF_CHANGE_MASK > ASYNC_FLAGS #error Change mask not equivalent to userspace-visible bit defines #endif /* * Must hold termios_rwsem, port mutex and port lock to change; * can hold any one lock to read. */ upstat_t status; #define UPSTAT_CTS_ENABLE ((__force upstat_t) (1 << 0)) #define UPSTAT_DCD_ENABLE ((__force upstat_t) (1 << 1)) #define UPSTAT_AUTORTS ((__force upstat_t) (1 << 2)) #define UPSTAT_AUTOCTS ((__force upstat_t) (1 << 3)) #define UPSTAT_AUTOXOFF ((__force upstat_t) (1 << 4)) #define UPSTAT_SYNC_FIFO ((__force upstat_t) (1 << 5)) bool hw_stopped; /* sw-assisted CTS flow state */ unsigned int mctrl; /* current modem ctrl settings */ unsigned int frame_time; /* frame timing in ns */ unsigned int type; /* port type */ const struct uart_ops *ops; unsigned int custom_divisor; unsigned int line; /* port index */ unsigned int minor; resource_size_t mapbase; /* for ioremap */ resource_size_t mapsize; struct device *dev; /* serial port physical parent device */ struct serial_port_device *port_dev; /* serial core port device */ unsigned long sysrq; /* sysrq timeout */ u8 sysrq_ch; /* char for sysrq */ unsigned char has_sysrq; unsigned char sysrq_seq; /* index in sysrq_toggle_seq */ unsigned char hub6; /* this should be in the 8250 driver */ unsigned char suspended; unsigned char console_reinit; const char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; struct serial_rs485 rs485_supported; /* Supported mask for serial_rs485 */ struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ struct gpio_desc *rs485_rx_during_tx_gpio; /* Output GPIO that sets the state of RS485 RX during TX */ struct serial_iso7816 iso7816; void *private_data; /* generic platform data pointer */ }; /* * Only for console->device_lock()/_unlock() callbacks and internal * port lock wrapper synchronization. */ static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); } /* * Only for console->device_lock()/_unlock() callbacks and internal * port lock wrapper synchronization. */ static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { spin_unlock_irqrestore(&up->lock, flags); } /** * uart_port_set_cons - Safely set the @cons field for a uart * @up: The uart port to set * @con: The new console to set to * * This function must be used to set @up->cons. It uses the port lock to * synchronize with the port lock wrappers in order to ensure that the console * cannot change or disappear while another context is holding the port lock. */ static inline void uart_port_set_cons(struct uart_port *up, struct console *con) { unsigned long flags; __uart_port_lock_irqsave(up, &flags); up->cons = con; __uart_port_unlock_irqrestore(up, flags); } /* Only for internal port lock wrapper usage. */ static inline bool __uart_port_using_nbcon(struct uart_port *up) { lockdep_assert_held_once(&up->lock); if (likely(!uart_console(up))) return false; /* * @up->cons is only modified under the port lock. Therefore it is * certain that it cannot disappear here. * * @up->cons->node is added/removed from the console list under the * port lock. Therefore it is certain that the registration status * cannot change here, thus @up->cons->flags can be read directly. */ if (hlist_unhashed_lockless(&up->cons->node) || !(up->cons->flags & CON_NBCON) || !up->cons->write_atomic) { return false; } return true; } /* Only for internal port lock wrapper usage. */ static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up) { if (!__uart_port_using_nbcon(up)) return true; return nbcon_device_try_acquire(up->cons); } /* Only for internal port lock wrapper usage. */ static inline void __uart_port_nbcon_acquire(struct uart_port *up) { if (!__uart_port_using_nbcon(up)) return; while (!nbcon_device_try_acquire(up->cons)) cpu_relax(); } /* Only for internal port lock wrapper usage. */ static inline void __uart_port_nbcon_release(struct uart_port *up) { if (!__uart_port_using_nbcon(up)) return; nbcon_device_release(up->cons); } /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure */ static inline void uart_port_lock(struct uart_port *up) { spin_lock(&up->lock); __uart_port_nbcon_acquire(up); } /** * uart_port_lock_irq - Lock the UART port and disable interrupts * @up: Pointer to UART port structure */ static inline void uart_port_lock_irq(struct uart_port *up) { spin_lock_irq(&up->lock); __uart_port_nbcon_acquire(up); } /** * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts * @up: Pointer to UART port structure * @flags: Pointer to interrupt flags storage */ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); __uart_port_nbcon_acquire(up); } /** * uart_port_trylock - Try to lock the UART port * @up: Pointer to UART port structure * * Returns: True if lock was acquired, false otherwise */ static inline bool uart_port_trylock(struct uart_port *up) { if (!spin_trylock(&up->lock)) return false; if (!__uart_port_nbcon_try_acquire(up)) { spin_unlock(&up->lock); return false; } return true; } /** * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts * @up: Pointer to UART port structure * @flags: Pointer to interrupt flags storage * * Returns: True if lock was acquired, false otherwise */ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) { if (!spin_trylock_irqsave(&up->lock, *flags)) return false; if (!__uart_port_nbcon_try_acquire(up)) { spin_unlock_irqrestore(&up->lock, *flags); return false; } return true; } /** * uart_port_unlock - Unlock the UART port * @up: Pointer to UART port structure */ static inline void uart_port_unlock(struct uart_port *up) { __uart_port_nbcon_release(up); spin_unlock(&up->lock); } /** * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts * @up: Pointer to UART port structure */ static inline void uart_port_unlock_irq(struct uart_port *up) { __uart_port_nbcon_release(up); spin_unlock_irq(&up->lock); } /** * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts * @up: Pointer to UART port structure * @flags: The saved interrupt flags for restore */ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { __uart_port_nbcon_release(up); spin_unlock_irqrestore(&up->lock, flags); } DEFINE_GUARD(uart_port_lock, struct uart_port *, uart_port_lock(_T), uart_port_unlock(_T)); DEFINE_GUARD_COND(uart_port_lock, _try, uart_port_trylock(_T)); DEFINE_GUARD(uart_port_lock_irq, struct uart_port *, uart_port_lock_irq(_T), uart_port_unlock_irq(_T)); DEFINE_LOCK_GUARD_1(uart_port_lock_irqsave, struct uart_port, uart_port_lock_irqsave(_T->lock, &_T->flags), uart_port_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags); DEFINE_LOCK_GUARD_1_COND(uart_port_lock_irqsave, _try, uart_port_trylock_irqsave(_T->lock, &_T->flags)); static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); } static inline void serial_port_out(struct uart_port *up, int offset, int value) { up->serial_out(up, offset, value); } /** * enum uart_pm_state - power states for UARTs * @UART_PM_STATE_ON: UART is powered, up and operational * @UART_PM_STATE_OFF: UART is powered off * @UART_PM_STATE_UNDEFINED: sentinel */ enum uart_pm_state { UART_PM_STATE_ON = 0, UART_PM_STATE_OFF = 3, /* number taken from ACPI */ UART_PM_STATE_UNDEFINED, }; /* * This is the state information which is persistent across opens. */ struct uart_state { struct tty_port port; enum uart_pm_state pm_state; atomic_t refcount; wait_queue_head_t remove_wait; struct uart_port *uart_port; }; #define UART_XMIT_SIZE PAGE_SIZE /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 /** * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars * @up: uart_port structure describing the port * @chars: number of characters sent * * This function advances the tail of circular xmit buffer by the number of * @chars transmitted and handles accounting of transmitted bytes (into * @up's icount.tx). */ static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars) { struct tty_port *tport = &up->state->port; kfifo_skip_count(&tport->xmit_fifo, chars); up->icount.tx += chars; } static inline unsigned int uart_fifo_out(struct uart_port *up, unsigned char *buf, unsigned int chars) { struct tty_port *tport = &up->state->port; chars = kfifo_out(&tport->xmit_fifo, buf, chars); up->icount.tx += chars; return chars; } static inline unsigned int uart_fifo_get(struct uart_port *up, unsigned char *ch) { struct tty_port *tport = &up->state->port; unsigned int chars; chars = kfifo_get(&tport->xmit_fifo, ch); up->icount.tx += chars; return chars; } struct module; struct tty_driver; struct uart_driver { struct module *owner; const char *driver_name; const char *dev_name; int major; int minor; int nr; struct console *cons; /* * these are private; the low level driver should not * touch these; they should be initialised to NULL */ struct uart_state *state; struct tty_driver *tty_driver; }; void uart_write_wakeup(struct uart_port *port); /** * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() * * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer */ enum UART_TX_FLAGS { UART_TX_NOSTOP = BIT(0), }; #define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct tty_port *__tport = &__port->state->port; \ unsigned int pending; \ \ for (; (for_test) && (tx_ready); (for_post), __port->icount.tx++) { \ if (__port->x_char) { \ (ch) = __port->x_char; \ (put_char); \ __port->x_char = 0; \ continue; \ } \ \ if (uart_tx_stopped(__port)) \ break; \ \ if (!kfifo_get(&__tport->xmit_fifo, &(ch))) \ break; \ \ (put_char); \ } \ \ (tx_done); \ \ pending = kfifo_len(&__tport->xmit_fifo); \ if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ pending; \ }) /** * uart_port_tx_limited -- transmit helper for uart_port with count limiting * @port: uart port * @ch: variable to store a character to be written to the HW * @count: a limit of characters to send * @tx_ready: can HW accept more data function * @put_char: function to write a character * @tx_done: function to call after the loop is done * * This helper transmits characters from the xmit buffer to the hardware using * @put_char(). It does so until @count characters are sent and while @tx_ready * evaluates to true. * * Returns: the number of characters in the xmit buffer when done. * * The expression in macro parameters shall be designed as follows: * * **tx_ready:** should evaluate to true if the HW can accept more data to * be sent. This parameter can be %true, which means the HW is always ready. * * **put_char:** shall write @ch to the device of @port. * * **tx_done:** when the write loop is done, this can perform arbitrary * action before potential invocation of ops->stop_tx() happens. If the * driver does not need to do anything, use e.g. ({}). * * For all of them, @port->lock is held, interrupts are locally disabled and * the expressions must not sleep. */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) /** * uart_port_tx_limited_flags -- transmit helper for uart_port with count limiting with flags * @port: uart port * @ch: variable to store a character to be written to the HW * @flags: %UART_TX_NOSTOP or similar * @count: a limit of characters to send * @tx_ready: can HW accept more data function * @put_char: function to write a character * @tx_done: function to call after the loop is done * * See uart_port_tx_limited() for more details. */ #define uart_port_tx_limited_flags(port, ch, flags, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ __uart_port_tx(port, ch, flags, tx_ready, put_char, tx_done, __count, \ __count--); \ }) /** * uart_port_tx -- transmit helper for uart_port * @port: uart port * @ch: variable to store a character to be written to the HW * @tx_ready: can HW accept more data function * @put_char: function to write a character * * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) /** * uart_port_tx_flags -- transmit helper for uart_port with flags * @port: uart port * @ch: variable to store a character to be written to the HW * @flags: %UART_TX_NOSTOP or similar * @tx_ready: can HW accept more data function * @put_char: function to write a character * * See uart_port_tx_limited() for more details. */ #define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ void uart_update_timeout(struct uart_port *port, unsigned int cflag, unsigned int baud); unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, const struct ktermios *old, unsigned int min, unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); /* * Calculates FIFO drain time. */ static inline unsigned long uart_fifo_timeout(struct uart_port *port) { u64 fifo_timeout = (u64)READ_ONCE(port->frame_time) * port->fifosize; /* Add .02 seconds of slop */ fifo_timeout += 20 * NSEC_PER_MSEC; return max(nsecs_to_jiffies(fifo_timeout), 1UL); } /* Base timer interval for polling */ static inline unsigned long uart_poll_timeout(struct uart_port *port) { unsigned long timeout = uart_fifo_timeout(port); return timeout > 6 ? (timeout / 2 - 2) : 1; } /* * Console helpers. */ struct earlycon_device { struct console *con; struct uart_port port; char options[32]; /* e.g., 115200n8 */ unsigned int baud; }; struct earlycon_id { char name[15]; char name_term; /* In case compiler didn't '\0' term name */ char compatible[128]; int (*setup)(struct earlycon_device *, const char *options); }; extern const struct earlycon_id __earlycon_table[]; extern const struct earlycon_id __earlycon_table_end[]; #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE) #define EARLYCON_USED_OR_UNUSED __used #else #define EARLYCON_USED_OR_UNUSED __maybe_unused #endif #define OF_EARLYCON_DECLARE(_name, compat, fn) \ static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name) \ EARLYCON_USED_OR_UNUSED __section("__earlycon_table") \ __aligned(__alignof__(struct earlycon_id)) \ = { .name = __stringify(_name), \ .compatible = compat, \ .setup = fn } #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) int of_setup_earlycon(const struct earlycon_id *match, unsigned long node, const char *options); #ifdef CONFIG_SERIAL_EARLYCON extern bool earlycon_acpi_spcr_enable __initdata; int setup_earlycon(char *buf); #else static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif /* Variant of uart_console_registered() when the console_list_lock is held. */ static inline bool uart_console_registered_locked(struct uart_port *port) { return uart_console(port) && console_is_registered_locked(port->cons); } static inline bool uart_console_registered(struct uart_port *port) { return uart_console(port) && console_is_registered(port->cons); } int uart_parse_earlycon(char *p, enum uart_iotype *iotype, resource_size_t *addr, char **options); void uart_parse_options(const char *options, int *baud, int *parity, int *bits, int *flow); int uart_set_options(struct uart_port *port, struct console *co, int baud, int parity, int bits, int flow); struct tty_driver *uart_console_device(struct console *co, int *index); void uart_console_write(struct uart_port *port, const char *s, unsigned int count, void (*putchar)(struct uart_port *, unsigned char)); /* * Port/driver registration/removal */ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); int uart_read_port_properties(struct uart_port *port); int uart_read_and_validate_port_properties(struct uart_port *port); bool uart_match_port(const struct uart_port *port1, const struct uart_port *port2); /* * Power Management */ int uart_suspend_port(struct uart_driver *reg, struct uart_port *port); int uart_resume_port(struct uart_driver *reg, struct uart_port *port); static inline int uart_tx_stopped(struct uart_port *port) { struct tty_struct *tty = port->state->port.tty; if ((tty && tty->flow.stopped) || port->hw_stopped) return 1; return 0; } static inline bool uart_cts_enabled(struct uart_port *uport) { return !!(uport->status & UPSTAT_CTS_ENABLE); } static inline bool uart_softcts_mode(struct uart_port *uport) { upstat_t mask = UPSTAT_CTS_ENABLE | UPSTAT_AUTOCTS; return ((uport->status & mask) == UPSTAT_CTS_ENABLE); } /* * The following are helper functions for the low level drivers. */ void uart_handle_dcd_change(struct uart_port *uport, bool active); void uart_handle_cts_change(struct uart_port *uport, bool active); void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, u8 ch, u8 flag); void uart_xchar_out(struct uart_port *uport, int offset); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) bool uart_try_toggle_sysrq(struct uart_port *port, u8 ch); static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { handle_sysrq(ch); port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { port->sysrq_ch = ch; port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { u8 sysrq_ch; if (!port->has_sysrq) { uart_port_unlock(port); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; uart_port_unlock(port); if (sysrq_ch) handle_sysrq(sysrq_ch); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { u8 sysrq_ch; if (!port->has_sysrq) { uart_port_unlock_irqrestore(port, flags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; uart_port_unlock_irqrestore(port, flags); if (sysrq_ch) handle_sysrq(sysrq_ch); } #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { uart_port_unlock(port); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { uart_port_unlock_irqrestore(port, flags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ /* * We do the SysRQ and SAK checking like this... */ static inline int uart_handle_break(struct uart_port *port) { struct uart_state *state = port->state; if (port->handle_break) port->handle_break(port); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL if (port->has_sysrq && uart_console(port)) { if (!port->sysrq) { port->sysrq = jiffies + SYSRQ_TIMEOUT; return 1; } port->sysrq = 0; } #endif if (port->flags & UPF_SAK) do_SAK(state->port.tty); return 0; } /* * UART_ENABLE_MS - determine if port should enable modem status irqs */ #define UART_ENABLE_MS(port,cflag) ((port)->flags & UPF_HARDPPS_CD || \ (cflag) & CRTSCTS || \ !((cflag) & CLOCAL)) int uart_get_rs485_mode(struct uart_port *port); #endif /* LINUX_SERIAL_CORE_H */
167 323 3 67 4 1 128 185 185 185 247 3 151 154 154 194 63 19 22 137 29 182 142 56 40 136 136 150 433 326 431 251 154 194 63 430 5459 5470 5455 5429 345 5470 1211 1211 192 192 192 23 175 174 192 192 192 117 117 117 116 117 117 117 117 127 127 127 127 127 127 14 63 16 50 10 2 93 91 1 7 5 2 90 71 13 75 74 51 16 91 154 155 155 17 141 176 57 158 6 155 145 2 5 145 2 5 3 2 1 11 142 142 141 136 6 140 141 11 97 11 109 35 1 11 11 11 38 3 14 4 34 34 34 24 1 23 22 22 22 5 17 17 17 17 11 11 9 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 // SPDX-License-Identifier: GPL-2.0 /* * linux/mm/mlock.c * * (C) Copyright 1995 Linus Torvalds * (C) Copyright 2002 Christoph Hellwig */ #include <linux/capability.h> #include <linux/mman.h> #include <linux/mm.h> #include <linux/sched/user.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/pagewalk.h> #include <linux/mempolicy.h> #include <linux/syscalls.h> #include <linux/sched.h> #include <linux/export.h> #include <linux/rmap.h> #include <linux/mmzone.h> #include <linux/hugetlb.h> #include <linux/memcontrol.h> #include <linux/mm_inline.h> #include <linux/secretmem.h> #include "internal.h" struct mlock_fbatch { local_lock_t lock; struct folio_batch fbatch; }; static DEFINE_PER_CPU(struct mlock_fbatch, mlock_fbatch) = { .lock = INIT_LOCAL_LOCK(lock), }; bool can_do_mlock(void) { if (rlimit(RLIMIT_MEMLOCK) != 0) return true; if (capable(CAP_IPC_LOCK)) return true; return false; } EXPORT_SYMBOL(can_do_mlock); /* * Mlocked folios are marked with the PG_mlocked flag for efficient testing * in vmscan and, possibly, the fault path; and to support semi-accurate * statistics. * * An mlocked folio [folio_test_mlocked(folio)] is unevictable. As such, it * will be ostensibly placed on the LRU "unevictable" list (actually no such * list exists), rather than the [in]active lists. PG_unevictable is set to * indicate the unevictable state. */ static struct lruvec *__mlock_folio(struct folio *folio, struct lruvec *lruvec) { /* There is nothing more we can do while it's off LRU */ if (!folio_test_clear_lru(folio)) return lruvec; lruvec = folio_lruvec_relock_irq(folio, lruvec); if (unlikely(folio_evictable(folio))) { /* * This is a little surprising, but quite possible: PG_mlocked * must have got cleared already by another CPU. Could this * folio be unevictable? I'm not sure, but move it now if so. */ if (folio_test_unevictable(folio)) { lruvec_del_folio(lruvec, folio); folio_clear_unevictable(folio); lruvec_add_folio(lruvec, folio); __count_vm_events(UNEVICTABLE_PGRESCUED, folio_nr_pages(folio)); } goto out; } if (folio_test_unevictable(folio)) { if (folio_test_mlocked(folio)) folio->mlock_count++; goto out; } lruvec_del_folio(lruvec, folio); folio_clear_active(folio); folio_set_unevictable(folio); folio->mlock_count = !!folio_test_mlocked(folio); lruvec_add_folio(lruvec, folio); __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio)); out: folio_set_lru(folio); return lruvec; } static struct lruvec *__mlock_new_folio(struct folio *folio, struct lruvec *lruvec) { VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); lruvec = folio_lruvec_relock_irq(folio, lruvec); /* As above, this is a little surprising, but possible */ if (unlikely(folio_evictable(folio))) goto out; folio_set_unevictable(folio); folio->mlock_count = !!folio_test_mlocked(folio); __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio)); out: lruvec_add_folio(lruvec, folio); folio_set_lru(folio); return lruvec; } static struct lruvec *__munlock_folio(struct folio *folio, struct lruvec *lruvec) { int nr_pages = folio_nr_pages(folio); bool isolated = false; if (!folio_test_clear_lru(folio)) goto munlock; isolated = true; lruvec = folio_lruvec_relock_irq(folio, lruvec); if (folio_test_unevictable(folio)) { /* Then mlock_count is maintained, but might undercount */ if (folio->mlock_count) folio->mlock_count--; if (folio->mlock_count) goto out; } /* else assume that was the last mlock: reclaim will fix it if not */ munlock: if (folio_test_clear_mlocked(folio)) { __zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages); if (isolated || !folio_test_unevictable(folio)) __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); else __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); } /* folio_evictable() has to be checked *after* clearing Mlocked */ if (isolated && folio_test_unevictable(folio) && folio_evictable(folio)) { lruvec_del_folio(lruvec, folio); folio_clear_unevictable(folio); lruvec_add_folio(lruvec, folio); __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } out: if (isolated) folio_set_lru(folio); return lruvec; } /* * Flags held in the low bits of a struct folio pointer on the mlock_fbatch. */ #define LRU_FOLIO 0x1 #define NEW_FOLIO 0x2 static inline struct folio *mlock_lru(struct folio *folio) { return (struct folio *)((unsigned long)folio + LRU_FOLIO); } static inline struct folio *mlock_new(struct folio *folio) { return (struct folio *)((unsigned long)folio + NEW_FOLIO); } /* * mlock_folio_batch() is derived from folio_batch_move_lru(): perhaps that can * make use of such folio pointer flags in future, but for now just keep it for * mlock. We could use three separate folio batches instead, but one feels * better (munlocking a full folio batch does not need to drain mlocking folio * batches first). */ static void mlock_folio_batch(struct folio_batch *fbatch) { struct lruvec *lruvec = NULL; unsigned long mlock; struct folio *folio; int i; for (i = 0; i < folio_batch_count(fbatch); i++) { folio = fbatch->folios[i]; mlock = (unsigned long)folio & (LRU_FOLIO | NEW_FOLIO); folio = (struct folio *)((unsigned long)folio - mlock); fbatch->folios[i] = folio; if (mlock & LRU_FOLIO) lruvec = __mlock_folio(folio, lruvec); else if (mlock & NEW_FOLIO) lruvec = __mlock_new_folio(folio, lruvec); else lruvec = __munlock_folio(folio, lruvec); } if (lruvec) unlock_page_lruvec_irq(lruvec); folios_put(fbatch); } void mlock_drain_local(void) { struct folio_batch *fbatch; local_lock(&mlock_fbatch.lock); fbatch = this_cpu_ptr(&mlock_fbatch.fbatch); if (folio_batch_count(fbatch)) mlock_folio_batch(fbatch); local_unlock(&mlock_fbatch.lock); } void mlock_drain_remote(int cpu) { struct folio_batch *fbatch; WARN_ON_ONCE(cpu_online(cpu)); fbatch = &per_cpu(mlock_fbatch.fbatch, cpu); if (folio_batch_count(fbatch)) mlock_folio_batch(fbatch); } bool need_mlock_drain(int cpu) { return folio_batch_count(&per_cpu(mlock_fbatch.fbatch, cpu)); } /** * mlock_folio - mlock a folio already on (or temporarily off) LRU * @folio: folio to be mlocked. */ void mlock_folio(struct folio *folio) { struct folio_batch *fbatch; local_lock(&mlock_fbatch.lock); fbatch = this_cpu_ptr(&mlock_fbatch.fbatch); if (!folio_test_set_mlocked(folio)) { int nr_pages = folio_nr_pages(folio); zone_stat_mod_folio(folio, NR_MLOCK, nr_pages); __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); } folio_get(folio); if (!folio_batch_add(fbatch, mlock_lru(folio)) || !folio_may_be_lru_cached(folio) || lru_cache_disabled()) mlock_folio_batch(fbatch); local_unlock(&mlock_fbatch.lock); } /** * mlock_new_folio - mlock a newly allocated folio not yet on LRU * @folio: folio to be mlocked, either normal or a THP head. */ void mlock_new_folio(struct folio *folio) { struct folio_batch *fbatch; int nr_pages = folio_nr_pages(folio); local_lock(&mlock_fbatch.lock); fbatch = this_cpu_ptr(&mlock_fbatch.fbatch); folio_set_mlocked(folio); zone_stat_mod_folio(folio, NR_MLOCK, nr_pages); __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); folio_get(folio); if (!folio_batch_add(fbatch, mlock_new(folio)) || !folio_may_be_lru_cached(folio) || lru_cache_disabled()) mlock_folio_batch(fbatch); local_unlock(&mlock_fbatch.lock); } /** * munlock_folio - munlock a folio * @folio: folio to be munlocked, either normal or a THP head. */ void munlock_folio(struct folio *folio) { struct folio_batch *fbatch; local_lock(&mlock_fbatch.lock); fbatch = this_cpu_ptr(&mlock_fbatch.fbatch); /* * folio_test_clear_mlocked(folio) must be left to __munlock_folio(), * which will check whether the folio is multiply mlocked. */ folio_get(folio); if (!folio_batch_add(fbatch, folio) || !folio_may_be_lru_cached(folio) || lru_cache_disabled()) mlock_folio_batch(fbatch); local_unlock(&mlock_fbatch.lock); } static inline unsigned int folio_mlock_step(struct folio *folio, pte_t *pte, unsigned long addr, unsigned long end) { unsigned int count = (end - addr) >> PAGE_SHIFT; pte_t ptent = ptep_get(pte); if (!folio_test_large(folio)) return 1; return folio_pte_batch(folio, pte, ptent, count); } static inline bool allow_mlock_munlock(struct folio *folio, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned int step) { /* * For unlock, allow munlock large folio which is partially * mapped to VMA. As it's possible that large folio is * mlocked and VMA is split later. * * During memory pressure, such kind of large folio can * be split. And the pages are not in VM_LOCKed VMA * can be reclaimed. */ if (!(vma->vm_flags & VM_LOCKED)) return true; /* folio_within_range() cannot take KSM, but any small folio is OK */ if (!folio_test_large(folio)) return true; /* folio not in range [start, end), skip mlock */ if (!folio_within_range(folio, vma, start, end)) return false; /* folio is not fully mapped, skip mlock */ if (step != folio_nr_pages(folio)) return false; return true; } static int mlock_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; spinlock_t *ptl; pte_t *start_pte, *pte; pte_t ptent; struct folio *folio; unsigned int step = 1; unsigned long start = addr; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { if (!pmd_present(*pmd)) goto out; if (is_huge_zero_pmd(*pmd)) goto out; folio = pmd_folio(*pmd); if (folio_is_zone_device(folio)) goto out; if (vma->vm_flags & VM_LOCKED) mlock_folio(folio); else munlock_folio(folio); goto out; } start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!start_pte) { walk->action = ACTION_AGAIN; return 0; } for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) { ptent = ptep_get(pte); if (!pte_present(ptent)) continue; folio = vm_normal_folio(vma, addr, ptent); if (!folio || folio_is_zone_device(folio)) continue; step = folio_mlock_step(folio, pte, addr, end); if (!allow_mlock_munlock(folio, vma, start, end, step)) goto next_entry; if (vma->vm_flags & VM_LOCKED) mlock_folio(folio); else munlock_folio(folio); next_entry: pte += step - 1; addr += (step - 1) << PAGE_SHIFT; } pte_unmap(start_pte); out: spin_unlock(ptl); cond_resched(); return 0; } /* * mlock_vma_pages_range() - mlock any pages already in the range, * or munlock all pages in the range. * @vma - vma containing range to be mlock()ed or munlock()ed * @start - start address in @vma of the range * @end - end of range in @vma * @newflags - the new set of flags for @vma. * * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED; * called for munlock() and munlockall(), to clear VM_LOCKED from @vma. */ static void mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, vm_flags_t newflags) { static const struct mm_walk_ops mlock_walk_ops = { .pmd_entry = mlock_pte_range, .walk_lock = PGWALK_WRLOCK_VERIFY, }; /* * There is a slight chance that concurrent page migration, * or page reclaim finding a page of this now-VM_LOCKED vma, * will call mlock_vma_folio() and raise page's mlock_count: * double counting, leaving the page unevictable indefinitely. * Communicate this danger to mlock_vma_folio() with VM_IO, * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas. * mmap_lock is held in write mode here, so this weird * combination should not be visible to other mmap_lock users; * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED. */ if (newflags & VM_LOCKED) newflags |= VM_IO; vma_start_write(vma); vm_flags_reset_once(vma, newflags); lru_add_drain(); walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL); lru_add_drain(); if (newflags & VM_IO) { newflags &= ~VM_IO; vm_flags_reset_once(vma, newflags); } } /* * mlock_fixup - handle mlock[all]/munlock[all] requests. * * Filters out "special" vmas -- VM_LOCKED never gets set for these, and * munlock is a no-op. However, for some special vmas, we go ahead and * populate the ptes. * * For vmas that pass the filters, merge/split as appropriate. */ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, vm_flags_t newflags) { struct mm_struct *mm = vma->vm_mm; int nr_pages; int ret = 0; vm_flags_t oldflags = vma->vm_flags; if (newflags == oldflags || (oldflags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE)) /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ goto out; vma = vma_modify_flags(vmi, *prev, vma, start, end, &newflags); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; } /* * Keep track of amount of locked VM. */ nr_pages = (end - start) >> PAGE_SHIFT; if (!(newflags & VM_LOCKED)) nr_pages = -nr_pages; else if (oldflags & VM_LOCKED) nr_pages = 0; mm->locked_vm += nr_pages; /* * vm_flags is protected by the mmap_lock held in write mode. * It's okay if try_to_unmap_one unmaps a page just after we * set VM_LOCKED, populate_vma_page_range will bring it back. */ if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) { /* No work to do, and mlocking twice would be wrong */ vma_start_write(vma); vm_flags_reset(vma, newflags); } else { mlock_vma_pages_range(vma, start, end, newflags); } out: *prev = vma; return ret; } static int apply_vma_lock_flags(unsigned long start, size_t len, vm_flags_t flags) { unsigned long nstart, end, tmp; struct vm_area_struct *vma, *prev; VMA_ITERATOR(vmi, current->mm, start); VM_BUG_ON(offset_in_page(start)); VM_BUG_ON(len != PAGE_ALIGN(len)); end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; vma = vma_iter_load(&vmi); if (!vma) return -ENOMEM; prev = vma_prev(&vmi); if (start > vma->vm_start) prev = vma; nstart = start; tmp = vma->vm_start; for_each_vma_range(vmi, vma, end) { int error; vm_flags_t newflags; if (vma->vm_start != tmp) return -ENOMEM; newflags = vma->vm_flags & ~VM_LOCKED_MASK; newflags |= flags; /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ tmp = vma->vm_end; if (tmp > end) tmp = end; error = mlock_fixup(&vmi, vma, &prev, nstart, tmp, newflags); if (error) return error; tmp = vma_iter_end(&vmi); nstart = tmp; } if (tmp < end) return -ENOMEM; return 0; } /* * Go through vma areas and sum size of mlocked * vma pages, as return value. * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT) * is also counted. * Return value: previously mlocked page counts */ static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm, unsigned long start, size_t len) { struct vm_area_struct *vma; unsigned long count = 0; unsigned long end; VMA_ITERATOR(vmi, mm, start); /* Don't overflow past ULONG_MAX */ if (unlikely(ULONG_MAX - len < start)) end = ULONG_MAX; else end = start + len; for_each_vma_range(vmi, vma, end) { if (vma->vm_flags & VM_LOCKED) { if (start > vma->vm_start) count -= (start - vma->vm_start); if (end < vma->vm_end) { count += end - vma->vm_start; break; } count += vma->vm_end - vma->vm_start; } } return count >> PAGE_SHIFT; } /* * convert get_user_pages() return value to posix mlock() error */ static int __mlock_posix_error_return(long retval) { if (retval == -EFAULT) retval = -ENOMEM; else if (retval == -ENOMEM) retval = -EAGAIN; return retval; } static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags) { unsigned long locked; unsigned long lock_limit; int error = -ENOMEM; start = untagged_addr(start); if (!can_do_mlock()) return -EPERM; len = PAGE_ALIGN(len + (offset_in_page(start))); start &= PAGE_MASK; lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; locked = len >> PAGE_SHIFT; if (mmap_write_lock_killable(current->mm)) return -EINTR; locked += current->mm->locked_vm; if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) { /* * It is possible that the regions requested intersect with * previously mlocked areas, that part area in "mm->locked_vm" * should not be counted to new mlock increment count. So check * and adjust locked count if necessary. */ locked -= count_mm_mlocked_page_nr(current->mm, start, len); } /* check against resource limits */ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = apply_vma_lock_flags(start, len, flags); mmap_write_unlock(current->mm); if (error) return error; error = __mm_populate(start, len, 0); if (error) return __mlock_posix_error_return(error); return 0; } SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) { return do_mlock(start, len, VM_LOCKED); } SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) { vm_flags_t vm_flags = VM_LOCKED; if (flags & ~MLOCK_ONFAULT) return -EINVAL; if (flags & MLOCK_ONFAULT) vm_flags |= VM_LOCKONFAULT; return do_mlock(start, len, vm_flags); } SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) { int ret; start = untagged_addr(start); len = PAGE_ALIGN(len + (offset_in_page(start))); start &= PAGE_MASK; if (mmap_write_lock_killable(current->mm)) return -EINTR; ret = apply_vma_lock_flags(start, len, 0); mmap_write_unlock(current->mm); return ret; } /* * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall) * and translate into the appropriate modifications to mm->def_flags and/or the * flags for all current VMAs. * * There are a couple of subtleties with this. If mlockall() is called multiple * times with different flags, the values do not necessarily stack. If mlockall * is called once including the MCL_FUTURE flag and then a second time without * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags. */ static int apply_mlockall_flags(int flags) { VMA_ITERATOR(vmi, current->mm, 0); struct vm_area_struct *vma, *prev = NULL; vm_flags_t to_add = 0; current->mm->def_flags &= ~VM_LOCKED_MASK; if (flags & MCL_FUTURE) { current->mm->def_flags |= VM_LOCKED; if (flags & MCL_ONFAULT) current->mm->def_flags |= VM_LOCKONFAULT; if (!(flags & MCL_CURRENT)) goto out; } if (flags & MCL_CURRENT) { to_add |= VM_LOCKED; if (flags & MCL_ONFAULT) to_add |= VM_LOCKONFAULT; } for_each_vma(vmi, vma) { int error; vm_flags_t newflags; newflags = vma->vm_flags & ~VM_LOCKED_MASK; newflags |= to_add; error = mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end, newflags); /* Ignore errors, but prev needs fixing up. */ if (error) prev = vma; cond_resched(); } out: return 0; } SYSCALL_DEFINE1(mlockall, int, flags) { unsigned long lock_limit; int ret; if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) || flags == MCL_ONFAULT) return -EINVAL; if (!can_do_mlock()) return -EPERM; lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; if (mmap_write_lock_killable(current->mm)) return -EINTR; ret = -ENOMEM; if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = apply_mlockall_flags(flags); mmap_write_unlock(current->mm); if (!ret && (flags & MCL_CURRENT)) mm_populate(0, TASK_SIZE); return ret; } SYSCALL_DEFINE0(munlockall) { int ret; if (mmap_write_lock_killable(current->mm)) return -EINTR; ret = apply_mlockall_flags(0); mmap_write_unlock(current->mm); return ret; } /* * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB * shm segments) get accounted against the user_struct instead. */ static DEFINE_SPINLOCK(shmlock_user_lock); int user_shm_lock(size_t size, struct ucounts *ucounts) { unsigned long lock_limit, locked; long memlock; int allowed = 0; locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; lock_limit = rlimit(RLIMIT_MEMLOCK); if (lock_limit != RLIM_INFINITY) lock_limit >>= PAGE_SHIFT; spin_lock(&shmlock_user_lock); memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) { dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); goto out; } if (!get_ucounts(ucounts)) { dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); allowed = 0; goto out; } allowed = 1; out: spin_unlock(&shmlock_user_lock); return allowed; } void user_shm_unlock(size_t size, struct ucounts *ucounts) { spin_lock(&shmlock_user_lock); dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT); spin_unlock(&shmlock_user_lock); put_ucounts(ucounts); }
39 39 39 39 39 39 34 20 20 8 8 1 8 17 6 17 2 12 3 15 6 33 33 33 3 10 10 17 2 2 2 22 22 22 19 3 1 11 4 1 3 43 46 34 1 9 1 2 44 3 43 46 49 49 23 25 46 46 17 7 3 4 3 10 6 53 54 50 3 51 4 6 6 6 3 20 9 12 10 11 34 26 1 2 1 1 24 19 2 5 18 10 1 1 1 1 2 55 51 51 51 51 6 6 1 3 2 3 2 4 1 4 1 4 1 4 1 6 6 4 2 121 192 2 2 2 1 193 34 34 31 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 virtual tunneling interface * * Copyright (C) 2013 secunet Security Networks AG * * Author: * Steffen Klassert <steffen.klassert@secunet.com> * * Based on: * net/ipv6/ip6_tunnel.c */ #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/sockios.h> #include <linux/icmp.h> #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/icmpv6.h> #include <linux/init.h> #include <linux/route.h> #include <linux/rtnetlink.h> #include <linux/netfilter_ipv6.h> #include <linux/slab.h> #include <linux/hash.h> #include <linux/uaccess.h> #include <linux/atomic.h> #include <net/icmp.h> #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/ip6_tunnel.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/netdev_lock.h> #include <linux/etherdevice.h> #define IP6_VTI_HASH_SIZE_SHIFT 5 #define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT) static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT); } static int vti6_dev_init(struct net_device *dev); static void vti6_dev_setup(struct net_device *dev); static struct rtnl_link_ops vti6_link_ops __read_mostly; static unsigned int vti6_net_id __read_mostly; struct vti6_net { /* the vti6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; }; #define for_each_vti6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) /** * vti6_tnl_lookup - fetch tunnel matching the end-point addresses * @net: network namespace * @remote: the address of the tunnel exit-point * @local: the address of the tunnel entry-point * * Return: * tunnel matching given end-points if found, * else fallback tunnel if its device is up, * else %NULL **/ static struct ip6_tnl * vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) { unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct vti6_net *ip6n = net_generic(net, vti6_net_id); struct in6_addr any; for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } memset(&any, 0, sizeof(any)); hash = HASH(&any, local); for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && (t->dev->flags & IFF_UP)) return t; } hash = HASH(remote, &any); for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(remote, &t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; return NULL; } /** * vti6_tnl_bucket - get head of list matching given tunnel parameters * @ip6n: the private data for ip6_vti in the netns * @p: parameters containing tunnel end-points * * Description: * vti6_tnl_bucket() returns the head of the list matching the * &struct in6_addr entries laddr and raddr in @p. * * Return: head of IPv6 tunnel list **/ static struct ip6_tnl __rcu ** vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; unsigned int h = 0; int prio = 0; if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; h = HASH(remote, local); } return &ip6n->tnls[prio][h]; } static void vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms); rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } static void vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; for (tp = vti6_tnl_bucket(ip6n, &t->parms); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { rcu_assign_pointer(*tp, t->next); break; } } } static int vti6_tnl_create2(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct vti6_net *ip6n = net_generic(t->net, vti6_net_id); int err; dev->rtnl_link_ops = &vti6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); vti6_tnl_link(ip6n, t); return 0; out: return err; } static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) { struct net_device *dev; struct ip6_tnl *t; char name[IFNAMSIZ]; int err; if (p->name[0]) { if (!dev_valid_name(p->name)) goto failed; strscpy(name, p->name, IFNAMSIZ); } else { sprintf(name, "ip6_vti%%d"); } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup); if (!dev) goto failed; dev_net_set(dev, net); t = netdev_priv(dev); t->parms = *p; t->net = dev_net(dev); err = vti6_tnl_create2(dev); if (err < 0) goto failed_free; return t; failed_free: free_netdev(dev); failed: return NULL; } /** * vti6_locate - find or create tunnel matching given parameters * @net: network namespace * @p: tunnel parameters * @create: != 0 if allowed to create new tunnel if no match found * * Description: * vti6_locate() first tries to locate an existing tunnel * based on @parms. If this is unsuccessful, but @create is set a new * tunnel device is created and registered for use. * * Return: * matching tunnel or NULL **/ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, int create) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; struct ip6_tnl __rcu **tp; struct ip6_tnl *t; struct vti6_net *ip6n = net_generic(net, vti6_net_id); for (tp = vti6_tnl_bucket(ip6n, p); (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr)) { if (create) return NULL; return t; } } if (!create) return NULL; return vti6_tnl_create(net, p); } /** * vti6_dev_uninit - tunnel device uninitializer * @dev: the device to be destroyed * * Description: * vti6_dev_uninit() removes tunnel from its list **/ static void vti6_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct vti6_net *ip6n = net_generic(t->net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else vti6_tnl_unlink(ip6n, t); netdev_put(dev, &t->dev_tracker); } static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); rcu_read_lock(); t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); if (t) { if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; } if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { rcu_read_unlock(); goto discard; } ipv6h = ipv6_hdr(skb); if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { DEV_STATS_INC(t->dev, rx_dropped); rcu_read_unlock(); goto discard; } rcu_read_unlock(); XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, encap_type); } rcu_read_unlock(); return -EINVAL; discard: kfree_skb(skb); return 0; } static int vti6_rcv(struct sk_buff *skb) { int nexthdr = skb_network_header(skb)[IP6CB(skb)->nhoff]; return vti6_input_proto(skb, nexthdr, 0, 0); } static int vti6_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; struct net_device *dev; struct xfrm_state *x; const struct xfrm_mode *inner_mode; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; u32 orig_mark = skb->mark; int ret; if (!t) return 1; dev = t->dev; if (err) { DEV_STATS_INC(dev, rx_errors); DEV_STATS_INC(dev, rx_dropped); return 0; } x = xfrm_input_state(skb); inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); if (inner_mode == NULL) { XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINSTATEMODEERROR); return -EINVAL; } } family = inner_mode->family; skb->mark = be32_to_cpu(t->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); skb->mark = orig_mark; if (!ret) return -EPERM; skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev))); skb->dev = dev; dev_sw_netstats_rx_add(dev, skb->len); return 0; } /** * vti6_addr_conflict - compare packet addresses to tunnel's own * @t: the outgoing tunnel device * @hdr: IPv6 header from the incoming packet * * Description: * Avoid trivial tunneling loop by checking that tunnel exit-point * doesn't match source of incoming packet. * * Return: * 1 if conflict, * 0 else **/ static inline bool vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) { return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } static bool vti6_state_check(const struct xfrm_state *x, const struct in6_addr *dst, const struct in6_addr *src) { xfrm_address_t *daddr = (xfrm_address_t *)dst; xfrm_address_t *saddr = (xfrm_address_t *)src; /* if there is no transform then this tunnel is not functional. * Or if the xfrm is not mode tunnel. */ if (!x || x->props.mode != XFRM_MODE_TUNNEL || x->props.family != AF_INET6) return false; if (ipv6_addr_any(dst)) return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6); if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6)) return false; return true; } /** * vti6_xmit - send a packet * @skb: the outgoing socket buffer * @dev: the outgoing tunnel device * @fl: the flow informations for the xfrm_lookup **/ static int vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { struct ip6_tnl *t = netdev_priv(dev); struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; struct xfrm_state *x; int pkt_len = skb->len; int err = -1; int mtu; if (!dst) { switch (skb->protocol) { case htons(ETH_P_IP): { struct rtable *rt; fl->u.ip4.flowi4_oif = dev->ifindex; fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); if (IS_ERR(rt)) goto tx_err_link_failure; dst = &rt->dst; skb_dst_set(skb, dst); break; } case htons(ETH_P_IPV6): fl->u.ip6.flowi6_oif = dev->ifindex; fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); if (dst->error) { dst_release(dst); dst = NULL; goto tx_err_link_failure; } skb_dst_set(skb, dst); break; default: goto tx_err_link_failure; } } dst_hold(dst); dst = xfrm_lookup_route(t->net, dst, fl, NULL, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto tx_err_link_failure; } if (dst->flags & DST_XFRM_QUEUE) goto xmit; x = dst->xfrm; if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr)) goto tx_err_link_failure; if (!ip6_tnl_xmit_ctl(t, (const struct in6_addr *)&x->props.saddr, (const struct in6_addr *)&x->id.daddr)) goto tx_err_link_failure; tdev = dst_dev(dst); if (tdev == dev) { DEV_STATS_INC(dev, collisions); net_warn_ratelimited("%s: Local routing loop detected!\n", t->parms.name); goto tx_err_dst_release; } mtu = dst_mtu(dst); if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } else { if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) goto xmit; icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); } err = -EMSGSIZE; goto tx_err_dst_release; } xmit: skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = dst_dev(dst); err = dst_output(t->net, skb->sk, skb); if (net_xmit_eval(err) == 0) err = pkt_len; iptunnel_xmit_stats(dev, err); return 0; tx_err_link_failure: DEV_STATS_INC(dev, tx_carrier_errors); dst_link_failure(skb); tx_err_dst_release: dst_release(dst); return err; } static netdev_tx_t vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct flowi fl; int ret; if (!pskb_inet_may_pull(skb)) goto tx_err; memset(&fl, 0, sizeof(fl)); switch (skb->protocol) { case htons(ETH_P_IPV6): if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || vti6_addr_conflict(t, ipv6_hdr(skb))) goto tx_err; memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; default: goto tx_err; } /* override mark with tunnel output key */ fl.flowi_mark = be32_to_cpu(t->parms.o_key); ret = vti6_xmit(skb, dev, &fl); if (ret < 0) goto tx_err; return NETDEV_TX_OK; tx_err: DEV_STATS_INC(dev, tx_errors); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __be32 spi; __u32 mark; struct xfrm_state *x; struct ip6_tnl *t; struct ip_esp_hdr *esph; struct ip_auth_hdr *ah; struct ip_comp_hdr *ipch; struct net *net = dev_net(skb->dev); const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; int protocol = iph->nexthdr; t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr); if (!t) return -1; mark = be32_to_cpu(t->parms.o_key); switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data + offset); spi = esph->spi; break; case IPPROTO_AH: ah = (struct ip_auth_hdr *)(skb->data + offset); spi = ah->spi; break; case IPPROTO_COMP: ipch = (struct ip_comp_hdr *)(skb->data + offset); spi = htonl(ntohs(ipch->cpi)); break; default: return 0; } if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return 0; x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET6); if (!x) return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; } static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu) { struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; struct net_device *tdev = NULL; int mtu; __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | IP6_TNL_F_CAP_PER_PACKET); p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); if (p->flags & IP6_TNL_F_CAP_XMIT && p->flags & IP6_TNL_F_CAP_RCV) dev->flags |= IFF_POINTOPOINT; else dev->flags &= ~IFF_POINTOPOINT; if (keep_mtu && dev->mtu) { WRITE_ONCE(dev->mtu, clamp(dev->mtu, dev->min_mtu, dev->max_mtu)); return; } if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, NULL, strict); if (rt) tdev = rt->dst.dev; ip6_rt_put(rt); } if (!tdev && p->link) tdev = __dev_get_by_index(t->net, p->link); if (tdev) mtu = tdev->mtu - sizeof(struct ipv6hdr); else mtu = ETH_DATA_LEN - LL_MAX_HEADER - sizeof(struct ipv6hdr); dev->mtu = max_t(int, mtu, IPV4_MIN_MTU); } /** * vti6_tnl_change - update the tunnel parameters * @t: tunnel to be changed * @p: tunnel configuration parameters * @keep_mtu: MTU was set from userspace, don't re-compute it * * Description: * vti6_tnl_change() updates the tunnel parameters **/ static int vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p, bool keep_mtu) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; t->parms.link = p->link; t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; t->parms.proto = p->proto; t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); vti6_link_config(t, keep_mtu); return 0; } static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p, bool keep_mtu) { struct net *net = dev_net(t->dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); int err; vti6_tnl_unlink(ip6n, t); synchronize_net(); err = vti6_tnl_change(t, p, keep_mtu); vti6_tnl_link(ip6n, t); netdev_state_change(t->dev); return err; } static void vti6_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u) { p->laddr = u->laddr; p->raddr = u->raddr; p->link = u->link; p->i_key = u->i_key; p->o_key = u->o_key; p->proto = u->proto; memcpy(p->name, u->name, sizeof(u->name)); } static void vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) { u->laddr = p->laddr; u->raddr = p->raddr; u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; if (u->i_key) u->i_flags |= GRE_KEY; if (u->o_key) u->o_flags |= GRE_KEY; u->proto = p->proto; memcpy(u->name, p->name, sizeof(u->name)); } /** * vti6_siocdevprivate - configure vti6 tunnels from userspace * @dev: virtual device associated with tunnel * @ifr: unused * @data: parameters passed from userspace * @cmd: command to be performed * * Description: * vti6_siocdevprivate() is used for managing vti6 tunnels * from userspace. * * The possible commands are the following: * %SIOCGETTUNNEL: get tunnel parameters for device * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters * %SIOCCHGTUNNEL: change tunnel parameters to those given * %SIOCDELTUNNEL: delete tunnel * * The fallback device "ip6_vti0", created during module * initialization, can be used for creating other tunnel devices. * * Return: * 0 on success, * %-EFAULT if unable to copy data to or from userspace, * %-EPERM if current process hasn't %CAP_NET_ADMIN set * %-EINVAL if passed tunnel parameters are invalid, * %-EEXIST if changing a tunnel's parameters would cause a conflict * %-ENODEV if attempting to change or delete a nonexisting device **/ static int vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm2 p; struct __ip6_tnl_parm p1; struct ip6_tnl *t = NULL; struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); memset(&p1, 0, sizeof(p1)); switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } vti6_parm_from_user(&p1, &p); t = vti6_locate(net, &p1, 0); } else { memset(&p, 0, sizeof(p)); } if (!t) t = netdev_priv(dev); vti6_parm_to_user(&p, &t->parms); if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; case SIOCADDTUNNEL: case SIOCCHGTUNNEL: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; if (copy_from_user(&p, data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != 0) break; vti6_parm_from_user(&p1, &p); t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { if (t) { if (t->dev != dev) { err = -EEXIST; break; } } else t = netdev_priv(dev); err = vti6_update(t, &p1, false); } if (t) { err = 0; vti6_parm_to_user(&p, &t->parms); if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); break; case SIOCDELTUNNEL: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; if (copy_from_user(&p, data, sizeof(p))) break; err = -ENOENT; vti6_parm_from_user(&p1, &p); t = vti6_locate(net, &p1, 0); if (!t) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) break; dev = t->dev; } err = 0; unregister_netdevice(dev); break; default: err = -EINVAL; } return err; } static const struct net_device_ops vti6_netdev_ops = { .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, .ndo_siocdevprivate = vti6_siocdevprivate, .ndo_get_iflink = ip6_tnl_get_iflink, }; /** * vti6_dev_setup - setup virtual tunnel device * @dev: virtual device associated with tunnel * * Description: * Initialize function pointers and device parameters **/ static void vti6_dev_setup(struct net_device *dev) { dev->netdev_ops = &vti6_netdev_ops; dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->type = ARPHRD_TUNNEL6; dev->min_mtu = IPV4_MIN_MTU; dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr); dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); /* This perm addr will be used as interface identifier by IPv6 */ dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->perm_addr); } /** * vti6_dev_init_gen - general initializer for all tunnel devices * @dev: virtual device associated with tunnel **/ static inline int vti6_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); t->dev = dev; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); return 0; } /** * vti6_dev_init - initializer for all non fallback tunnel devices * @dev: virtual device associated with tunnel **/ static int vti6_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int err = vti6_dev_init_gen(dev); if (err) return err; vti6_link_config(t, true); return 0; } /** * vti6_fb_tnl_dev_init - initializer for fallback tunnel device * @dev: fallback device * * Return: 0 **/ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); t->net = net; t->parms.proto = IPPROTO_IPV6; rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } static int vti6_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { return 0; } static void vti6_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { memset(parms, 0, sizeof(*parms)); if (!data) return; if (data[IFLA_VTI_LINK]) parms->link = nla_get_u32(data[IFLA_VTI_LINK]); if (data[IFLA_VTI_LOCAL]) parms->laddr = nla_get_in6_addr(data[IFLA_VTI_LOCAL]); if (data[IFLA_VTI_REMOTE]) parms->raddr = nla_get_in6_addr(data[IFLA_VTI_REMOTE]); if (data[IFLA_VTI_IKEY]) parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); if (data[IFLA_VTI_OKEY]) parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); if (data[IFLA_VTI_FWMARK]) parms->fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]); } static int vti6_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct nlattr **data = params->data; struct ip6_tnl *nt; struct net *net; net = params->link_net ? : dev_net(dev); nt = netdev_priv(dev); vti6_netlink_parms(data, &nt->parms); nt->parms.proto = IPPROTO_IPV6; nt->net = net; if (vti6_locate(net, &nt->parms, 0)) return -EEXIST; return vti6_tnl_create2(dev); } static void vti6_dellink(struct net_device *dev, struct list_head *head) { struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); if (dev != ip6n->fb_tnl_dev) unregister_netdevice_queue(dev, head); } static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip6_tnl *t; struct __ip6_tnl_parm p; struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) return -EINVAL; vti6_netlink_parms(data, &p); t = vti6_locate(net, &p, 0); if (t) { if (t->dev != dev) return -EEXIST; } else t = netdev_priv(dev); return vti6_update(t, &p, tb && tb[IFLA_MTU]); } static size_t vti6_get_size(const struct net_device *dev) { return /* IFLA_VTI_LINK */ nla_total_size(4) + /* IFLA_VTI_LOCAL */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VTI_REMOTE */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VTI_IKEY */ nla_total_size(4) + /* IFLA_VTI_OKEY */ nla_total_size(4) + /* IFLA_VTI_FWMARK */ nla_total_size(4) + 0; } static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *tunnel = netdev_priv(dev); struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) || nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) || nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) || nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key) || nla_put_u32(skb, IFLA_VTI_FWMARK, parm->fwmark)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = { [IFLA_VTI_LINK] = { .type = NLA_U32 }, [IFLA_VTI_LOCAL] = { .len = sizeof(struct in6_addr) }, [IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) }, [IFLA_VTI_IKEY] = { .type = NLA_U32 }, [IFLA_VTI_OKEY] = { .type = NLA_U32 }, [IFLA_VTI_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops vti6_link_ops __read_mostly = { .kind = "vti6", .maxtype = IFLA_VTI_MAX, .policy = vti6_policy, .priv_size = sizeof(struct ip6_tnl), .setup = vti6_dev_setup, .validate = vti6_validate, .newlink = vti6_newlink, .dellink = vti6_dellink, .changelink = vti6_changelink, .get_size = vti6_get_size, .fill_info = vti6_fill_info, .get_link_net = ip6_tnl_get_link_net, }; static void __net_exit vti6_exit_rtnl_net(struct net *net, struct list_head *list) { struct vti6_net *ip6n = net_generic(net, vti6_net_id); struct ip6_tnl *t; int h; for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { t = rtnl_net_dereference(net, ip6n->tnls_r_l[h]); while (t) { unregister_netdevice_queue(t->dev, list); t = rtnl_net_dereference(net, t->next); } } t = rtnl_net_dereference(net, ip6n->tnls_wc[0]); if (t) unregister_netdevice_queue(t->dev, list); } static int __net_init vti6_init_net(struct net *net) { struct vti6_net *ip6n = net_generic(net, vti6_net_id); struct ip6_tnl *t = NULL; int err; ip6n->tnls[0] = ip6n->tnls_wc; ip6n->tnls[1] = ip6n->tnls_r_l; if (!net_has_fallback_tunnels(net)) return 0; err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0", NET_NAME_UNKNOWN, vti6_dev_setup); if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); ip6n->fb_tnl_dev->rtnl_link_ops = &vti6_link_ops; err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) goto err_register; err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) goto err_register; t = netdev_priv(ip6n->fb_tnl_dev); strcpy(t->parms.name, ip6n->fb_tnl_dev->name); return 0; err_register: free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: return err; } static struct pernet_operations vti6_net_ops = { .init = vti6_init_net, .exit_rtnl = vti6_exit_rtnl_net, .id = &vti6_net_id, .size = sizeof(struct vti6_net), }; static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { .handler = vti6_rcv, .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, }; static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { .handler = vti6_rcv, .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, }; static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .handler = vti6_rcv, .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, }; #if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) static int vti6_rcv_tunnel(struct sk_buff *skb) { const xfrm_address_t *saddr; __be32 spi; saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr; spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr); return vti6_input_proto(skb, IPPROTO_IPV6, spi, 0); } static struct xfrm6_tunnel vti_ipv6_handler __read_mostly = { .handler = vti6_rcv_tunnel, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 0, }; static struct xfrm6_tunnel vti_ip6ip_handler __read_mostly = { .handler = vti6_rcv_tunnel, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 0, }; #endif /** * vti6_tunnel_init - register protocol and reserve needed resources * * Return: 0 on success **/ static int __init vti6_tunnel_init(void) { const char *msg; int err; msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) goto pernet_dev_failed; msg = "tunnel protocols"; err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP); if (err < 0) goto xfrm_proto_esp_failed; err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH); if (err < 0) goto xfrm_proto_ah_failed; err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; #if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) msg = "ipv6 tunnel"; err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6); if (err < 0) goto vti_tunnel_ipv6_failed; err = xfrm6_tunnel_register(&vti_ip6ip_handler, AF_INET); if (err < 0) goto vti_tunnel_ip6ip_failed; #endif msg = "netlink interface"; err = rtnl_link_register(&vti6_link_ops); if (err < 0) goto rtnl_link_failed; return 0; rtnl_link_failed: #if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) err = xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); vti_tunnel_ip6ip_failed: err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); vti_tunnel_ipv6_failed: #endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm_proto_ah_failed: xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); pernet_dev_failed: pr_err("vti6 init: failed to register %s\n", msg); return err; } /** * vti6_tunnel_cleanup - free resources and unregister protocol **/ static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); #if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); #endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); } module_init(vti6_tunnel_init); module_exit(vti6_tunnel_cleanup); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("vti6"); MODULE_ALIAS_NETDEV("ip6_vti0"); MODULE_AUTHOR("Steffen Klassert"); MODULE_DESCRIPTION("IPv6 virtual tunnel interface");
1 1 205 204 2 2 2 4 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 // SPDX-License-Identifier: GPL-2.0-only /* * Persistent Storage - ramfs parts. * * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com> */ #include <linux/module.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/time.h> #include <linux/init.h> #include <linux/list.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/ramfs.h> #include <linux/fs_parser.h> #include <linux/fs_context.h> #include <linux/sched.h> #include <linux/magic.h> #include <linux/pstore.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/cleanup.h> #include "internal.h" #define PSTORE_NAMELEN 64 static DEFINE_MUTEX(records_list_lock); static LIST_HEAD(records_list); static DEFINE_MUTEX(pstore_sb_lock); static struct super_block *pstore_sb; DEFINE_FREE(pstore_iput, struct inode *, if (_T) iput(_T)) struct pstore_private { struct list_head list; struct dentry *dentry; struct pstore_record *record; size_t total_size; }; struct pstore_ftrace_seq_data { const void *ptr; size_t off; size_t size; }; #define REC_SIZE sizeof(struct pstore_ftrace_record) static void free_pstore_private(struct pstore_private *private) { if (!private) return; if (private->record) { kvfree(private->record->buf); kfree(private->record->priv); kfree(private->record); } kfree(private); } DEFINE_FREE(pstore_private, struct pstore_private *, free_pstore_private(_T)); static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos) { struct pstore_private *ps = s->private; struct pstore_ftrace_seq_data *data __free(kfree) = NULL; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return NULL; data->off = ps->total_size % REC_SIZE; data->off += *pos * REC_SIZE; if (data->off + REC_SIZE > ps->total_size) return NULL; return_ptr(data); } static void pstore_ftrace_seq_stop(struct seq_file *s, void *v) { kfree(v); } static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct pstore_private *ps = s->private; struct pstore_ftrace_seq_data *data = v; (*pos)++; data->off += REC_SIZE; if (data->off + REC_SIZE > ps->total_size) return NULL; return data; } static int pstore_ftrace_seq_show(struct seq_file *s, void *v) { struct pstore_private *ps = s->private; struct pstore_ftrace_seq_data *data = v; struct pstore_ftrace_record *rec; if (!data) return 0; rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off); seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %ps <- %pS\n", pstore_ftrace_decode_cpu(rec), pstore_ftrace_read_timestamp(rec), rec->ip, rec->parent_ip, (void *)rec->ip, (void *)rec->parent_ip); return 0; } static const struct seq_operations pstore_ftrace_seq_ops = { .start = pstore_ftrace_seq_start, .next = pstore_ftrace_seq_next, .stop = pstore_ftrace_seq_stop, .show = pstore_ftrace_seq_show, }; static ssize_t pstore_file_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct seq_file *sf = file->private_data; struct pstore_private *ps = sf->private; if (ps->record->type == PSTORE_TYPE_FTRACE) return seq_read(file, userbuf, count, ppos); return simple_read_from_buffer(userbuf, count, ppos, ps->record->buf, ps->total_size); } static int pstore_file_open(struct inode *inode, struct file *file) { struct pstore_private *ps = inode->i_private; struct seq_file *sf; int err; const struct seq_operations *sops = NULL; if (ps->record->type == PSTORE_TYPE_FTRACE) sops = &pstore_ftrace_seq_ops; err = seq_open(file, sops); if (err < 0) return err; sf = file->private_data; sf->private = ps; return 0; } static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence) { struct seq_file *sf = file->private_data; if (sf->op) return seq_lseek(file, off, whence); return default_llseek(file, off, whence); } static const struct file_operations pstore_file_operations = { .open = pstore_file_open, .read = pstore_file_read, .llseek = pstore_file_llseek, .release = seq_release, }; /* * When a file is unlinked from our file system we call the * platform driver to erase the record from persistent store. */ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = d_inode(dentry)->i_private; struct pstore_record *record = p->record; if (!record->psi->erase) return -EPERM; /* Make sure we can't race while removing this file. */ scoped_guard(mutex, &records_list_lock) { if (!list_empty(&p->list)) list_del_init(&p->list); else return -ENOENT; p->dentry = NULL; } scoped_guard(mutex, &record->psi->read_mutex) record->psi->erase(record); return simple_unlink(dir, dentry); } static void pstore_evict_inode(struct inode *inode) { struct pstore_private *p = inode->i_private; clear_inode(inode); free_pstore_private(p); } static const struct inode_operations pstore_dir_inode_operations = { .lookup = simple_lookup, .unlink = pstore_unlink, }; static struct inode *pstore_get_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); simple_inode_init_ts(inode); } return inode; } enum { Opt_kmsg_bytes }; static const struct fs_parameter_spec pstore_param_spec[] = { fsparam_u32 ("kmsg_bytes", Opt_kmsg_bytes), {} }; struct pstore_context { unsigned int kmsg_bytes; }; static int pstore_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct pstore_context *ctx = fc->fs_private; struct fs_parse_result result; int opt; opt = fs_parse(fc, pstore_param_spec, param, &result); /* pstore has historically ignored invalid kmsg_bytes param */ if (opt < 0) return 0; switch (opt) { case Opt_kmsg_bytes: ctx->kmsg_bytes = result.uint_32; break; default: return -EINVAL; } return 0; } /* * Display the mount options in /proc/mounts. */ static int pstore_show_options(struct seq_file *m, struct dentry *root) { if (kmsg_bytes != CONFIG_PSTORE_DEFAULT_KMSG_BYTES) seq_printf(m, ",kmsg_bytes=%u", kmsg_bytes); return 0; } static int pstore_reconfigure(struct fs_context *fc) { struct pstore_context *ctx = fc->fs_private; sync_filesystem(fc->root->d_sb); pstore_set_kmsg_bytes(ctx->kmsg_bytes); return 0; } static const struct super_operations pstore_ops = { .statfs = simple_statfs, .drop_inode = inode_just_drop, .evict_inode = pstore_evict_inode, .show_options = pstore_show_options, }; static struct dentry *psinfo_lock_root(void) { struct dentry *root; guard(mutex)(&pstore_sb_lock); /* * Having no backend is fine -- no records appear. * Not being mounted is fine -- nothing to do. */ if (!psinfo || !pstore_sb) return NULL; root = pstore_sb->s_root; inode_lock_nested(d_inode(root), I_MUTEX_PARENT); return root; } int pstore_put_backend_records(struct pstore_info *psi) { struct pstore_private *pos, *tmp; struct dentry *root; root = psinfo_lock_root(); if (!root) return 0; scoped_guard(mutex, &records_list_lock) { list_for_each_entry_safe(pos, tmp, &records_list, list) { if (pos->record->psi == psi) { list_del_init(&pos->list); locked_recursive_removal(pos->dentry, NULL); pos->dentry = NULL; } } } inode_unlock(d_inode(root)); return 0; } /* * Make a regular file in the root directory of our file system. * Load it up with "size" bytes of data from "buf". * Set the mtime & ctime to the date that this record was originally stored. */ int pstore_mkfile(struct dentry *root, struct pstore_record *record) { struct dentry *dentry; struct inode *inode __free(pstore_iput) = NULL; char name[PSTORE_NAMELEN]; struct pstore_private *private __free(pstore_private) = NULL, *pos; size_t size = record->size + record->ecc_notice_size; if (WARN_ON(!inode_is_locked(d_inode(root)))) return -EINVAL; guard(mutex)(&records_list_lock); /* Skip records that are already present in the filesystem. */ list_for_each_entry(pos, &records_list, list) { if (pos->record->type == record->type && pos->record->id == record->id && pos->record->psi == record->psi) return -EEXIST; } inode = pstore_get_inode(root->d_sb); if (!inode) return -ENOMEM; inode->i_mode = S_IFREG | 0444; inode->i_fop = &pstore_file_operations; scnprintf(name, sizeof(name), "%s-%s-%llu%s", pstore_type_to_name(record->type), record->psi->name, record->id, record->compressed ? ".enc.z" : ""); private = kzalloc(sizeof(*private), GFP_KERNEL); if (!private) return -ENOMEM; dentry = d_alloc_name(root, name); if (!dentry) return -ENOMEM; private->dentry = dentry; // borrowed private->record = record; inode->i_size = private->total_size = size; inode->i_private = private; if (record->time.tv_sec) inode_set_mtime_to_ts(inode, inode_set_ctime_to_ts(inode, record->time)); d_make_persistent(dentry, no_free_ptr(inode)); dput(dentry); list_add(&(no_free_ptr(private))->list, &records_list); return 0; } /* * Read all the records from the persistent store. Create * files in our filesystem. Don't warn about -EEXIST errors * when we are re-scanning the backing store looking to add new * error records. */ void pstore_get_records(int quiet) { struct dentry *root; root = psinfo_lock_root(); if (!root) return; pstore_get_backend_records(psinfo, root, quiet); inode_unlock(d_inode(root)); } static int pstore_fill_super(struct super_block *sb, struct fs_context *fc) { struct pstore_context *ctx = fc->fs_private; struct inode *inode; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = PSTOREFS_MAGIC; sb->s_op = &pstore_ops; sb->s_time_gran = 1; pstore_set_kmsg_bytes(ctx->kmsg_bytes); inode = pstore_get_inode(sb); if (inode) { inode->i_mode = S_IFDIR | 0750; inode->i_op = &pstore_dir_inode_operations; inode->i_fop = &simple_dir_operations; inc_nlink(inode); } sb->s_root = d_make_root(inode); if (!sb->s_root) return -ENOMEM; scoped_guard(mutex, &pstore_sb_lock) pstore_sb = sb; pstore_get_records(0); return 0; } static int pstore_get_tree(struct fs_context *fc) { if (fc->root) return pstore_reconfigure(fc); return get_tree_single(fc, pstore_fill_super); } static void pstore_free_fc(struct fs_context *fc) { kfree(fc->fs_private); } static const struct fs_context_operations pstore_context_ops = { .parse_param = pstore_parse_param, .get_tree = pstore_get_tree, .reconfigure = pstore_reconfigure, .free = pstore_free_fc, }; static void pstore_kill_sb(struct super_block *sb) { guard(mutex)(&pstore_sb_lock); WARN_ON(pstore_sb && pstore_sb != sb); kill_anon_super(sb); pstore_sb = NULL; guard(mutex)(&records_list_lock); INIT_LIST_HEAD(&records_list); } static int pstore_init_fs_context(struct fs_context *fc) { struct pstore_context *ctx; ctx = kzalloc(sizeof(struct pstore_context), GFP_KERNEL); if (!ctx) return -ENOMEM; /* * Global kmsg_bytes is initialized to default, and updated * every time we (re)mount the single-sb filesystem with the * option specified. */ ctx->kmsg_bytes = kmsg_bytes; fc->fs_private = ctx; fc->ops = &pstore_context_ops; return 0; } static struct file_system_type pstore_fs_type = { .owner = THIS_MODULE, .name = "pstore", .kill_sb = pstore_kill_sb, .init_fs_context = pstore_init_fs_context, .parameters = pstore_param_spec, }; int __init pstore_init_fs(void) { int err; /* Create a convenient mount point for people to access pstore */ err = sysfs_create_mount_point(fs_kobj, "pstore"); if (err) goto out; err = register_filesystem(&pstore_fs_type); if (err < 0) sysfs_remove_mount_point(fs_kobj, "pstore"); out: return err; } void __exit pstore_exit_fs(void) { unregister_filesystem(&pstore_fs_type); sysfs_remove_mount_point(fs_kobj, "pstore"); }
14525 14524 14509 14505 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 // SPDX-License-Identifier: GPL-2.0-only // Copyright (C) 2022 Linutronix GmbH, John Ogness // Copyright (C) 2022 Intel, Thomas Gleixner #include <linux/atomic.h> #include <linux/bug.h> #include <linux/console.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/init.h> #include <linux/irqflags.h> #include <linux/kdb.h> #include <linux/kthread.h> #include <linux/minmax.h> #include <linux/panic.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <linux/slab.h> #include <linux/smp.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include "internal.h" #include "printk_ringbuffer.h" /* * Printk console printing implementation for consoles which does not depend * on the legacy style console_lock mechanism. * * The state of the console is maintained in the "nbcon_state" atomic * variable. * * The console is locked when: * * - The 'prio' field contains the priority of the context that owns the * console. Only higher priority contexts are allowed to take over the * lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. * * - The 'cpu' field denotes on which CPU the console is locked. It is used * to prevent busy waiting on the same CPU. Also it informs the lock owner * that it has lost the lock in a more complex scenario when the lock was * taken over by a higher priority context, released, and taken on another * CPU with the same priority as the interrupted owner. * * The acquire mechanism uses a few more fields: * * - The 'req_prio' field is used by the handover approach to make the * current owner aware that there is a context with a higher priority * waiting for the friendly handover. * * - The 'unsafe' field allows to take over the console in a safe way in the * middle of emitting a message. The field is set only when accessing some * shared resources or when the console device is manipulated. It can be * cleared, for example, after emitting one character when the console * device is in a consistent state. * * - The 'unsafe_takeover' field is set when a hostile takeover took the * console in an unsafe state. The console will stay in the unsafe state * until re-initialized. * * The acquire mechanism uses three approaches: * * 1) Direct acquire when the console is not owned or is owned by a lower * priority context and is in a safe state. * * 2) Friendly handover mechanism uses a request/grant handshake. It is used * when the current owner has lower priority and the console is in an * unsafe state. * * The requesting context: * * a) Sets its priority into the 'req_prio' field. * * b) Waits (with a timeout) for the owning context to unlock the * console. * * c) Takes the lock and clears the 'req_prio' field. * * The owning context: * * a) Observes the 'req_prio' field set on exit from the unsafe * console state. * * b) Gives up console ownership by clearing the 'prio' field. * * 3) Unsafe hostile takeover allows to take over the lock even when the * console is an unsafe state. It is used only in panic() by the final * attempt to flush consoles in a try and hope mode. * * Note that separate record buffers are used in panic(). As a result, * the messages can be read and formatted without any risk even after * using the hostile takeover in unsafe state. * * The release function simply clears the 'prio' field. * * All operations on @console::nbcon_state are atomic cmpxchg based to * handle concurrency. * * The acquire/release functions implement only minimal policies: * * - Preference for higher priority contexts. * - Protection of the panic CPU. * * All other policy decisions must be made at the call sites: * * - What is marked as an unsafe section. * - Whether to spin-wait if there is already an owner and the console is * in an unsafe state. * - Whether to attempt an unsafe hostile takeover. * * The design allows to implement the well known: * * acquire() * output_one_printk_record() * release() * * The output of one printk record might be interrupted with a higher priority * context. The new owner is supposed to reprint the entire interrupted record * from scratch. */ /* Counter of active nbcon emergency contexts. */ static atomic_t nbcon_cpu_emergency_cnt = ATOMIC_INIT(0); /** * nbcon_state_set - Helper function to set the console state * @con: Console to update * @new: The new state to write * * Only to be used when the console is not yet or no longer visible in the * system. Otherwise use nbcon_state_try_cmpxchg(). */ static inline void nbcon_state_set(struct console *con, struct nbcon_state *new) { atomic_set(&ACCESS_PRIVATE(con, nbcon_state), new->atom); } /** * nbcon_state_read - Helper function to read the console state * @con: Console to read * @state: The state to store the result */ static inline void nbcon_state_read(struct console *con, struct nbcon_state *state) { state->atom = atomic_read(&ACCESS_PRIVATE(con, nbcon_state)); } /** * nbcon_state_try_cmpxchg() - Helper function for atomic_try_cmpxchg() on console state * @con: Console to update * @cur: Old/expected state * @new: New state * * Return: True on success. False on fail and @cur is updated. */ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_state *cur, struct nbcon_state *new) { return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } /** * nbcon_seq_read - Read the current console sequence * @con: Console to read the sequence of * * Return: Sequence number of the next record to print on @con. */ u64 nbcon_seq_read(struct console *con) { unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); return __ulseq_to_u64seq(prb, nbcon_seq); } /** * nbcon_seq_force - Force console sequence to a specific value * @con: Console to work on * @seq: Sequence number value to set * * Only to be used during init (before registration) or in extreme situations * (such as panic with CONSOLE_REPLAY_ALL). */ void nbcon_seq_force(struct console *con, u64 seq) { /* * If the specified record no longer exists, the oldest available record * is chosen. This is especially important on 32bit systems because only * the lower 32 bits of the sequence number are stored. The upper 32 bits * are derived from the sequence numbers available in the ringbuffer. */ u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); } /** * nbcon_seq_try_update - Try to update the console sequence number * @ctxt: Pointer to an acquire context that contains * all information about the acquire mode * @new_seq: The new sequence number to set * * @ctxt->seq is updated to the new value of @con::nbcon_seq (expanded to * the 64bit value). This could be a different value than @new_seq if * nbcon_seq_force() was used or the current context no longer owns the * console. In the later case, it will stop printing anyway. */ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) { unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq); struct console *con = ctxt->console; if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, __u64seq_to_ulseq(new_seq))) { ctxt->seq = new_seq; } else { ctxt->seq = nbcon_seq_read(con); } } /** * nbcon_context_try_acquire_direct - Try to acquire directly * @ctxt: The context of the caller * @cur: The current console state * @is_reacquire: This acquire is a reacquire * * Acquire the console when it is released. Also acquire the console when * the current owner has a lower priority and the console is in a safe state. * * Return: 0 on success. Otherwise, an error code on failure. Also @cur * is updated to the latest state when failed to modify it. * * Errors: * * -EPERM: A panic is in progress and this is neither the panic * CPU nor is this a reacquire. Or the current owner or * waiter has the same or higher priority. No acquire * method can be successful in these cases. * * -EBUSY: The current owner has a lower priority but the console * in an unsafe state. The caller should try using * the handover acquire method. */ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, struct nbcon_state *cur, bool is_reacquire) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; do { /* * Panic does not imply that the console is owned. However, * since all non-panic CPUs are stopped during panic(), it * is safer to have them avoid gaining console ownership. * * One exception is when kdb has locked for printing on this CPU. * * Second exception is a reacquire (and an unsafe takeover * has not previously occurred) then it is allowed to attempt * a direct acquire in panic. This gives console drivers an * opportunity to perform any necessary cleanup if they were * interrupted by the panic CPU while printing. */ if (panic_on_other_cpu() && !kdb_printf_on_this_cpu() && (!is_reacquire || cur->unsafe_takeover)) { return -EPERM; } if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) return -EPERM; if (cur->unsafe) return -EBUSY; /* * The console should never be safe for a direct acquire * if an unsafe hostile takeover has ever happened. */ WARN_ON_ONCE(cur->unsafe_takeover); new.atom = cur->atom; new.prio = ctxt->prio; new.req_prio = NBCON_PRIO_NONE; new.unsafe = cur->unsafe_takeover; new.cpu = cpu; } while (!nbcon_state_try_cmpxchg(con, cur, &new)); return 0; } static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) { /* * The request context is well defined by the @req_prio because: * * - Only a context with a priority higher than the owner can become * a waiter. * - Only a context with a priority higher than the waiter can * directly take over the request. * - There are only three priorities. * - Only one CPU is allowed to request PANIC priority. * - Lower priorities are ignored during panic() until reboot. * * As a result, the following scenario is *not* possible: * * 1. This context is currently a waiter. * 2. Another context with a higher priority than this context * directly takes ownership. * 3. The higher priority context releases the ownership. * 4. Another lower priority context takes the ownership. * 5. Another context with the same priority as this context * creates a request and starts waiting. * * Event #1 implies this context is EMERGENCY. * Event #2 implies the new context is PANIC. * Event #3 occurs when panic() has flushed the console. * Event #4 occurs when a non-panic CPU reacquires. * Event #5 is not possible due to the panic_on_other_cpu() check * in nbcon_context_try_acquire_handover(). */ return (cur->req_prio == expected_prio); } /** * nbcon_context_try_acquire_requested - Try to acquire after having * requested a handover * @ctxt: The context of the caller * @cur: The current console state * * This is a helper function for nbcon_context_try_acquire_handover(). * It is called when the console is in an unsafe state. The current * owner will release the console on exit from the unsafe region. * * Return: 0 on success and @cur is updated to the new console state. * Otherwise an error code on failure. * * Errors: * * -EPERM: A panic is in progress and this is not the panic CPU * or this context is no longer the waiter. * * -EBUSY: The console is still locked. The caller should * continue waiting. * * Note: The caller must still remove the request when an error has occurred * except when this context is no longer the waiter. */ static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; /* Note that the caller must still remove the request! */ if (panic_on_other_cpu()) return -EPERM; /* * Note that the waiter will also change if there was an unsafe * hostile takeover. */ if (!nbcon_waiter_matches(cur, ctxt->prio)) return -EPERM; /* If still locked, caller should continue waiting. */ if (cur->prio != NBCON_PRIO_NONE) return -EBUSY; /* * The previous owner should have never released ownership * in an unsafe region. */ WARN_ON_ONCE(cur->unsafe); new.atom = cur->atom; new.prio = ctxt->prio; new.req_prio = NBCON_PRIO_NONE; new.unsafe = cur->unsafe_takeover; new.cpu = cpu; if (!nbcon_state_try_cmpxchg(con, cur, &new)) { /* * The acquire could fail only when it has been taken * over by a higher priority context. */ WARN_ON_ONCE(nbcon_waiter_matches(cur, ctxt->prio)); return -EPERM; } /* Handover success. This context now owns the console. */ return 0; } /** * nbcon_context_try_acquire_handover - Try to acquire via handover * @ctxt: The context of the caller * @cur: The current console state * * The function must be called only when the context has higher priority * than the current owner and the console is in an unsafe state. * It is the case when nbcon_context_try_acquire_direct() returns -EBUSY. * * The function sets "req_prio" field to make the current owner aware of * the request. Then it waits until the current owner releases the console, * or an even higher context takes over the request, or timeout expires. * * The current owner checks the "req_prio" field on exit from the unsafe * region and releases the console. It does not touch the "req_prio" field * so that the console stays reserved for the waiter. * * Return: 0 on success. Otherwise, an error code on failure. Also @cur * is updated to the latest state when failed to modify it. * * Errors: * * -EPERM: A panic is in progress and this is not the panic CPU. * Or a higher priority context has taken over the * console or the handover request. * * -EBUSY: The current owner is on the same CPU so that the hand * shake could not work. Or the current owner is not * willing to wait (zero timeout). Or the console does * not enter the safe state before timeout passed. The * caller might still use the unsafe hostile takeover * when allowed. * * -EAGAIN: @cur has changed when creating the handover request. * The caller should retry with direct acquire. */ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; int timeout; int request_err = -EBUSY; /* * Check that the handover is called when the direct acquire failed * with -EBUSY. */ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); WARN_ON_ONCE(!cur->unsafe); /* * Panic does not imply that the console is owned. However, it * is critical that non-panic CPUs during panic are unable to * wait for a handover in order to satisfy the assumptions of * nbcon_waiter_matches(). In particular, the assumption that * lower priorities are ignored during panic. */ if (panic_on_other_cpu()) return -EPERM; /* Handover is not possible on the same CPU. */ if (cur->cpu == cpu) return -EBUSY; /* * Console stays unsafe after an unsafe takeover until re-initialized. * Waiting is not going to help in this case. */ if (cur->unsafe_takeover) return -EBUSY; /* Is the caller willing to wait? */ if (ctxt->spinwait_max_us == 0) return -EBUSY; /* * Setup a request for the handover. The caller should try to acquire * the console directly when the current state has been modified. */ new.atom = cur->atom; new.req_prio = ctxt->prio; if (!nbcon_state_try_cmpxchg(con, cur, &new)) return -EAGAIN; cur->atom = new.atom; /* Wait until there is no owner and then acquire the console. */ for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) { /* On successful acquire, this request is cleared. */ request_err = nbcon_context_try_acquire_requested(ctxt, cur); if (!request_err) return 0; /* * If the acquire should be aborted, it must be ensured * that the request is removed before returning to caller. */ if (request_err == -EPERM) break; udelay(1); /* Re-read the state because some time has passed. */ nbcon_state_read(con, cur); } /* Timed out or aborted. Carefully remove handover request. */ do { /* * No need to remove request if there is a new waiter. This * can only happen if a higher priority context has taken over * the console or the handover request. */ if (!nbcon_waiter_matches(cur, ctxt->prio)) return -EPERM; /* Unset request for handover. */ new.atom = cur->atom; new.req_prio = NBCON_PRIO_NONE; if (nbcon_state_try_cmpxchg(con, cur, &new)) { /* * Request successfully unset. Report failure of * acquiring via handover. */ cur->atom = new.atom; return request_err; } /* * Unable to remove request. Try to acquire in case * the owner has released the lock. */ } while (nbcon_context_try_acquire_requested(ctxt, cur)); /* Lucky timing. The acquire succeeded while removing the request. */ return 0; } /** * nbcon_context_try_acquire_hostile - Acquire via unsafe hostile takeover * @ctxt: The context of the caller * @cur: The current console state * * Acquire the console even in the unsafe state. * * It can be permitted by setting the 'allow_unsafe_takeover' field only * by the final attempt to flush messages in panic(). * * Return: 0 on success. -EPERM when not allowed by the context. */ static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; if (!ctxt->allow_unsafe_takeover) return -EPERM; /* Ensure caller is allowed to perform unsafe hostile takeovers. */ if (WARN_ON_ONCE(ctxt->prio != NBCON_PRIO_PANIC)) return -EPERM; /* * Check that try_acquire_direct() and try_acquire_handover() returned * -EBUSY in the right situation. */ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); WARN_ON_ONCE(cur->unsafe != true); do { new.atom = cur->atom; new.cpu = cpu; new.prio = ctxt->prio; new.unsafe |= cur->unsafe_takeover; new.unsafe_takeover |= cur->unsafe; } while (!nbcon_state_try_cmpxchg(con, cur, &new)); return 0; } static struct printk_buffers panic_nbcon_pbufs; /** * nbcon_context_try_acquire - Try to acquire nbcon console * @ctxt: The context of the caller * @is_reacquire: This acquire is a reacquire * * Context: Under @ctxt->con->device_lock() or local_irq_save(). * Return: True if the console was acquired. False otherwise. * * If the caller allowed an unsafe hostile takeover, on success the * caller should check the current console state to see if it is * in an unsafe state. Otherwise, on success the caller may assume * the console is not in an unsafe state. */ static bool nbcon_context_try_acquire(struct nbcon_context *ctxt, bool is_reacquire) { struct console *con = ctxt->console; struct nbcon_state cur; int err; nbcon_state_read(con, &cur); try_again: err = nbcon_context_try_acquire_direct(ctxt, &cur, is_reacquire); if (err != -EBUSY) goto out; err = nbcon_context_try_acquire_handover(ctxt, &cur); if (err == -EAGAIN) goto try_again; if (err != -EBUSY) goto out; err = nbcon_context_try_acquire_hostile(ctxt, &cur); out: if (err) return false; /* Acquire succeeded. */ /* Assign the appropriate buffer for this context. */ if (panic_on_this_cpu()) ctxt->pbufs = &panic_nbcon_pbufs; else ctxt->pbufs = con->pbufs; /* Set the record sequence for this context to print. */ ctxt->seq = nbcon_seq_read(ctxt->console); return true; } static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, int expected_prio) { /* * A similar function, nbcon_waiter_matches(), only deals with * EMERGENCY and PANIC priorities. However, this function must also * deal with the NORMAL priority, which requires additional checks * and constraints. * * For the case where preemption and interrupts are disabled, it is * enough to also verify that the owning CPU has not changed. * * For the case where preemption or interrupts are enabled, an * external synchronization method *must* be used. In particular, * the driver-specific locking mechanism used in device_lock() * (including disabling migration) should be used. It prevents * scenarios such as: * * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X] and * is scheduled out. * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY * and releases it. * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X] * and is scheduled out. * 4. [Task A] gets running on [CPU X] and sees that the console is * still owned by a task on [CPU X] with NBON_PRIO_NORMAL. Thus * [Task A] thinks it is the owner when it is not. */ if (cur->prio != expected_prio) return false; if (cur->cpu != expected_cpu) return false; return true; } /** * nbcon_context_release - Release the console * @ctxt: The nbcon context from nbcon_context_try_acquire() */ static void nbcon_context_release(struct nbcon_context *ctxt) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state cur; struct nbcon_state new; nbcon_state_read(con, &cur); do { if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) break; new.atom = cur.atom; new.prio = NBCON_PRIO_NONE; /* * If @unsafe_takeover is set, it is kept set so that * the state remains permanently unsafe. */ new.unsafe |= cur.unsafe_takeover; } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); ctxt->pbufs = NULL; } /** * nbcon_context_can_proceed - Check whether ownership can proceed * @ctxt: The nbcon context from nbcon_context_try_acquire() * @cur: The current console state * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * Must be invoked when entering the unsafe state to make sure that it still * owns the lock. Also must be invoked when exiting the unsafe context * to eventually free the lock for a higher priority context which asked * for the friendly handover. * * It can be called inside an unsafe section when the console is just * temporary in safe state instead of exiting and entering the unsafe * state. * * Also it can be called in the safe context before doing an expensive * safe operation. It does not make sense to do the operation when * a higher priority context took the lock. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); /* Make sure this context still owns the console. */ if (!nbcon_owner_matches(cur, cpu, ctxt->prio)) return false; /* The console owner can proceed if there is no waiter. */ if (cur->req_prio == NBCON_PRIO_NONE) return true; /* * A console owner within an unsafe region is always allowed to * proceed, even if there are waiters. It can perform a handover * when exiting the unsafe region. Otherwise the waiter will * need to perform an unsafe hostile takeover. */ if (cur->unsafe) return true; /* Waiters always have higher priorities than owners. */ WARN_ON_ONCE(cur->req_prio <= cur->prio); /* * Having a safe point for take over and eventually a few * duplicated characters or a full line is way better than a * hostile takeover. Post processing can take care of the garbage. * Release and hand over. */ nbcon_context_release(ctxt); /* * It is not clear whether the waiter really took over ownership. The * outermost callsite must make the final decision whether console * ownership is needed for it to proceed. If yes, it must reacquire * ownership (possibly hostile) before carefully proceeding. * * The calling context no longer owns the console so go back all the * way instead of trying to implement reacquire heuristics in tons of * places. */ return false; } /** * nbcon_can_proceed - Check whether ownership can proceed * @wctxt: The write context that was handed to the write function * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * It is used in nbcon_enter_unsafe() to make sure that it still owns the * lock. Also it is used in nbcon_exit_unsafe() to eventually free the lock * for a higher priority context which asked for the friendly handover. * * It can be called inside an unsafe section when the console is just * temporary in safe state instead of exiting and entering the unsafe state. * * Also it can be called in the safe context before doing an expensive safe * operation. It does not make sense to do the operation when a higher * priority context took the lock. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; struct nbcon_state cur; nbcon_state_read(con, &cur); return nbcon_context_can_proceed(ctxt, &cur); } EXPORT_SYMBOL_GPL(nbcon_can_proceed); #define nbcon_context_enter_unsafe(c) __nbcon_context_update_unsafe(c, true) #define nbcon_context_exit_unsafe(c) __nbcon_context_update_unsafe(c, false) /** * __nbcon_context_update_unsafe - Update the unsafe bit in @con->nbcon_state * @ctxt: The nbcon context from nbcon_context_try_acquire() * @unsafe: The new value for the unsafe bit * * Return: True if the unsafe state was updated and this context still * owns the console. Otherwise false if ownership was handed * over or taken. * * This function allows console owners to modify the unsafe status of the * console. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. * * Internal helper to avoid duplicated code. */ static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe) { struct console *con = ctxt->console; struct nbcon_state cur; struct nbcon_state new; nbcon_state_read(con, &cur); do { /* * The unsafe bit must not be cleared if an * unsafe hostile takeover has occurred. */ if (!unsafe && cur.unsafe_takeover) goto out; if (!nbcon_context_can_proceed(ctxt, &cur)) return false; new.atom = cur.atom; new.unsafe = unsafe; } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); cur.atom = new.atom; out: return nbcon_context_can_proceed(ctxt, &cur); } void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt, char *buf, unsigned int len) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; struct nbcon_state cur; wctxt->outbuf = buf; wctxt->len = len; nbcon_state_read(con, &cur); wctxt->unsafe_takeover = cur.unsafe_takeover; } /** * nbcon_enter_unsafe - Enter an unsafe region in the driver * @wctxt: The write context that was handed to the write function * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); bool is_owner; is_owner = nbcon_context_enter_unsafe(ctxt); if (!is_owner) nbcon_write_context_set_buf(wctxt, NULL, 0); return is_owner; } EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); /** * nbcon_exit_unsafe - Exit an unsafe region in the driver * @wctxt: The write context that was handed to the write function * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); bool ret; ret = nbcon_context_exit_unsafe(ctxt); if (!ret) nbcon_write_context_set_buf(wctxt, NULL, 0); return ret; } EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); /** * nbcon_reacquire_nobuf - Reacquire a console after losing ownership * while printing * @wctxt: The write context that was handed to the write callback * * Since ownership can be lost at any time due to handover or takeover, a * printing context _must_ be prepared to back out immediately and * carefully. However, there are scenarios where the printing context must * reacquire ownership in order to finalize or revert hardware changes. * * This function allows a printing context to reacquire ownership using the * same priority as its previous ownership. * * Note that after a successful reacquire the printing context will have no * output buffer because that has been lost. This function cannot be used to * resume printing. */ void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); while (!nbcon_context_try_acquire(ctxt, true)) cpu_relax(); nbcon_write_context_set_buf(wctxt, NULL, 0); } EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf); /** * nbcon_emit_next_record - Emit a record in the acquired context * @wctxt: The write context that will be handed to the write function * @use_atomic: True if the write_atomic() callback is to be used * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. If the caller * wants to do more it must reacquire the console first. * * When true is returned, @wctxt->ctxt.backlog indicates whether there are * still records pending in the ringbuffer, */ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; struct printk_message pmsg = { .pbufs = ctxt->pbufs, }; unsigned long con_dropped; struct nbcon_state cur; unsigned long dropped; unsigned long ulseq; /* * This function should never be called for consoles that have not * implemented the necessary callback for writing: i.e. legacy * consoles and, when atomic, nbcon consoles with no write_atomic(). * Handle it as if ownership was lost and try to continue. * * Note that for nbcon consoles the write_thread() callback is * mandatory and was already checked in nbcon_alloc(). */ if (WARN_ON_ONCE((use_atomic && !con->write_atomic) || !(console_srcu_read_flags(con) & CON_NBCON))) { nbcon_context_release(ctxt); return false; } /* * The printk buffers are filled within an unsafe section. This * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from * clobbering each other. */ if (!nbcon_context_enter_unsafe(ctxt)) return false; ctxt->backlog = printk_get_next_message(&pmsg, ctxt->seq, is_extended, true); if (!ctxt->backlog) return nbcon_context_exit_unsafe(ctxt); /* * @con->dropped is not protected in case of an unsafe hostile * takeover. In that situation the update can be racy so * annotate it accordingly. */ con_dropped = data_race(READ_ONCE(con->dropped)); dropped = con_dropped + pmsg.dropped; if (dropped && !is_extended) console_prepend_dropped(&pmsg, dropped); /* * If the previous owner was assigned the same record, this context * has taken over ownership and is replaying the record. Prepend a * message to let the user know the record is replayed. */ ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq)); if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) { console_prepend_replay(&pmsg); } else { /* * Ensure this context is still the owner before trying to * update @nbcon_prev_seq. Otherwise the value in @ulseq may * not be from the previous owner and instead be some later * value from the context that took over ownership. */ nbcon_state_read(con, &cur); if (!nbcon_context_can_proceed(ctxt, &cur)) return false; atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq, __u64seq_to_ulseq(pmsg.seq)); } if (!nbcon_context_exit_unsafe(ctxt)) return false; /* For skipped records just update seq/dropped in @con. */ if (pmsg.outbuf_len == 0) goto update_con; /* Initialize the write context for driver callbacks. */ nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len); if (use_atomic) con->write_atomic(con, wctxt); else con->write_thread(con, wctxt); if (!wctxt->outbuf) { /* * Ownership was lost and reacquired by the driver. Handle it * as if ownership was lost. */ nbcon_context_release(ctxt); return false; } /* * Ownership may have been lost but _not_ reacquired by the driver. * This case is detected and handled when entering unsafe to update * dropped/seq values. */ /* * Since any dropped message was successfully output, reset the * dropped count for the console. */ dropped = 0; update_con: /* * The dropped count and the sequence number are updated within an * unsafe section. This limits update races to the panic context and * allows the panic context to win. */ if (!nbcon_context_enter_unsafe(ctxt)) return false; if (dropped != con_dropped) { /* Counterpart to the READ_ONCE() above. */ WRITE_ONCE(con->dropped, dropped); } nbcon_seq_try_update(ctxt, pmsg.seq + 1); return nbcon_context_exit_unsafe(ctxt); } /* * nbcon_emit_one - Print one record for an nbcon console using the * specified callback * @wctxt: An initialized write context struct to use for this context * @use_atomic: True if the write_atomic() callback is to be used * * Return: True, when a record has been printed and there are still * pending records. The caller might want to continue flushing. * * False, when there is no pending record, or when the console * context cannot be acquired, or the ownership has been lost. * The caller should give up. Either the job is done, cannot be * done, or will be handled by the owning context. * * This is an internal helper to handle the locking of the console before * calling nbcon_emit_next_record(). */ static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; unsigned long flags; bool ret = false; if (!use_atomic) { con->device_lock(con, &flags); /* * Ensure this stays on the CPU to make handover and * takeover possible. */ cant_migrate(); } if (!nbcon_context_try_acquire(ctxt, false)) goto out; /* * nbcon_emit_next_record() returns false when the console was * handed over or taken over. In both cases the context is no * longer valid. * * The higher priority printing context takes over responsibility * to print the pending records. */ if (!nbcon_emit_next_record(wctxt, use_atomic)) goto out; nbcon_context_release(ctxt); ret = ctxt->backlog; out: if (!use_atomic) con->device_unlock(con, flags); return ret; } /** * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup * @con: Console to operate on * @ctxt: The nbcon context from nbcon_context_try_acquire() * * Return: True if the thread should shutdown or if the console is * allowed to print and a record is available. False otherwise. * * After the thread wakes up, it must first check if it should shutdown before * attempting any printing. */ static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt) { bool ret = false; short flags; int cookie; if (kthread_should_stop()) return true; /* * Block the kthread when the system is in an emergency or panic mode. * It increases the chance that these contexts would be able to show * the messages directly. And it reduces the risk of interrupted writes * where the context with a higher priority takes over the nbcon console * ownership in the middle of a message. */ if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) || unlikely(panic_in_progress())) return false; cookie = console_srcu_read_lock(); flags = console_srcu_read_flags(con); if (console_is_usable(con, flags, false)) { /* Bring the sequence in @ctxt up to date */ ctxt->seq = nbcon_seq_read(con); ret = prb_read_valid(prb, ctxt->seq, NULL); } console_srcu_read_unlock(cookie); return ret; } /** * nbcon_kthread_func - The printer thread function * @__console: Console to operate on * * Return: 0 */ static int nbcon_kthread_func(void *__console) { struct console *con = __console; struct nbcon_write_context wctxt = { .ctxt.console = con, .ctxt.prio = NBCON_PRIO_NORMAL, }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); short con_flags; bool backlog; int cookie; wait_for_event: /* * Guarantee this task is visible on the rcuwait before * checking the wake condition. * * The full memory barrier within set_current_state() of * ___rcuwait_wait_event() pairs with the full memory * barrier within rcuwait_has_sleeper(). * * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A. */ rcuwait_wait_event(&con->rcuwait, nbcon_kthread_should_wakeup(con, ctxt), TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */ do { if (kthread_should_stop()) return 0; /* * Block the kthread when the system is in an emergency or panic * mode. See nbcon_kthread_should_wakeup() for more details. */ if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) || unlikely(panic_in_progress())) goto wait_for_event; backlog = false; /* * Keep the srcu read lock around the entire operation so that * synchronize_srcu() can guarantee that the kthread stopped * or suspended printing. */ cookie = console_srcu_read_lock(); con_flags = console_srcu_read_flags(con); if (console_is_usable(con, con_flags, false)) backlog = nbcon_emit_one(&wctxt, false); console_srcu_read_unlock(cookie); cond_resched(); } while (backlog); goto wait_for_event; } /** * nbcon_irq_work - irq work to wake console printer thread * @irq_work: The irq work to operate on */ static void nbcon_irq_work(struct irq_work *irq_work) { struct console *con = container_of(irq_work, struct console, irq_work); nbcon_kthread_wake(con); } static inline bool rcuwait_has_sleeper(struct rcuwait *w) { /* * Guarantee any new records can be seen by tasks preparing to wait * before this context checks if the rcuwait is empty. * * This full memory barrier pairs with the full memory barrier within * set_current_state() of ___rcuwait_wait_event(), which is called * after prepare_to_rcuwait() adds the waiter but before it has * checked the wait condition. * * This pairs with nbcon_kthread_func:A. */ smp_mb(); /* LMM(rcuwait_has_sleeper:A) */ return rcuwait_active(w); } /** * nbcon_kthreads_wake - Wake up printing threads using irq_work */ void nbcon_kthreads_wake(void) { struct console *con; int cookie; if (!printk_kthreads_running) return; /* * It is not allowed to call this function when console irq_work * is blocked. */ if (WARN_ON_ONCE(console_irqwork_blocked)) return; cookie = console_srcu_read_lock(); for_each_console_srcu(con) { if (!(console_srcu_read_flags(con) & CON_NBCON)) continue; /* * Only schedule irq_work if the printing thread is * actively waiting. If not waiting, the thread will * notice by itself that it has work to do. */ if (rcuwait_has_sleeper(&con->rcuwait)) irq_work_queue(&con->irq_work); } console_srcu_read_unlock(cookie); } /* * nbcon_kthread_stop - Stop a console printer thread * @con: Console to operate on */ void nbcon_kthread_stop(struct console *con) { lockdep_assert_console_list_lock_held(); if (!con->kthread) return; kthread_stop(con->kthread); con->kthread = NULL; } /** * nbcon_kthread_create - Create a console printer thread * @con: Console to operate on * * Return: True if the kthread was started or already exists. * Otherwise false and @con must not be registered. * * This function is called when it will be expected that nbcon consoles are * flushed using the kthread. The messages printed with NBCON_PRIO_NORMAL * will be no longer flushed by the legacy loop. This is why failure must * be fatal for console registration. * * If @con was already registered and this function fails, @con must be * unregistered before the global state variable @printk_kthreads_running * can be set. */ bool nbcon_kthread_create(struct console *con) { struct task_struct *kt; lockdep_assert_console_list_lock_held(); if (con->kthread) return true; kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index); if (WARN_ON(IS_ERR(kt))) { con_printk(KERN_ERR, con, "failed to start printing thread\n"); return false; } con->kthread = kt; /* * It is important that console printing threads are scheduled * shortly after a printk call and with generous runtime budgets. */ sched_set_normal(con->kthread, -20); return true; } /* Track the nbcon emergency nesting per CPU. */ static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; /** * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer * * Context: For reading, any context. For writing, any context which could * not be migrated to another CPU. * Return: Either a pointer to the per CPU emergency nesting counter of * the current CPU or to the init data during early boot. * * The function is safe for reading per-CPU variables in any context because * preemption is disabled if the current CPU is in the emergency state. See * also nbcon_cpu_emergency_enter(). */ static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void) { /* * The value of __printk_percpu_data_ready gets set in normal * context and before SMP initialization. As a result it could * never change while inside an nbcon emergency section. */ if (!printk_percpu_data_ready()) return &early_nbcon_pcpu_emergency_nesting; return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting); } /** * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon * printing on the current CPU * * Context: Any context. * Return: The nbcon_prio to use for acquiring an nbcon console in this * context for printing. * * The function is safe for reading per-CPU data in any context because * preemption is disabled if the current CPU is in the emergency or panic * state. */ enum nbcon_prio nbcon_get_default_prio(void) { unsigned int *cpu_emergency_nesting; if (panic_on_this_cpu()) return NBCON_PRIO_PANIC; cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); if (*cpu_emergency_nesting) return NBCON_PRIO_EMERGENCY; return NBCON_PRIO_NORMAL; } /* * Track if it is allowed to perform unsafe hostile takeovers of console * ownership. When true, console drivers might perform unsafe actions while * printing. It is externally available via nbcon_allow_unsafe_takeover(). */ static bool panic_nbcon_allow_unsafe_takeover; /** * nbcon_allow_unsafe_takeover - Check if unsafe console takeovers are allowed * * Return: True, when it is permitted to perform unsafe console printing * * This is also used by console_is_usable() to determine if it is allowed to * call write_atomic() callbacks flagged as unsafe (CON_NBCON_ATOMIC_UNSAFE). */ bool nbcon_allow_unsafe_takeover(void) { return panic_on_this_cpu() && panic_nbcon_allow_unsafe_takeover; } /** * nbcon_legacy_emit_next_record - Print one record for an nbcon console * in legacy contexts * @con: The console to print on * @handover: Will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding * both the console_lock and the SRCU read lock. Otherwise it * is set to false. * @cookie: The cookie from the SRCU read lock. * @use_atomic: Set true when called in an atomic or unknown context. * It affects which nbcon callback will be used: write_atomic() * or write_thread(). * * When false, the write_thread() callback is used and would be * called in a preemtible context unless disabled by the * device_lock. The legacy handover is not allowed in this mode. * * Context: Any context except NMI. * Return: True, when a record has been printed and there are still * pending records. The caller might want to continue flushing. * * False, when there is no pending record, or when the console * context cannot be acquired, or the ownership has been lost. * The caller should give up. Either the job is done, cannot be * done, or will be handled by the owning context. * * This function is meant to be called by console_flush_all() to print records * on nbcon consoles from legacy context (printing via console unlocking). * Essentially it is the nbcon version of console_emit_next_record(). */ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, int cookie, bool use_atomic) { struct nbcon_write_context wctxt = { }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); unsigned long flags; bool progress; ctxt->console = con; ctxt->prio = nbcon_get_default_prio(); if (use_atomic) { /* * In an atomic or unknown context, use the same procedure as * in console_emit_next_record(). It allows to handover. */ printk_safe_enter_irqsave(flags); console_lock_spinning_enable(); stop_critical_timings(); } progress = nbcon_emit_one(&wctxt, use_atomic); if (use_atomic) { start_critical_timings(); *handover = console_lock_spinning_disable_and_check(cookie); printk_safe_exit_irqrestore(flags); } else { /* Non-atomic does not perform legacy spinning handovers. */ *handover = false; } return progress; } /** * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record * * Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on * failure. * * Errors: * * -EPERM: Unable to acquire console ownership. * * -EAGAIN: Another context took over ownership while printing. * * -ENOENT: A record before @stop_seq is not available. * * If flushing up to @stop_seq was not successful, it only makes sense for the * caller to try again when -EAGAIN was returned. When -EPERM is returned, * this context is not allowed to acquire the console. When -ENOENT is * returned, it cannot be expected that the unfinalized record will become * available. */ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) { struct nbcon_write_context wctxt = { }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); int err = 0; ctxt->console = con; ctxt->spinwait_max_us = 2000; ctxt->prio = nbcon_get_default_prio(); ctxt->allow_unsafe_takeover = nbcon_allow_unsafe_takeover(); while (nbcon_seq_read(con) < stop_seq) { /* * Atomic flushing does not use console driver synchronization * (i.e. it does not hold the port lock for uart consoles). * Therefore IRQs must be disabled to avoid being interrupted * and then calling into a driver that will deadlock trying * to acquire console ownership. */ scoped_guard(irqsave) { if (!nbcon_context_try_acquire(ctxt, false)) return -EPERM; /* * nbcon_emit_next_record() returns false when * the console was handed over or taken over. * In both cases the context is no longer valid. */ if (!nbcon_emit_next_record(&wctxt, true)) return -EAGAIN; nbcon_context_release(ctxt); } if (!ctxt->backlog) { /* Are there reserved but not yet finalized records? */ if (nbcon_seq_read(con) < stop_seq) err = -ENOENT; break; } } return err; } /** * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record * * This will stop flushing before @stop_seq if another context has ownership. * That context is then responsible for the flushing. Likewise, if new records * are added while this context was flushing and there is no other context * to handle the printing, this context must also flush those records. */ static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) { struct console_flush_type ft; int err; again: err = __nbcon_atomic_flush_pending_con(con, stop_seq); /* * If there was a new owner (-EPERM, -EAGAIN), that context is * responsible for completing. * * Do not wait for records not yet finalized (-ENOENT) to avoid a * possible deadlock. They will either get flushed by the writer or * eventually skipped on panic CPU. */ if (err) return; /* * If flushing was successful but more records are available, this * context must flush those remaining records if the printer thread * is not available do it. */ printk_get_console_flush_type(&ft); if (!ft.nbcon_offload && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { stop_seq = prb_next_reserve_seq(prb); goto again; } } /** * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their * write_atomic() callback * @stop_seq: Flush up until this record */ static void __nbcon_atomic_flush_pending(u64 stop_seq) { struct console *con; int cookie; cookie = console_srcu_read_lock(); for_each_console_srcu(con) { short flags = console_srcu_read_flags(con); if (!(flags & CON_NBCON)) continue; if (!console_is_usable(con, flags, true)) continue; if (nbcon_seq_read(con) >= stop_seq) continue; nbcon_atomic_flush_pending_con(con, stop_seq); } console_srcu_read_unlock(cookie); } /** * nbcon_atomic_flush_pending - Flush all nbcon consoles using their * write_atomic() callback * * Flush the backlog up through the currently newest record. Any new * records added while flushing will not be flushed if there is another * context available to handle the flushing. This is to avoid one CPU * printing unbounded because other CPUs continue to add records. */ void nbcon_atomic_flush_pending(void) { __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb)); } /** * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their * write_atomic() callback and allowing unsafe hostile takeovers * * Flush the backlog up through the currently newest record. Unsafe hostile * takeovers will be performed, if necessary. */ void nbcon_atomic_flush_unsafe(void) { panic_nbcon_allow_unsafe_takeover = true; __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb)); panic_nbcon_allow_unsafe_takeover = false; } /** * nbcon_cpu_emergency_enter - Enter an emergency section where printk() * messages for that CPU are flushed directly * * Context: Any context. Disables preemption. * * When within an emergency section, printk() calls will attempt to flush any * pending messages in the ringbuffer. */ void nbcon_cpu_emergency_enter(void) { unsigned int *cpu_emergency_nesting; preempt_disable(); atomic_inc(&nbcon_cpu_emergency_cnt); cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); (*cpu_emergency_nesting)++; } /** * nbcon_cpu_emergency_exit - Exit an emergency section * * Context: Within an emergency section. Enables preemption. */ void nbcon_cpu_emergency_exit(void) { unsigned int *cpu_emergency_nesting; cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0)) (*cpu_emergency_nesting)--; /* * Wake up kthreads because there might be some pending messages * added by other CPUs with normal priority since the last flush * in the emergency context. */ if (!WARN_ON_ONCE(atomic_read(&nbcon_cpu_emergency_cnt) == 0)) { if (atomic_dec_return(&nbcon_cpu_emergency_cnt) == 0) { struct console_flush_type ft; printk_get_console_flush_type(&ft); if (ft.nbcon_offload) nbcon_kthreads_wake(); } } preempt_enable(); } /** * nbcon_alloc - Allocate and init the nbcon console specific data * @con: Console to initialize * * Return: True if the console was fully allocated and initialized. * Otherwise @con must not be registered. * * When allocation and init was successful, the console must be properly * freed using nbcon_free() once it is no longer needed. */ bool nbcon_alloc(struct console *con) { struct nbcon_state state = { }; /* Synchronize the kthread start. */ lockdep_assert_console_list_lock_held(); /* The write_thread() callback is mandatory. */ if (WARN_ON(!con->write_thread)) return false; rcuwait_init(&con->rcuwait); init_irq_work(&con->irq_work, nbcon_irq_work); atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL); nbcon_state_set(con, &state); /* * Initialize @nbcon_seq to the highest possible sequence number so * that practically speaking it will have nothing to print until a * desired initial sequence number has been set via nbcon_seq_force(). */ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), ULSEQ_MAX(prb)); if (con->flags & CON_BOOT) { /* * Boot console printing is synchronized with legacy console * printing, so boot consoles can share the same global printk * buffers. */ con->pbufs = &printk_shared_pbufs; } else { con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); if (!con->pbufs) { con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); return false; } if (printk_kthreads_ready && !have_boot_console) { if (!nbcon_kthread_create(con)) { kfree(con->pbufs); con->pbufs = NULL; return false; } /* Might be the first kthread. */ printk_kthreads_running = true; } } return true; } /** * nbcon_free - Free and cleanup the nbcon console specific data * @con: Console to free/cleanup nbcon data * * Important: @have_nbcon_console must be updated before calling * this function. In particular, it can be set only when there * is still another nbcon console registered. */ void nbcon_free(struct console *con) { struct nbcon_state state = { }; /* Synchronize the kthread stop. */ lockdep_assert_console_list_lock_held(); if (printk_kthreads_running) { nbcon_kthread_stop(con); /* Might be the last nbcon console. * * Do not rely on printk_kthreads_check_locked(). It is not * called in some code paths, see nbcon_free() callers. */ if (!have_nbcon_console) printk_kthreads_running = false; } nbcon_state_set(con, &state); /* Boot consoles share global printk buffers. */ if (!(con->flags & CON_BOOT)) kfree(con->pbufs); con->pbufs = NULL; } /** * nbcon_device_try_acquire - Try to acquire nbcon console and enter unsafe * section * @con: The nbcon console to acquire * * Context: Under the locking mechanism implemented in * @con->device_lock() including disabling migration. * Return: True if the console was acquired. False otherwise. * * Console drivers will usually use their own internal synchronization * mechasism to synchronize between console printing and non-printing * activities (such as setting baud rates). However, nbcon console drivers * supporting atomic consoles may also want to mark unsafe sections when * performing non-printing activities in order to synchronize against their * atomic_write() callback. * * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL * and marks it unsafe for handover/takeover. */ bool nbcon_device_try_acquire(struct console *con) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); cant_migrate(); memset(ctxt, 0, sizeof(*ctxt)); ctxt->console = con; ctxt->prio = NBCON_PRIO_NORMAL; if (!nbcon_context_try_acquire(ctxt, false)) return false; if (!nbcon_context_enter_unsafe(ctxt)) return false; return true; } EXPORT_SYMBOL_GPL(nbcon_device_try_acquire); /** * nbcon_device_release - Exit unsafe section and release the nbcon console * @con: The nbcon console acquired in nbcon_device_try_acquire() */ void nbcon_device_release(struct console *con) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); struct console_flush_type ft; int cookie; if (!nbcon_context_exit_unsafe(ctxt)) return; nbcon_context_release(ctxt); /* * This context must flush any new records added while the console * was locked if the printer thread is not available to do it. The * console_srcu_read_lock must be taken to ensure the console is * usable throughout flushing. */ cookie = console_srcu_read_lock(); printk_get_console_flush_type(&ft); if (console_is_usable(con, console_srcu_read_flags(con), true) && !ft.nbcon_offload && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { /* * If nbcon_atomic flushing is not available, fallback to * using the legacy loop. */ if (ft.nbcon_atomic) { __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb)); } else if (ft.legacy_direct) { if (console_trylock()) console_unlock(); } else if (ft.legacy_offload) { defer_console_output(); } } console_srcu_read_unlock(cookie); } EXPORT_SYMBOL_GPL(nbcon_device_release); /** * nbcon_kdb_try_acquire - Try to acquire nbcon console and enter unsafe * section * @con: The nbcon console to acquire * @wctxt: The nbcon write context to be used on success * * Context: Under console_srcu_read_lock() for emitting a single kdb message * using the given con->write_atomic() callback. Can be called * only when the console is usable at the moment. * * Return: True if the console was acquired. False otherwise. * * kdb emits messages on consoles registered for printk() without * storing them into the ring buffer. It has to acquire the console * ownerhip so that it could call con->write_atomic() callback a safe way. * * This function acquires the nbcon console using priority NBCON_PRIO_EMERGENCY * and marks it unsafe for handover/takeover. */ bool nbcon_kdb_try_acquire(struct console *con, struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); memset(ctxt, 0, sizeof(*ctxt)); ctxt->console = con; ctxt->prio = NBCON_PRIO_EMERGENCY; if (!nbcon_context_try_acquire(ctxt, false)) return false; if (!nbcon_context_enter_unsafe(ctxt)) return false; return true; } /** * nbcon_kdb_release - Exit unsafe section and release the nbcon console * * @wctxt: The nbcon write context initialized by a successful * nbcon_kdb_try_acquire() */ void nbcon_kdb_release(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); if (!nbcon_context_exit_unsafe(ctxt)) return; nbcon_context_release(ctxt); /* * Flush any new printk() messages added when the console was blocked. * Only the console used by the given write context was blocked. * The console was locked only when the write_atomic() callback * was usable. */ __nbcon_atomic_flush_pending_con(ctxt->console, prb_next_reserve_seq(prb)); }
332 42 2 9 323 332 13 3 1 10 13 8 333 18 1 15 1 3 6 7 17 333 2 55 331 325 331 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 // SPDX-License-Identifier: GPL-2.0 #include <linux/proc_fs.h> #include <linux/nsproxy.h> #include <linux/ptrace.h> #include <linux/namei.h> #include <linux/file.h> #include <linux/utsname.h> #include <net/net_namespace.h> #include <linux/ipc_namespace.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include "internal.h" static const struct proc_ns_operations *const ns_entries[] = { #ifdef CONFIG_NET_NS &netns_operations, #endif #ifdef CONFIG_UTS_NS &utsns_operations, #endif #ifdef CONFIG_IPC_NS &ipcns_operations, #endif #ifdef CONFIG_PID_NS &pidns_operations, &pidns_for_children_operations, #endif #ifdef CONFIG_USER_NS &userns_operations, #endif &mntns_operations, #ifdef CONFIG_CGROUPS &cgroupns_operations, #endif #ifdef CONFIG_TIME_NS &timens_operations, &timens_for_children_operations, #endif }; static const char *proc_ns_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; struct task_struct *task; struct path ns_path; int error = -EACCES; if (!dentry) return ERR_PTR(-ECHILD); task = get_proc_task(inode); if (!task) return ERR_PTR(-EACCES); if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out; error = ns_get_path(&ns_path, task, ns_ops); if (error) goto out; error = nd_jump_link(&ns_path); out: put_task_struct(task); return ERR_PTR(error); } static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = d_inode(dentry); const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; struct task_struct *task; char name[50]; int res = -EACCES; task = get_proc_task(inode); if (!task) return res; if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { res = ns_get_name(name, sizeof(name), task, ns_ops); if (res >= 0) res = readlink_copy(buffer, buflen, name, strlen(name)); } put_task_struct(task); return res; } static const struct inode_operations proc_ns_link_inode_operations = { .readlink = proc_ns_readlink, .get_link = proc_ns_get_link, .setattr = proc_setattr, }; static struct dentry *proc_ns_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { const struct proc_ns_operations *ns_ops = ptr; struct inode *inode; struct proc_inode *ei; inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO); if (!inode) return ERR_PTR(-ENOENT); ei = PROC_I(inode); inode->i_op = &proc_ns_link_inode_operations; ei->ns_ops = ns_ops; pid_update_inode(task, inode); return d_splice_alias_ops(inode, dentry, &pid_dentry_operations); } static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) { struct task_struct *task = get_proc_task(file_inode(file)); const struct proc_ns_operations *const *entry, *const *last; if (!task) return -ENOENT; if (!dir_emit_dots(file, ctx)) goto out; if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries)) goto out; entry = ns_entries + (ctx->pos - 2); last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; while (entry <= last) { const struct proc_ns_operations *ops = *entry; if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name), proc_ns_instantiate, task, ops)) break; ctx->pos++; entry++; } out: put_task_struct(task); return 0; } const struct file_operations proc_ns_dir_operations = { .read = generic_read_dir, .iterate_shared = proc_ns_dir_readdir, .llseek = generic_file_llseek, }; static struct dentry *proc_ns_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct task_struct *task = get_proc_task(dir); const struct proc_ns_operations *const *entry, *const *last; unsigned int len = dentry->d_name.len; struct dentry *res = ERR_PTR(-ENOENT); if (!task) goto out_no_task; last = &ns_entries[ARRAY_SIZE(ns_entries)]; for (entry = ns_entries; entry < last; entry++) { if (strlen((*entry)->name) != len) continue; if (!memcmp(dentry->d_name.name, (*entry)->name, len)) break; } if (entry == last) goto out; res = proc_ns_instantiate(dentry, task, *entry); out: put_task_struct(task); out_no_task: return res; } const struct inode_operations proc_ns_dir_inode_operations = { .lookup = proc_ns_dir_lookup, .getattr = pid_getattr, .setattr = proc_setattr, };
90 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 /* * videobuf2-v4l2.h - V4L2 driver helper framework * * Copyright (C) 2010 Samsung Electronics * * Author: Pawel Osciak <pawel@osciak.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation. */ #ifndef _MEDIA_VIDEOBUF2_V4L2_H #define _MEDIA_VIDEOBUF2_V4L2_H #include <linux/videodev2.h> #include <media/videobuf2-core.h> #if VB2_MAX_FRAME != VIDEO_MAX_FRAME #error VB2_MAX_FRAME != VIDEO_MAX_FRAME #endif #if VB2_MAX_PLANES != VIDEO_MAX_PLANES #error VB2_MAX_PLANES != VIDEO_MAX_PLANES #endif struct video_device; /** * struct vb2_v4l2_buffer - video buffer information for v4l2. * * @vb2_buf: embedded struct &vb2_buffer. * @flags: buffer informational flags. * @field: field order of the image in the buffer, as defined by * &enum v4l2_field. * @timecode: frame timecode. * @sequence: sequence count of this frame. * @request_fd: the request_fd associated with this buffer * @is_held: if true, then this capture buffer was held * @planes: plane information (userptr/fd, length, bytesused, data_offset). * * Should contain enough information to be able to cover all the fields * of &struct v4l2_buffer at ``videodev2.h``. */ struct vb2_v4l2_buffer { struct vb2_buffer vb2_buf; __u32 flags; __u32 field; struct v4l2_timecode timecode; __u32 sequence; __s32 request_fd; bool is_held; struct vb2_plane planes[VB2_MAX_PLANES]; }; /* VB2 V4L2 flags as set in vb2_queue.subsystem_flags */ #define VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF (1 << 0) /* * to_vb2_v4l2_buffer() - cast struct vb2_buffer * to struct vb2_v4l2_buffer * */ #define to_vb2_v4l2_buffer(vb) \ container_of(vb, struct vb2_v4l2_buffer, vb2_buf) /** * vb2_find_buffer() - Find a buffer with given timestamp * * @q: pointer to &struct vb2_queue with videobuf2 queue. * @timestamp: the timestamp to find. * * Returns the buffer with the given @timestamp, or NULL if not found. */ struct vb2_buffer *vb2_find_buffer(struct vb2_queue *q, u64 timestamp); int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b); /** * vb2_reqbufs() - Wrapper for vb2_core_reqbufs() that also verifies * the memory and type values. * * @q: pointer to &struct vb2_queue with videobuf2 queue. * @req: &struct v4l2_requestbuffers passed from userspace to * &v4l2_ioctl_ops->vidioc_reqbufs handler in driver. */ int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req); /** * vb2_create_bufs() - Wrapper for vb2_core_create_bufs() that also verifies * the memory and type values. * * @q: pointer to &struct vb2_queue with videobuf2 queue. * @create: creation parameters, passed from userspace to * &v4l2_ioctl_ops->vidioc_create_bufs handler in driver */ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create); /** * vb2_prepare_buf() - Pass ownership of a buffer from userspace to the kernel * * @q: pointer to &struct vb2_queue with videobuf2 queue. * @mdev: pointer to &struct media_device, may be NULL. * @b: buffer structure passed from userspace to * &v4l2_ioctl_ops->vidioc_prepare_buf handler in driver * * Should be called from &v4l2_ioctl_ops->vidioc_prepare_buf ioctl handler * of a driver. * * This function: * * #) verifies the passed buffer, * #) calls &vb2_ops->buf_prepare callback in the driver (if provided), * in which driver-specific buffer initialization can be performed. * #) if @b->request_fd is non-zero and @mdev->ops->req_queue is set, * then bind the prepared buffer to the request. * * The return values from this function are intended to be directly returned * from &v4l2_ioctl_ops->vidioc_prepare_buf handler in driver. */ int vb2_prepare_buf(struct vb2_queue *q, struct media_device *mdev, struct v4l2_buffer *b); /** * vb2_qbuf() - Queue a buffer from userspace * @q: pointer to &struct vb2_queue with videobuf2 queue. * @mdev: pointer to &struct media_device, may be NULL. * @b: buffer structure passed from userspace to * &v4l2_ioctl_ops->vidioc_qbuf handler in driver * * Should be called from &v4l2_ioctl_ops->vidioc_qbuf handler of a driver. * * This function: * * #) verifies the passed buffer; * #) if @b->request_fd is non-zero and @mdev->ops->req_queue is set, * then bind the buffer to the request. * #) if necessary, calls &vb2_ops->buf_prepare callback in the driver * (if provided), in which driver-specific buffer initialization can * be performed; * #) if streaming is on, queues the buffer in driver by the means of * &vb2_ops->buf_queue callback for processing. * * The return values from this function are intended to be directly returned * from &v4l2_ioctl_ops->vidioc_qbuf handler in driver. */ int vb2_qbuf(struct vb2_queue *q, struct media_device *mdev, struct v4l2_buffer *b); /** * vb2_expbuf() - Export a buffer as a file descriptor * @q: pointer to &struct vb2_queue with videobuf2 queue. * @eb: export buffer structure passed from userspace to * &v4l2_ioctl_ops->vidioc_expbuf handler in driver * * The return values from this function are intended to be directly returned * from &v4l2_ioctl_ops->vidioc_expbuf handler in driver. */ int vb2_expbuf(struct vb2_queue *q, struct v4l2_exportbuffer *eb); /** * vb2_dqbuf() - Dequeue a buffer to the userspace * @q: pointer to &struct vb2_queue with videobuf2 queue. * @b: buffer structure passed from userspace to * &v4l2_ioctl_ops->vidioc_dqbuf handler in driver * @nonblocking: if true, this call will not sleep waiting for a buffer if no * buffers ready for dequeuing are present. Normally the driver * would be passing (&file->f_flags & %O_NONBLOCK) here * * Should be called from &v4l2_ioctl_ops->vidioc_dqbuf ioctl handler * of a driver. * * This function: * * #) verifies the passed buffer; * #) calls &vb2_ops->buf_finish callback in the driver (if provided), in which * driver can perform any additional operations that may be required before * returning the buffer to userspace, such as cache sync; * #) the buffer struct members are filled with relevant information for * the userspace. * * The return values from this function are intended to be directly returned * from &v4l2_ioctl_ops->vidioc_dqbuf handler in driver. */ int vb2_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b, bool nonblocking); /** * vb2_streamon - start streaming * @q: pointer to &struct vb2_queue with videobuf2 queue. * @type: type argument passed from userspace to vidioc_streamon handler, * as defined by &enum v4l2_buf_type. * * Should be called from &v4l2_ioctl_ops->vidioc_streamon handler of a driver. * * This function: * * 1) verifies current state * 2) passes any previously queued buffers to the driver and starts streaming * * The return values from this function are intended to be directly returned * from &v4l2_ioctl_ops->vidioc_streamon handler in the driver. */ int vb2_streamon(struct vb2_queue *q, enum v4l2_buf_type type); /** * vb2_streamoff - stop streaming * @q: pointer to &struct vb2_queue with videobuf2 queue. * @type: type argument passed from userspace to vidioc_streamoff handler * * Should be called from vidioc_streamoff handler of a driver. * * This function: * * #) verifies current state, * #) stop streaming and dequeues any queued buffers, including those previously * passed to the driver (after waiting for the driver to finish). * * This call can be used for pausing playback. * The return values from this function are intended to be directly returned * from vidioc_streamoff handler in the driver */ int vb2_streamoff(struct vb2_queue *q, enum v4l2_buf_type type); /** * vb2_queue_init() - initialize a videobuf2 queue * @q: pointer to &struct vb2_queue with videobuf2 queue. * * The vb2_queue structure should be allocated by the driver. The driver is * responsible of clearing it's content and setting initial values for some * required entries before calling this function. * q->ops, q->mem_ops, q->type and q->io_modes are mandatory. Please refer * to the struct vb2_queue description in include/media/videobuf2-core.h * for more information. */ int __must_check vb2_queue_init(struct vb2_queue *q); /** * vb2_queue_init_name() - initialize a videobuf2 queue with a name * @q: pointer to &struct vb2_queue with videobuf2 queue. * @name: the queue name * * This function initializes the vb2_queue exactly like vb2_queue_init(), * and additionally sets the queue name. The queue name is used for logging * purpose, and should uniquely identify the queue within the context of the * device it belongs to. This is useful to attribute kernel log messages to the * right queue for m2m devices or other devices that handle multiple queues. */ int __must_check vb2_queue_init_name(struct vb2_queue *q, const char *name); /** * vb2_queue_release() - stop streaming, release the queue and free memory * @q: pointer to &struct vb2_queue with videobuf2 queue. * * This function stops streaming and performs necessary clean ups, including * freeing video buffer memory. The driver is responsible for freeing * the vb2_queue structure itself. */ void vb2_queue_release(struct vb2_queue *q); /** * vb2_queue_change_type() - change the type of an inactive vb2_queue * @q: pointer to &struct vb2_queue with videobuf2 queue. * @type: the type to change to (V4L2_BUF_TYPE_VIDEO_*) * * This function changes the type of the vb2_queue. This is only possible * if the queue is not busy (i.e. no buffers have been allocated). * * vb2_queue_change_type() can be used to support multiple buffer types using * the same queue. The driver can implement v4l2_ioctl_ops.vidioc_reqbufs and * v4l2_ioctl_ops.vidioc_create_bufs functions and call vb2_queue_change_type() * before calling vb2_ioctl_reqbufs() or vb2_ioctl_create_bufs(), and thus * "lock" the buffer type until the buffers have been released. */ int vb2_queue_change_type(struct vb2_queue *q, unsigned int type); /** * vb2_poll() - implements poll userspace operation * @q: pointer to &struct vb2_queue with videobuf2 queue. * @file: file argument passed to the poll file operation handler * @wait: wait argument passed to the poll file operation handler * * This function implements poll file operation handler for a driver. * For CAPTURE queues, if a buffer is ready to be dequeued, the userspace will * be informed that the file descriptor of a video device is available for * reading. * For OUTPUT queues, if a buffer is ready to be dequeued, the file descriptor * will be reported as available for writing. * * If the driver uses struct v4l2_fh, then vb2_poll() will also check for any * pending events. * * The return values from this function are intended to be directly returned * from poll handler in driver. */ __poll_t vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait); /* * The following functions are not part of the vb2 core API, but are simple * helper functions that you can use in your struct v4l2_file_operations, * struct v4l2_ioctl_ops and struct vb2_ops. They will serialize if vb2_queue->lock * or video_device->lock is set, and they will set and test the queue owner * (vb2_queue->owner) to check if the calling filehandle is permitted to do the * queuing operation. */ /** * vb2_queue_is_busy() - check if the queue is busy * @q: pointer to &struct vb2_queue with videobuf2 queue. * @file: file through which the vb2 queue access is performed * * The queue is considered busy if it has an owner and the owner is not the * @file. * * Queue ownership is acquired and checked by some of the v4l2_ioctl_ops helpers * below. Drivers can also use this function directly when they need to * open-code ioctl handlers, for instance to add additional checks between the * queue ownership test and the call to the corresponding vb2 operation. */ static inline bool vb2_queue_is_busy(struct vb2_queue *q, struct file *file) { return q->owner && q->owner != file->private_data; } /* struct v4l2_ioctl_ops helpers */ int vb2_ioctl_reqbufs(struct file *file, void *priv, struct v4l2_requestbuffers *p); int vb2_ioctl_create_bufs(struct file *file, void *priv, struct v4l2_create_buffers *p); int vb2_ioctl_prepare_buf(struct file *file, void *priv, struct v4l2_buffer *p); int vb2_ioctl_querybuf(struct file *file, void *priv, struct v4l2_buffer *p); int vb2_ioctl_qbuf(struct file *file, void *priv, struct v4l2_buffer *p); int vb2_ioctl_dqbuf(struct file *file, void *priv, struct v4l2_buffer *p); int vb2_ioctl_streamon(struct file *file, void *priv, enum v4l2_buf_type i); int vb2_ioctl_streamoff(struct file *file, void *priv, enum v4l2_buf_type i); int vb2_ioctl_expbuf(struct file *file, void *priv, struct v4l2_exportbuffer *p); int vb2_ioctl_remove_bufs(struct file *file, void *priv, struct v4l2_remove_buffers *p); /* struct v4l2_file_operations helpers */ int vb2_fop_mmap(struct file *file, struct vm_area_struct *vma); int vb2_fop_release(struct file *file); int _vb2_fop_release(struct file *file, struct mutex *lock); ssize_t vb2_fop_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos); ssize_t vb2_fop_read(struct file *file, char __user *buf, size_t count, loff_t *ppos); __poll_t vb2_fop_poll(struct file *file, poll_table *wait); #ifndef CONFIG_MMU unsigned long vb2_fop_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif /** * vb2_video_unregister_device - unregister the video device and release queue * * @vdev: pointer to &struct video_device * * If the driver uses vb2_fop_release()/_vb2_fop_release(), then it should use * vb2_video_unregister_device() instead of video_unregister_device(). * * This function will call video_unregister_device() and then release the * vb2_queue if streaming is in progress. This will stop streaming and * this will simplify the unbind sequence since after this call all subdevs * will have stopped streaming as well. */ void vb2_video_unregister_device(struct video_device *vdev); /** * vb2_ops_wait_prepare - helper function to lock a struct &vb2_queue * * @vq: pointer to &struct vb2_queue * * ..note:: only use if vq->lock is non-NULL. */ void vb2_ops_wait_prepare(struct vb2_queue *vq); /** * vb2_ops_wait_finish - helper function to unlock a struct &vb2_queue * * @vq: pointer to &struct vb2_queue * * ..note:: only use if vq->lock is non-NULL. */ void vb2_ops_wait_finish(struct vb2_queue *vq); struct media_request; int vb2_request_validate(struct media_request *req); void vb2_request_queue(struct media_request *req); #endif /* _MEDIA_VIDEOBUF2_V4L2_H */
50 50 62 6 12 6 44 6 44 50 39 11 50 23 27 63 63 51 46 5 6 51 5 62 62 45 5 50 12 60 60 54 32 41 49 35 34 48 34 28 34 66 43 29 66 62 4 62 32 18 52 1 51 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 // SPDX-License-Identifier: GPL-2.0-only /* * DVB USB library - provides a generic interface for a DVB USB device driver. * * dvb-usb-init.c * * Copyright (C) 2004-6 Patrick Boettcher (patrick.boettcher@posteo.de) * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include "dvb-usb-common.h" /* debug */ int dvb_usb_debug; module_param_named(debug, dvb_usb_debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level (1=info,xfer=2,pll=4,ts=8,err=16,rc=32,fw=64,mem=128,uxfer=256 (or-able))." DVB_USB_DEBUG_STATUS); int dvb_usb_disable_rc_polling; module_param_named(disable_rc_polling, dvb_usb_disable_rc_polling, int, 0644); MODULE_PARM_DESC(disable_rc_polling, "disable remote control polling (default: 0)."); static int dvb_usb_force_pid_filter_usage; module_param_named(force_pid_filter_usage, dvb_usb_force_pid_filter_usage, int, 0444); MODULE_PARM_DESC(force_pid_filter_usage, "force all dvb-usb-devices to use a PID filter, if any (default: 0)."); static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) { struct dvb_usb_adapter *adap; int ret, n, o; for (n = 0; n < d->props.num_adapters; n++) { adap = &d->adapter[n]; adap->dev = d; adap->id = n; memcpy(&adap->props, &d->props.adapter[n], sizeof(struct dvb_usb_adapter_properties)); for (o = 0; o < adap->props.num_frontends; o++) { struct dvb_usb_adapter_fe_properties *props = &adap->props.fe[o]; /* speed - when running at FULL speed we need a HW PID filter */ if (d->udev->speed == USB_SPEED_FULL && !(props->caps & DVB_USB_ADAP_HAS_PID_FILTER)) { err("This USB2.0 device cannot be run on a USB1.1 port. (it lacks a hardware PID filter)"); return -ENODEV; } if ((d->udev->speed == USB_SPEED_FULL && props->caps & DVB_USB_ADAP_HAS_PID_FILTER) || (props->caps & DVB_USB_ADAP_NEED_PID_FILTERING)) { info("will use the device's hardware PID filter (table count: %d).", props->pid_filter_count); adap->fe_adap[o].pid_filtering = 1; adap->fe_adap[o].max_feed_count = props->pid_filter_count; } else { info("will pass the complete MPEG2 transport stream to the software demuxer."); adap->fe_adap[o].pid_filtering = 0; adap->fe_adap[o].max_feed_count = 255; } if (!adap->fe_adap[o].pid_filtering && dvb_usb_force_pid_filter_usage && props->caps & DVB_USB_ADAP_HAS_PID_FILTER) { info("pid filter enabled by module option."); adap->fe_adap[o].pid_filtering = 1; adap->fe_adap[o].max_feed_count = props->pid_filter_count; } if (props->size_of_priv > 0) { adap->fe_adap[o].priv = kzalloc(props->size_of_priv, GFP_KERNEL); if (adap->fe_adap[o].priv == NULL) { err("no memory for priv for adapter %d fe %d.", n, o); return -ENOMEM; } } } if (adap->props.size_of_priv > 0) { adap->priv = kzalloc(adap->props.size_of_priv, GFP_KERNEL); if (adap->priv == NULL) { err("no memory for priv for adapter %d.", n); return -ENOMEM; } } ret = dvb_usb_adapter_stream_init(adap); if (ret) goto stream_init_err; ret = dvb_usb_adapter_dvb_init(adap, adapter_nrs); if (ret) goto dvb_init_err; ret = dvb_usb_adapter_frontend_init(adap); if (ret) goto frontend_init_err; /* use exclusive FE lock if there is multiple shared FEs */ if (adap->fe_adap[1].fe && adap->dvb_adap.mfe_shared < 1) adap->dvb_adap.mfe_shared = 1; d->num_adapters_initialized++; d->state |= DVB_USB_STATE_DVB; } /* * when reloading the driver w/o replugging the device * sometimes a timeout occurs, this helps */ if (d->props.generic_bulk_ctrl_endpoint != 0) { usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); } return 0; frontend_init_err: dvb_usb_adapter_dvb_exit(adap); dvb_init_err: dvb_usb_adapter_stream_exit(adap); stream_init_err: kfree(adap->priv); return ret; } static int dvb_usb_adapter_exit(struct dvb_usb_device *d) { int n; for (n = 0; n < d->num_adapters_initialized; n++) { dvb_usb_adapter_frontend_exit(&d->adapter[n]); dvb_usb_adapter_dvb_exit(&d->adapter[n]); dvb_usb_adapter_stream_exit(&d->adapter[n]); kfree(d->adapter[n].priv); } d->num_adapters_initialized = 0; d->state &= ~DVB_USB_STATE_DVB; return 0; } /* general initialization functions */ static int dvb_usb_exit(struct dvb_usb_device *d) { deb_info("state before exiting everything: %x\n", d->state); dvb_usb_remote_exit(d); dvb_usb_adapter_exit(d); dvb_usb_i2c_exit(d); deb_info("state should be zero now: %x\n", d->state); d->state = DVB_USB_STATE_INIT; if (d->priv != NULL && d->props.priv_destroy != NULL) d->props.priv_destroy(d); kfree(d->priv); kfree(d); return 0; } static int dvb_usb_init(struct dvb_usb_device *d, short *adapter_nums) { int ret = 0; mutex_init(&d->data_mutex); mutex_init(&d->usb_mutex); mutex_init(&d->i2c_mutex); d->state = DVB_USB_STATE_INIT; if (d->props.size_of_priv > 0) { d->priv = kzalloc(d->props.size_of_priv, GFP_KERNEL); if (d->priv == NULL) { err("no memory for priv in 'struct dvb_usb_device'"); return -ENOMEM; } if (d->props.priv_init != NULL) { ret = d->props.priv_init(d); if (ret != 0) goto err_priv_init; } } /* check the capabilities and set appropriate variables */ dvb_usb_device_power_ctrl(d, 1); ret = dvb_usb_i2c_init(d); if (ret) goto err_i2c_init; ret = dvb_usb_adapter_init(d, adapter_nums); if (ret) goto err_adapter_init; if ((ret = dvb_usb_remote_init(d))) err("could not initialize remote control."); dvb_usb_device_power_ctrl(d, 0); return 0; err_adapter_init: dvb_usb_adapter_exit(d); dvb_usb_i2c_exit(d); err_i2c_init: if (d->priv && d->props.priv_destroy) d->props.priv_destroy(d); err_priv_init: kfree(d->priv); d->priv = NULL; return ret; } /* determine the name and the state of the just found USB device */ static const struct dvb_usb_device_description *dvb_usb_find_device(struct usb_device *udev, const struct dvb_usb_device_properties *props, int *cold) { int i, j; const struct dvb_usb_device_description *desc = NULL; *cold = -1; for (i = 0; i < props->num_device_descs; i++) { for (j = 0; j < DVB_USB_ID_MAX_NUM && props->devices[i].cold_ids[j] != NULL; j++) { deb_info("check for cold %x %x\n", props->devices[i].cold_ids[j]->idVendor, props->devices[i].cold_ids[j]->idProduct); if (props->devices[i].cold_ids[j]->idVendor == le16_to_cpu(udev->descriptor.idVendor) && props->devices[i].cold_ids[j]->idProduct == le16_to_cpu(udev->descriptor.idProduct)) { *cold = 1; desc = &props->devices[i]; break; } } if (desc != NULL) break; for (j = 0; j < DVB_USB_ID_MAX_NUM && props->devices[i].warm_ids[j] != NULL; j++) { deb_info("check for warm %x %x\n", props->devices[i].warm_ids[j]->idVendor, props->devices[i].warm_ids[j]->idProduct); if (props->devices[i].warm_ids[j]->idVendor == le16_to_cpu(udev->descriptor.idVendor) && props->devices[i].warm_ids[j]->idProduct == le16_to_cpu(udev->descriptor.idProduct)) { *cold = 0; desc = &props->devices[i]; break; } } } if (desc != NULL && props->identify_state != NULL) props->identify_state(udev, props, &desc, cold); return desc; } int dvb_usb_device_power_ctrl(struct dvb_usb_device *d, int onoff) { if (onoff) d->powered++; else d->powered--; if (d->powered == 0 || (onoff && d->powered == 1)) { /* when switching from 1 to 0 or from 0 to 1 */ deb_info("power control: %d\n", onoff); if (d->props.power_ctrl) return d->props.power_ctrl(d, onoff); } return 0; } /* * USB */ int dvb_usb_device_init(struct usb_interface *intf, const struct dvb_usb_device_properties *props, struct module *owner, struct dvb_usb_device **du, short *adapter_nums) { struct usb_device *udev = interface_to_usbdev(intf); struct dvb_usb_device *d = NULL; const struct dvb_usb_device_description *desc = NULL; int ret = -ENOMEM, cold = 0; if (du != NULL) *du = NULL; d = kzalloc(sizeof(*d), GFP_KERNEL); if (!d) { err("no memory for 'struct dvb_usb_device'"); return -ENOMEM; } memcpy(&d->props, props, sizeof(struct dvb_usb_device_properties)); desc = dvb_usb_find_device(udev, &d->props, &cold); if (!desc) { deb_err("something went very wrong, device was not found in current device list - let's see what comes next.\n"); ret = -ENODEV; goto error; } if (cold) { info("found a '%s' in cold state, will try to load a firmware", desc->name); ret = dvb_usb_download_firmware(udev, props); if (!props->no_reconnect || ret != 0) goto error; } info("found a '%s' in warm state.", desc->name); d->udev = udev; d->desc = desc; d->owner = owner; usb_set_intfdata(intf, d); ret = dvb_usb_init(d, adapter_nums); if (ret) { info("%s error while loading driver (%d)", desc->name, ret); goto error; } if (du) *du = d; info("%s successfully initialized and connected.", desc->name); return 0; error: usb_set_intfdata(intf, NULL); kfree(d); return ret; } EXPORT_SYMBOL(dvb_usb_device_init); void dvb_usb_device_exit(struct usb_interface *intf) { struct dvb_usb_device *d = usb_get_intfdata(intf); const char *default_name = "generic DVB-USB module"; char name[40]; usb_set_intfdata(intf, NULL); if (d != NULL && d->desc != NULL) { strscpy(name, d->desc->name, sizeof(name)); dvb_usb_exit(d); } else { strscpy(name, default_name, sizeof(name)); } info("%s successfully deinitialized and disconnected.", name); } EXPORT_SYMBOL(dvb_usb_device_exit); MODULE_VERSION("1.0"); MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>"); MODULE_DESCRIPTION("A library module containing commonly used USB and DVB function USB DVB devices"); MODULE_LICENSE("GPL");
115 113 19 90 41 802 797 44 522 527 527 25 25 25 18 25 25 24 25 526 525 25 205 203 1 205 204 3 546 38 37 38 1053 1054 1056 51 6 1 44 157 20 133 290 295 9 281 283 2 13 16 1 14 14 29 15 45 14 2 31 31 2 2 1 1 1 2 2 1 1 1 4 4 2 2 2 114 114 13 101 101 45 101 37 2 4 35 4 4 4 4 143 54 120 139 4 2 141 118 54 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/proc/inode.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/cache.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/kernel.h> #include <linux/pid_namespace.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/completion.h> #include <linux/poll.h> #include <linux/printk.h> #include <linux/file.h> #include <linux/limits.h> #include <linux/init.h> #include <linux/module.h> #include <linux/sysctl.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/mount.h> #include <linux/bug.h> #include "internal.h" static void proc_evict_inode(struct inode *inode) { struct ctl_table_header *head; struct proc_inode *ei = PROC_I(inode); truncate_inode_pages_final(&inode->i_data); clear_inode(inode); /* Stop tracking associated processes */ if (ei->pid) proc_pid_evict_inode(ei); head = ei->sysctl; if (head) { WRITE_ONCE(ei->sysctl, NULL); proc_sys_evict_inode(inode, head); } } static struct kmem_cache *proc_inode_cachep __ro_after_init; static struct kmem_cache *pde_opener_cache __ro_after_init; static struct inode *proc_alloc_inode(struct super_block *sb) { struct proc_inode *ei; ei = alloc_inode_sb(sb, proc_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->pid = NULL; ei->fd = 0; ei->op.proc_get_link = NULL; ei->pde = NULL; ei->sysctl = NULL; ei->sysctl_entry = NULL; INIT_HLIST_NODE(&ei->sibling_inodes); ei->ns_ops = NULL; return &ei->vfs_inode; } static void proc_free_inode(struct inode *inode) { struct proc_inode *ei = PROC_I(inode); if (ei->pid) put_pid(ei->pid); /* Let go of any associated proc directory entry */ if (ei->pde) pde_put(ei->pde); kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } static void init_once(void *foo) { struct proc_inode *ei = (struct proc_inode *) foo; inode_init_once(&ei->vfs_inode); } void __init proc_init_kmemcache(void) { proc_inode_cachep = kmem_cache_create("proc_inode_cache", sizeof(struct proc_inode), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT| SLAB_PANIC), init_once); pde_opener_cache = kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0, SLAB_ACCOUNT|SLAB_PANIC, NULL); proc_dir_entry_cache = kmem_cache_create_usercopy( "proc_dir_entry", SIZEOF_PDE, 0, SLAB_PANIC, offsetof(struct proc_dir_entry, inline_name), SIZEOF_PDE_INLINE_NAME, NULL); BUILD_BUG_ON(sizeof(struct proc_dir_entry) >= SIZEOF_PDE); } void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock) { struct hlist_node *node; struct super_block *old_sb = NULL; rcu_read_lock(); while ((node = hlist_first_rcu(inodes))) { struct proc_inode *ei = hlist_entry(node, struct proc_inode, sibling_inodes); struct super_block *sb; struct inode *inode; spin_lock(lock); hlist_del_init_rcu(&ei->sibling_inodes); spin_unlock(lock); inode = &ei->vfs_inode; sb = inode->i_sb; if ((sb != old_sb) && !atomic_inc_not_zero(&sb->s_active)) continue; inode = igrab(inode); rcu_read_unlock(); if (sb != old_sb) { if (old_sb) deactivate_super(old_sb); old_sb = sb; } if (unlikely(!inode)) { rcu_read_lock(); continue; } if (S_ISDIR(inode->i_mode)) { struct dentry *dir = d_find_any_alias(inode); if (dir) { d_invalidate(dir); dput(dir); } } else { struct dentry *dentry; while ((dentry = d_find_alias(inode))) { d_invalidate(dentry); dput(dentry); } } iput(inode); rcu_read_lock(); } rcu_read_unlock(); if (old_sb) deactivate_super(old_sb); } static inline const char *hidepid2str(enum proc_hidepid v) { switch (v) { case HIDEPID_OFF: return "off"; case HIDEPID_NO_ACCESS: return "noaccess"; case HIDEPID_INVISIBLE: return "invisible"; case HIDEPID_NOT_PTRACEABLE: return "ptraceable"; } WARN_ONCE(1, "bad hide_pid value: %d\n", v); return "unknown"; } static int proc_show_options(struct seq_file *seq, struct dentry *root) { struct proc_fs_info *fs_info = proc_sb_info(root->d_sb); if (!gid_eq(fs_info->pid_gid, GLOBAL_ROOT_GID)) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, fs_info->pid_gid)); if (fs_info->hide_pid != HIDEPID_OFF) seq_printf(seq, ",hidepid=%s", hidepid2str(fs_info->hide_pid)); if (fs_info->pidonly != PROC_PIDONLY_OFF) seq_printf(seq, ",subset=pid"); return 0; } const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .free_inode = proc_free_inode, .drop_inode = inode_just_drop, .evict_inode = proc_evict_inode, .statfs = simple_statfs, .show_options = proc_show_options, }; enum {BIAS = -1U<<31}; static inline int use_pde(struct proc_dir_entry *pde) { return likely(atomic_inc_unless_negative(&pde->in_use)); } static void unuse_pde(struct proc_dir_entry *pde) { if (unlikely(atomic_dec_return(&pde->in_use) == BIAS)) complete(pde->pde_unload_completion); } /* * At most 2 contexts can enter this function: the one doing the last * close on the descriptor and whoever is deleting PDE itself. * * First to enter calls ->proc_release hook and signals its completion * to the second one which waits and then does nothing. * * PDE is locked on entry, unlocked on exit. */ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) __releases(&pde->pde_unload_lock) { /* * close() (proc_reg_release()) can't delete an entry and proceed: * ->release hook needs to be available at the right moment. * * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: * "struct file" needs to be available at the right moment. */ if (pdeo->closing) { /* somebody else is doing that, just wait */ DECLARE_COMPLETION_ONSTACK(c); pdeo->c = &c; spin_unlock(&pde->pde_unload_lock); wait_for_completion(&c); } else { struct file *file; struct completion *c; pdeo->closing = true; spin_unlock(&pde->pde_unload_lock); file = pdeo->file; pde->proc_ops->proc_release(file_inode(file), file); spin_lock(&pde->pde_unload_lock); /* Strictly after ->proc_release, see above. */ list_del(&pdeo->lh); c = pdeo->c; spin_unlock(&pde->pde_unload_lock); if (unlikely(c)) complete(c); kmem_cache_free(pde_opener_cache, pdeo); } } void proc_entry_rundown(struct proc_dir_entry *de) { DECLARE_COMPLETION_ONSTACK(c); /* Wait until all existing callers into module are done. */ de->pde_unload_completion = &c; if (atomic_add_return(BIAS, &de->in_use) != BIAS) wait_for_completion(&c); /* ->pde_openers list can't grow from now on. */ spin_lock(&de->pde_unload_lock); while (!list_empty(&de->pde_openers)) { struct pde_opener *pdeo; pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); close_pdeo(de, pdeo); spin_lock(&de->pde_unload_lock); } spin_unlock(&de->pde_unload_lock); } static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) { struct proc_dir_entry *pde = PDE(file_inode(file)); loff_t rv = -EINVAL; if (pde_is_permanent(pde)) { return pde->proc_ops->proc_lseek(file, offset, whence); } else if (use_pde(pde)) { rv = pde->proc_ops->proc_lseek(file, offset, whence); unuse_pde(pde); } return rv; } static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct proc_dir_entry *pde = PDE(file_inode(iocb->ki_filp)); ssize_t ret; if (pde_is_permanent(pde)) return pde->proc_ops->proc_read_iter(iocb, iter); if (!use_pde(pde)) return -EIO; ret = pde->proc_ops->proc_read_iter(iocb, iter); unuse_pde(pde); return ret; } static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos) { const auto read = pde->proc_ops->proc_read; if (read) return read(file, buf, count, ppos); return -EIO; } static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; if (pde_is_permanent(pde)) { return pde_read(pde, file, buf, count, ppos); } else if (use_pde(pde)) { rv = pde_read(pde, file, buf, count, ppos); unuse_pde(pde); } return rv; } static ssize_t pde_write(struct proc_dir_entry *pde, struct file *file, const char __user *buf, size_t count, loff_t *ppos) { const auto write = pde->proc_ops->proc_write; if (write) return write(file, buf, count, ppos); return -EIO; } static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; if (pde_is_permanent(pde)) { return pde_write(pde, file, buf, count, ppos); } else if (use_pde(pde)) { rv = pde_write(pde, file, buf, count, ppos); unuse_pde(pde); } return rv; } static __poll_t pde_poll(struct proc_dir_entry *pde, struct file *file, struct poll_table_struct *pts) { const auto poll = pde->proc_ops->proc_poll; if (poll) return poll(file, pts); return DEFAULT_POLLMASK; } static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts) { struct proc_dir_entry *pde = PDE(file_inode(file)); __poll_t rv = DEFAULT_POLLMASK; if (pde_is_permanent(pde)) { return pde_poll(pde, file, pts); } else if (use_pde(pde)) { rv = pde_poll(pde, file, pts); unuse_pde(pde); } return rv; } static long pde_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg) { const auto ioctl = pde->proc_ops->proc_ioctl; if (ioctl) return ioctl(file, cmd, arg); return -ENOTTY; } static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; if (pde_is_permanent(pde)) { return pde_ioctl(pde, file, cmd, arg); } else if (use_pde(pde)) { rv = pde_ioctl(pde, file, cmd, arg); unuse_pde(pde); } return rv; } #ifdef CONFIG_COMPAT static long pde_compat_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg) { const auto compat_ioctl = pde->proc_ops->proc_compat_ioctl; if (compat_ioctl) return compat_ioctl(file, cmd, arg); return -ENOTTY; } static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; if (pde_is_permanent(pde)) { return pde_compat_ioctl(pde, file, cmd, arg); } else if (use_pde(pde)) { rv = pde_compat_ioctl(pde, file, cmd, arg); unuse_pde(pde); } return rv; } #endif static int pde_mmap(struct proc_dir_entry *pde, struct file *file, struct vm_area_struct *vma) { const auto mmap = pde->proc_ops->proc_mmap; if (mmap) return mmap(file, vma); return -EIO; } static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) { struct proc_dir_entry *pde = PDE(file_inode(file)); int rv = -EIO; if (pde_is_permanent(pde)) { return pde_mmap(pde, file, vma); } else if (use_pde(pde)) { rv = pde_mmap(pde, file, vma); unuse_pde(pde); } return rv; } static unsigned long pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) { if (pde->proc_ops->proc_get_unmapped_area) return pde->proc_ops->proc_get_unmapped_area(file, orig_addr, len, pgoff, flags); #ifdef CONFIG_MMU return mm_get_unmapped_area(file, orig_addr, len, pgoff, flags); #endif return orig_addr; } static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct proc_dir_entry *pde = PDE(file_inode(file)); unsigned long rv = -EIO; if (pde_is_permanent(pde)) { return pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags); } else if (use_pde(pde)) { rv = pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags); unuse_pde(pde); } return rv; } static int proc_reg_open(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); int rv = 0; typeof_member(struct proc_ops, proc_open) open; struct pde_opener *pdeo; if (!pde_has_proc_lseek(pde)) file->f_mode &= ~FMODE_LSEEK; if (pde_is_permanent(pde)) { open = pde->proc_ops->proc_open; if (open) rv = open(inode, file); return rv; } /* * Ensure that * 1) PDE's ->release hook will be called no matter what * either normally by close()/->release, or forcefully by * rmmod/remove_proc_entry. * * 2) rmmod isn't blocked by opening file in /proc and sitting on * the descriptor (including "rmmod foo </proc/foo" scenario). * * Save every "struct file" with custom ->release hook. */ if (!use_pde(pde)) return -ENOENT; const auto release = pde->proc_ops->proc_release; if (release) { pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL); if (!pdeo) { rv = -ENOMEM; goto out_unuse; } } open = pde->proc_ops->proc_open; if (open) rv = open(inode, file); if (release) { if (rv == 0) { /* To know what to release. */ pdeo->file = file; pdeo->closing = false; pdeo->c = NULL; spin_lock(&pde->pde_unload_lock); list_add(&pdeo->lh, &pde->pde_openers); spin_unlock(&pde->pde_unload_lock); } else kmem_cache_free(pde_opener_cache, pdeo); } out_unuse: unuse_pde(pde); return rv; } static int proc_reg_release(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); struct pde_opener *pdeo; if (pde_is_permanent(pde)) { const auto release = pde->proc_ops->proc_release; if (release) return release(inode, file); return 0; } spin_lock(&pde->pde_unload_lock); list_for_each_entry(pdeo, &pde->pde_openers, lh) { if (pdeo->file == file) { close_pdeo(pde, pdeo); return 0; } } spin_unlock(&pde->pde_unload_lock); return 0; } static const struct file_operations proc_reg_file_ops = { .llseek = proc_reg_llseek, .read = proc_reg_read, .write = proc_reg_write, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, .mmap = proc_reg_mmap, .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, .release = proc_reg_release, }; static const struct file_operations proc_iter_file_ops = { .llseek = proc_reg_llseek, .read_iter = proc_reg_read_iter, .write = proc_reg_write, .splice_read = copy_splice_read, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, .mmap = proc_reg_mmap, .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, .release = proc_reg_release, }; #ifdef CONFIG_COMPAT static const struct file_operations proc_reg_file_ops_compat = { .llseek = proc_reg_llseek, .read = proc_reg_read, .write = proc_reg_write, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, .compat_ioctl = proc_reg_compat_ioctl, .mmap = proc_reg_mmap, .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, .release = proc_reg_release, }; static const struct file_operations proc_iter_file_ops_compat = { .llseek = proc_reg_llseek, .read_iter = proc_reg_read_iter, .splice_read = copy_splice_read, .write = proc_reg_write, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, .compat_ioctl = proc_reg_compat_ioctl, .mmap = proc_reg_mmap, .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, .release = proc_reg_release, }; #endif static void proc_put_link(void *p) { unuse_pde(p); } static const char *proc_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct proc_dir_entry *pde = PDE(inode); if (!use_pde(pde)) return ERR_PTR(-EINVAL); set_delayed_call(done, proc_put_link, pde); return pde->data; } const struct inode_operations proc_link_inode_operations = { .get_link = proc_get_link, }; struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) { struct inode *inode = new_inode(sb); if (!inode) { pde_put(de); return NULL; } inode->i_private = de->data; inode->i_ino = de->low_ino; simple_inode_init_ts(inode); PROC_I(inode)->pde = de; if (is_empty_pde(de)) { make_empty_dir_inode(inode); return inode; } if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; inode->i_gid = de->gid; } if (de->size) inode->i_size = de->size; if (de->nlink) set_nlink(inode, de->nlink); if (S_ISREG(inode->i_mode)) { inode->i_op = de->proc_iops; if (pde_has_proc_read_iter(de)) inode->i_fop = &proc_iter_file_ops; else inode->i_fop = &proc_reg_file_ops; #ifdef CONFIG_COMPAT if (pde_has_proc_compat_ioctl(de)) { if (pde_has_proc_read_iter(de)) inode->i_fop = &proc_iter_file_ops_compat; else inode->i_fop = &proc_reg_file_ops_compat; } #endif } else if (S_ISDIR(inode->i_mode)) { inode->i_op = de->proc_iops; inode->i_fop = de->proc_dir_ops; } else if (S_ISLNK(inode->i_mode)) { inode->i_op = de->proc_iops; inode->i_fop = NULL; } else { BUG(); } return inode; }
113 113 57 1 56 37 37 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 // SPDX-License-Identifier: GPL-2.0 #include <linux/proc_fs.h> #include <linux/ethtool.h> #include <linux/export.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/bonding.h> #include "bonding_priv.h" static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct bonding *bond = pde_data(file_inode(seq->file)); struct list_head *iter; struct slave *slave; loff_t off = 0; rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; bond_for_each_slave_rcu(bond, slave, iter) if (++off == *pos) return slave; return NULL; } static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bonding *bond = pde_data(file_inode(seq->file)); struct list_head *iter; struct slave *slave; bool found = false; ++*pos; if (v == SEQ_START_TOKEN) return bond_first_slave_rcu(bond); bond_for_each_slave_rcu(bond, slave, iter) { if (found) return slave; if (slave == v) found = true; } return NULL; } static void bond_info_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static void bond_info_show_master(struct seq_file *seq) { struct bonding *bond = pde_data(file_inode(seq->file)); const struct bond_opt_value *optval; struct slave *curr, *primary; int i; curr = rcu_dereference(bond->curr_active_slave); seq_printf(seq, "Bonding Mode: %s", bond_mode_name(BOND_MODE(bond))); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && bond->params.fail_over_mac) { optval = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC, bond->params.fail_over_mac); seq_printf(seq, " (fail_over_mac %s)", optval->string); } seq_printf(seq, "\n"); if (bond_mode_uses_xmit_hash(bond)) { optval = bond_opt_get_val(BOND_OPT_XMIT_HASH, bond->params.xmit_policy); seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", optval->string, bond->params.xmit_policy); } if (bond_uses_primary(bond)) { primary = rcu_dereference(bond->primary_slave); seq_printf(seq, "Primary Slave: %s", primary ? primary->dev->name : "None"); if (primary) { optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, bond->params.primary_reselect); seq_printf(seq, " (primary_reselect %s)", optval->string); } seq_printf(seq, "\nCurrently Active Slave: %s\n", (curr) ? curr->dev->name : "None"); } seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? "up" : "down"); seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); seq_printf(seq, "Up Delay (ms): %d\n", bond->params.updelay * bond->params.miimon); seq_printf(seq, "Down Delay (ms): %d\n", bond->params.downdelay * bond->params.miimon); seq_printf(seq, "Peer Notification Delay (ms): %d\n", bond->params.peer_notif_delay * bond->params.miimon); /* ARP information */ if (bond->params.arp_interval > 0) { int printed = 0; seq_printf(seq, "ARP Polling Interval (ms): %d\n", bond->params.arp_interval); seq_printf(seq, "ARP Missed Max: %u\n", bond->params.missed_max); seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { if (!bond->params.arp_targets[i]) break; if (printed) seq_printf(seq, ","); seq_printf(seq, " %pI4", &bond->params.arp_targets[i]); printed = 1; } seq_printf(seq, "\n"); #if IS_ENABLED(CONFIG_IPV6) printed = 0; seq_printf(seq, "NS IPv6 target/s (xx::xx form):"); for (i = 0; (i < BOND_MAX_NS_TARGETS); i++) { if (ipv6_addr_any(&bond->params.ns_targets[i])) break; if (printed) seq_printf(seq, ","); seq_printf(seq, " %pI6c", &bond->params.ns_targets[i]); printed = 1; } seq_printf(seq, "\n"); #endif } if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info ad_info; seq_puts(seq, "\n802.3ad info\n"); seq_printf(seq, "LACP active: %s\n", (bond->params.lacp_active) ? "on" : "off"); seq_printf(seq, "LACP rate: %s\n", (bond->params.lacp_fast) ? "fast" : "slow"); seq_printf(seq, "Min links: %d\n", bond->params.min_links); optval = bond_opt_get_val(BOND_OPT_AD_SELECT, bond->params.ad_select); seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", optval->string); if (capable(CAP_NET_ADMIN)) { seq_printf(seq, "System priority: %d\n", BOND_AD_INFO(bond).system.sys_priority); seq_printf(seq, "System MAC address: %pM\n", &BOND_AD_INFO(bond).system.sys_mac_addr); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { seq_printf(seq, "bond %s has no active aggregator\n", bond->dev->name); } else { seq_printf(seq, "Active Aggregator Info:\n"); seq_printf(seq, "\tAggregator ID: %d\n", ad_info.aggregator_id); seq_printf(seq, "\tNumber of ports: %d\n", ad_info.ports); seq_printf(seq, "\tActor Key: %d\n", ad_info.actor_key); seq_printf(seq, "\tPartner Key: %d\n", ad_info.partner_key); seq_printf(seq, "\tPartner Mac Address: %pM\n", ad_info.partner_system); } } } } static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) { struct bonding *bond = pde_data(file_inode(seq->file)); seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link)); if (slave->speed == SPEED_UNKNOWN) seq_printf(seq, "Speed: %s\n", "Unknown"); else seq_printf(seq, "Speed: %d Mbps\n", slave->speed); if (slave->duplex == DUPLEX_UNKNOWN) seq_printf(seq, "Duplex: %s\n", "Unknown"); else seq_printf(seq, "Duplex: %s\n", slave->duplex ? "full" : "half"); seq_printf(seq, "Link Failure Count: %u\n", slave->link_failure_count); seq_printf(seq, "Permanent HW addr: %*phC\n", slave->dev->addr_len, slave->perm_hwaddr); seq_printf(seq, "Slave queue ID: %d\n", READ_ONCE(slave->queue_id)); if (BOND_MODE(bond) == BOND_MODE_8023AD) { const struct port *port = &SLAVE_AD_INFO(slave)->port; const struct aggregator *agg = port->aggregator; if (agg) { seq_printf(seq, "Aggregator ID: %d\n", agg->aggregator_identifier); seq_printf(seq, "Actor Churn State: %s\n", bond_3ad_churn_desc(port->sm_churn_actor_state)); seq_printf(seq, "Partner Churn State: %s\n", bond_3ad_churn_desc(port->sm_churn_partner_state)); seq_printf(seq, "Actor Churned Count: %d\n", port->churn_actor_count); seq_printf(seq, "Partner Churned Count: %d\n", port->churn_partner_count); if (capable(CAP_NET_ADMIN)) { seq_puts(seq, "details actor lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->actor_system_priority); seq_printf(seq, " system mac address: %pM\n", &port->actor_system); seq_printf(seq, " port key: %d\n", port->actor_oper_port_key); seq_printf(seq, " port priority: %d\n", port->actor_port_priority); seq_printf(seq, " port number: %d\n", port->actor_port_number); seq_printf(seq, " port state: %d\n", port->actor_oper_port_state); seq_puts(seq, "details partner lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->partner_oper.system_priority); seq_printf(seq, " system mac address: %pM\n", &port->partner_oper.system); seq_printf(seq, " oper key: %d\n", port->partner_oper.key); seq_printf(seq, " port priority: %d\n", port->partner_oper.port_priority); seq_printf(seq, " port number: %d\n", port->partner_oper.port_number); seq_printf(seq, " port state: %d\n", port->partner_oper.port_state); } } else { seq_puts(seq, "Aggregator ID: N/A\n"); } } } static int bond_info_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_printf(seq, "%s\n", bond_version); bond_info_show_master(seq); } else bond_info_show_slave(seq, v); return 0; } static const struct seq_operations bond_info_seq_ops = { .start = bond_info_seq_start, .next = bond_info_seq_next, .stop = bond_info_seq_stop, .show = bond_info_seq_show, }; void bond_create_proc_entry(struct bonding *bond) { struct net_device *bond_dev = bond->dev; struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); if (bn->proc_dir) { bond->proc_entry = proc_create_seq_data(bond_dev->name, 0444, bn->proc_dir, &bond_info_seq_ops, bond); if (bond->proc_entry == NULL) netdev_warn(bond_dev, "Cannot create /proc/net/%s/%s\n", DRV_NAME, bond_dev->name); else memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); } } void bond_remove_proc_entry(struct bonding *bond) { struct net_device *bond_dev = bond->dev; struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); if (bn->proc_dir && bond->proc_entry) { remove_proc_entry(bond->proc_file_name, bn->proc_dir); memset(bond->proc_file_name, 0, IFNAMSIZ); bond->proc_entry = NULL; } } /* Create the bonding directory under /proc/net, if doesn't exist yet. * Caller must hold rtnl_lock. */ void __net_init bond_create_proc_dir(struct bond_net *bn) { if (!bn->proc_dir) { bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net); if (!bn->proc_dir) pr_warn("Warning: Cannot create /proc/net/%s\n", DRV_NAME); } } /* Destroy the bonding directory under /proc/net, if empty. */ void __net_exit bond_destroy_proc_dir(struct bond_net *bn) { if (bn->proc_dir) { remove_proc_entry(DRV_NAME, bn->net->proc_net); bn->proc_dir = NULL; } }
7 99 24 5 245 4 347 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_LOG_PRIV_H__ #define __XFS_LOG_PRIV_H__ #include "xfs_extent_busy.h" /* for struct xfs_busy_extents */ struct xfs_buf; struct xlog; struct xlog_ticket; struct xfs_mount; /* * get client id from packed copy. * * this hack is here because the xlog_pack code copies four bytes * of xlog_op_header containing the fields oh_clientid, oh_flags * and oh_res2 into the packed copy. * * later on this four byte chunk is treated as an int and the * client id is pulled out. * * this has endian issues, of course. */ static inline uint xlog_get_client_id(__be32 i) { return be32_to_cpu(i) >> 24; } /* * In core log state */ enum xlog_iclog_state { XLOG_STATE_ACTIVE, /* Current IC log being written to */ XLOG_STATE_WANT_SYNC, /* Want to sync this iclog; no more writes */ XLOG_STATE_SYNCING, /* This IC log is syncing */ XLOG_STATE_DONE_SYNC, /* Done syncing to disk */ XLOG_STATE_CALLBACK, /* Callback functions now */ XLOG_STATE_DIRTY, /* Dirty IC log, not ready for ACTIVE status */ }; #define XLOG_STATE_STRINGS \ { XLOG_STATE_ACTIVE, "XLOG_STATE_ACTIVE" }, \ { XLOG_STATE_WANT_SYNC, "XLOG_STATE_WANT_SYNC" }, \ { XLOG_STATE_SYNCING, "XLOG_STATE_SYNCING" }, \ { XLOG_STATE_DONE_SYNC, "XLOG_STATE_DONE_SYNC" }, \ { XLOG_STATE_CALLBACK, "XLOG_STATE_CALLBACK" }, \ { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" } /* * In core log flags */ #define XLOG_ICL_NEED_FLUSH (1u << 0) /* iclog needs REQ_PREFLUSH */ #define XLOG_ICL_NEED_FUA (1u << 1) /* iclog needs REQ_FUA */ #define XLOG_ICL_STRINGS \ { XLOG_ICL_NEED_FLUSH, "XLOG_ICL_NEED_FLUSH" }, \ { XLOG_ICL_NEED_FUA, "XLOG_ICL_NEED_FUA" } /* * Log ticket flags */ #define XLOG_TIC_PERM_RESERV (1u << 0) /* permanent reservation */ #define XLOG_TIC_FLAGS \ { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } /* * Below are states for covering allocation transactions. * By covering, we mean changing the h_tail_lsn in the last on-disk * log write such that no allocation transactions will be re-done during * recovery after a system crash. Recovery starts at the last on-disk * log write. * * These states are used to insert dummy log entries to cover * space allocation transactions which can undo non-transactional changes * after a crash. Writes to a file with space * already allocated do not result in any transactions. Allocations * might include space beyond the EOF. So if we just push the EOF a * little, the last transaction for the file could contain the wrong * size. If there is no file system activity, after an allocation * transaction, and the system crashes, the allocation transaction * will get replayed and the file will be truncated. This could * be hours/days/... after the allocation occurred. * * The fix for this is to do two dummy transactions when the * system is idle. We need two dummy transaction because the h_tail_lsn * in the log record header needs to point beyond the last possible * non-dummy transaction. The first dummy changes the h_tail_lsn to * the first transaction before the dummy. The second dummy causes * h_tail_lsn to point to the first dummy. Recovery starts at h_tail_lsn. * * These dummy transactions get committed when everything * is idle (after there has been some activity). * * There are 5 states used to control this. * * IDLE -- no logging has been done on the file system or * we are done covering previous transactions. * NEED -- logging has occurred and we need a dummy transaction * when the log becomes idle. * DONE -- we were in the NEED state and have committed a dummy * transaction. * NEED2 -- we detected that a dummy transaction has gone to the * on disk log with no other transactions. * DONE2 -- we committed a dummy transaction when in the NEED2 state. * * There are two places where we switch states: * * 1.) In xfs_sync, when we detect an idle log and are in NEED or NEED2. * We commit the dummy transaction and switch to DONE or DONE2, * respectively. In all other states, we don't do anything. * * 2.) When we finish writing the on-disk log (xlog_state_clean_log). * * No matter what state we are in, if this isn't the dummy * transaction going out, the next state is NEED. * So, if we aren't in the DONE or DONE2 states, the next state * is NEED. We can't be finishing a write of the dummy record * unless it was committed and the state switched to DONE or DONE2. * * If we are in the DONE state and this was a write of the * dummy transaction, we move to NEED2. * * If we are in the DONE2 state and this was a write of the * dummy transaction, we move to IDLE. * * * Writing only one dummy transaction can get appended to * one file space allocation. When this happens, the log recovery * code replays the space allocation and a file could be truncated. * This is why we have the NEED2 and DONE2 states before going idle. */ #define XLOG_STATE_COVER_IDLE 0 #define XLOG_STATE_COVER_NEED 1 #define XLOG_STATE_COVER_DONE 2 #define XLOG_STATE_COVER_NEED2 3 #define XLOG_STATE_COVER_DONE2 4 #define XLOG_COVER_OPS 5 struct xlog_ticket { struct list_head t_queue; /* reserve/write queue */ struct task_struct *t_task; /* task that owns this ticket */ xlog_tid_t t_tid; /* transaction identifier */ atomic_t t_ref; /* ticket reference count */ int t_curr_res; /* current reservation */ int t_unit_res; /* unit reservation */ char t_ocnt; /* original unit count */ char t_cnt; /* current unit count */ uint8_t t_flags; /* properties of reservation */ int t_iclog_hdrs; /* iclog hdrs in t_curr_res */ }; /* * In-core log structure. * * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. * - ic_next is the pointer to the next iclog in the ring. * - ic_log is a pointer back to the global log structure. * - ic_size is the full size of the log buffer, minus the cycle headers. * - ic_offset is the current number of bytes written to in this iclog. * - ic_refcnt is bumped when someone is writing to the log. * - ic_state is the state of the iclog. * * Because of cacheline contention on large machines, we need to separate * various resources onto different cachelines. To start with, make the * structure cacheline aligned. The following fields can be contended on * by independent processes: * * - ic_callbacks * - ic_refcnt * - fields protected by the global l_icloglock * * so we need to ensure that these fields are located in separate cachelines. * We'll put all the read-only and l_icloglock fields in the first cacheline, * and move everything else out to subsequent cachelines. */ struct xlog_in_core { wait_queue_head_t ic_force_wait; wait_queue_head_t ic_write_wait; struct xlog_in_core *ic_next; struct xlog_in_core *ic_prev; struct xlog *ic_log; u32 ic_size; u32 ic_offset; enum xlog_iclog_state ic_state; unsigned int ic_flags; void *ic_datap; /* pointer to iclog data */ struct list_head ic_callbacks; /* reference counts need their own cacheline */ atomic_t ic_refcnt ____cacheline_aligned_in_smp; struct xlog_rec_header *ic_header; #ifdef DEBUG bool ic_fail_crc : 1; #endif struct semaphore ic_sema; struct work_struct ic_end_io_work; struct bio ic_bio; struct bio_vec ic_bvec[]; }; /* * The CIL context is used to aggregate per-transaction details as well be * passed to the iclog for checkpoint post-commit processing. After being * passed to the iclog, another context needs to be allocated for tracking the * next set of transactions to be aggregated into a checkpoint. */ struct xfs_cil; struct xfs_cil_ctx { struct xfs_cil *cil; xfs_csn_t sequence; /* chkpt sequence # */ xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ struct xlog_in_core *commit_iclog; struct xlog_ticket *ticket; /* chkpt ticket */ atomic_t space_used; /* aggregate size of regions */ struct xfs_busy_extents busy_extents; struct list_head log_items; /* log items in chkpt */ struct list_head lv_chain; /* logvecs being pushed */ struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ struct work_struct push_work; atomic_t order_id; /* * CPUs that could have added items to the percpu CIL data. Access is * coordinated with xc_ctx_lock. */ struct cpumask cil_pcpmask; }; /* * Per-cpu CIL tracking items */ struct xlog_cil_pcp { int32_t space_used; uint32_t space_reserved; struct list_head busy_extents; struct list_head log_items; }; /* * Committed Item List structure * * This structure is used to track log items that have been committed but not * yet written into the log. It is used only when the delayed logging mount * option is enabled. * * This structure tracks the list of committing checkpoint contexts so * we can avoid the problem of having to hold out new transactions during a * flush until we have a the commit record LSN of the checkpoint. We can * traverse the list of committing contexts in xlog_cil_push_lsn() to find a * sequence match and extract the commit LSN directly from there. If the * checkpoint is still in the process of committing, we can block waiting for * the commit LSN to be determined as well. This should make synchronous * operations almost as efficient as the old logging methods. */ struct xfs_cil { struct xlog *xc_log; unsigned long xc_flags; atomic_t xc_iclog_hdrs; struct workqueue_struct *xc_push_wq; struct rw_semaphore xc_ctx_lock ____cacheline_aligned_in_smp; struct xfs_cil_ctx *xc_ctx; spinlock_t xc_push_lock ____cacheline_aligned_in_smp; xfs_csn_t xc_push_seq; bool xc_push_commit_stable; struct list_head xc_committing; wait_queue_head_t xc_commit_wait; wait_queue_head_t xc_start_wait; xfs_csn_t xc_current_sequence; wait_queue_head_t xc_push_wait; /* background push throttle */ void __percpu *xc_pcp; /* percpu CIL structures */ } ____cacheline_aligned_in_smp; /* xc_flags bit values */ #define XLOG_CIL_EMPTY 1 #define XLOG_CIL_PCP_SPACE 2 /* * The amount of log space we allow the CIL to aggregate is difficult to size. * Whatever we choose, we have to make sure we can get a reservation for the * log space effectively, that it is large enough to capture sufficient * relogging to reduce log buffer IO significantly, but it is not too large for * the log or induces too much latency when writing out through the iclogs. We * track both space consumed and the number of vectors in the checkpoint * context, so we need to decide which to use for limiting. * * Every log buffer we write out during a push needs a header reserved, which * is at least one sector and more for v2 logs. Hence we need a reservation of * at least 512 bytes per 32k of log space just for the LR headers. That means * 16KB of reservation per megabyte of delayed logging space we will consume, * plus various headers. The number of headers will vary based on the num of * io vectors, so limiting on a specific number of vectors is going to result * in transactions of varying size. IOWs, it is more consistent to track and * limit space consumed in the log rather than by the number of objects being * logged in order to prevent checkpoint ticket overruns. * * Further, use of static reservations through the log grant mechanism is * problematic. It introduces a lot of complexity (e.g. reserve grant vs write * grant) and a significant deadlock potential because regranting write space * can block on log pushes. Hence if we have to regrant log space during a log * push, we can deadlock. * * However, we can avoid this by use of a dynamic "reservation stealing" * technique during transaction commit whereby unused reservation space in the * transaction ticket is transferred to the CIL ctx commit ticket to cover the * space needed by the checkpoint transaction. This means that we never need to * specifically reserve space for the CIL checkpoint transaction, nor do we * need to regrant space once the checkpoint completes. This also means the * checkpoint transaction ticket is specific to the checkpoint context, rather * than the CIL itself. * * With dynamic reservations, we can effectively make up arbitrary limits for * the checkpoint size so long as they don't violate any other size rules. * Recovery imposes a rule that no transaction exceed half the log, so we are * limited by that. Furthermore, the log transaction reservation subsystem * tries to keep 25% of the log free, so we need to keep below that limit or we * risk running out of free log space to start any new transactions. * * In order to keep background CIL push efficient, we only need to ensure the * CIL is large enough to maintain sufficient in-memory relogging to avoid * repeated physical writes of frequently modified metadata. If we allow the CIL * to grow to a substantial fraction of the log, then we may be pinning hundreds * of megabytes of metadata in memory until the CIL flushes. This can cause * issues when we are running low on memory - pinned memory cannot be reclaimed, * and the CIL consumes a lot of memory. Hence we need to set an upper physical * size limit for the CIL that limits the maximum amount of memory pinned by the * CIL but does not limit performance by reducing relogging efficiency * significantly. * * As such, the CIL push threshold ends up being the smaller of two thresholds: * - a threshold large enough that it allows CIL to be pushed and progress to be * made without excessive blocking of incoming transaction commits. This is * defined to be 12.5% of the log space - half the 25% push threshold of the * AIL. * - small enough that it doesn't pin excessive amounts of memory but maintains * close to peak relogging efficiency. This is defined to be 16x the iclog * buffer window (32MB) as measurements have shown this to be roughly the * point of diminishing performance increases under highly concurrent * modification workloads. * * To prevent the CIL from overflowing upper commit size bounds, we introduce a * new threshold at which we block committing transactions until the background * CIL commit commences and switches to a new context. While this is not a hard * limit, it forces the process committing a transaction to the CIL to block and * yeild the CPU, giving the CIL push work a chance to be scheduled and start * work. This prevents a process running lots of transactions from overfilling * the CIL because it is not yielding the CPU. We set the blocking limit at * twice the background push space threshold so we keep in line with the AIL * push thresholds. * * Note: this is not a -hard- limit as blocking is applied after the transaction * is inserted into the CIL and the push has been triggered. It is largely a * throttling mechanism that allows the CIL push to be scheduled and run. A hard * limit will be difficult to implement without introducing global serialisation * in the CIL commit fast path, and it's not at all clear that we actually need * such hard limits given the ~7 years we've run without a hard limit before * finding the first situation where a checkpoint size overflow actually * occurred. Hence the simple throttle, and an ASSERT check to tell us that * we've overrun the max size. */ #define XLOG_CIL_SPACE_LIMIT(log) \ min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4) #define XLOG_CIL_BLOCKING_SPACE_LIMIT(log) \ (XLOG_CIL_SPACE_LIMIT(log) * 2) /* * ticket grant locks, queues and accounting have their own cachlines * as these are quite hot and can be operated on concurrently. */ struct xlog_grant_head { spinlock_t lock ____cacheline_aligned_in_smp; struct list_head waiters; atomic64_t grant; }; /* * The reservation head lsn is not made up of a cycle number and block number. * Instead, it uses a cycle number and byte number. Logs don't expect to * overflow 31 bits worth of byte offset, so using a byte number will mean * that round off problems won't occur when releasing partial reservations. */ struct xlog { /* The following fields don't need locking */ struct xfs_mount *l_mp; /* mount point */ struct xfs_ail *l_ailp; /* AIL log is working with */ struct xfs_cil *l_cilp; /* CIL log is working with */ struct xfs_buftarg *l_targ; /* buftarg of log */ struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */ struct delayed_work l_work; /* background flush work */ long l_opstate; /* operational state */ uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ struct list_head *l_buf_cancel_table; struct list_head r_dfops; /* recovered log intent items */ int l_iclog_hsize; /* size of iclog header */ uint l_sectBBsize; /* sector size in BBs (2^n) */ int l_iclog_size; /* size of log in bytes */ int l_iclog_bufs; /* number of iclog buffers */ xfs_daddr_t l_logBBstart; /* start block of log */ int l_logsize; /* size of log in bytes */ int l_logBBsize; /* size of log in BB chunks */ /* The following block of fields are changed while holding icloglock */ wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp; /* waiting for iclog flush */ int l_covered_state;/* state of "covering disk * log entries" */ struct xlog_in_core *l_iclog; /* head log queue */ spinlock_t l_icloglock; /* grab to change iclog state */ int l_curr_cycle; /* Cycle number of log writes */ int l_prev_cycle; /* Cycle number before last * block increment */ int l_curr_block; /* current logical log block */ int l_prev_block; /* previous logical log block */ /* * l_tail_lsn is atomic so it can be set and read without needing to * hold specific locks. To avoid operations contending with other hot * objects, it on a separate cacheline. */ /* lsn of 1st LR with unflushed * buffers */ atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; struct xlog_grant_head l_reserve_head; struct xlog_grant_head l_write_head; uint64_t l_tail_space; struct xfs_kobj l_kobj; /* log recovery lsn tracking (for buffer submission */ xfs_lsn_t l_recovery_lsn; uint32_t l_iclog_roundoff;/* padding roundoff */ }; /* * Bits for operational state */ #define XLOG_ACTIVE_RECOVERY 0 /* in the middle of recovery */ #define XLOG_RECOVERY_NEEDED 1 /* log was recovered */ #define XLOG_IO_ERROR 2 /* log hit an I/O error, and being shutdown */ #define XLOG_TAIL_WARN 3 /* log tail verify warning issued */ #define XLOG_SHUTDOWN_STARTED 4 /* xlog_force_shutdown() exclusion */ static inline bool xlog_recovery_needed(struct xlog *log) { return test_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); } static inline bool xlog_in_recovery(struct xlog *log) { return test_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); } static inline bool xlog_is_shutdown(struct xlog *log) { return test_bit(XLOG_IO_ERROR, &log->l_opstate); } /* * Wait until the xlog_force_shutdown() has marked the log as shut down * so xlog_is_shutdown() will always return true. */ static inline void xlog_shutdown_wait( struct xlog *log) { wait_var_event(&log->l_opstate, xlog_is_shutdown(log)); } /* common routines */ extern int xlog_recover( struct xlog *log); extern int xlog_recover_finish( struct xlog *log); extern void xlog_recover_cancel(struct xlog *); __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, char *dp, unsigned int hdrsize, unsigned int size); extern struct kmem_cache *xfs_log_ticket_cache; struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes, int count, bool permanent); void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, struct list_head *lv_chain, struct xlog_ticket *tic, uint32_t len); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog, int eventual_size); int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, struct xlog_ticket *ticket); /* * When we crack an atomic LSN, we sample it first so that the value will not * change while we are cracking it into the component values. This means we * will always get consistent component values to work from. This should always * be used to sample and crack LSNs that are stored and updated in atomic * variables. */ static inline void xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block) { xfs_lsn_t val = atomic64_read(lsn); *cycle = CYCLE_LSN(val); *block = BLOCK_LSN(val); } /* * Calculate and assign a value to an atomic LSN variable from component pieces. */ static inline void xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block) { atomic64_set(lsn, xlog_assign_lsn(cycle, block)); } /* * Committed Item List interfaces */ int xlog_cil_init(struct xlog *log); void xlog_cil_init_post_recovery(struct xlog *log); void xlog_cil_destroy(struct xlog *log); bool xlog_cil_empty(struct xlog *log); void xlog_cil_commit(struct xlog *log, struct xfs_trans *tp, xfs_csn_t *commit_seq, bool regrant); void xlog_cil_set_ctx_write_state(struct xfs_cil_ctx *ctx, struct xlog_in_core *iclog); /* * CIL force routines */ void xlog_cil_flush(struct xlog *log); xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence); static inline void xlog_cil_force(struct xlog *log) { xlog_cil_force_seq(log, log->l_cilp->xc_current_sequence); } /* * Wrapper function for waiting on a wait queue serialised against wakeups * by a spinlock. This matches the semantics of all the wait queues used in the * log code. */ static inline void xlog_wait( struct wait_queue_head *wq, struct spinlock *lock) __releases(lock) { DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(wq, &wait); __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(lock); schedule(); remove_wait_queue(wq, &wait); } int xlog_wait_on_iclog(struct xlog_in_core *iclog) __releases(iclog->ic_log->l_icloglock); /* Calculate the distance between two LSNs in bytes */ static inline uint64_t xlog_lsn_sub( struct xlog *log, xfs_lsn_t high, xfs_lsn_t low) { uint32_t hi_cycle = CYCLE_LSN(high); uint32_t hi_block = BLOCK_LSN(high); uint32_t lo_cycle = CYCLE_LSN(low); uint32_t lo_block = BLOCK_LSN(low); if (hi_cycle == lo_cycle) return BBTOB(hi_block - lo_block); ASSERT((hi_cycle == lo_cycle + 1) || xlog_is_shutdown(log)); return (uint64_t)log->l_logsize - BBTOB(lo_block - hi_block); } void xlog_grant_return_space(struct xlog *log, xfs_lsn_t old_head, xfs_lsn_t new_head); /* * The LSN is valid so long as it is behind the current LSN. If it isn't, this * means that the next log record that includes this metadata could have a * smaller LSN. In turn, this means that the modification in the log would not * replay. */ static inline bool xlog_valid_lsn( struct xlog *log, xfs_lsn_t lsn) { int cur_cycle; int cur_block; bool valid = true; /* * First, sample the current lsn without locking to avoid added * contention from metadata I/O. The current cycle and block are updated * (in xlog_state_switch_iclogs()) and read here in a particular order * to avoid false negatives (e.g., thinking the metadata LSN is valid * when it is not). * * The current block is always rewound before the cycle is bumped in * xlog_state_switch_iclogs() to ensure the current LSN is never seen in * a transiently forward state. Instead, we can see the LSN in a * transiently behind state if we happen to race with a cycle wrap. */ cur_cycle = READ_ONCE(log->l_curr_cycle); smp_rmb(); cur_block = READ_ONCE(log->l_curr_block); if ((CYCLE_LSN(lsn) > cur_cycle) || (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) { /* * If the metadata LSN appears invalid, it's possible the check * above raced with a wrap to the next log cycle. Grab the lock * to check for sure. */ spin_lock(&log->l_icloglock); cur_cycle = log->l_curr_cycle; cur_block = log->l_curr_block; spin_unlock(&log->l_icloglock); if ((CYCLE_LSN(lsn) > cur_cycle) || (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) valid = false; } return valid; } /* * Log vector and shadow buffers can be large, so we need to use kvmalloc() here * to ensure success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts * to fall back to vmalloc, so we can't actually do anything useful with gfp * flags to control the kmalloc() behaviour within kvmalloc(). Hence kmalloc() * will do direct reclaim and compaction in the slow path, both of which are * horrendously expensive. We just want kmalloc to fail fast and fall back to * vmalloc if it can't get something straight away from the free lists or * buddy allocator. Hence we have to open code kvmalloc outselves here. * * This assumes that the caller uses memalloc_nofs_save task context here, so * despite the use of GFP_KERNEL here, we are going to be doing GFP_NOFS * allocations. This is actually the only way to make vmalloc() do GFP_NOFS * allocations, so lets just all pretend this is a GFP_KERNEL context * operation.... */ static inline void * xlog_kvmalloc( size_t buf_size) { gfp_t flags = GFP_KERNEL; void *p; flags &= ~__GFP_DIRECT_RECLAIM; flags |= __GFP_NOWARN | __GFP_NORETRY; do { p = kmalloc(buf_size, flags); if (!p) p = vmalloc(buf_size); } while (!p); return p; } /* * Given a count of iovecs and space for a log item, compute the space we need * in the log to store that data plus the log headers. */ static inline unsigned int xlog_item_space( unsigned int niovecs, unsigned int nbytes) { nbytes += niovecs * (sizeof(uint64_t) + sizeof(struct xlog_op_header)); return round_up(nbytes, sizeof(uint64_t)); } /* * Cycles over XLOG_CYCLE_DATA_SIZE overflow into the extended header that was * added for v2 logs. Addressing for the cycles array there is off by one, * because the first batch of cycles is in the original header. */ static inline __be32 *xlog_cycle_data(struct xlog_rec_header *rhead, unsigned i) { if (i >= XLOG_CYCLE_DATA_SIZE) { unsigned j = i / XLOG_CYCLE_DATA_SIZE; unsigned k = i % XLOG_CYCLE_DATA_SIZE; return &rhead->h_ext[j - 1].xh_cycle_data[k]; } return &rhead->h_cycle_data[i]; } #endif /* __XFS_LOG_PRIV_H__ */
1 7 2 5 2 5 2 5 4 8 8 2 4 1 1 3 5 2 5 6 6 4 4 7 7 5 5 11 11 9 5 5 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 // SPDX-License-Identifier: GPL-2.0-or-later /* * Glue Code for assembler optimized version of Camellia * * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> * * Camellia parts based on code by: * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) */ #include <linux/unaligned.h> #include <linux/crypto.h> #include <linux/export.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> #include <crypto/algapi.h> #include "camellia.h" #include "ecb_cbc_helpers.h" /* regular block cipher functions */ asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk); asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk); /* 2-way parallel cipher functions */ asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way); asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk_2way); static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src); } static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src); } /* camellia sboxes */ __visible const u64 camellia_sp10011110[256] = { 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, 0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL, 0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL, 0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL, 0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL, 0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL, 0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL, 0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL, 0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL, 0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL, 0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL, 0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL, 0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL, 0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL, 0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL, 0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL, 0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL, 0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL, 0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL, 0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL, 0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL, 0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL, 0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL, 0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL, 0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL, 0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL, 0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL, 0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL, 0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL, 0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL, 0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL, 0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL, 0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL, 0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL, 0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL, 0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL, 0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL, 0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL, 0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL, 0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL, 0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL, 0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL, 0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL, 0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL, 0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL, 0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL, 0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL, 0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL, 0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL, 0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL, 0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL, 0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL, 0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL, 0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL, 0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL, 0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL, 0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL, 0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL, 0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL, 0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL, 0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL, 0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL, 0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL, 0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL, 0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL, 0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL, 0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL, 0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL, 0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL, 0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL, 0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL, 0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL, 0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL, 0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL, 0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL, 0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL, 0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL, 0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL, 0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL, 0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL, 0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL, 0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL, 0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL, 0x9e00009e9e9e9e00ULL, }; __visible const u64 camellia_sp22000222[256] = { 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, 0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL, 0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL, 0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL, 0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL, 0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL, 0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL, 0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL, 0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL, 0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL, 0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL, 0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL, 0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL, 0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL, 0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL, 0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL, 0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL, 0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL, 0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL, 0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL, 0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL, 0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL, 0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL, 0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL, 0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL, 0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL, 0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL, 0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL, 0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL, 0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL, 0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL, 0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL, 0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL, 0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL, 0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL, 0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL, 0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL, 0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL, 0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL, 0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL, 0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL, 0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL, 0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL, 0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL, 0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL, 0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL, 0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL, 0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL, 0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL, 0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL, 0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL, 0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL, 0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL, 0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL, 0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL, 0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL, 0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL, 0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL, 0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL, 0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL, 0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL, 0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL, 0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL, 0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL, 0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL, 0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL, 0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL, 0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL, 0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL, 0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL, 0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL, 0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL, 0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL, 0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL, 0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL, 0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL, 0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL, 0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL, 0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL, 0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL, 0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL, 0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL, 0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL, 0x3d3d0000003d3d3dULL, }; __visible const u64 camellia_sp03303033[256] = { 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, 0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL, 0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL, 0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL, 0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL, 0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL, 0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL, 0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL, 0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL, 0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL, 0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL, 0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL, 0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL, 0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL, 0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL, 0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL, 0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL, 0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL, 0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL, 0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL, 0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL, 0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL, 0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL, 0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL, 0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL, 0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL, 0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL, 0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL, 0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL, 0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL, 0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL, 0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL, 0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL, 0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL, 0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL, 0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL, 0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL, 0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL, 0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL, 0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL, 0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL, 0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL, 0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL, 0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL, 0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL, 0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL, 0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL, 0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL, 0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL, 0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL, 0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL, 0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL, 0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL, 0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL, 0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL, 0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL, 0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL, 0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL, 0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL, 0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL, 0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL, 0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL, 0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL, 0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL, 0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL, 0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL, 0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL, 0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL, 0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL, 0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL, 0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL, 0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL, 0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL, 0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL, 0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL, 0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL, 0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL, 0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL, 0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL, 0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL, 0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL, 0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL, 0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL, 0x004f4f004f004f4fULL, }; __visible const u64 camellia_sp00444404[256] = { 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, 0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL, 0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL, 0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL, 0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL, 0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL, 0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL, 0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL, 0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL, 0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL, 0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL, 0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL, 0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL, 0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL, 0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL, 0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL, 0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL, 0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL, 0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL, 0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL, 0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL, 0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL, 0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL, 0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL, 0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL, 0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL, 0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL, 0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL, 0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL, 0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL, 0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL, 0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL, 0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL, 0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL, 0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL, 0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL, 0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL, 0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL, 0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL, 0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL, 0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL, 0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL, 0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL, 0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL, 0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL, 0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL, 0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL, 0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL, 0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL, 0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL, 0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL, 0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL, 0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL, 0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL, 0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL, 0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL, 0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL, 0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL, 0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL, 0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL, 0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL, 0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL, 0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL, 0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL, 0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL, 0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL, 0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL, 0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL, 0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL, 0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL, 0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL, 0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL, 0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL, 0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL, 0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL, 0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL, 0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL, 0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL, 0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL, 0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL, 0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL, 0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL, 0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL, 0x00009e9e9e9e009eULL, }; __visible const u64 camellia_sp02220222[256] = { 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, 0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL, 0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL, 0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL, 0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL, 0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL, 0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL, 0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL, 0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL, 0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL, 0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL, 0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL, 0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL, 0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL, 0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL, 0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL, 0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL, 0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL, 0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL, 0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL, 0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL, 0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL, 0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL, 0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL, 0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL, 0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL, 0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL, 0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL, 0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL, 0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL, 0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL, 0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL, 0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL, 0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL, 0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL, 0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL, 0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL, 0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL, 0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL, 0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL, 0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL, 0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL, 0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL, 0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL, 0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL, 0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL, 0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL, 0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL, 0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL, 0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL, 0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL, 0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL, 0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL, 0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL, 0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL, 0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL, 0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL, 0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL, 0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL, 0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL, 0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL, 0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL, 0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL, 0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL, 0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL, 0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL, 0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL, 0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL, 0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL, 0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL, 0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL, 0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL, 0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL, 0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL, 0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL, 0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL, 0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL, 0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL, 0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL, 0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL, 0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL, 0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL, 0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL, 0x003d3d3d003d3d3dULL, }; __visible const u64 camellia_sp30333033[256] = { 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, 0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL, 0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL, 0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL, 0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL, 0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL, 0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL, 0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL, 0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL, 0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL, 0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL, 0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL, 0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL, 0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL, 0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL, 0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL, 0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL, 0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL, 0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL, 0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL, 0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL, 0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL, 0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL, 0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL, 0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL, 0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL, 0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL, 0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL, 0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL, 0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL, 0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL, 0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL, 0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL, 0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL, 0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL, 0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL, 0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL, 0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL, 0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL, 0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL, 0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL, 0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL, 0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL, 0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL, 0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL, 0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL, 0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL, 0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL, 0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL, 0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL, 0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL, 0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL, 0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL, 0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL, 0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL, 0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL, 0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL, 0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL, 0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL, 0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL, 0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL, 0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL, 0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL, 0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL, 0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL, 0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL, 0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL, 0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL, 0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL, 0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL, 0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL, 0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL, 0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL, 0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL, 0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL, 0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL, 0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL, 0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL, 0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL, 0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL, 0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL, 0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL, 0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL, 0x4f004f4f4f004f4fULL, }; __visible const u64 camellia_sp44044404[256] = { 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, 0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL, 0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL, 0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL, 0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL, 0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL, 0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL, 0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL, 0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL, 0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL, 0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL, 0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL, 0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL, 0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL, 0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL, 0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL, 0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL, 0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL, 0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL, 0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL, 0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL, 0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL, 0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL, 0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL, 0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL, 0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL, 0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL, 0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL, 0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL, 0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL, 0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL, 0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL, 0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL, 0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL, 0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL, 0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL, 0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL, 0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL, 0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL, 0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL, 0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL, 0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL, 0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL, 0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL, 0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL, 0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL, 0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL, 0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL, 0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL, 0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL, 0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL, 0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL, 0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL, 0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL, 0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL, 0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL, 0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL, 0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL, 0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL, 0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL, 0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL, 0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL, 0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL, 0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL, 0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL, 0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL, 0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL, 0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL, 0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL, 0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL, 0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL, 0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL, 0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL, 0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL, 0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL, 0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL, 0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL, 0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL, 0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL, 0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL, 0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL, 0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL, 0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL, 0x9e9e009e9e9e009eULL, }; __visible const u64 camellia_sp11101110[256] = { 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, 0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL, 0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL, 0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL, 0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL, 0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL, 0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL, 0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL, 0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL, 0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL, 0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL, 0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL, 0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL, 0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL, 0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL, 0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL, 0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL, 0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL, 0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL, 0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL, 0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL, 0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL, 0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL, 0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL, 0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL, 0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL, 0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL, 0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL, 0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL, 0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL, 0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL, 0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL, 0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL, 0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL, 0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL, 0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL, 0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL, 0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL, 0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL, 0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL, 0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL, 0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL, 0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL, 0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL, 0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL, 0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL, 0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL, 0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL, 0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL, 0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL, 0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL, 0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL, 0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL, 0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL, 0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL, 0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL, 0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL, 0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL, 0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL, 0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL, 0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL, 0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL, 0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL, 0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL, 0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL, 0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL, 0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL, 0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL, 0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL, 0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL, 0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL, 0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL, 0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL, 0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL, 0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL, 0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL, 0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL, 0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL, 0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL, 0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL, 0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL, 0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL, 0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL, 0x9e9e9e009e9e9e00ULL, }; /* key constants */ #define CAMELLIA_SIGMA1L (0xA09E667FL) #define CAMELLIA_SIGMA1R (0x3BCC908BL) #define CAMELLIA_SIGMA2L (0xB67AE858L) #define CAMELLIA_SIGMA2R (0x4CAA73B2L) #define CAMELLIA_SIGMA3L (0xC6EF372FL) #define CAMELLIA_SIGMA3R (0xE94F82BEL) #define CAMELLIA_SIGMA4L (0x54FF53A5L) #define CAMELLIA_SIGMA4R (0xF1D36F1CL) #define CAMELLIA_SIGMA5L (0x10E527FAL) #define CAMELLIA_SIGMA5R (0xDE682D1DL) #define CAMELLIA_SIGMA6L (0xB05688C2L) #define CAMELLIA_SIGMA6R (0xB3E6C1FDL) /* macros */ #define ROLDQ(l, r, bits) ({ \ u64 t = l; \ l = (l << bits) | (r >> (64 - bits)); \ r = (r << bits) | (t >> (64 - bits)); \ }) #define CAMELLIA_F(x, kl, kr, y) ({ \ u64 ii = x ^ (((u64)kl << 32) | kr); \ y = camellia_sp11101110[(uint8_t)ii]; \ y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \ ii >>= 16; \ y ^= camellia_sp30333033[(uint8_t)ii]; \ y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \ ii >>= 16; \ y ^= camellia_sp00444404[(uint8_t)ii]; \ y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \ ii >>= 16; \ y ^= camellia_sp22000222[(uint8_t)ii]; \ y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \ y = ror64(y, 32); \ }) #define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32)) static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) { u64 kw4, tt; u32 dw, tl, tr; /* absorb kw2 to other subkeys */ /* round 2 */ subRL[3] ^= subRL[1]; /* round 4 */ subRL[5] ^= subRL[1]; /* round 6 */ subRL[7] ^= subRL[1]; subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; /* modified for FLinv(kl2) */ dw = (subRL[1] & subRL[9]) >> 32; subRL[1] ^= rol32(dw, 1); /* round 8 */ subRL[11] ^= subRL[1]; /* round 10 */ subRL[13] ^= subRL[1]; /* round 12 */ subRL[15] ^= subRL[1]; subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; /* modified for FLinv(kl4) */ dw = (subRL[1] & subRL[17]) >> 32; subRL[1] ^= rol32(dw, 1); /* round 14 */ subRL[19] ^= subRL[1]; /* round 16 */ subRL[21] ^= subRL[1]; /* round 18 */ subRL[23] ^= subRL[1]; if (max == 24) { /* kw3 */ subRL[24] ^= subRL[1]; /* absorb kw4 to other subkeys */ kw4 = subRL[25]; } else { subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; /* modified for FLinv(kl6) */ dw = (subRL[1] & subRL[25]) >> 32; subRL[1] ^= rol32(dw, 1); /* round 20 */ subRL[27] ^= subRL[1]; /* round 22 */ subRL[29] ^= subRL[1]; /* round 24 */ subRL[31] ^= subRL[1]; /* kw3 */ subRL[32] ^= subRL[1]; /* absorb kw4 to other subkeys */ kw4 = subRL[33]; /* round 23 */ subRL[30] ^= kw4; /* round 21 */ subRL[28] ^= kw4; /* round 19 */ subRL[26] ^= kw4; kw4 ^= (kw4 & ~subRL[24]) << 32; /* modified for FL(kl5) */ dw = (kw4 & subRL[24]) >> 32; kw4 ^= rol32(dw, 1); } /* round 17 */ subRL[22] ^= kw4; /* round 15 */ subRL[20] ^= kw4; /* round 13 */ subRL[18] ^= kw4; kw4 ^= (kw4 & ~subRL[16]) << 32; /* modified for FL(kl3) */ dw = (kw4 & subRL[16]) >> 32; kw4 ^= rol32(dw, 1); /* round 11 */ subRL[14] ^= kw4; /* round 9 */ subRL[12] ^= kw4; /* round 7 */ subRL[10] ^= kw4; kw4 ^= (kw4 & ~subRL[8]) << 32; /* modified for FL(kl1) */ dw = (kw4 & subRL[8]) >> 32; kw4 ^= rol32(dw, 1); /* round 5 */ subRL[6] ^= kw4; /* round 3 */ subRL[4] ^= kw4; /* round 1 */ subRL[2] ^= kw4; /* kw1 */ subRL[0] ^= kw4; /* key XOR is end of F-function */ SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */ SET_SUBKEY_LR(2, subRL[3]); /* round 1 */ SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */ SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */ SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */ SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); dw = tl & (subRL[8] >> 32); /* FL(kl1) */ tr = subRL[10] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */ SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); dw = tl & (subRL[9] >> 32); /* FLinv(kl2) */ tr = subRL[7] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */ SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */ SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */ SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); dw = tl & (subRL[16] >> 32); /* FL(kl3) */ tr = subRL[18] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */ SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); dw = tl & (subRL[17] >> 32); /* FLinv(kl4) */ tr = subRL[15] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */ SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */ SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */ SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */ if (max == 24) { SET_SUBKEY_LR(23, subRL[22]); /* round 18 */ SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ } else { tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); dw = tl & (subRL[24] >> 32); /* FL(kl5) */ tr = subRL[26] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */ SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); dw = tl & (subRL[25] >> 32); /* FLinv(kl6) */ tr = subRL[23] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */ SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */ SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */ SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */ SET_SUBKEY_LR(31, subRL[30]); /* round 24 */ SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */ } } static void camellia_setup128(const unsigned char *key, u64 *subkey) { u64 kl, kr, ww; u64 subRL[26]; /** * k == kl || kr (|| is concatenation) */ kl = get_unaligned_be64(key); kr = get_unaligned_be64(key + 8); /* generate KL dependent subkeys */ /* kw1 */ subRL[0] = kl; /* kw2 */ subRL[1] = kr; /* rotation left shift 15bit */ ROLDQ(kl, kr, 15); /* k3 */ subRL[4] = kl; /* k4 */ subRL[5] = kr; /* rotation left shift 15+30bit */ ROLDQ(kl, kr, 30); /* k7 */ subRL[10] = kl; /* k8 */ subRL[11] = kr; /* rotation left shift 15+30+15bit */ ROLDQ(kl, kr, 15); /* k10 */ subRL[13] = kr; /* rotation left shift 15+30+15+17 bit */ ROLDQ(kl, kr, 17); /* kl3 */ subRL[16] = kl; /* kl4 */ subRL[17] = kr; /* rotation left shift 15+30+15+17+17 bit */ ROLDQ(kl, kr, 17); /* k13 */ subRL[18] = kl; /* k14 */ subRL[19] = kr; /* rotation left shift 15+30+15+17+17+17 bit */ ROLDQ(kl, kr, 17); /* k17 */ subRL[22] = kl; /* k18 */ subRL[23] = kr; /* generate KA */ kl = subRL[0]; kr = subRL[1]; CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); kr ^= ww; CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); /* current status == (kll, klr, w0, w1) */ CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); kr ^= ww; CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); kl ^= ww; /* generate KA dependent subkeys */ /* k1, k2 */ subRL[2] = kl; subRL[3] = kr; ROLDQ(kl, kr, 15); /* k5,k6 */ subRL[6] = kl; subRL[7] = kr; ROLDQ(kl, kr, 15); /* kl1, kl2 */ subRL[8] = kl; subRL[9] = kr; ROLDQ(kl, kr, 15); /* k9 */ subRL[12] = kl; ROLDQ(kl, kr, 15); /* k11, k12 */ subRL[14] = kl; subRL[15] = kr; ROLDQ(kl, kr, 34); /* k15, k16 */ subRL[20] = kl; subRL[21] = kr; ROLDQ(kl, kr, 17); /* kw3, kw4 */ subRL[24] = kl; subRL[25] = kr; camellia_setup_tail(subkey, subRL, 24); } static void camellia_setup256(const unsigned char *key, u64 *subkey) { u64 kl, kr; /* left half of key */ u64 krl, krr; /* right half of key */ u64 ww; /* temporary variables */ u64 subRL[34]; /** * key = (kl || kr || krl || krr) (|| is concatenation) */ kl = get_unaligned_be64(key); kr = get_unaligned_be64(key + 8); krl = get_unaligned_be64(key + 16); krr = get_unaligned_be64(key + 24); /* generate KL dependent subkeys */ /* kw1 */ subRL[0] = kl; /* kw2 */ subRL[1] = kr; ROLDQ(kl, kr, 45); /* k9 */ subRL[12] = kl; /* k10 */ subRL[13] = kr; ROLDQ(kl, kr, 15); /* kl3 */ subRL[16] = kl; /* kl4 */ subRL[17] = kr; ROLDQ(kl, kr, 17); /* k17 */ subRL[22] = kl; /* k18 */ subRL[23] = kr; ROLDQ(kl, kr, 34); /* k23 */ subRL[30] = kl; /* k24 */ subRL[31] = kr; /* generate KR dependent subkeys */ ROLDQ(krl, krr, 15); /* k3 */ subRL[4] = krl; /* k4 */ subRL[5] = krr; ROLDQ(krl, krr, 15); /* kl1 */ subRL[8] = krl; /* kl2 */ subRL[9] = krr; ROLDQ(krl, krr, 30); /* k13 */ subRL[18] = krl; /* k14 */ subRL[19] = krr; ROLDQ(krl, krr, 34); /* k19 */ subRL[26] = krl; /* k20 */ subRL[27] = krr; ROLDQ(krl, krr, 34); /* generate KA */ kl = subRL[0] ^ krl; kr = subRL[1] ^ krr; CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); kr ^= ww; CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); kl ^= krl; CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); kr ^= ww ^ krr; CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); kl ^= ww; /* generate KB */ krl ^= kl; krr ^= kr; CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww); krr ^= ww; CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww); krl ^= ww; /* generate KA dependent subkeys */ ROLDQ(kl, kr, 15); /* k5 */ subRL[6] = kl; /* k6 */ subRL[7] = kr; ROLDQ(kl, kr, 30); /* k11 */ subRL[14] = kl; /* k12 */ subRL[15] = kr; /* rotation left shift 32bit */ ROLDQ(kl, kr, 32); /* kl5 */ subRL[24] = kl; /* kl6 */ subRL[25] = kr; /* rotation left shift 17 from k11,k12 -> k21,k22 */ ROLDQ(kl, kr, 17); /* k21 */ subRL[28] = kl; /* k22 */ subRL[29] = kr; /* generate KB dependent subkeys */ /* k1 */ subRL[2] = krl; /* k2 */ subRL[3] = krr; ROLDQ(krl, krr, 30); /* k7 */ subRL[10] = krl; /* k8 */ subRL[11] = krr; ROLDQ(krl, krr, 30); /* k15 */ subRL[20] = krl; /* k16 */ subRL[21] = krr; ROLDQ(krl, krr, 51); /* kw3 */ subRL[32] = krl; /* kw4 */ subRL[33] = krr; camellia_setup_tail(subkey, subRL, 32); } static void camellia_setup192(const unsigned char *key, u64 *subkey) { unsigned char kk[32]; u64 krl, krr; memcpy(kk, key, 24); memcpy((unsigned char *)&krl, key+16, 8); krr = ~krl; memcpy(kk+24, (unsigned char *)&krr, 8); camellia_setup256(kk, subkey); } int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len) { if (key_len != 16 && key_len != 24 && key_len != 32) return -EINVAL; cctx->key_length = key_len; switch (key_len) { case 16: camellia_setup128(key, cctx->key_table); break; case 24: camellia_setup192(key, cctx->key_table); break; case 32: camellia_setup256(key, cctx->key_table); break; } return 0; } EXPORT_SYMBOL_GPL(__camellia_setkey); static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len); } static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { return camellia_setkey(&tfm->base, key, key_len); } void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src) { u8 buf[CAMELLIA_BLOCK_SIZE]; const u8 *iv = src; if (dst == src) iv = memcpy(buf, iv, sizeof(buf)); camellia_dec_blk_2way(ctx, dst, src); crypto_xor(dst + CAMELLIA_BLOCK_SIZE, iv, CAMELLIA_BLOCK_SIZE); } EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); static int ecb_encrypt(struct skcipher_request *req) { ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); ECB_BLOCK(2, camellia_enc_blk_2way); ECB_BLOCK(1, camellia_enc_blk); ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); ECB_BLOCK(2, camellia_dec_blk_2way); ECB_BLOCK(1, camellia_dec_blk); ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); CBC_ENC_BLOCK(camellia_enc_blk); CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); CBC_DEC_BLOCK(1, camellia_dec_blk); CBC_WALK_END(); } static struct crypto_alg camellia_cipher_alg = { .cra_name = "camellia", .cra_driver_name = "camellia-asm", .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAMELLIA_BLOCK_SIZE, .cra_ctxsize = sizeof(struct camellia_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, .cia_setkey = camellia_setkey, .cia_encrypt = camellia_encrypt, .cia_decrypt = camellia_decrypt } } }; static struct skcipher_alg camellia_skcipher_algs[] = { { .base.cra_name = "ecb(camellia)", .base.cra_driver_name = "ecb-camellia-asm", .base.cra_priority = 300, .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct camellia_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CAMELLIA_MIN_KEY_SIZE, .max_keysize = CAMELLIA_MAX_KEY_SIZE, .setkey = camellia_setkey_skcipher, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { .base.cra_name = "cbc(camellia)", .base.cra_driver_name = "cbc-camellia-asm", .base.cra_priority = 300, .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct camellia_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CAMELLIA_MIN_KEY_SIZE, .max_keysize = CAMELLIA_MAX_KEY_SIZE, .ivsize = CAMELLIA_BLOCK_SIZE, .setkey = camellia_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, } }; static bool is_blacklisted_cpu(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return false; if (boot_cpu_data.x86 == 0x0f) { /* * On Pentium 4, camellia-asm is slower than original assembler * implementation because excessive uses of 64bit rotate and * left-shifts (which are really slow on P4) needed to store and * handle 128bit block in two 64bit registers. */ return true; } return false; } static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init camellia_init(void) { int err; if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "camellia-x86_64: performance on this CPU " "would be suboptimal: disabling " "camellia-x86_64.\n"); return -ENODEV; } err = crypto_register_alg(&camellia_cipher_alg); if (err) return err; err = crypto_register_skciphers(camellia_skcipher_algs, ARRAY_SIZE(camellia_skcipher_algs)); if (err) crypto_unregister_alg(&camellia_cipher_alg); return err; } static void __exit camellia_fini(void) { crypto_unregister_alg(&camellia_cipher_alg); crypto_unregister_skciphers(camellia_skcipher_algs, ARRAY_SIZE(camellia_skcipher_algs)); } module_init(camellia_init); module_exit(camellia_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); MODULE_ALIAS_CRYPTO("camellia"); MODULE_ALIAS_CRYPTO("camellia-asm");
78 78 1 1 1 1 1 1 72 72 20 14 6 7 7 7 7 17 2 10 4 2 2 15 15 15 15 11 1 10 10 10 10 10 8 8 8 8 1 8 9 10 92 93 67 2 34 3 2 31 118 119 8 57 89 78 14 88 1 2 35 85 39 66 76 14 4 1 3 57 11 4 4 4 4 87 2 34 35 72 35 1 2 1 1 77 35 15 74 76 71 71 70 57 14 14 1 12 1 13 13 13 13 13 10 2 5 3 3 10 10 2 9 8 8 1 8 10 3 5 23 12 12 1 5 7 7 69 69 6 2 17 54 68 17 1 19 56 59 35 102 10 12 8 103 103 102 101 13 12 1 12 12 103 100 4 103 15 15 10 10 10 8 8 8 8 15 15 15 2 13 10 10 10 10 10 8 10 1 103 103 103 30 30 15 15 8 8 1 8 104 92 43 71 71 69 10 68 1 65 6 66 98 98 97 98 98 91 61 8 12 122 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Tino Reichardt, 2012 */ #include <linux/fs.h> #include <linux/slab.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_dmap.h" #include "jfs_imap.h" #include "jfs_lock.h" #include "jfs_metapage.h" #include "jfs_debug.h" #include "jfs_discard.h" /* * SERIALIZATION of the Block Allocation Map. * * the working state of the block allocation map is accessed in * two directions: * * 1) allocation and free requests that start at the dmap * level and move up through the dmap control pages (i.e. * the vast majority of requests). * * 2) allocation requests that start at dmap control page * level and work down towards the dmaps. * * the serialization scheme used here is as follows. * * requests which start at the bottom are serialized against each * other through buffers and each requests holds onto its buffers * as it works it way up from a single dmap to the required level * of dmap control page. * requests that start at the top are serialized against each other * and request that start from the bottom by the multiple read/single * write inode lock of the bmap inode. requests starting at the top * take this lock in write mode while request starting at the bottom * take the lock in read mode. a single top-down request may proceed * exclusively while multiple bottoms-up requests may proceed * simultaneously (under the protection of busy buffers). * * in addition to information found in dmaps and dmap control pages, * the working state of the block allocation map also includes read/ * write information maintained in the bmap descriptor (i.e. total * free block count, allocation group level free block counts). * a single exclusive lock (BMAP_LOCK) is used to guard this information * in the face of multiple-bottoms up requests. * (lock ordering: IREAD_LOCK, BMAP_LOCK); * * accesses to the persistent state of the block allocation map (limited * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. */ #define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock) #define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock) #define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock) /* * forward references */ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl); static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl); static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl); static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl); static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level); static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results); static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results); static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks, int l2nb, s64 * results); static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results); static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results); static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks); static int dbFindBits(u32 word, int l2nb); static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl); static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static int dbMaxBud(u8 * cp); static int blkstol2(s64 nb); static int cntlz(u32 value); static int cnttz(u32 word); static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks); static int dbInitDmapTree(struct dmap * dp); static int dbInitTree(struct dmaptree * dtp); static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i); static int dbGetL2AGSize(s64 nblocks); /* * buddy table * * table used for determining buddy sizes within characters of * dmap bitmap words. the characters themselves serve as indexes * into the table, with the table elements yielding the maximum * binary buddy of free bits within the character. */ static const s8 budtab[256] = { 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 }; /* * NAME: dbMount() * * FUNCTION: initializate the block allocation map. * * memory is allocated for the in-core bmap descriptor and * the in-core descriptor is initialized from disk. * * PARAMETERS: * ipbmap - pointer to in-core inode for the block map. * * RETURN VALUES: * 0 - success * -ENOMEM - insufficient memory * -EIO - i/o error * -EINVAL - wrong bmap data */ int dbMount(struct inode *ipbmap) { struct bmap *bmp; struct dbmap_disk *dbmp_le; struct metapage *mp; int i, err; /* * allocate/initialize the in-memory bmap descriptor */ /* allocate memory for the in-memory bmap descriptor */ bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL); if (bmp == NULL) return -ENOMEM; /* read the on-disk bmap descriptor. */ mp = read_metapage(ipbmap, BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, PSIZE, 0); if (mp == NULL) { err = -EIO; goto err_kfree_bmp; } /* copy the on-disk bmap descriptor to its in-memory version. */ dbmp_le = (struct dbmap_disk *) mp->data; bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight); bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); if ((bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE) || (bmp->db_l2nbperpage < 0) || !bmp->db_numag || (bmp->db_numag > MAXAG) || (bmp->db_maxag >= MAXAG) || (bmp->db_maxag < 0) || (bmp->db_agpref >= MAXAG) || (bmp->db_agpref < 0) || (bmp->db_agheight < 0) || (bmp->db_agheight > (L2LPERCTL >> 1)) || (bmp->db_agwidth < 1) || (bmp->db_agwidth > (LPERCTL / MAXAG)) || (bmp->db_agwidth > (1 << (L2LPERCTL - (bmp->db_agheight << 1)))) || (bmp->db_agstart < 0) || (bmp->db_agstart > (CTLTREESIZE - 1 - bmp->db_agwidth * (MAXAG - 1))) || (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG) || (bmp->db_agl2size < 0) || ((bmp->db_mapsize - 1) >> bmp->db_agl2size) > MAXAG) { err = -EINVAL; goto err_release_metapage; } for (i = 0; i < MAXAG; i++) bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); bmp->db_maxfreebud = dbmp_le->dn_maxfreebud; /* release the buffer. */ release_metapage(mp); /* bind the bmap inode and the bmap descriptor to each other. */ bmp->db_ipbmap = ipbmap; JFS_SBI(ipbmap->i_sb)->bmap = bmp; memset(bmp->db_active, 0, sizeof(bmp->db_active)); /* * allocate/initialize the bmap lock */ BMAP_LOCK_INIT(bmp); return (0); err_release_metapage: release_metapage(mp); err_kfree_bmp: kfree(bmp); return err; } /* * NAME: dbUnmount() * * FUNCTION: terminate the block allocation map in preparation for * file system unmount. * * the in-core bmap descriptor is written to disk and * the memory for this descriptor is freed. * * PARAMETERS: * ipbmap - pointer to in-core inode for the block map. * * RETURN VALUES: * 0 - success * -EIO - i/o error */ int dbUnmount(struct inode *ipbmap, int mounterror) { struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; if (!(mounterror || isReadOnly(ipbmap))) dbSync(ipbmap); /* * Invalidate the page cache buffers */ truncate_inode_pages(ipbmap->i_mapping, 0); /* free the memory for the in-memory bmap. */ kfree(bmp); JFS_SBI(ipbmap->i_sb)->bmap = NULL; return (0); } /* * dbSync() */ int dbSync(struct inode *ipbmap) { struct dbmap_disk *dbmp_le; struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; struct metapage *mp; int i; /* * write bmap global control page */ /* get the buffer for the on-disk bmap descriptor. */ mp = read_metapage(ipbmap, BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, PSIZE, 0); if (mp == NULL) { jfs_err("dbSync: read_metapage failed!"); return -EIO; } /* copy the in-memory version of the bmap to the on-disk version */ dbmp_le = (struct dbmap_disk *) mp->data; dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize); dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree); dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage); dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag); dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel); dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight); dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); for (i = 0; i < MAXAG; i++) dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]); dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize); dbmp_le->dn_maxfreebud = bmp->db_maxfreebud; /* write the buffer */ write_metapage(mp); /* * write out dirty pages of bmap */ filemap_write_and_wait(ipbmap->i_mapping); diWriteSpecial(ipbmap, 0); return (0); } /* * NAME: dbFree() * * FUNCTION: free the specified block range from the working block * allocation map. * * the blocks will be free from the working map one dmap * at a time. * * PARAMETERS: * ip - pointer to in-core inode; * blkno - starting block number to be freed. * nblocks - number of blocks to be freed. * * RETURN VALUES: * 0 - success * -EIO - i/o error */ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) { struct metapage *mp; struct dmap *dp; int nb, rc; s64 lblkno, rem; struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; struct super_block *sb = ipbmap->i_sb; IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); /* block to be freed better be within the mapsize. */ if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { IREAD_UNLOCK(ipbmap); printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", (unsigned long long) blkno, (unsigned long long) nblocks); jfs_error(ip->i_sb, "block to be freed is outside the map\n"); return -EIO; } /** * TRIM the blocks, when mounted with discard option */ if (JFS_SBI(sb)->flag & JFS_DISCARD) if (JFS_SBI(sb)->minblks_trim <= nblocks) jfs_issue_discard(ipbmap, blkno, nblocks); /* * free the blocks a dmap at a time. */ mp = NULL; for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { /* release previous dmap if any */ if (mp) { write_metapage(mp); } /* get the buffer for the current dmap. */ lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); mp = read_metapage(ipbmap, lblkno, PSIZE, 0); if (mp == NULL) { IREAD_UNLOCK(ipbmap); return -EIO; } dp = (struct dmap *) mp->data; /* determine the number of blocks to be freed from * this dmap. */ nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); /* free the blocks. */ if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { jfs_error(ip->i_sb, "error in block map\n"); release_metapage(mp); IREAD_UNLOCK(ipbmap); return (rc); } } /* write the last buffer. */ if (mp) write_metapage(mp); IREAD_UNLOCK(ipbmap); return (0); } /* * NAME: dbUpdatePMap() * * FUNCTION: update the allocation state (free or allocate) of the * specified block range in the persistent block allocation map. * * the blocks will be updated in the persistent map one * dmap at a time. * * PARAMETERS: * ipbmap - pointer to in-core inode for the block map. * free - 'true' if block range is to be freed from the persistent * map; 'false' if it is to be allocated. * blkno - starting block number of the range. * nblocks - number of contiguous blocks in the range. * tblk - transaction block; * * RETURN VALUES: * 0 - success * -EIO - i/o error */ int dbUpdatePMap(struct inode *ipbmap, int free, s64 blkno, s64 nblocks, struct tblock * tblk) { int nblks, dbitno, wbitno, rbits; int word, nbits, nwords; struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; s64 lblkno, rem, lastlblkno; u32 mask; struct dmap *dp; struct metapage *mp; struct jfs_log *log; int lsn, difft, diffp; unsigned long flags; /* the blocks better be within the mapsize. */ if (blkno + nblocks > bmp->db_mapsize) { printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", (unsigned long long) blkno, (unsigned long long) nblocks); jfs_error(ipbmap->i_sb, "blocks are outside the map\n"); return -EIO; } /* compute delta of transaction lsn from log syncpt */ lsn = tblk->lsn; log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; logdiff(difft, lsn, log); /* * update the block state a dmap at a time. */ mp = NULL; lastlblkno = 0; for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) { /* get the buffer for the current dmap. */ lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); if (lblkno != lastlblkno) { if (mp) { write_metapage(mp); } mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); if (mp == NULL) return -EIO; metapage_wait_for_io(mp); } dp = (struct dmap *) mp->data; /* determine the bit number and word within the dmap of * the starting block. also determine how many blocks * are to be updated within this dmap. */ dbitno = blkno & (BPERDMAP - 1); word = dbitno >> L2DBWORD; nblks = min(rem, (s64)BPERDMAP - dbitno); /* update the bits of the dmap words. the first and last * words may only have a subset of their bits updated. if * this is the case, we'll work against that word (i.e. * partial first and/or last) only in a single pass. a * single pass will also be used to update all words that * are to have all their bits updated. */ for (rbits = nblks; rbits > 0; rbits -= nbits, dbitno += nbits) { /* determine the bit number within the word and * the number of bits within the word. */ wbitno = dbitno & (DBWORD - 1); nbits = min(rbits, DBWORD - wbitno); /* check if only part of the word is to be updated. */ if (nbits < DBWORD) { /* update (free or allocate) the bits * in this word. */ mask = (ONES << (DBWORD - nbits) >> wbitno); if (free) dp->pmap[word] &= cpu_to_le32(~mask); else dp->pmap[word] |= cpu_to_le32(mask); word += 1; } else { /* one or more words are to have all * their bits updated. determine how * many words and how many bits. */ nwords = rbits >> L2DBWORD; nbits = nwords << L2DBWORD; /* update (free or allocate) the bits * in these words. */ if (free) memset(&dp->pmap[word], 0, nwords * 4); else memset(&dp->pmap[word], (int) ONES, nwords * 4); word += nwords; } } /* * update dmap lsn */ if (lblkno == lastlblkno) continue; lastlblkno = lblkno; LOGSYNC_LOCK(log, flags); if (mp->lsn != 0) { /* inherit older/smaller lsn */ logdiff(diffp, mp->lsn, log); if (difft < diffp) { mp->lsn = lsn; /* move bp after tblock in logsync list */ list_move(&mp->synclist, &tblk->synclist); } /* inherit younger/larger clsn */ logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; } else { mp->log = log; mp->lsn = lsn; /* insert bp after tblock in logsync list */ log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; } LOGSYNC_UNLOCK(log, flags); } /* write the last buffer. */ if (mp) { write_metapage(mp); } return (0); } /* * NAME: dbNextAG() * * FUNCTION: find the preferred allocation group for new allocations. * * Within the allocation groups, we maintain a preferred * allocation group which consists of a group with at least * average free space. It is the preferred group that we target * new inode allocation towards. The tie-in between inode * allocation and block allocation occurs as we allocate the * first (data) block of an inode and specify the inode (block) * as the allocation hint for this block. * * We try to avoid having more than one open file growing in * an allocation group, as this will lead to fragmentation. * This differs from the old OS/2 method of trying to keep * empty ags around for large allocations. * * PARAMETERS: * ipbmap - pointer to in-core inode for the block map. * * RETURN VALUES: * the preferred allocation group number. */ int dbNextAG(struct inode *ipbmap) { s64 avgfree; int agpref; s64 hwm = 0; int i; int next_best = -1; struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; BMAP_LOCK(bmp); /* determine the average number of free blocks within the ags. */ avgfree = (u32)bmp->db_nfree / bmp->db_numag; /* * if the current preferred ag does not have an active allocator * and has at least average freespace, return it */ agpref = bmp->db_agpref; if ((atomic_read(&bmp->db_active[agpref]) == 0) && (bmp->db_agfree[agpref] >= avgfree)) goto unlock; /* From the last preferred ag, find the next one with at least * average free space. */ for (i = 0 ; i < bmp->db_numag; i++, agpref++) { if (agpref >= bmp->db_numag) agpref = 0; if (atomic_read(&bmp->db_active[agpref])) /* open file is currently growing in this ag */ continue; if (bmp->db_agfree[agpref] >= avgfree) { /* Return this one */ bmp->db_agpref = agpref; goto unlock; } else if (bmp->db_agfree[agpref] > hwm) { /* Less than avg. freespace, but best so far */ hwm = bmp->db_agfree[agpref]; next_best = agpref; } } /* * If no inactive ag was found with average freespace, use the * next best */ if (next_best != -1) bmp->db_agpref = next_best; /* else leave db_agpref unchanged */ unlock: BMAP_UNLOCK(bmp); /* return the preferred group. */ return (bmp->db_agpref); } /* * NAME: dbAlloc() * * FUNCTION: attempt to allocate a specified number of contiguous free * blocks from the working allocation block map. * * the block allocation policy uses hints and a multi-step * approach. * * for allocation requests smaller than the number of blocks * per dmap, we first try to allocate the new blocks * immediately following the hint. if these blocks are not * available, we try to allocate blocks near the hint. if * no blocks near the hint are available, we next try to * allocate within the same dmap as contains the hint. * * if no blocks are available in the dmap or the allocation * request is larger than the dmap size, we try to allocate * within the same allocation group as contains the hint. if * this does not succeed, we finally try to allocate anywhere * within the aggregate. * * we also try to allocate anywhere within the aggregate * for allocation requests larger than the allocation group * size or requests that specify no hint value. * * PARAMETERS: * ip - pointer to in-core inode; * hint - allocation hint. * nblocks - number of contiguous blocks in the range. * results - on successful return, set to the starting block number * of the newly allocated contiguous range. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error */ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) { int rc, agno; struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct bmap *bmp; struct metapage *mp; s64 lblkno, blkno; struct dmap *dp; int l2nb; s64 mapSize; int writers; /* assert that nblocks is valid */ assert(nblocks > 0); /* get the log2 number of blocks to be allocated. * if the number of blocks is not a log2 multiple, * it will be rounded up to the next log2 multiple. */ l2nb = BLKSTOL2(nblocks); bmp = JFS_SBI(ip->i_sb)->bmap; mapSize = bmp->db_mapsize; /* the hint should be within the map */ if (hint >= mapSize) { jfs_error(ip->i_sb, "the hint is outside the map\n"); return -EIO; } /* if the number of blocks to be allocated is greater than the * allocation group size, try to allocate anywhere. */ if (l2nb > bmp->db_agl2size) { IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); rc = dbAllocAny(bmp, nblocks, l2nb, results); goto write_unlock; } /* * If no hint, let dbNextAG recommend an allocation group */ if (hint == 0) goto pref_ag; /* we would like to allocate close to the hint. adjust the * hint to the block following the hint since the allocators * will start looking for free space starting at this point. */ blkno = hint + 1; if (blkno >= bmp->db_mapsize) goto pref_ag; agno = blkno >> bmp->db_agl2size; /* check if blkno crosses over into a new allocation group. * if so, check if we should allow allocations within this * allocation group. */ if ((blkno & (bmp->db_agsize - 1)) == 0) /* check if the AG is currently being written to. * if so, call dbNextAG() to find a non-busy * AG with sufficient free space. */ if (atomic_read(&bmp->db_active[agno])) goto pref_ag; /* check if the allocation request size can be satisfied from a * single dmap. if so, try to allocate from the dmap containing * the hint using a tiered strategy. */ if (nblocks <= BPERDMAP) { IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); /* get the buffer for the dmap containing the hint. */ rc = -EIO; lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); mp = read_metapage(ipbmap, lblkno, PSIZE, 0); if (mp == NULL) goto read_unlock; dp = (struct dmap *) mp->data; /* first, try to satisfy the allocation request with the * blocks beginning at the hint. */ if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks)) != -ENOSPC) { if (rc == 0) { *results = blkno; mark_metapage_dirty(mp); } release_metapage(mp); goto read_unlock; } writers = atomic_read(&bmp->db_active[agno]); if ((writers > 1) || ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) { /* * Someone else is writing in this allocation * group. To avoid fragmenting, try another ag */ release_metapage(mp); IREAD_UNLOCK(ipbmap); goto pref_ag; } /* next, try to satisfy the allocation request with blocks * near the hint. */ if ((rc = dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results)) != -ENOSPC) { if (rc == 0) mark_metapage_dirty(mp); release_metapage(mp); goto read_unlock; } /* try to satisfy the allocation request with blocks within * the same dmap as the hint. */ if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) != -ENOSPC) { if (rc == 0) mark_metapage_dirty(mp); release_metapage(mp); goto read_unlock; } release_metapage(mp); IREAD_UNLOCK(ipbmap); } /* try to satisfy the allocation request with blocks within * the same allocation group as the hint. */ IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) goto write_unlock; IWRITE_UNLOCK(ipbmap); pref_ag: /* * Let dbNextAG recommend a preferred allocation group */ agno = dbNextAG(ipbmap); IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); /* Try to allocate within this allocation group. if that fails, try to * allocate anywhere in the map. */ if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC) rc = dbAllocAny(bmp, nblocks, l2nb, results); write_unlock: IWRITE_UNLOCK(ipbmap); return (rc); read_unlock: IREAD_UNLOCK(ipbmap); return (rc); } /* * NAME: dbReAlloc() * * FUNCTION: attempt to extend a current allocation by a specified * number of blocks. * * this routine attempts to satisfy the allocation request * by first trying to extend the existing allocation in * place by allocating the additional blocks as the blocks * immediately following the current allocation. if these * blocks are not available, this routine will attempt to * allocate a new set of contiguous blocks large enough * to cover the existing allocation plus the additional * number of blocks required. * * PARAMETERS: * ip - pointer to in-core inode requiring allocation. * blkno - starting block of the current allocation. * nblocks - number of contiguous blocks within the current * allocation. * addnblocks - number of blocks to add to the allocation. * results - on successful return, set to the starting block number * of the existing allocation if the existing allocation * was extended in place or to a newly allocated contiguous * range if the existing allocation could not be extended * in place. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error */ int dbReAlloc(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks, s64 * results) { int rc; /* try to extend the allocation in place. */ if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) { *results = blkno; return (0); } else { if (rc != -ENOSPC) return (rc); } /* could not extend the allocation in place, so allocate a * new set of blocks for the entire request (i.e. try to get * a range of contiguous blocks large enough to cover the * existing allocation plus the additional blocks.) */ return (dbAlloc (ip, blkno + nblocks - 1, addnblocks + nblocks, results)); } /* * NAME: dbExtend() * * FUNCTION: attempt to extend a current allocation by a specified * number of blocks. * * this routine attempts to satisfy the allocation request * by first trying to extend the existing allocation in * place by allocating the additional blocks as the blocks * immediately following the current allocation. * * PARAMETERS: * ip - pointer to in-core inode requiring allocation. * blkno - starting block of the current allocation. * nblocks - number of contiguous blocks within the current * allocation. * addnblocks - number of blocks to add to the allocation. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error */ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) { struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); s64 lblkno, lastblkno, extblkno; uint rel_block; struct metapage *mp; struct dmap *dp; int rc; struct inode *ipbmap = sbi->ipbmap; struct bmap *bmp; /* * We don't want a non-aligned extent to cross a page boundary */ if (((rel_block = blkno & (sbi->nbperpage - 1))) && (rel_block + nblocks + addnblocks > sbi->nbperpage)) return -ENOSPC; /* get the last block of the current allocation */ lastblkno = blkno + nblocks - 1; /* determine the block number of the block following * the existing allocation. */ extblkno = lastblkno + 1; IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); /* better be within the file system */ bmp = sbi->bmap; if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { IREAD_UNLOCK(ipbmap); jfs_error(ip->i_sb, "the block is outside the filesystem\n"); return -EIO; } /* we'll attempt to extend the current allocation in place by * allocating the additional blocks as the blocks immediately * following the current allocation. we only try to extend the * current allocation in place if the number of additional blocks * can fit into a dmap, the last block of the current allocation * is not the last block of the file system, and the start of the * inplace extension is not on an allocation group boundary. */ if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize || (extblkno & (bmp->db_agsize - 1)) == 0) { IREAD_UNLOCK(ipbmap); return -ENOSPC; } /* get the buffer for the dmap containing the first block * of the extension. */ lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage); mp = read_metapage(ipbmap, lblkno, PSIZE, 0); if (mp == NULL) { IREAD_UNLOCK(ipbmap); return -EIO; } dp = (struct dmap *) mp->data; /* try to allocate the blocks immediately following the * current allocation. */ rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks); IREAD_UNLOCK(ipbmap); /* were we successful ? */ if (rc == 0) write_metapage(mp); else /* we were not successful */ release_metapage(mp); return (rc); } /* * NAME: dbAllocNext() * * FUNCTION: attempt to allocate the blocks of the specified block * range within a dmap. * * PARAMETERS: * bmp - pointer to bmap descriptor * dp - pointer to dmap. * blkno - starting block number of the range. * nblocks - number of contiguous free blocks of the range. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) held on entry/exit; */ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks) { int dbitno, word, rembits, nb, nwords, wbitno, nw; int l2size; s8 *leaf; u32 mask; if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); return -EIO; } /* pick up a pointer to the leaves of the dmap tree. */ leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); /* determine the bit number and word within the dmap of the * starting block. */ dbitno = blkno & (BPERDMAP - 1); word = dbitno >> L2DBWORD; /* check if the specified block range is contained within * this dmap. */ if (dbitno + nblocks > BPERDMAP) return -ENOSPC; /* check if the starting leaf indicates that anything * is free. */ if (leaf[word] == NOFREE) return -ENOSPC; /* check the dmaps words corresponding to block range to see * if the block range is free. not all bits of the first and * last words may be contained within the block range. if this * is the case, we'll work against those words (i.e. partial first * and/or last) on an individual basis (a single pass) and examine * the actual bits to determine if they are free. a single pass * will be used for all dmap words fully contained within the * specified range. within this pass, the leaves of the dmap * tree will be examined to determine if the blocks are free. a * single leaf may describe the free space of multiple dmap * words, so we may visit only a subset of the actual leaves * corresponding to the dmap words of the block range. */ for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { /* determine the bit number within the word and * the number of bits within the word. */ wbitno = dbitno & (DBWORD - 1); nb = min(rembits, DBWORD - wbitno); /* check if only part of the word is to be examined. */ if (nb < DBWORD) { /* check if the bits are free. */ mask = (ONES << (DBWORD - nb) >> wbitno); if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask) return -ENOSPC; word += 1; } else { /* one or more dmap words are fully contained * within the block range. determine how many * words and how many bits. */ nwords = rembits >> L2DBWORD; nb = nwords << L2DBWORD; /* now examine the appropriate leaves to determine * if the blocks are free. */ while (nwords > 0) { /* does the leaf describe any free space ? */ if (leaf[word] < BUDMIN) return -ENOSPC; /* determine the l2 number of bits provided * by this leaf. */ l2size = min_t(int, leaf[word], NLSTOL2BSZ(nwords)); /* determine how many words were handled. */ nw = BUDSIZE(l2size, BUDMIN); nwords -= nw; word += nw; } } } /* allocate the blocks. */ return (dbAllocDmap(bmp, dp, blkno, nblocks)); } /* * NAME: dbAllocNear() * * FUNCTION: attempt to allocate a number of contiguous free blocks near * a specified block (hint) within a dmap. * * starting with the dmap leaf that covers the hint, we'll * check the next four contiguous leaves for sufficient free * space. if sufficient free space is found, we'll allocate * the desired free space. * * PARAMETERS: * bmp - pointer to bmap descriptor * dp - pointer to dmap. * blkno - block number to allocate near. * nblocks - actual number of contiguous free blocks desired. * l2nb - log2 number of contiguous free blocks desired. * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) held on entry/exit; */ static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results) { int word, lword, rc; s8 *leaf; if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); return -EIO; } leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); /* determine the word within the dmap that holds the hint * (i.e. blkno). also, determine the last word in the dmap * that we'll include in our examination. */ word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; lword = min(word + 4, LPERDMAP); /* examine the leaves for sufficient free space. */ for (; word < lword; word++) { /* does the leaf describe sufficient free space ? */ if (leaf[word] < l2nb) continue; /* determine the block number within the file system * of the first block described by this dmap word. */ blkno = le64_to_cpu(dp->start) + (word << L2DBWORD); /* if not all bits of the dmap word are free, get the * starting bit number within the dmap word of the required * string of free bits and adjust the block number with the * value. */ if (leaf[word] < BUDMIN) blkno += dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb); /* allocate the blocks. */ if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) *results = blkno; return (rc); } return -ENOSPC; } /* * NAME: dbAllocAG() * * FUNCTION: attempt to allocate the specified number of contiguous * free blocks within the specified allocation group. * * unless the allocation group size is equal to the number * of blocks per dmap, the dmap control pages will be used to * find the required free space, if available. we start the * search at the highest dmap control page level which * distinctly describes the allocation group's free space * (i.e. the highest level at which the allocation group's * free space is not mixed in with that of any other group). * in addition, we start the search within this level at a * height of the dmapctl dmtree at which the nodes distinctly * describe the allocation group's free space. at this height, * the allocation group's free space may be represented by 1 * or two sub-trees, depending on the allocation group size. * we search the top nodes of these subtrees left to right for * sufficient free space. if sufficient free space is found, * the subtree is searched to find the leftmost leaf that * has free space. once we have made it to the leaf, we * move the search to the next lower level dmap control page * corresponding to this leaf. we continue down the dmap control * pages until we find the dmap that contains or starts the * sufficient free space and we allocate at this dmap. * * if the allocation group size is equal to the dmap size, * we'll start at the dmap corresponding to the allocation * group and attempt the allocation at this level. * * the dmap control page search is also not performed if the * allocation group is completely free and we go to the first * dmap of the allocation group to do the allocation. this is * done because the allocation group may be part (not the first * part) of a larger binary buddy system, causing the dmap * control pages to indicate no free space (NOFREE) within * the allocation group. * * PARAMETERS: * bmp - pointer to bmap descriptor * agno - allocation group number. * nblocks - actual number of contiguous free blocks desired. * l2nb - log2 number of contiguous free blocks desired. * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error * * note: IWRITE_LOCK(ipmap) held on entry/exit; */ static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) { struct metapage *mp; struct dmapctl *dcp; int rc, ti, i, k, m, n, agperlev; s64 blkno, lblkno; int budmin; /* allocation request should not be for more than the * allocation group size. */ if (l2nb > bmp->db_agl2size) { jfs_error(bmp->db_ipbmap->i_sb, "allocation request is larger than the allocation group size\n"); return -EIO; } /* determine the starting block number of the allocation * group. */ blkno = (s64) agno << bmp->db_agl2size; /* check if the allocation group size is the minimum allocation * group size or if the allocation group is completely free. if * the allocation group size is the minimum size of BPERDMAP (i.e. * 1 dmap), there is no need to search the dmap control page (below) * that fully describes the allocation group since the allocation * group is already fully described by a dmap. in this case, we * just call dbAllocCtl() to search the dmap tree and allocate the * required space if available. * * if the allocation group is completely free, dbAllocCtl() is * also called to allocate the required space. this is done for * two reasons. first, it makes no sense searching the dmap control * pages for free space when we know that free space exists. second, * the dmap control pages may indicate that the allocation group * has no free space if the allocation group is part (not the first * part) of a larger binary buddy system. */ if (bmp->db_agsize == BPERDMAP || bmp->db_agfree[agno] == bmp->db_agsize) { rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); if ((rc == -ENOSPC) && (bmp->db_agfree[agno] == bmp->db_agsize)) { printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n", (unsigned long long) blkno, (unsigned long long) nblocks); jfs_error(bmp->db_ipbmap->i_sb, "dbAllocCtl failed in free AG\n"); } return (rc); } /* the buffer for the dmap control page that fully describes the * allocation group. */ lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel); mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); if (mp == NULL) return -EIO; dcp = (struct dmapctl *) mp->data; budmin = dcp->budmin; if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } /* search the subtree(s) of the dmap control page that describes * the allocation group, looking for sufficient free space. to begin, * determine how many allocation groups are represented in a dmap * control page at the control page level (i.e. L0, L1, L2) that * fully describes an allocation group. next, determine the starting * tree index of this allocation group within the control page. */ agperlev = (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth; ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); if (ti < 0 || ti >= le32_to_cpu(dcp->nleafs)) { jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } /* dmap control page trees fan-out by 4 and a single allocation * group may be described by 1 or 2 subtrees within the ag level * dmap control page, depending upon the ag size. examine the ag's * subtrees for sufficient free space, starting with the leftmost * subtree. */ for (i = 0; i < bmp->db_agwidth; i++, ti++) { /* is there sufficient free space ? */ if (l2nb > dcp->stree[ti]) continue; /* sufficient free space found in a subtree. now search down * the subtree to find the leftmost leaf that describes this * free space. */ for (k = bmp->db_agheight; k > 0; k--) { for (n = 0, m = (ti << 2) + 1; n < 4; n++) { if (l2nb <= dcp->stree[m + n]) { ti = m + n; break; } } if (n == 4) { jfs_error(bmp->db_ipbmap->i_sb, "failed descending stree\n"); release_metapage(mp); return -EIO; } } /* determine the block number within the file system * that corresponds to this leaf. */ if (bmp->db_aglevel == 2) blkno = 0; else if (bmp->db_aglevel == 1) blkno &= ~(MAXL1SIZE - 1); else /* bmp->db_aglevel == 0 */ blkno &= ~(MAXL0SIZE - 1); blkno += ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin; /* release the buffer in preparation for going down * the next level of dmap control pages. */ release_metapage(mp); /* check if we need to continue to search down the lower * level dmap control pages. we need to if the number of * blocks required is less than maximum number of blocks * described at the next lower level. */ if (l2nb < budmin) { /* search the lower level dmap control pages to get * the starting block number of the dmap that * contains or starts off the free space. */ if ((rc = dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1, &blkno))) { if (rc == -ENOSPC) { jfs_error(bmp->db_ipbmap->i_sb, "control page inconsistent\n"); return -EIO; } return (rc); } } /* allocate the blocks. */ rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); if (rc == -ENOSPC) { jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n"); rc = -EIO; } return (rc); } /* no space in the allocation group. release the buffer and * return -ENOSPC. */ release_metapage(mp); return -ENOSPC; } /* * NAME: dbAllocAny() * * FUNCTION: attempt to allocate the specified number of contiguous * free blocks anywhere in the file system. * * dbAllocAny() attempts to find the sufficient free space by * searching down the dmap control pages, starting with the * highest level (i.e. L0, L1, L2) control page. if free space * large enough to satisfy the desired free space is found, the * desired free space is allocated. * * PARAMETERS: * bmp - pointer to bmap descriptor * nblocks - actual number of contiguous free blocks desired. * l2nb - log2 number of contiguous free blocks desired. * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error * * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; */ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) { int rc; s64 blkno = 0; /* starting with the top level dmap control page, search * down the dmap control levels for sufficient free space. * if free space is found, dbFindCtl() returns the starting * block number of the dmap that contains or starts off the * range of free space. */ if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno))) return (rc); /* allocate the blocks. */ rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); if (rc == -ENOSPC) { jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n"); return -EIO; } return (rc); } /* * NAME: dbDiscardAG() * * FUNCTION: attempt to discard (TRIM) all free blocks of specific AG * * algorithm: * 1) allocate blocks, as large as possible and save them * while holding IWRITE_LOCK on ipbmap * 2) trim all these saved block/length values * 3) mark the blocks free again * * benefit: * - we work only on one ag at some time, minimizing how long we * need to lock ipbmap * - reading / writing the fs is possible most time, even on * trimming * * downside: * - we write two times to the dmapctl and dmap pages * - but for me, this seems the best way, better ideas? * /TR 2012 * * PARAMETERS: * ip - pointer to in-core inode * agno - ag to trim * minlen - minimum value of contiguous blocks * * RETURN VALUES: * s64 - actual number of blocks trimmed */ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) { struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; s64 nblocks, blkno; u64 trimmed = 0; int rc, l2nb; struct super_block *sb = ipbmap->i_sb; struct range2trim { u64 blkno; u64 nblocks; } *totrim, *tt; /* max blkno / nblocks pairs to trim */ int count = 0, range_cnt; u64 max_ranges; /* prevent others from writing new stuff here, while trimming */ IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); nblocks = bmp->db_agfree[agno]; max_ranges = nblocks; do_div(max_ranges, minlen); range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); totrim = kmalloc_array(range_cnt, sizeof(struct range2trim), GFP_NOFS); if (totrim == NULL) { jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n"); IWRITE_UNLOCK(ipbmap); return 0; } tt = totrim; while (nblocks >= minlen) { l2nb = BLKSTOL2(nblocks); /* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */ rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno); if (rc == 0) { tt->blkno = blkno; tt->nblocks = nblocks; tt++; count++; /* the whole ag is free, trim now */ if (bmp->db_agfree[agno] == 0) break; /* give a hint for the next while */ nblocks = bmp->db_agfree[agno]; continue; } else if (rc == -ENOSPC) { /* search for next smaller log2 block */ l2nb = BLKSTOL2(nblocks) - 1; if (unlikely(l2nb < 0)) break; nblocks = 1LL << l2nb; } else { /* Trim any already allocated blocks */ jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n"); break; } /* check, if our trim array is full */ if (unlikely(count >= range_cnt - 1)) break; } IWRITE_UNLOCK(ipbmap); tt->nblocks = 0; /* mark the current end */ for (tt = totrim; tt->nblocks != 0; tt++) { /* when mounted with online discard, dbFree() will * call jfs_issue_discard() itself */ if (!(JFS_SBI(sb)->flag & JFS_DISCARD)) jfs_issue_discard(ip, tt->blkno, tt->nblocks); dbFree(ip, tt->blkno, tt->nblocks); trimmed += tt->nblocks; } kfree(totrim); return trimmed; } /* * NAME: dbFindCtl() * * FUNCTION: starting at a specified dmap control page level and block * number, search down the dmap control levels for a range of * contiguous free blocks large enough to satisfy an allocation * request for the specified number of free blocks. * * if sufficient contiguous free blocks are found, this routine * returns the starting block number within a dmap page that * contains or starts a range of contiqious free blocks that * is sufficient in size. * * PARAMETERS: * bmp - pointer to bmap descriptor * level - starting dmap control page level. * l2nb - log2 number of contiguous free blocks desired. * *blkno - on entry, starting block number for conducting the search. * on successful return, the first block within a dmap page * that contains or starts a range of contiguous free blocks. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error * * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; */ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) { int rc, leafidx, lev; s64 b, lblkno; struct dmapctl *dcp; int budmin; struct metapage *mp; /* starting at the specified dmap control page level and block * number, search down the dmap control levels for the starting * block number of a dmap page that contains or starts off * sufficient free blocks. */ for (lev = level, b = *blkno; lev >= 0; lev--) { /* get the buffer of the dmap control page for the block * number and level (i.e. L0, L1, L2). */ lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev); mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); if (mp == NULL) return -EIO; dcp = (struct dmapctl *) mp->data; budmin = dcp->budmin; if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } /* search the tree within the dmap control page for * sufficient free space. if sufficient free space is found, * dbFindLeaf() returns the index of the leaf at which * free space was found. */ rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx, true); /* release the buffer. */ release_metapage(mp); /* space found ? */ if (rc) { if (lev != level) { jfs_error(bmp->db_ipbmap->i_sb, "dmap inconsistent\n"); return -EIO; } return -ENOSPC; } /* adjust the block number to reflect the location within * the dmap control page (i.e. the leaf) at which free * space was found. */ b += (((s64) leafidx) << budmin); /* we stop the search at this dmap control page level if * the number of blocks required is greater than or equal * to the maximum number of blocks described at the next * (lower) level. */ if (l2nb >= budmin) break; } *blkno = b; return (0); } /* * NAME: dbAllocCtl() * * FUNCTION: attempt to allocate a specified number of contiguous * blocks starting within a specific dmap. * * this routine is called by higher level routines that search * the dmap control pages above the actual dmaps for contiguous * free space. the result of successful searches by these * routines are the starting block numbers within dmaps, with * the dmaps themselves containing the desired contiguous free * space or starting a contiguous free space of desired size * that is made up of the blocks of one or more dmaps. these * calls should not fail due to insufficent resources. * * this routine is called in some cases where it is not known * whether it will fail due to insufficient resources. more * specifically, this occurs when allocating from an allocation * group whose size is equal to the number of blocks per dmap. * in this case, the dmap control pages are not examined prior * to calling this routine (to save pathlength) and the call * might fail. * * for a request size that fits within a dmap, this routine relies * upon the dmap's dmtree to find the requested contiguous free * space. for request sizes that are larger than a dmap, the * requested free space will start at the first block of the * first dmap (i.e. blkno). * * PARAMETERS: * bmp - pointer to bmap descriptor * nblocks - actual number of contiguous free blocks to allocate. * l2nb - log2 number of contiguous free blocks to allocate. * blkno - starting block number of the dmap to start the allocation * from. * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error * * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; */ static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) { int rc, nb; s64 b, lblkno, n; struct metapage *mp; struct dmap *dp; /* check if the allocation request is confined to a single dmap. */ if (l2nb <= L2BPERDMAP) { /* get the buffer for the dmap. */ lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); if (mp == NULL) return -EIO; dp = (struct dmap *) mp->data; if (dp->tree.budmin < 0) { release_metapage(mp); return -EIO; } /* try to allocate the blocks. */ rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results); if (rc == 0) mark_metapage_dirty(mp); release_metapage(mp); return (rc); } /* allocation request involving multiple dmaps. it must start on * a dmap boundary. */ assert((blkno & (BPERDMAP - 1)) == 0); /* allocate the blocks dmap by dmap. */ for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) { /* get the buffer for the dmap. */ lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); if (mp == NULL) { rc = -EIO; goto backout; } dp = (struct dmap *) mp->data; /* the dmap better be all free. */ if (dp->tree.stree[ROOT] != L2BPERDMAP) { release_metapage(mp); jfs_error(bmp->db_ipbmap->i_sb, "the dmap is not all free\n"); rc = -EIO; goto backout; } /* determine how many blocks to allocate from this dmap. */ nb = min_t(s64, n, BPERDMAP); /* allocate the blocks from the dmap. */ if ((rc = dbAllocDmap(bmp, dp, b, nb))) { release_metapage(mp); goto backout; } /* write the buffer. */ write_metapage(mp); } /* set the results (starting block number) and return. */ *results = blkno; return (0); /* something failed in handling an allocation request involving * multiple dmaps. we'll try to clean up by backing out any * allocation that has already happened for this request. if * we fail in backing out the allocation, we'll mark the file * system to indicate that blocks have been leaked. */ backout: /* try to backout the allocations dmap by dmap. */ for (n = nblocks - n, b = blkno; n > 0; n -= BPERDMAP, b += BPERDMAP) { /* get the buffer for this dmap. */ lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); if (mp == NULL) { /* could not back out. mark the file system * to indicate that we have leaked blocks. */ jfs_error(bmp->db_ipbmap->i_sb, "I/O Error: Block Leakage\n"); continue; } dp = (struct dmap *) mp->data; /* free the blocks is this dmap. */ if (dbFreeDmap(bmp, dp, b, BPERDMAP)) { /* could not back out. mark the file system * to indicate that we have leaked blocks. */ release_metapage(mp); jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n"); continue; } /* write the buffer. */ write_metapage(mp); } return (rc); } /* * NAME: dbAllocDmapLev() * * FUNCTION: attempt to allocate a specified number of contiguous blocks * from a specified dmap. * * this routine checks if the contiguous blocks are available. * if so, nblocks of blocks are allocated; otherwise, ENOSPC is * returned. * * PARAMETERS: * mp - pointer to bmap descriptor * dp - pointer to dmap to attempt to allocate blocks from. * l2nb - log2 number of contiguous block desired. * nblocks - actual number of contiguous block desired. * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient disk resources * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; */ static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks, int l2nb, s64 * results) { s64 blkno; int leafidx, rc; /* can't be more than a dmaps worth of blocks */ assert(l2nb <= L2BPERDMAP); /* search the tree within the dmap page for sufficient * free space. if sufficient free space is found, dbFindLeaf() * returns the index of the leaf at which free space was found. */ if (dbFindLeaf((dmtree_t *) &dp->tree, l2nb, &leafidx, false)) return -ENOSPC; if (leafidx < 0) return -EIO; /* determine the block number within the file system corresponding * to the leaf at which free space was found. */ blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD); /* if not all bits of the dmap word are free, get the starting * bit number within the dmap word of the required string of free * bits and adjust the block number with this value. */ if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN) blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb); /* allocate the blocks */ if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) *results = blkno; return (rc); } /* * NAME: dbAllocDmap() * * FUNCTION: adjust the disk allocation map to reflect the allocation * of a specified block range within a dmap. * * this routine allocates the specified blocks from the dmap * through a call to dbAllocBits(). if the allocation of the * block range causes the maximum string of free blocks within * the dmap to change (i.e. the value of the root of the dmap's * dmtree), this routine will cause this change to be reflected * up through the appropriate levels of the dmap control pages * by a call to dbAdjCtl() for the L0 dmap control page that * covers this dmap. * * PARAMETERS: * bmp - pointer to bmap descriptor * dp - pointer to dmap to allocate the block range from. * blkno - starting block number of the block to be allocated. * nblocks - number of blocks to be allocated. * * RETURN VALUES: * 0 - success * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks) { s8 oldroot; int rc; /* save the current value of the root (i.e. maximum free string) * of the dmap tree. */ oldroot = dp->tree.stree[ROOT]; /* allocate the specified (blocks) bits */ dbAllocBits(bmp, dp, blkno, nblocks); /* if the root has not changed, done. */ if (dp->tree.stree[ROOT] == oldroot) return (0); /* root changed. bubble the change up to the dmap control pages. * if the adjustment of the upper level control pages fails, * backout the bit allocation (thus making everything consistent). */ if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0))) dbFreeBits(bmp, dp, blkno, nblocks); return (rc); } /* * NAME: dbFreeDmap() * * FUNCTION: adjust the disk allocation map to reflect the allocation * of a specified block range within a dmap. * * this routine frees the specified blocks from the dmap through * a call to dbFreeBits(). if the deallocation of the block range * causes the maximum string of free blocks within the dmap to * change (i.e. the value of the root of the dmap's dmtree), this * routine will cause this change to be reflected up through the * appropriate levels of the dmap control pages by a call to * dbAdjCtl() for the L0 dmap control page that covers this dmap. * * PARAMETERS: * bmp - pointer to bmap descriptor * dp - pointer to dmap to free the block range from. * blkno - starting block number of the block to be freed. * nblocks - number of blocks to be freed. * * RETURN VALUES: * 0 - success * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks) { s8 oldroot; int rc = 0, word; /* save the current value of the root (i.e. maximum free string) * of the dmap tree. */ oldroot = dp->tree.stree[ROOT]; /* free the specified (blocks) bits */ rc = dbFreeBits(bmp, dp, blkno, nblocks); /* if error or the root has not changed, done. */ if (rc || (dp->tree.stree[ROOT] == oldroot)) return (rc); /* root changed. bubble the change up to the dmap control pages. * if the adjustment of the upper level control pages fails, * backout the deallocation. */ if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; /* as part of backing out the deallocation, we will have * to back split the dmap tree if the deallocation caused * the freed blocks to become part of a larger binary buddy * system. */ if (dp->tree.stree[word] == NOFREE) dbBackSplit((dmtree_t *)&dp->tree, word, false); dbAllocBits(bmp, dp, blkno, nblocks); } return (rc); } /* * NAME: dbAllocBits() * * FUNCTION: allocate a specified block range from a dmap. * * this routine updates the dmap to reflect the working * state allocation of the specified block range. it directly * updates the bits of the working map and causes the adjustment * of the binary buddy system described by the dmap's dmtree * leaves to reflect the bits allocated. it also causes the * dmap's dmtree, as a whole, to reflect the allocated range. * * PARAMETERS: * bmp - pointer to bmap descriptor * dp - pointer to dmap to allocate bits from. * blkno - starting block number of the bits to be allocated. * nblocks - number of bits to be allocated. * * RETURN VALUES: none * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks) { int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; dmtree_t *tp = (dmtree_t *) & dp->tree; int size; s8 *leaf; /* pick up a pointer to the leaves of the dmap tree */ leaf = dp->tree.stree + LEAFIND; /* determine the bit number and word within the dmap of the * starting block. */ dbitno = blkno & (BPERDMAP - 1); word = dbitno >> L2DBWORD; /* block range better be within the dmap */ assert(dbitno + nblocks <= BPERDMAP); /* allocate the bits of the dmap's words corresponding to the block * range. not all bits of the first and last words may be contained * within the block range. if this is the case, we'll work against * those words (i.e. partial first and/or last) on an individual basis * (a single pass), allocating the bits of interest by hand and * updating the leaf corresponding to the dmap word. a single pass * will be used for all dmap words fully contained within the * specified range. within this pass, the bits of all fully contained * dmap words will be marked as free in a single shot and the leaves * will be updated. a single leaf may describe the free space of * multiple dmap words, so we may update only a subset of the actual * leaves corresponding to the dmap words of the block range. */ for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { /* determine the bit number within the word and * the number of bits within the word. */ wbitno = dbitno & (DBWORD - 1); nb = min(rembits, DBWORD - wbitno); /* check if only part of a word is to be allocated. */ if (nb < DBWORD) { /* allocate (set to 1) the appropriate bits within * this dmap word. */ dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) >> wbitno); /* update the leaf for this dmap word. in addition * to setting the leaf value to the binary buddy max * of the updated dmap word, dbSplit() will split * the binary system of the leaves if need be. */ dbSplit(tp, word, BUDMIN, dbMaxBud((u8 *)&dp->wmap[word]), false); word += 1; } else { /* one or more dmap words are fully contained * within the block range. determine how many * words and allocate (set to 1) the bits of these * words. */ nwords = rembits >> L2DBWORD; memset(&dp->wmap[word], (int) ONES, nwords * 4); /* determine how many bits. */ nb = nwords << L2DBWORD; /* now update the appropriate leaves to reflect * the allocated words. */ for (; nwords > 0; nwords -= nw) { if (leaf[word] < BUDMIN) { jfs_error(bmp->db_ipbmap->i_sb, "leaf page corrupt\n"); break; } /* determine what the leaf value should be * updated to as the minimum of the l2 number * of bits being allocated and the l2 number * of bits currently described by this leaf. */ size = min_t(int, leaf[word], NLSTOL2BSZ(nwords)); /* update the leaf to reflect the allocation. * in addition to setting the leaf value to * NOFREE, dbSplit() will split the binary * system of the leaves to reflect the current * allocation (size). */ dbSplit(tp, word, size, NOFREE, false); /* get the number of dmap words handled */ nw = BUDSIZE(size, BUDMIN); word += nw; } } } /* update the free count for this dmap */ le32_add_cpu(&dp->nfree, -nblocks); BMAP_LOCK(bmp); /* if this allocation group is completely free, * update the maximum allocation group number if this allocation * group is the new max. */ agno = blkno >> bmp->db_agl2size; if (agno > bmp->db_maxag) bmp->db_maxag = agno; /* update the free count for the allocation group and map */ bmp->db_agfree[agno] -= nblocks; bmp->db_nfree -= nblocks; BMAP_UNLOCK(bmp); } /* * NAME: dbFreeBits() * * FUNCTION: free a specified block range from a dmap. * * this routine updates the dmap to reflect the working * state allocation of the specified block range. it directly * updates the bits of the working map and causes the adjustment * of the binary buddy system described by the dmap's dmtree * leaves to reflect the bits freed. it also causes the dmap's * dmtree, as a whole, to reflect the deallocated range. * * PARAMETERS: * bmp - pointer to bmap descriptor * dp - pointer to dmap to free bits from. * blkno - starting block number of the bits to be freed. * nblocks - number of bits to be freed. * * RETURN VALUES: 0 for success * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks) { int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; dmtree_t *tp = (dmtree_t *) & dp->tree; int rc = 0; int size; /* determine the bit number and word within the dmap of the * starting block. */ dbitno = blkno & (BPERDMAP - 1); word = dbitno >> L2DBWORD; /* block range better be within the dmap. */ assert(dbitno + nblocks <= BPERDMAP); /* free the bits of the dmaps words corresponding to the block range. * not all bits of the first and last words may be contained within * the block range. if this is the case, we'll work against those * words (i.e. partial first and/or last) on an individual basis * (a single pass), freeing the bits of interest by hand and updating * the leaf corresponding to the dmap word. a single pass will be used * for all dmap words fully contained within the specified range. * within this pass, the bits of all fully contained dmap words will * be marked as free in a single shot and the leaves will be updated. a * single leaf may describe the free space of multiple dmap words, * so we may update only a subset of the actual leaves corresponding * to the dmap words of the block range. * * dbJoin() is used to update leaf values and will join the binary * buddy system of the leaves if the new leaf values indicate this * should be done. */ for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { /* determine the bit number within the word and * the number of bits within the word. */ wbitno = dbitno & (DBWORD - 1); nb = min(rembits, DBWORD - wbitno); /* check if only part of a word is to be freed. */ if (nb < DBWORD) { /* free (zero) the appropriate bits within this * dmap word. */ dp->wmap[word] &= cpu_to_le32(~(ONES << (DBWORD - nb) >> wbitno)); /* update the leaf for this dmap word. */ rc = dbJoin(tp, word, dbMaxBud((u8 *)&dp->wmap[word]), false); if (rc) return rc; word += 1; } else { /* one or more dmap words are fully contained * within the block range. determine how many * words and free (zero) the bits of these words. */ nwords = rembits >> L2DBWORD; memset(&dp->wmap[word], 0, nwords * 4); /* determine how many bits. */ nb = nwords << L2DBWORD; /* now update the appropriate leaves to reflect * the freed words. */ for (; nwords > 0; nwords -= nw) { /* determine what the leaf value should be * updated to as the minimum of the l2 number * of bits being freed and the l2 (max) number * of bits that can be described by this leaf. */ size = min(LITOL2BSZ (word, L2LPERDMAP, BUDMIN), NLSTOL2BSZ(nwords)); /* update the leaf. */ rc = dbJoin(tp, word, size, false); if (rc) return rc; /* get the number of dmap words handled. */ nw = BUDSIZE(size, BUDMIN); word += nw; } } } /* update the free count for this dmap. */ le32_add_cpu(&dp->nfree, nblocks); BMAP_LOCK(bmp); /* update the free count for the allocation group and * map. */ agno = blkno >> bmp->db_agl2size; bmp->db_nfree += nblocks; bmp->db_agfree[agno] += nblocks; /* check if this allocation group is not completely free and * if it is currently the maximum (rightmost) allocation group. * if so, establish the new maximum allocation group number by * searching left for the first allocation group with allocation. */ if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) || (agno == bmp->db_numag - 1 && bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) { while (bmp->db_maxag > 0) { bmp->db_maxag -= 1; if (bmp->db_agfree[bmp->db_maxag] != bmp->db_agsize) break; } /* re-establish the allocation group preference if the * current preference is right of the maximum allocation * group. */ if (bmp->db_agpref > bmp->db_maxag) bmp->db_agpref = bmp->db_maxag; } BMAP_UNLOCK(bmp); return 0; } /* * NAME: dbAdjCtl() * * FUNCTION: adjust a dmap control page at a specified level to reflect * the change in a lower level dmap or dmap control page's * maximum string of free blocks (i.e. a change in the root * of the lower level object's dmtree) due to the allocation * or deallocation of a range of blocks with a single dmap. * * on entry, this routine is provided with the new value of * the lower level dmap or dmap control page root and the * starting block number of the block range whose allocation * or deallocation resulted in the root change. this range * is respresented by a single leaf of the current dmapctl * and the leaf will be updated with this value, possibly * causing a binary buddy system within the leaves to be * split or joined. the update may also cause the dmapctl's * dmtree to be updated. * * if the adjustment of the dmap control page, itself, causes its * root to change, this change will be bubbled up to the next dmap * control level by a recursive call to this routine, specifying * the new root value and the next dmap control page level to * be adjusted. * PARAMETERS: * bmp - pointer to bmap descriptor * blkno - the first block of a block range within a dmap. it is * the allocation or deallocation of this block range that * requires the dmap control page to be adjusted. * newval - the new value of the lower level dmap or dmap control * page root. * alloc - 'true' if adjustment is due to an allocation. * level - current level of dmap control page (i.e. L0, L1, L2) to * be adjusted. * * RETURN VALUES: * 0 - success * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) { struct metapage *mp; s8 oldroot; int oldval; s64 lblkno; struct dmapctl *dcp; int rc, leafno, ti; /* get the buffer for the dmap control page for the specified * block number and control page level. */ lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level); mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); if (mp == NULL) return -EIO; dcp = (struct dmapctl *) mp->data; if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } /* determine the leaf number corresponding to the block and * the index within the dmap control tree. */ leafno = BLKTOCTLLEAF(blkno, dcp->budmin); ti = leafno + le32_to_cpu(dcp->leafidx); /* save the current leaf value and the current root level (i.e. * maximum l2 free string described by this dmapctl). */ oldval = dcp->stree[ti]; oldroot = dcp->stree[ROOT]; /* check if this is a control page update for an allocation. * if so, update the leaf to reflect the new leaf value using * dbSplit(); otherwise (deallocation), use dbJoin() to update * the leaf with the new value. in addition to updating the * leaf, dbSplit() will also split the binary buddy system of * the leaves, if required, and bubble new values within the * dmapctl tree, if required. similarly, dbJoin() will join * the binary buddy system of leaves and bubble new values up * the dmapctl tree as required by the new leaf value. */ if (alloc) { /* check if we are in the middle of a binary buddy * system. this happens when we are performing the * first allocation out of an allocation group that * is part (not the first part) of a larger binary * buddy system. if we are in the middle, back split * the system prior to calling dbSplit() which assumes * that it is at the front of a binary buddy system. */ if (oldval == NOFREE) { rc = dbBackSplit((dmtree_t *)dcp, leafno, true); if (rc) { release_metapage(mp); return rc; } oldval = dcp->stree[ti]; } dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval, true); } else { rc = dbJoin((dmtree_t *) dcp, leafno, newval, true); if (rc) { release_metapage(mp); return rc; } } /* check if the root of the current dmap control page changed due * to the update and if the current dmap control page is not at * the current top level (i.e. L0, L1, L2) of the map. if so (i.e. * root changed and this is not the top level), call this routine * again (recursion) for the next higher level of the mapping to * reflect the change in root for the current dmap control page. */ if (dcp->stree[ROOT] != oldroot) { /* are we below the top level of the map. if so, * bubble the root up to the next higher level. */ if (level < bmp->db_maxlevel) { /* bubble up the new root of this dmap control page to * the next level. */ if ((rc = dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc, level + 1))) { /* something went wrong in bubbling up the new * root value, so backout the changes to the * current dmap control page. */ if (alloc) { dbJoin((dmtree_t *) dcp, leafno, oldval, true); } else { /* the dbJoin() above might have * caused a larger binary buddy system * to form and we may now be in the * middle of it. if this is the case, * back split the buddies. */ if (dcp->stree[ti] == NOFREE) dbBackSplit((dmtree_t *) dcp, leafno, true); dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, oldval, true); } /* release the buffer and return the error. */ release_metapage(mp); return (rc); } } else { /* we're at the top level of the map. update * the bmap control page to reflect the size * of the maximum free buddy system. */ assert(level == bmp->db_maxlevel); if (bmp->db_maxfreebud != oldroot) { jfs_error(bmp->db_ipbmap->i_sb, "the maximum free buddy is not the old root\n"); } bmp->db_maxfreebud = dcp->stree[ROOT]; } } /* write the buffer. */ write_metapage(mp); return (0); } /* * NAME: dbSplit() * * FUNCTION: update the leaf of a dmtree with a new value, splitting * the leaf from the binary buddy system of the dmtree's * leaves, as required. * * PARAMETERS: * tp - pointer to the tree containing the leaf. * leafno - the number of the leaf to be updated. * splitsz - the size the binary buddy system starting at the leaf * must be split to, specified as the log2 number of blocks. * newval - the new value for the leaf. * * RETURN VALUES: none * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl) { int budsz; int cursz; s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); /* check if the leaf needs to be split. */ if (leaf[leafno] > tp->dmt_budmin) { /* the split occurs by cutting the buddy system in half * at the specified leaf until we reach the specified * size. pick up the starting split size (current size * - 1 in l2) and the corresponding buddy size. */ cursz = leaf[leafno] - 1; budsz = BUDSIZE(cursz, tp->dmt_budmin); /* split until we reach the specified size. */ while (cursz >= splitsz) { /* update the buddy's leaf with its new value. */ dbAdjTree(tp, leafno ^ budsz, cursz, is_ctl); /* on to the next size and buddy. */ cursz -= 1; budsz >>= 1; } } /* adjust the dmap tree to reflect the specified leaf's new * value. */ dbAdjTree(tp, leafno, newval, is_ctl); } /* * NAME: dbBackSplit() * * FUNCTION: back split the binary buddy system of dmtree leaves * that hold a specified leaf until the specified leaf * starts its own binary buddy system. * * the allocators typically perform allocations at the start * of binary buddy systems and dbSplit() is used to accomplish * any required splits. in some cases, however, allocation * may occur in the middle of a binary system and requires a * back split, with the split proceeding out from the middle of * the system (less efficient) rather than the start of the * system (more efficient). the cases in which a back split * is required are rare and are limited to the first allocation * within an allocation group which is a part (not first part) * of a larger binary buddy system and a few exception cases * in which a previous join operation must be backed out. * * PARAMETERS: * tp - pointer to the tree containing the leaf. * leafno - the number of the leaf to be updated. * * RETURN VALUES: none * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl) { int budsz, bud, w, bsz, size; int cursz; s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); /* leaf should be part (not first part) of a binary * buddy system. */ assert(leaf[leafno] == NOFREE); /* the back split is accomplished by iteratively finding the leaf * that starts the buddy system that contains the specified leaf and * splitting that system in two. this iteration continues until * the specified leaf becomes the start of a buddy system. * * determine maximum possible l2 size for the specified leaf. */ size = LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs), tp->dmt_budmin); /* determine the number of leaves covered by this size. this * is the buddy size that we will start with as we search for * the buddy system that contains the specified leaf. */ budsz = BUDSIZE(size, tp->dmt_budmin); /* back split. */ while (leaf[leafno] == NOFREE) { /* find the leftmost buddy leaf. */ for (w = leafno, bsz = budsz;; bsz <<= 1, w = (w < bud) ? w : bud) { if (bsz >= le32_to_cpu(tp->dmt_nleafs)) { jfs_err("JFS: block map error in dbBackSplit"); return -EIO; } /* determine the buddy. */ bud = w ^ bsz; /* check if this buddy is the start of the system. */ if (leaf[bud] != NOFREE) { /* split the leaf at the start of the * system in two. */ cursz = leaf[bud] - 1; dbSplit(tp, bud, cursz, cursz, is_ctl); break; } } } if (leaf[leafno] != size) { jfs_err("JFS: wrong leaf value in dbBackSplit"); return -EIO; } return 0; } /* * NAME: dbJoin() * * FUNCTION: update the leaf of a dmtree with a new value, joining * the leaf with other leaves of the dmtree into a multi-leaf * binary buddy system, as required. * * PARAMETERS: * tp - pointer to the tree containing the leaf. * leafno - the number of the leaf to be updated. * newval - the new value for the leaf. * * RETURN VALUES: none */ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) { int budsz, buddy; s8 *leaf; /* can the new leaf value require a join with other leaves ? */ if (newval >= tp->dmt_budmin) { /* pickup a pointer to the leaves of the tree. */ leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); /* try to join the specified leaf into a large binary * buddy system. the join proceeds by attempting to join * the specified leafno with its buddy (leaf) at new value. * if the join occurs, we attempt to join the left leaf * of the joined buddies with its buddy at new value + 1. * we continue to join until we find a buddy that cannot be * joined (does not have a value equal to the size of the * last join) or until all leaves have been joined into a * single system. * * get the buddy size (number of words covered) of * the new value. */ budsz = BUDSIZE(newval, tp->dmt_budmin); /* try to join. */ while (budsz < le32_to_cpu(tp->dmt_nleafs)) { /* get the buddy leaf. */ buddy = leafno ^ budsz; /* if the leaf's new value is greater than its * buddy's value, we join no more. */ if (newval > leaf[buddy]) break; /* It shouldn't be less */ if (newval < leaf[buddy]) return -EIO; /* check which (leafno or buddy) is the left buddy. * the left buddy gets to claim the blocks resulting * from the join while the right gets to claim none. * the left buddy is also eligible to participate in * a join at the next higher level while the right * is not. * */ if (leafno < buddy) { /* leafno is the left buddy. */ dbAdjTree(tp, buddy, NOFREE, is_ctl); } else { /* buddy is the left buddy and becomes * leafno. */ dbAdjTree(tp, leafno, NOFREE, is_ctl); leafno = buddy; } /* on to try the next join. */ newval += 1; budsz <<= 1; } } /* update the leaf value. */ dbAdjTree(tp, leafno, newval, is_ctl); return 0; } /* * NAME: dbAdjTree() * * FUNCTION: update a leaf of a dmtree with a new value, adjusting * the dmtree, as required, to reflect the new leaf value. * the combination of any buddies must already be done before * this is called. * * PARAMETERS: * tp - pointer to the tree to be adjusted. * leafno - the number of the leaf to be updated. * newval - the new value for the leaf. * * RETURN VALUES: none */ static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl) { int lp, pp, k; int max, size; size = is_ctl ? CTLTREESIZE : TREESIZE; /* pick up the index of the leaf for this leafno. */ lp = leafno + le32_to_cpu(tp->dmt_leafidx); if (WARN_ON_ONCE(lp >= size || lp < 0)) return; /* is the current value the same as the old value ? if so, * there is nothing to do. */ if (tp->dmt_stree[lp] == newval) return; /* set the new value. */ tp->dmt_stree[lp] = newval; /* bubble the new value up the tree as required. */ for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) { if (lp == 0) break; /* get the index of the first leaf of the 4 leaf * group containing the specified leaf (leafno). */ lp = ((lp - 1) & ~0x03) + 1; /* get the index of the parent of this 4 leaf group. */ pp = (lp - 1) >> 2; /* determine the maximum of the 4 leaves. */ max = TREEMAX(&tp->dmt_stree[lp]); /* if the maximum of the 4 is the same as the * parent's value, we're done. */ if (tp->dmt_stree[pp] == max) break; /* parent gets new value. */ tp->dmt_stree[pp] = max; /* parent becomes leaf for next go-round. */ lp = pp; } } /* * NAME: dbFindLeaf() * * FUNCTION: search a dmtree_t for sufficient free blocks, returning * the index of a leaf describing the free blocks if * sufficient free blocks are found. * * the search starts at the top of the dmtree_t tree and * proceeds down the tree to the leftmost leaf with sufficient * free space. * * PARAMETERS: * tp - pointer to the tree to be searched. * l2nb - log2 number of free blocks to search for. * leafidx - return pointer to be set to the index of the leaf * describing at least l2nb free blocks if sufficient * free blocks are found. * is_ctl - determines if the tree is of type ctl * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient free blocks. */ static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl) { int ti, n = 0, k, x = 0; int max_size, max_idx; max_size = is_ctl ? CTLTREESIZE : TREESIZE; max_idx = is_ctl ? LPERCTL : LPERDMAP; /* first check the root of the tree to see if there is * sufficient free space. */ if (l2nb > tp->dmt_stree[ROOT]) return -ENOSPC; /* sufficient free space available. now search down the tree * starting at the next level for the leftmost leaf that * describes sufficient free space. */ for (k = le32_to_cpu(tp->dmt_height), ti = 1; k > 0; k--, ti = ((ti + n) << 2) + 1) { /* search the four nodes at this level, starting from * the left. */ for (x = ti, n = 0; n < 4; n++) { /* sufficient free space found. move to the next * level (or quit if this is the last level). */ if (x + n > max_size) return -ENOSPC; if (l2nb <= tp->dmt_stree[x + n]) break; } /* better have found something since the higher * levels of the tree said it was here. */ assert(n < 4); } if (le32_to_cpu(tp->dmt_leafidx) >= max_idx) return -ENOSPC; /* set the return to the leftmost leaf describing sufficient * free space. */ *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx); return (0); } /* * NAME: dbFindBits() * * FUNCTION: find a specified number of binary buddy free bits within a * dmap bitmap word value. * * this routine searches the bitmap value for (1 << l2nb) free * bits at (1 << l2nb) alignments within the value. * * PARAMETERS: * word - dmap bitmap word value. * l2nb - number of free bits specified as a log2 number. * * RETURN VALUES: * starting bit number of free bits. */ static int dbFindBits(u32 word, int l2nb) { int bitno, nb; u32 mask; /* get the number of bits. */ nb = 1 << l2nb; assert(nb <= DBWORD); /* complement the word so we can use a mask (i.e. 0s represent * free bits) and compute the mask. */ word = ~word; mask = ONES << (DBWORD - nb); /* scan the word for nb free bits at nb alignments. */ for (bitno = 0; mask != 0; bitno += nb, mask = (mask >> nb)) { if ((mask & word) == mask) break; } ASSERT(bitno < 32); /* return the bit number. */ return (bitno); } /* * NAME: dbMaxBud(u8 *cp) * * FUNCTION: determine the largest binary buddy string of free * bits within 32-bits of the map. * * PARAMETERS: * cp - pointer to the 32-bit value. * * RETURN VALUES: * largest binary buddy of free bits within a dmap word. */ static int dbMaxBud(u8 * cp) { signed char tmp1, tmp2; /* check if the wmap word is all free. if so, the * free buddy size is BUDMIN. */ if (*((uint *) cp) == 0) return (BUDMIN); /* check if the wmap word is half free. if so, the * free buddy size is BUDMIN-1. */ if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0) return (BUDMIN - 1); /* not all free or half free. determine the free buddy * size thru table lookup using quarters of the wmap word. */ tmp1 = max(budtab[cp[2]], budtab[cp[3]]); tmp2 = max(budtab[cp[0]], budtab[cp[1]]); return (max(tmp1, tmp2)); } /* * NAME: cnttz(uint word) * * FUNCTION: determine the number of trailing zeros within a 32-bit * value. * * PARAMETERS: * value - 32-bit value to be examined. * * RETURN VALUES: * count of trailing zeros */ static int cnttz(u32 word) { int n; for (n = 0; n < 32; n++, word >>= 1) { if (word & 0x01) break; } return (n); } /* * NAME: cntlz(u32 value) * * FUNCTION: determine the number of leading zeros within a 32-bit * value. * * PARAMETERS: * value - 32-bit value to be examined. * * RETURN VALUES: * count of leading zeros */ static int cntlz(u32 value) { int n; for (n = 0; n < 32; n++, value <<= 1) { if (value & HIGHORDER) break; } return (n); } /* * NAME: blkstol2(s64 nb) * * FUNCTION: convert a block count to its log2 value. if the block * count is not a l2 multiple, it is rounded up to the next * larger l2 multiple. * * PARAMETERS: * nb - number of blocks * * RETURN VALUES: * log2 number of blocks */ static int blkstol2(s64 nb) { int l2nb; s64 mask; /* meant to be signed */ mask = (s64) 1 << (64 - 1); /* count the leading bits. */ for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) { /* leading bit found. */ if (nb & mask) { /* determine the l2 value. */ l2nb = (64 - 1) - l2nb; /* check if we need to round up. */ if (~mask & nb) l2nb++; return (l2nb); } } assert(0); return 0; /* fix compiler warning */ } /* * NAME: dbAllocBottomUp() * * FUNCTION: alloc the specified block range from the working block * allocation map. * * the blocks will be alloc from the working map one dmap * at a time. * * PARAMETERS: * ip - pointer to in-core inode; * blkno - starting block number to be freed. * nblocks - number of blocks to be freed. * * RETURN VALUES: * 0 - success * -EIO - i/o error */ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) { struct metapage *mp; struct dmap *dp; int nb, rc; s64 lblkno, rem; struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); /* block to be allocated better be within the mapsize. */ ASSERT(nblocks <= bmp->db_mapsize - blkno); /* * allocate the blocks a dmap at a time. */ mp = NULL; for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { /* release previous dmap if any */ if (mp) { write_metapage(mp); } /* get the buffer for the current dmap. */ lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); mp = read_metapage(ipbmap, lblkno, PSIZE, 0); if (mp == NULL) { IREAD_UNLOCK(ipbmap); return -EIO; } dp = (struct dmap *) mp->data; /* determine the number of blocks to be allocated from * this dmap. */ nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); /* allocate the blocks. */ if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { release_metapage(mp); IREAD_UNLOCK(ipbmap); return (rc); } } /* write the last buffer. */ write_metapage(mp); IREAD_UNLOCK(ipbmap); return (0); } static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks) { int rc; int dbitno, word, rembits, nb, nwords, wbitno, agno; s8 oldroot; struct dmaptree *tp = (struct dmaptree *) & dp->tree; /* save the current value of the root (i.e. maximum free string) * of the dmap tree. */ oldroot = tp->stree[ROOT]; /* determine the bit number and word within the dmap of the * starting block. */ dbitno = blkno & (BPERDMAP - 1); word = dbitno >> L2DBWORD; /* block range better be within the dmap */ assert(dbitno + nblocks <= BPERDMAP); /* allocate the bits of the dmap's words corresponding to the block * range. not all bits of the first and last words may be contained * within the block range. if this is the case, we'll work against * those words (i.e. partial first and/or last) on an individual basis * (a single pass), allocating the bits of interest by hand and * updating the leaf corresponding to the dmap word. a single pass * will be used for all dmap words fully contained within the * specified range. within this pass, the bits of all fully contained * dmap words will be marked as free in a single shot and the leaves * will be updated. a single leaf may describe the free space of * multiple dmap words, so we may update only a subset of the actual * leaves corresponding to the dmap words of the block range. */ for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { /* determine the bit number within the word and * the number of bits within the word. */ wbitno = dbitno & (DBWORD - 1); nb = min(rembits, DBWORD - wbitno); /* check if only part of a word is to be allocated. */ if (nb < DBWORD) { /* allocate (set to 1) the appropriate bits within * this dmap word. */ dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) >> wbitno); word++; } else { /* one or more dmap words are fully contained * within the block range. determine how many * words and allocate (set to 1) the bits of these * words. */ nwords = rembits >> L2DBWORD; memset(&dp->wmap[word], (int) ONES, nwords * 4); /* determine how many bits */ nb = nwords << L2DBWORD; word += nwords; } } /* update the free count for this dmap */ le32_add_cpu(&dp->nfree, -nblocks); /* reconstruct summary tree */ dbInitDmapTree(dp); BMAP_LOCK(bmp); /* if this allocation group is completely free, * update the highest active allocation group number * if this allocation group is the new max. */ agno = blkno >> bmp->db_agl2size; if (agno > bmp->db_maxag) bmp->db_maxag = agno; /* update the free count for the allocation group and map */ bmp->db_agfree[agno] -= nblocks; bmp->db_nfree -= nblocks; BMAP_UNLOCK(bmp); /* if the root has not changed, done. */ if (tp->stree[ROOT] == oldroot) return (0); /* root changed. bubble the change up to the dmap control pages. * if the adjustment of the upper level control pages fails, * backout the bit allocation (thus making everything consistent). */ if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0))) dbFreeBits(bmp, dp, blkno, nblocks); return (rc); } /* * NAME: dbExtendFS() * * FUNCTION: extend bmap from blkno for nblocks; * dbExtendFS() updates bmap ready for dbAllocBottomUp(); * * L2 * | * L1---------------------------------L1 * | | * L0---------L0---------L0 L0---------L0---------L0 * | | | | | | * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm * * <---old---><----------------------------extend-----------------------> */ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) { struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb); int nbperpage = sbi->nbperpage; int i, i0 = true, j, j0 = true, k, n; s64 newsize; s64 p; struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL; struct dmapctl *l2dcp, *l1dcp, *l0dcp; struct dmap *dp; s8 *l0leaf, *l1leaf, *l2leaf; struct bmap *bmp = sbi->bmap; int agno, l2agsize, oldl2agsize; s64 ag_rem; newsize = blkno + nblocks; jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld", (long long) blkno, (long long) nblocks, (long long) newsize); /* * initialize bmap control page. * * all the data in bmap control page should exclude * the mkfs hidden dmap page. */ /* update mapsize */ bmp->db_mapsize = newsize; bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize); /* compute new AG size */ l2agsize = dbGetL2AGSize(newsize); oldl2agsize = bmp->db_agl2size; bmp->db_agl2size = l2agsize; bmp->db_agsize = (s64)1 << l2agsize; /* compute new number of AG */ agno = bmp->db_numag; bmp->db_numag = newsize >> l2agsize; bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; /* * reconfigure db_agfree[] * from old AG configuration to new AG configuration; * * coalesce contiguous k (newAGSize/oldAGSize) AGs; * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; * note: new AG size = old AG size * (2**x). */ if (l2agsize == oldl2agsize) goto extend; k = 1 << (l2agsize - oldl2agsize); ag_rem = bmp->db_agfree[0]; /* save agfree[0] */ for (i = 0, n = 0; i < agno; n++) { bmp->db_agfree[n] = 0; /* init collection point */ /* coalesce contiguous k AGs; */ for (j = 0; j < k && i < agno; j++, i++) { /* merge AGi to AGn */ bmp->db_agfree[n] += bmp->db_agfree[i]; } } bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */ for (; n < MAXAG; n++) bmp->db_agfree[n] = 0; /* * update highest active ag number */ bmp->db_maxag = bmp->db_maxag / k; /* * extend bmap * * update bit maps and corresponding level control pages; * global control page db_nfree, db_agfree[agno], db_maxfreebud; */ extend: /* get L2 page */ p = BMAPBLKNO + nbperpage; /* L2 page */ l2mp = read_metapage(ipbmap, p, PSIZE, 0); if (!l2mp) { jfs_error(ipbmap->i_sb, "L2 page could not be read\n"); return -EIO; } l2dcp = (struct dmapctl *) l2mp->data; /* compute start L1 */ k = blkno >> L2MAXL1SIZE; l2leaf = l2dcp->stree + CTLLEAFIND + k; p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */ /* * extend each L1 in L2 */ for (; k < LPERCTL; k++, p += nbperpage) { /* get L1 page */ if (j0) { /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */ l1mp = read_metapage(ipbmap, p, PSIZE, 0); if (l1mp == NULL) goto errout; l1dcp = (struct dmapctl *) l1mp->data; /* compute start L0 */ j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE; l1leaf = l1dcp->stree + CTLLEAFIND + j; p = BLKTOL0(blkno, sbi->l2nbperpage); j0 = false; } else { /* assign/init L1 page */ l1mp = get_metapage(ipbmap, p, PSIZE, 0); if (l1mp == NULL) goto errout; l1dcp = (struct dmapctl *) l1mp->data; /* compute start L0 */ j = 0; l1leaf = l1dcp->stree + CTLLEAFIND; p += nbperpage; /* 1st L0 of L1.k */ } /* * extend each L0 in L1 */ for (; j < LPERCTL; j++) { /* get L0 page */ if (i0) { /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */ l0mp = read_metapage(ipbmap, p, PSIZE, 0); if (l0mp == NULL) goto errout; l0dcp = (struct dmapctl *) l0mp->data; /* compute start dmap */ i = (blkno & (MAXL0SIZE - 1)) >> L2BPERDMAP; l0leaf = l0dcp->stree + CTLLEAFIND + i; p = BLKTODMAP(blkno, sbi->l2nbperpage); i0 = false; } else { /* assign/init L0 page */ l0mp = get_metapage(ipbmap, p, PSIZE, 0); if (l0mp == NULL) goto errout; l0dcp = (struct dmapctl *) l0mp->data; /* compute start dmap */ i = 0; l0leaf = l0dcp->stree + CTLLEAFIND; p += nbperpage; /* 1st dmap of L0.j */ } /* * extend each dmap in L0 */ for (; i < LPERCTL; i++) { /* * reconstruct the dmap page, and * initialize corresponding parent L0 leaf */ if ((n = blkno & (BPERDMAP - 1))) { /* read in dmap page: */ mp = read_metapage(ipbmap, p, PSIZE, 0); if (mp == NULL) goto errout; n = min(nblocks, (s64)BPERDMAP - n); } else { /* assign/init dmap page */ mp = read_metapage(ipbmap, p, PSIZE, 0); if (mp == NULL) goto errout; n = min_t(s64, nblocks, BPERDMAP); } dp = (struct dmap *) mp->data; *l0leaf = dbInitDmap(dp, blkno, n); bmp->db_nfree += n; agno = le64_to_cpu(dp->start) >> l2agsize; bmp->db_agfree[agno] += n; write_metapage(mp); l0leaf++; p += nbperpage; blkno += n; nblocks -= n; if (nblocks == 0) break; } /* for each dmap in a L0 */ /* * build current L0 page from its leaves, and * initialize corresponding parent L1 leaf */ *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); write_metapage(l0mp); l0mp = NULL; if (nblocks) l1leaf++; /* continue for next L0 */ else { /* more than 1 L0 ? */ if (j > 0) break; /* build L1 page */ else { /* summarize in global bmap page */ bmp->db_maxfreebud = *l1leaf; release_metapage(l1mp); release_metapage(l2mp); goto finalize; } } } /* for each L0 in a L1 */ /* * build current L1 page from its leaves, and * initialize corresponding parent L2 leaf */ *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); write_metapage(l1mp); l1mp = NULL; if (nblocks) l2leaf++; /* continue for next L1 */ else { /* more than 1 L1 ? */ if (k > 0) break; /* build L2 page */ else { /* summarize in global bmap page */ bmp->db_maxfreebud = *l2leaf; release_metapage(l2mp); goto finalize; } } } /* for each L1 in a L2 */ jfs_error(ipbmap->i_sb, "function has not returned as expected\n"); errout: if (l0mp) release_metapage(l0mp); if (l1mp) release_metapage(l1mp); release_metapage(l2mp); return -EIO; /* * finalize bmap control page */ finalize: return 0; } /* * dbFinalizeBmap() */ void dbFinalizeBmap(struct inode *ipbmap) { struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; int actags, inactags, l2nl; s64 ag_rem, actfree, inactfree, avgfree; int i, n; /* * finalize bmap control page */ //finalize: /* * compute db_agpref: preferred ag to allocate from * (the leftmost ag with average free space in it); */ //agpref: /* get the number of active ags and inactive ags */ actags = bmp->db_maxag + 1; inactags = bmp->db_numag - actags; ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */ /* determine how many blocks are in the inactive allocation * groups. in doing this, we must account for the fact that * the rightmost group might be a partial group (i.e. file * system size is not a multiple of the group size). */ inactfree = (inactags && ag_rem) ? (((s64)inactags - 1) << bmp->db_agl2size) + ag_rem : ((s64)inactags << bmp->db_agl2size); /* determine how many free blocks are in the active * allocation groups plus the average number of free blocks * within the active ags. */ actfree = bmp->db_nfree - inactfree; avgfree = (u32) actfree / (u32) actags; /* if the preferred allocation group has not average free space. * re-establish the preferred group as the leftmost * group with average free space. */ if (bmp->db_agfree[bmp->db_agpref] < avgfree) { for (bmp->db_agpref = 0; bmp->db_agpref < actags; bmp->db_agpref++) { if (bmp->db_agfree[bmp->db_agpref] >= avgfree) break; } if (bmp->db_agpref >= bmp->db_numag) { jfs_error(ipbmap->i_sb, "cannot find ag with average freespace\n"); } } /* * compute db_aglevel, db_agheight, db_width, db_agstart: * an ag is covered in aglevel dmapctl summary tree, * at agheight level height (from leaf) with agwidth number of nodes * each, which starts at agstart index node of the smmary tree node * array; */ bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); l2nl = bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); bmp->db_agheight = l2nl >> 1; bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1)); for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0; i--) { bmp->db_agstart += n; n <<= 2; } } /* * NAME: dbInitDmap()/ujfs_idmap_page() * * FUNCTION: initialize working/persistent bitmap of the dmap page * for the specified number of blocks: * * at entry, the bitmaps had been initialized as free (ZEROS); * The number of blocks will only account for the actually * existing blocks. Blocks which don't actually exist in * the aggregate will be marked as allocated (ONES); * * PARAMETERS: * dp - pointer to page of map * nblocks - number of blocks this page * * RETURNS: NONE */ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) { int blkno, w, b, r, nw, nb, i; /* starting block number within the dmap */ blkno = Blkno & (BPERDMAP - 1); if (blkno == 0) { dp->nblocks = dp->nfree = cpu_to_le32(nblocks); dp->start = cpu_to_le64(Blkno); if (nblocks == BPERDMAP) { memset(&dp->wmap[0], 0, LPERDMAP * 4); memset(&dp->pmap[0], 0, LPERDMAP * 4); goto initTree; } } else { le32_add_cpu(&dp->nblocks, nblocks); le32_add_cpu(&dp->nfree, nblocks); } /* word number containing start block number */ w = blkno >> L2DBWORD; /* * free the bits corresponding to the block range (ZEROS): * note: not all bits of the first and last words may be contained * within the block range. */ for (r = nblocks; r > 0; r -= nb, blkno += nb) { /* number of bits preceding range to be freed in the word */ b = blkno & (DBWORD - 1); /* number of bits to free in the word */ nb = min(r, DBWORD - b); /* is partial word to be freed ? */ if (nb < DBWORD) { /* free (set to 0) from the bitmap word */ dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) >> b)); dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) >> b)); /* skip the word freed */ w++; } else { /* free (set to 0) contiguous bitmap words */ nw = r >> L2DBWORD; memset(&dp->wmap[w], 0, nw * 4); memset(&dp->pmap[w], 0, nw * 4); /* skip the words freed */ nb = nw << L2DBWORD; w += nw; } } /* * mark bits following the range to be freed (non-existing * blocks) as allocated (ONES) */ if (blkno == BPERDMAP) goto initTree; /* the first word beyond the end of existing blocks */ w = blkno >> L2DBWORD; /* does nblocks fall on a 32-bit boundary ? */ b = blkno & (DBWORD - 1); if (b) { /* mark a partial word allocated */ dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b); w++; } /* set the rest of the words in the page to allocated (ONES) */ for (i = w; i < LPERDMAP; i++) dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES); /* * init tree */ initTree: return (dbInitDmapTree(dp)); } /* * NAME: dbInitDmapTree()/ujfs_complete_dmap() * * FUNCTION: initialize summary tree of the specified dmap: * * at entry, bitmap of the dmap has been initialized; * * PARAMETERS: * dp - dmap to complete * blkno - starting block number for this dmap * treemax - will be filled in with max free for this dmap * * RETURNS: max free string at the root of the tree */ static int dbInitDmapTree(struct dmap * dp) { struct dmaptree *tp; s8 *cp; int i; /* init fixed info of tree */ tp = &dp->tree; tp->nleafs = cpu_to_le32(LPERDMAP); tp->l2nleafs = cpu_to_le32(L2LPERDMAP); tp->leafidx = cpu_to_le32(LEAFIND); tp->height = cpu_to_le32(4); tp->budmin = BUDMIN; /* init each leaf from corresponding wmap word: * note: leaf is set to NOFREE(-1) if all blocks of corresponding * bitmap word are allocated. */ cp = tp->stree + le32_to_cpu(tp->leafidx); for (i = 0; i < LPERDMAP; i++) *cp++ = dbMaxBud((u8 *) & dp->wmap[i]); /* build the dmap's binary buddy summary tree */ return (dbInitTree(tp)); } /* * NAME: dbInitTree()/ujfs_adjtree() * * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. * * at entry, the leaves of the tree has been initialized * from corresponding bitmap word or root of summary tree * of the child control page; * configure binary buddy system at the leaf level, then * bubble up the values of the leaf nodes up the tree. * * PARAMETERS: * cp - Pointer to the root of the tree * l2leaves- Number of leaf nodes as a power of 2 * l2min - Number of blocks that can be covered by a leaf * as a power of 2 * * RETURNS: max free string at the root of the tree */ static int dbInitTree(struct dmaptree * dtp) { int l2max, l2free, bsize, nextb, i; int child, parent, nparent; s8 *tp, *cp, *cp1; tp = dtp->stree; /* Determine the maximum free string possible for the leaves */ l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin; /* * configure the leaf level into binary buddy system * * Try to combine buddies starting with a buddy size of 1 * (i.e. two leaves). At a buddy size of 1 two buddy leaves * can be combined if both buddies have a maximum free of l2min; * the combination will result in the left-most buddy leaf having * a maximum free of l2min+1. * After processing all buddies for a given size, process buddies * at the next higher buddy size (i.e. current size * 2) and * the next maximum free (current free + 1). * This continues until the maximum possible buddy combination * yields maximum free. */ for (l2free = dtp->budmin, bsize = 1; l2free < l2max; l2free++, bsize = nextb) { /* get next buddy size == current buddy pair size */ nextb = bsize << 1; /* scan each adjacent buddy pair at current buddy size */ for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx); i < le32_to_cpu(dtp->nleafs); i += nextb, cp += nextb) { /* coalesce if both adjacent buddies are max free */ if (*cp == l2free && *(cp + bsize) == l2free) { *cp = l2free + 1; /* left take right */ *(cp + bsize) = -1; /* right give left */ } } } /* * bubble summary information of leaves up the tree. * * Starting at the leaf node level, the four nodes described by * the higher level parent node are compared for a maximum free and * this maximum becomes the value of the parent node. * when all lower level nodes are processed in this fashion then * move up to the next level (parent becomes a lower level node) and * continue the process for that level. */ for (child = le32_to_cpu(dtp->leafidx), nparent = le32_to_cpu(dtp->nleafs) >> 2; nparent > 0; nparent >>= 2, child = parent) { /* get index of 1st node of parent level */ parent = (child - 1) >> 2; /* set the value of the parent node as the maximum * of the four nodes of the current level. */ for (i = 0, cp = tp + child, cp1 = tp + parent; i < nparent; i++, cp += 4, cp1++) *cp1 = TREEMAX(cp); } return (*tp); } /* * dbInitDmapCtl() * * function: initialize dmapctl page */ static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) { /* start leaf index not covered by range */ s8 *cp; dcp->nleafs = cpu_to_le32(LPERCTL); dcp->l2nleafs = cpu_to_le32(L2LPERCTL); dcp->leafidx = cpu_to_le32(CTLLEAFIND); dcp->height = cpu_to_le32(5); dcp->budmin = L2BPERDMAP + L2LPERCTL * level; /* * initialize the leaves of current level that were not covered * by the specified input block range (i.e. the leaves have no * low level dmapctl or dmap). */ cp = &dcp->stree[CTLLEAFIND + i]; for (; i < LPERCTL; i++) *cp++ = NOFREE; /* build the dmap's binary buddy summary tree */ return (dbInitTree((struct dmaptree *) dcp)); } /* * NAME: dbGetL2AGSize()/ujfs_getagl2size() * * FUNCTION: Determine log2(allocation group size) from aggregate size * * PARAMETERS: * nblocks - Number of blocks in aggregate * * RETURNS: log2(allocation group size) in aggregate blocks */ static int dbGetL2AGSize(s64 nblocks) { s64 sz; s64 m; int l2sz; if (nblocks < BPERDMAP * MAXAG) return (L2BPERDMAP); /* round up aggregate size to power of 2 */ m = ((u64) 1 << (64 - 1)); for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) { if (m & nblocks) break; } sz = (s64) 1 << l2sz; if (sz < nblocks) l2sz += 1; /* agsize = roundupSize/max_number_of_ag */ return (l2sz - L2MAXAG); } /* * NAME: dbMapFileSizeToMapSize() * * FUNCTION: compute number of blocks the block allocation map file * can cover from the map file size; * * RETURNS: Number of blocks which can be covered by this block map file; */ /* * maximum number of map pages at each level including control pages */ #define MAXL0PAGES (1 + LPERCTL) #define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES) /* * convert number of map pages to the zero origin top dmapctl level */ #define BMAPPGTOLEV(npages) \ (((npages) <= 3 + MAXL0PAGES) ? 0 : \ ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) s64 dbMapFileSizeToMapSize(struct inode * ipbmap) { struct super_block *sb = ipbmap->i_sb; s64 nblocks; s64 npages, ndmaps; int level, i; int complete, factor; nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize; npages = nblocks >> JFS_SBI(sb)->l2nbperpage; level = BMAPPGTOLEV(npages); /* At each level, accumulate the number of dmap pages covered by * the number of full child levels below it; * repeat for the last incomplete child level. */ ndmaps = 0; npages--; /* skip the first global control page */ /* skip higher level control pages above top level covered by map */ npages -= (2 - level); npages--; /* skip top level's control page */ for (i = level; i >= 0; i--) { factor = (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); complete = (u32) npages / factor; ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : ((i == 1) ? LPERCTL : 1)); /* pages in last/incomplete child */ npages = (u32) npages % factor; /* skip incomplete child's level control page */ npages--; } /* convert the number of dmaps into the number of blocks * which can be covered by the dmaps; */ nblocks = ndmaps << L2BPERDMAP; return (nblocks); }
16 16 13 17 17 3 2 16 15 16 9 3 16 16 2 15 15 15 17 16 15 11 11 1 1 1 11 3 3 8 16 16 16 16 16 16 16 15 15 15 16 16 9 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_health.h" static xfs_failaddr_t xfs_dir2_data_freefind_verify( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, struct xfs_dir2_data_free **bf_ent); struct xfs_dir2_data_free * xfs_dir2_data_bestfree_p( struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) { if (xfs_has_crc(mp)) return ((struct xfs_dir3_data_hdr *)hdr)->best_free; return hdr->bestfree; } /* * Pointer to an entry's tag word. */ __be16 * xfs_dir2_data_entry_tag_p( struct xfs_mount *mp, struct xfs_dir2_data_entry *dep) { return (__be16 *)((char *)dep + xfs_dir2_data_entsize(mp, dep->namelen) - sizeof(__be16)); } uint8_t xfs_dir2_data_get_ftype( struct xfs_mount *mp, struct xfs_dir2_data_entry *dep) { if (xfs_has_ftype(mp)) { uint8_t ftype = dep->name[dep->namelen]; if (likely(ftype < XFS_DIR3_FT_MAX)) return ftype; } return XFS_DIR3_FT_UNKNOWN; } void xfs_dir2_data_put_ftype( struct xfs_mount *mp, struct xfs_dir2_data_entry *dep, uint8_t ftype) { ASSERT(ftype < XFS_DIR3_FT_MAX); ASSERT(dep->namelen != 0); if (xfs_has_ftype(mp)) dep->name[dep->namelen] = ftype; } /* * The number of leaf entries is limited by the size of the block and the amount * of space used by the data entries. We don't know how much space is used by * the data entries yet, so just ensure that the count falls somewhere inside * the block right now. */ static inline unsigned int xfs_dir2_data_max_leaf_entries( struct xfs_da_geometry *geo) { return (geo->blksize - sizeof(struct xfs_dir2_block_tail) - geo->data_entry_offset) / sizeof(struct xfs_dir2_leaf_entry); } /* * Check the consistency of the data block. * The input can also be a block-format directory. * Return NULL if the buffer is good, otherwise the address of the error. */ xfs_failaddr_t __xfs_dir3_data_check( struct xfs_inode *dp, /* incore inode pointer */ struct xfs_buf *bp) /* data block's buffer */ { xfs_dir2_dataptr_t addr; /* addr for leaf lookup */ xfs_dir2_data_free_t *bf; /* bestfree table */ xfs_dir2_block_tail_t *btp=NULL; /* block tail */ int count; /* count of entries found */ xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_free_t *dfp; /* bestfree entry */ int freeseen; /* mask of bestfrees seen */ xfs_dahash_t hash; /* hash of current name */ int i; /* leaf index */ int lastfree; /* last entry was unused */ xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ struct xfs_mount *mp = bp->b_mount; int stale; /* count of stale leaves */ struct xfs_name name; unsigned int offset; unsigned int end; struct xfs_da_geometry *geo = mp->m_dir_geo; /* * If this isn't a directory, something is seriously wrong. Bail out. */ if (dp && !S_ISDIR(VFS_I(dp)->i_mode)) return __this_address; hdr = bp->b_addr; offset = geo->data_entry_offset; switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): btp = xfs_dir2_block_tail_p(geo, hdr); lep = xfs_dir2_block_leaf_p(btp); if (be32_to_cpu(btp->count) >= xfs_dir2_data_max_leaf_entries(geo)) return __this_address; break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): break; default: return __this_address; } end = xfs_dir3_data_end_offset(geo, hdr); if (!end) return __this_address; /* * Account for zero bestfree entries. */ bf = xfs_dir2_data_bestfree_p(mp, hdr); count = lastfree = freeseen = 0; if (!bf[0].length) { if (bf[0].offset) return __this_address; freeseen |= 1 << 0; } if (!bf[1].length) { if (bf[1].offset) return __this_address; freeseen |= 1 << 1; } if (!bf[2].length) { if (bf[2].offset) return __this_address; freeseen |= 1 << 2; } if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length)) return __this_address; if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length)) return __this_address; /* * Loop over the data/unused entries. */ while (offset < end) { struct xfs_dir2_data_unused *dup = bp->b_addr + offset; struct xfs_dir2_data_entry *dep = bp->b_addr + offset; unsigned int reclen; /* * Are the remaining bytes large enough to hold an * unused entry? */ if (offset > end - xfs_dir2_data_unusedsize(1)) return __this_address; /* * If it's unused, look for the space in the bestfree table. * If we find it, account for that, else make sure it * doesn't need to be there. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { xfs_failaddr_t fa; reclen = xfs_dir2_data_unusedsize( be16_to_cpu(dup->length)); if (lastfree != 0) return __this_address; if (be16_to_cpu(dup->length) != reclen) return __this_address; if (offset + reclen > end) return __this_address; if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != offset) return __this_address; fa = xfs_dir2_data_freefind_verify(hdr, bf, dup, &dfp); if (fa) return fa; if (dfp) { i = (int)(dfp - bf); if ((freeseen & (1 << i)) != 0) return __this_address; freeseen |= 1 << i; } else { if (be16_to_cpu(dup->length) > be16_to_cpu(bf[2].length)) return __this_address; } offset += reclen; lastfree = 1; continue; } /* * This is not an unused entry. Are the remaining bytes * large enough for a dirent with a single-byte name? */ if (offset > end - xfs_dir2_data_entsize(mp, 1)) return __this_address; /* * It's a real entry. Validate the fields. * If this is a block directory then make sure it's * in the leaf section of the block. * The linear search is crude but this is DEBUG code. */ if (dep->namelen == 0) return __this_address; reclen = xfs_dir2_data_entsize(mp, dep->namelen); if (offset + reclen > end) return __this_address; if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber))) return __this_address; if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset) return __this_address; if (xfs_dir2_data_get_ftype(mp, dep) >= XFS_DIR3_FT_MAX) return __this_address; count++; lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, (xfs_dir2_data_aoff_t) ((char *)dep - (char *)hdr)); name.name = dep->name; name.len = dep->namelen; hash = xfs_dir2_hashname(mp, &name); for (i = 0; i < be32_to_cpu(btp->count); i++) { if (be32_to_cpu(lep[i].address) == addr && be32_to_cpu(lep[i].hashval) == hash) break; } if (i >= be32_to_cpu(btp->count)) return __this_address; } offset += reclen; } /* * Need to have seen all the entries and all the bestfree slots. */ if (freeseen != 7) return __this_address; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { if (lep[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; if (i > 0 && be32_to_cpu(lep[i].hashval) < be32_to_cpu(lep[i - 1].hashval)) return __this_address; } if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)) return __this_address; if (stale != be32_to_cpu(btp->stale)) return __this_address; } return NULL; } #ifdef DEBUG void xfs_dir3_data_check( struct xfs_inode *dp, struct xfs_buf *bp) { xfs_failaddr_t fa; fa = __xfs_dir3_data_check(dp, bp); if (!fa) return; xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, bp->b_addr, BBTOB(bp->b_length), __FILE__, __LINE__, fa); ASSERT(0); } #endif static xfs_failaddr_t xfs_dir3_data_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_verify_magic(bp, hdr3->magic)) return __this_address; if (xfs_has_crc(mp)) { if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) return __this_address; } return __xfs_dir3_data_check(NULL, bp); } /* * Readahead of the first block of the directory when it is opened is completely * oblivious to the format of the directory. Hence we can either get a block * format buffer or a data format buffer on readahead. */ static void xfs_dir3_data_reada_verify( struct xfs_buf *bp) { struct xfs_dir2_data_hdr *hdr = bp->b_addr; switch (hdr->magic) { case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): bp->b_ops = &xfs_dir3_block_buf_ops; bp->b_ops->verify_read(bp); return; case cpu_to_be32(XFS_DIR2_DATA_MAGIC): case cpu_to_be32(XFS_DIR3_DATA_MAGIC): bp->b_ops = &xfs_dir3_data_buf_ops; bp->b_ops->verify_read(bp); return; default: xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } } static void xfs_dir3_data_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_dir3_data_verify(bp); if (fa) xfs_verifier_error(bp, -EFSCORRUPTED, fa); } } static void xfs_dir3_data_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; fa = xfs_dir3_data_verify(bp); if (fa) { xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } if (!xfs_has_crc(mp)) return; if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); } const struct xfs_buf_ops xfs_dir3_data_buf_ops = { .name = "xfs_dir3_data", .magic = { cpu_to_be32(XFS_DIR2_DATA_MAGIC), cpu_to_be32(XFS_DIR3_DATA_MAGIC) }, .verify_read = xfs_dir3_data_read_verify, .verify_write = xfs_dir3_data_write_verify, .verify_struct = xfs_dir3_data_verify, }; static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { .name = "xfs_dir3_data_reada", .magic = { cpu_to_be32(XFS_DIR2_DATA_MAGIC), cpu_to_be32(XFS_DIR3_DATA_MAGIC) }, .verify_read = xfs_dir3_data_reada_verify, .verify_write = xfs_dir3_data_write_verify, }; xfs_failaddr_t xfs_dir3_data_header_check( struct xfs_buf *bp, xfs_ino_t owner) { struct xfs_mount *mp = bp->b_mount; if (xfs_has_crc(mp)) { struct xfs_dir3_data_hdr *hdr3 = bp->b_addr; if (hdr3->hdr.magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) return __this_address; if (be64_to_cpu(hdr3->hdr.owner) != owner) return __this_address; } return NULL; } int xfs_dir3_data_read( struct xfs_trans *tp, struct xfs_inode *dp, xfs_ino_t owner, xfs_dablk_t bno, unsigned int flags, struct xfs_buf **bpp) { xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, bno, flags, bpp, XFS_DATA_FORK, &xfs_dir3_data_buf_ops); if (err || !*bpp) return err; /* Check things that we can't do in the verifier. */ fa = xfs_dir3_data_header_check(*bpp, owner); if (fa) { __xfs_buf_mark_corrupt(*bpp, fa); xfs_trans_brelse(tp, *bpp); *bpp = NULL; xfs_dirattr_mark_sick(dp, XFS_DATA_FORK); return -EFSCORRUPTED; } xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); return err; } int xfs_dir3_data_readahead( struct xfs_inode *dp, xfs_dablk_t bno, unsigned int flags) { return xfs_da_reada_buf(dp, bno, flags, XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops); } /* * Find the bestfree entry that exactly coincides with unused directory space * or a verifier error because the bestfree data are bad. */ static xfs_failaddr_t xfs_dir2_data_freefind_verify( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, struct xfs_dir2_data_free **bf_ent) { struct xfs_dir2_data_free *dfp; xfs_dir2_data_aoff_t off; bool matched = false; bool seenzero = false; *bf_ent = NULL; off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); /* * Validate some consistency in the bestfree table. * Check order, non-overlapping entries, and if we find the * one we're looking for it has to be exact. */ for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) { if (dfp->length) return __this_address; seenzero = true; continue; } if (seenzero) return __this_address; if (be16_to_cpu(dfp->offset) == off) { matched = true; if (dfp->length != dup->length) return __this_address; } else if (be16_to_cpu(dfp->offset) > off) { if (off + be16_to_cpu(dup->length) > be16_to_cpu(dfp->offset)) return __this_address; } else { if (be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) > off) return __this_address; } if (!matched && be16_to_cpu(dfp->length) < be16_to_cpu(dup->length)) return __this_address; if (dfp > &bf[0] && be16_to_cpu(dfp[-1].length) < be16_to_cpu(dfp[0].length)) return __this_address; } /* Looks ok so far; now try to match up with a bestfree entry. */ *bf_ent = xfs_dir2_data_freefind(hdr, bf, dup); return NULL; } /* * Given a data block and an unused entry from that block, * return the bestfree entry if any that corresponds to it. */ xfs_dir2_data_free_t * xfs_dir2_data_freefind( struct xfs_dir2_data_hdr *hdr, /* data block header */ struct xfs_dir2_data_free *bf, /* bestfree table pointer */ struct xfs_dir2_data_unused *dup) /* unused space */ { xfs_dir2_data_free_t *dfp; /* bestfree entry */ xfs_dir2_data_aoff_t off; /* offset value needed */ off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); /* * If this is smaller than the smallest bestfree entry, * it can't be there since they're sorted. */ if (be16_to_cpu(dup->length) < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length)) return NULL; /* * Look at the three bestfree entries for our guy. */ for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) return NULL; if (be16_to_cpu(dfp->offset) == off) return dfp; } /* * Didn't find it. This only happens if there are duplicate lengths. */ return NULL; } /* * Insert an unused-space entry into the bestfree table. */ xfs_dir2_data_free_t * /* entry inserted */ xfs_dir2_data_freeinsert( struct xfs_dir2_data_hdr *hdr, /* data block pointer */ struct xfs_dir2_data_free *dfp, /* bestfree table pointer */ struct xfs_dir2_data_unused *dup, /* unused space */ int *loghead) /* log the data header (out) */ { xfs_dir2_data_free_t new; /* new bestfree entry */ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); new.length = dup->length; new.offset = cpu_to_be16((char *)dup - (char *)hdr); /* * Insert at position 0, 1, or 2; or not at all. */ if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) { dfp[2] = dfp[1]; dfp[1] = dfp[0]; dfp[0] = new; *loghead = 1; return &dfp[0]; } if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) { dfp[2] = dfp[1]; dfp[1] = new; *loghead = 1; return &dfp[1]; } if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) { dfp[2] = new; *loghead = 1; return &dfp[2]; } return NULL; } /* * Remove a bestfree entry from the table. */ STATIC void xfs_dir2_data_freeremove( struct xfs_dir2_data_hdr *hdr, /* data block header */ struct xfs_dir2_data_free *bf, /* bestfree table pointer */ struct xfs_dir2_data_free *dfp, /* bestfree entry pointer */ int *loghead) /* out: log data header */ { ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); /* * It's the first entry, slide the next 2 up. */ if (dfp == &bf[0]) { bf[0] = bf[1]; bf[1] = bf[2]; } /* * It's the second entry, slide the 3rd entry up. */ else if (dfp == &bf[1]) bf[1] = bf[2]; /* * Must be the last entry. */ else ASSERT(dfp == &bf[2]); /* * Clear the 3rd entry, must be zero now. */ bf[2].length = 0; bf[2].offset = 0; *loghead = 1; } /* * Given a data block, reconstruct its bestfree map. */ void xfs_dir2_data_freescan( struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr, int *loghead) { struct xfs_da_geometry *geo = mp->m_dir_geo; struct xfs_dir2_data_free *bf = xfs_dir2_data_bestfree_p(mp, hdr); void *addr = hdr; unsigned int offset = geo->data_entry_offset; unsigned int end; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); /* * Start by clearing the table. */ memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); *loghead = 1; end = xfs_dir3_data_end_offset(geo, addr); while (offset < end) { struct xfs_dir2_data_unused *dup = addr + offset; struct xfs_dir2_data_entry *dep = addr + offset; /* * If it's a free entry, insert it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ASSERT(offset == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); xfs_dir2_data_freeinsert(hdr, bf, dup, loghead); offset += be16_to_cpu(dup->length); continue; } /* * For active entries, check their tags and skip them. */ ASSERT(offset == be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep))); offset += xfs_dir2_data_entsize(mp, dep->namelen); } } /* * Initialize a data block at the given block number in the directory. * Give back the buffer for the created block. */ int /* error */ xfs_dir3_data_init( struct xfs_da_args *args, /* directory operation args */ xfs_dir2_db_t blkno, /* logical dir block number */ struct xfs_buf **bpp) /* output block buffer */ { struct xfs_trans *tp = args->trans; struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; struct xfs_da_geometry *geo = args->geo; struct xfs_buf *bp; struct xfs_dir2_data_hdr *hdr; struct xfs_dir2_data_unused *dup; struct xfs_dir2_data_free *bf; int error; int i; /* * Get the buffer set up for the block. */ error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno), &bp, XFS_DATA_FORK); if (error) return error; bp->b_ops = &xfs_dir3_data_buf_ops; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF); /* * Initialize the header. */ hdr = bp->b_addr; if (xfs_has_crc(mp)) { struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; memset(hdr3, 0, sizeof(*hdr3)); hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp)); hdr3->owner = cpu_to_be64(args->owner); uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid); } else hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); bf = xfs_dir2_data_bestfree_p(mp, hdr); bf[0].offset = cpu_to_be16(geo->data_entry_offset); bf[0].length = cpu_to_be16(geo->blksize - geo->data_entry_offset); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { bf[i].length = 0; bf[i].offset = 0; } /* * Set up an unused entry for the block's body. */ dup = bp->b_addr + geo->data_entry_offset; dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = bf[0].length; *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); /* * Log it and return it. */ xfs_dir2_data_log_header(args, bp); xfs_dir2_data_log_unused(args, bp, dup); *bpp = bp; return 0; } /* * Log an active data entry from the block. */ void xfs_dir2_data_log_entry( struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_entry_t *dep) /* data entry pointer */ { struct xfs_mount *mp = bp->b_mount; struct xfs_dir2_data_hdr *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr), (uint)((char *)(xfs_dir2_data_entry_tag_p(mp, dep) + 1) - (char *)hdr - 1)); } /* * Log a data block header. */ void xfs_dir2_data_log_header( struct xfs_da_args *args, struct xfs_buf *bp) { #ifdef DEBUG struct xfs_dir2_data_hdr *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); #endif xfs_trans_log_buf(args->trans, bp, 0, args->geo->data_entry_offset - 1); } /* * Log a data unused entry. */ void xfs_dir2_data_log_unused( struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup) /* data unused pointer */ { xfs_dir2_data_hdr_t *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); /* * Log the first part of the unused entry. */ xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr), (uint)((char *)&dup->length + sizeof(dup->length) - 1 - (char *)hdr)); /* * Log the end (tag) of the unused entry. */ xfs_trans_log_buf(args->trans, bp, (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + sizeof(xfs_dir2_data_off_t) - 1)); } /* * Make a byte range in the data block unused. * Its current contents are unimportant. */ void xfs_dir2_data_make_free( struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, /* starting byte offset */ xfs_dir2_data_aoff_t len, /* length in bytes */ int *needlogp, /* out: log header */ int *needscanp) /* out: regen bestfree */ { xfs_dir2_data_hdr_t *hdr; /* data block pointer */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ int needscan; /* need to regen bestfree */ xfs_dir2_data_unused_t *newdup; /* new unused entry */ xfs_dir2_data_unused_t *postdup; /* unused entry after us */ xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ unsigned int end; struct xfs_dir2_data_free *bf; hdr = bp->b_addr; /* * Figure out where the end of the data area is. */ end = xfs_dir3_data_end_offset(args->geo, hdr); ASSERT(end != 0); /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ if (offset > args->geo->data_entry_offset) { __be16 *tagp; /* tag just before us */ tagp = (__be16 *)((char *)hdr + offset) - 1; prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) prevdup = NULL; } else prevdup = NULL; /* * If this isn't the end of the block, see if the entry after * us is free. */ if (offset + len < end) { postdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) postdup = NULL; } else postdup = NULL; ASSERT(*needscanp == 0); needscan = 0; /* * Previous and following entries are both free, * merge everything into a single free entry. */ bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr); if (prevdup && postdup) { xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ /* * See if prevdup and/or postdup are in bestfree table. */ dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup); /* * We need a rescan unless there are exactly 2 free entries * namely our two. Then we know what's happening, otherwise * since the third bestfree is there, there might be more * entries. */ needscan = (bf[2].length != 0); /* * Fix up the new big freespace. */ be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, prevdup); if (!needscan) { /* * Has to be the case that entries 0 and 1 are * dfp and dfp2 (don't know which is which), and * entry 2 is empty. * Remove entry 1 first then entry 0. */ ASSERT(dfp && dfp2); if (dfp == &bf[1]) { dfp = &bf[0]; ASSERT(dfp2 == dfp); dfp2 = &bf[1]; } xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp); xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); /* * Now insert the new entry. */ dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp); ASSERT(dfp == &bf[0]); ASSERT(dfp->length == prevdup->length); ASSERT(!dfp[1].length); ASSERT(!dfp[2].length); } } /* * The entry before us is free, merge with it. */ else if (prevdup) { dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, prevdup); /* * If the previous entry was in the table, the new entry * is longer, so it will be in the table too. Remove * the old one and add the new one. */ if (dfp) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(prevdup->length) > be16_to_cpu(bf[2].length); } } /* * The following entry is free, merge with it. */ else if (postdup) { dfp = xfs_dir2_data_freefind(hdr, bf, postdup); newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); /* * If the following entry was in the table, the new entry * is longer, so it will be in the table too. Remove * the old one and add the new one. */ if (dfp) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(newdup->length) > be16_to_cpu(bf[2].length); } } /* * Neither neighbor is free. Make a new entry. */ else { newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); } *needscanp = needscan; } /* Check our free data for obvious signs of corruption. */ static inline xfs_failaddr_t xfs_dir2_data_check_free( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len) { if (hdr->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC) && hdr->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC) && hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) && hdr->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) return __this_address; if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG) return __this_address; if (offset < (char *)dup - (char *)hdr) return __this_address; if (offset + len > (char *)dup + be16_to_cpu(dup->length) - (char *)hdr) return __this_address; if ((char *)dup - (char *)hdr != be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))) return __this_address; return NULL; } /* Sanity-check a new bestfree entry. */ static inline xfs_failaddr_t xfs_dir2_data_check_new_free( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *dfp, struct xfs_dir2_data_unused *newdup) { if (dfp == NULL) return __this_address; if (dfp->length != newdup->length) return __this_address; if (be16_to_cpu(dfp->offset) != (char *)newdup - (char *)hdr) return __this_address; return NULL; } /* * Take a byte range out of an existing unused space and make it un-free. */ int xfs_dir2_data_use_free( struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup, /* unused entry */ xfs_dir2_data_aoff_t offset, /* starting offset to use */ xfs_dir2_data_aoff_t len, /* length to use */ int *needlogp, /* out: need to log header */ int *needscanp) /* out: need regen bestfree */ { xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ xfs_dir2_data_unused_t *newdup; /* new unused entry */ xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ struct xfs_dir2_data_free *bf; xfs_failaddr_t fa; int matchback; /* matches end of freespace */ int matchfront; /* matches start of freespace */ int needscan; /* need to regen bestfree */ int oldlen; /* old unused entry's length */ hdr = bp->b_addr; fa = xfs_dir2_data_check_free(hdr, dup, offset, len); if (fa) goto corrupt; /* * Look up the entry in the bestfree table. */ oldlen = be16_to_cpu(dup->length); bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr); dfp = xfs_dir2_data_freefind(hdr, bf, dup); ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); /* * Check for alignment with front and back of the entry. */ matchfront = (char *)dup - (char *)hdr == offset; matchback = (char *)dup + oldlen - (char *)hdr == offset + len; ASSERT(*needscanp == 0); needscan = 0; /* * If we matched it exactly we just need to get rid of it from * the bestfree table. */ if (matchfront && matchback) { if (dfp) { needscan = (bf[2].offset != 0); if (!needscan) xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); } } /* * We match the first part of the entry. * Make a new entry with the remaining freespace. */ else if (matchfront) { newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(oldlen - len); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup); if (fa) goto corrupt; /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ needscan = dfp == &bf[2]; } } /* * We match the last part of the entry. * Trim the allocated space off the tail of the entry. */ else if (matchback) { newdup = dup; newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup); if (fa) goto corrupt; /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ needscan = dfp == &bf[2]; } } /* * Poking out the middle of an entry. * Make two new entries. */ else { newdup = dup; newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); *xfs_dir2_data_unused_tag_p(newdup2) = cpu_to_be16((char *)newdup2 - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup2); /* * If the old entry was in the table, we need to scan * if the 3rd entry was valid, since these entries * are smaller than the old one. * If we don't need to scan that means there were 1 or 2 * entries in the table, and removing the old and adding * the 2 new will work. */ if (dfp) { needscan = (bf[2].length != 0); if (!needscan) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); xfs_dir2_data_freeinsert(hdr, bf, newdup2, needlogp); } } } *needscanp = needscan; return 0; corrupt: xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount, hdr, sizeof(*hdr), __FILE__, __LINE__, fa); xfs_da_mark_sick(args); return -EFSCORRUPTED; } /* Find the end of the entry data in a data/block format dir block. */ unsigned int xfs_dir3_data_end_offset( struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) { void *p; switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): p = xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); return p - (void *)hdr; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): return geo->blksize; default: return 0; } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SPECIAL_INSNS_H #define _ASM_X86_SPECIAL_INSNS_H #ifdef __KERNEL__ #include <asm/nops.h> #include <asm/processor-flags.h> #include <linux/errno.h> #include <linux/irqflags.h> #include <linux/jump_label.h> void native_write_cr0(unsigned long val); static inline unsigned long native_read_cr0(void) { unsigned long val; asm volatile("mov %%cr0,%0" : "=r" (val)); return val; } static __always_inline unsigned long native_read_cr2(void) { unsigned long val; asm volatile("mov %%cr2,%0" : "=r" (val)); return val; } static __always_inline void native_write_cr2(unsigned long val) { asm volatile("mov %0,%%cr2": : "r" (val) : "memory"); } static __always_inline unsigned long __native_read_cr3(void) { unsigned long val; asm volatile("mov %%cr3,%0" : "=r" (val)); return val; } static __always_inline void native_write_cr3(unsigned long val) { asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); } static inline unsigned long native_read_cr4(void) { unsigned long val; #ifdef CONFIG_X86_32 /* * This could fault if CR4 does not exist. Non-existent CR4 * is functionally equivalent to CR4 == 0. Keep it simple and pretend * that CR4 == 0 on CPUs that don't have CR4. */ asm volatile("1: mov %%cr4, %0\n" "2:\n" _ASM_EXTABLE(1b, 2b) : "=r" (val) : "0" (0)); #else /* CR4 always exists on x86_64. */ asm volatile("mov %%cr4,%0" : "=r" (val)); #endif return val; } void native_write_cr4(unsigned long val); #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS static inline u32 rdpkru(void) { u32 ecx = 0; u32 edx, pkru; /* * "rdpkru" instruction. Places PKRU contents in to EAX, * clears EDX and requires that ecx=0. */ asm volatile("rdpkru" : "=a" (pkru), "=d" (edx) : "c" (ecx)); return pkru; } static inline void wrpkru(u32 pkru) { u32 ecx = 0, edx = 0; /* * "wrpkru" instruction. Loads contents in EAX to PKRU, * requires that ecx = edx = 0. */ asm volatile("wrpkru" : : "a" (pkru), "c"(ecx), "d"(edx)); } #else static inline u32 rdpkru(void) { return 0; } static inline void wrpkru(u32 pkru) { } #endif /* * Write back all modified lines in all levels of cache associated with this * logical processor to main memory, and then invalidate all caches. Depending * on the micro-architecture, WBINVD (and WBNOINVD below) may or may not affect * lower level caches associated with another logical processor that shares any * level of this processor's cache hierarchy. */ static __always_inline void wbinvd(void) { asm volatile("wbinvd" : : : "memory"); } /* Instruction encoding provided for binutils backwards compatibility. */ #define ASM_WBNOINVD _ASM_BYTES(0xf3,0x0f,0x09) /* * Write back all modified lines in all levels of cache associated with this * logical processor to main memory, but do NOT explicitly invalidate caches, * i.e. leave all/most cache lines in the hierarchy in non-modified state. */ static __always_inline void wbnoinvd(void) { /* * Explicitly encode WBINVD if X86_FEATURE_WBNOINVD is unavailable even * though WBNOINVD is backwards compatible (it's simply WBINVD with an * ignored REP prefix), to guarantee that WBNOINVD isn't used if it * needs to be avoided for any reason. For all supported usage in the * kernel, WBINVD is functionally a superset of WBNOINVD. */ alternative("wbinvd", ASM_WBNOINVD, X86_FEATURE_WBNOINVD); } static inline unsigned long __read_cr4(void) { return native_read_cr4(); } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else static inline unsigned long read_cr0(void) { return native_read_cr0(); } static inline void write_cr0(unsigned long x) { native_write_cr0(x); } static __always_inline unsigned long read_cr2(void) { return native_read_cr2(); } static __always_inline void write_cr2(unsigned long x) { native_write_cr2(x); } /* * Careful! CR3 contains more than just an address. You probably want * read_cr3_pa() instead. */ static inline unsigned long __read_cr3(void) { return __native_read_cr3(); } static inline void write_cr3(unsigned long x) { native_write_cr3(x); } static inline void __write_cr4(unsigned long x) { native_write_cr4(x); } #endif /* CONFIG_PARAVIRT_XXL */ static __always_inline void clflush(volatile void *__p) { asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); } static inline void clflushopt(volatile void *__p) { alternative_io("ds clflush %0", "clflushopt %0", X86_FEATURE_CLFLUSHOPT, "+m" (*(volatile char __force *)__p)); } static inline void clwb(volatile void *__p) { volatile struct { char x[64]; } *p = __p; asm_inline volatile(ALTERNATIVE_2( "ds clflush %0", "clflushopt %0", X86_FEATURE_CLFLUSHOPT, "clwb %0", X86_FEATURE_CLWB) : "+m" (*p)); } #ifdef CONFIG_X86_USER_SHADOW_STACK static inline int write_user_shstk_64(u64 __user *addr, u64 val) { asm goto("1: wrussq %[val], %[addr]\n" _ASM_EXTABLE(1b, %l[fail]) :: [addr] "m" (*addr), [val] "r" (val) :: fail); return 0; fail: return -EFAULT; } #endif /* CONFIG_X86_USER_SHADOW_STACK */ #define nop() asm volatile ("nop") static __always_inline void serialize(void) { /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */ asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory"); } /* The dst parameter must be 64-bytes aligned */ static inline void movdir64b(void *dst, const void *src) { const struct { char _[64]; } *__src = src; struct { char _[64]; } *__dst = dst; /* * MOVDIR64B %(rdx), rax. * * Both __src and __dst must be memory constraints in order to tell the * compiler that no other memory accesses should be reordered around * this one. * * Also, both must be supplied as lvalues because this tells * the compiler what the object is (its size) the instruction accesses. * I.e., not the pointers but what they point to, thus the deref'ing '*'. */ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" : "+m" (*__dst) : "m" (*__src), "a" (__dst), "d" (__src)); } static inline void movdir64b_io(void __iomem *dst, const void *src) { movdir64b((void __force *)dst, src); } /** * enqcmds - Enqueue a command in supervisor (CPL0) mode * @dst: destination, in MMIO space (must be 512-bit aligned) * @src: 512 bits memory operand * * The ENQCMDS instruction allows software to write a 512-bit command to * a 512-bit-aligned special MMIO region that supports the instruction. * A return status is loaded into the ZF flag in the RFLAGS register. * ZF = 0 equates to success, and ZF = 1 indicates retry or error. * * This function issues the ENQCMDS instruction to submit data from * kernel space to MMIO space, in a unit of 512 bits. Order of data access * is not guaranteed, nor is a memory barrier performed afterwards. It * returns 0 on success and -EAGAIN on failure. * * Warning: Do not use this helper unless your driver has checked that the * ENQCMDS instruction is supported on the platform and the device accepts * ENQCMDS. */ static inline int enqcmds(void __iomem *dst, const void *src) { const struct { char _[64]; } *__src = src; struct { char _[64]; } __iomem *__dst = dst; bool zf; /* * ENQCMDS %(rdx), rax * * See movdir64b()'s comment on operand specification. */ asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90" : "=@ccz" (zf), "+m" (*__dst) : "m" (*__src), "a" (__dst), "d" (__src)); /* Submission failure is indicated via EFLAGS.ZF=1 */ if (zf) return -EAGAIN; return 0; } static __always_inline void tile_release(void) { /* * Instruction opcode for TILERELEASE; supported in binutils * version >= 2.36. */ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0"); } #endif /* __KERNEL__ */ #endif /* _ASM_X86_SPECIAL_INSNS_H */
37 3 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/lockd/svc.c * * This is the central lockd service. * * FIXME: Separate the lockd NFS server functionality from the lockd NFS * client functionality. Oh why didn't Sun create two separate * services in the first place? * * Authors: Olaf Kirch (okir@monad.swb.de) * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ #include <linux/module.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/moduleparam.h> #include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/in.h> #include <linux/uio.h> #include <linux/smp.h> #include <linux/mutex.h> #include <linux/freezer.h> #include <linux/inetdevice.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/svc_xprt.h> #include <net/ip.h> #include <net/addrconf.h> #include <net/ipv6.h> #include <linux/lockd/lockd.h> #include <linux/nfs.h> #include "netns.h" #include "procfs.h" #include "netlink.h" #define NLMDBG_FACILITY NLMDBG_SVC #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) static struct svc_program nlmsvc_program; const struct nlmsvc_binding *nlmsvc_ops; EXPORT_SYMBOL_GPL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; static struct svc_serv *nlmsvc_serv; static void nlmsvc_request_retry(struct timer_list *tl) { svc_wake_up(nlmsvc_serv); } DEFINE_TIMER(nlmsvc_retry, nlmsvc_request_retry); unsigned int lockd_net_id; /* * These can be set at insmod time (useful for NFS as root filesystem), * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 */ static unsigned long nlm_grace_period; unsigned long nlm_timeout = LOCKD_DFLT_TIMEO; static int nlm_udpport, nlm_tcpport; /* * Constants needed for the sysctl interface. */ static const unsigned long nlm_grace_period_min = 0; static const unsigned long nlm_grace_period_max = 240; static const unsigned long nlm_timeout_min = 3; static const unsigned long nlm_timeout_max = 20; #ifdef CONFIG_SYSCTL static const int nlm_port_min = 0, nlm_port_max = 65535; static struct ctl_table_header * nlm_sysctl_table; #endif static unsigned long get_lockd_grace_period(struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); /* Return the net-ns specific grace period, if there is one */ if (ln->gracetime) return ln->gracetime * HZ; /* Note: nlm_timeout should always be nonzero */ if (nlm_grace_period) return roundup(nlm_grace_period, nlm_timeout) * HZ; else return nlm_timeout * 5 * HZ; } static void grace_ender(struct work_struct *grace) { struct delayed_work *dwork = to_delayed_work(grace); struct lockd_net *ln = container_of(dwork, struct lockd_net, grace_period_end); locks_end_grace(&ln->lockd_manager); } static void set_grace_period(struct net *net) { unsigned long grace_period = get_lockd_grace_period(net); struct lockd_net *ln = net_generic(net, lockd_net_id); locks_start_grace(net, &ln->lockd_manager); cancel_delayed_work_sync(&ln->grace_period_end); schedule_delayed_work(&ln->grace_period_end, grace_period); } /* * This is the lockd kernel thread */ static int lockd(void *vrqstp) { struct svc_rqst *rqstp = vrqstp; struct net *net = &init_net; struct lockd_net *ln = net_generic(net, lockd_net_id); svc_thread_init_status(rqstp, 0); /* try_to_freeze() is called from svc_recv() */ set_freezable(); dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); /* * The main request loop. We don't terminate until the last * NFS mount or NFS daemon has gone away. */ while (!svc_thread_should_stop(rqstp)) { nlmsvc_retry_blocked(rqstp); svc_recv(rqstp); } if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); dprintk("lockd_down: service stopped\n"); svc_exit_thread(rqstp); return 0; } static int create_lockd_listener(struct svc_serv *serv, const char *name, struct net *net, const int family, const unsigned short port, const struct cred *cred) { struct svc_xprt *xprt; xprt = svc_find_xprt(serv, name, net, family, 0); if (xprt == NULL) return svc_xprt_create(serv, name, net, family, port, SVC_SOCK_DEFAULTS, cred); svc_xprt_put(xprt); return 0; } static int create_lockd_family(struct svc_serv *serv, struct net *net, const int family, const struct cred *cred) { struct lockd_net *ln = net_generic(net, lockd_net_id); int err; err = create_lockd_listener(serv, "udp", net, family, ln->udp_port ? ln->udp_port : nlm_udpport, cred); if (err < 0) return err; return create_lockd_listener(serv, "tcp", net, family, ln->tcp_port ? ln->tcp_port : nlm_tcpport, cred); } /* * Ensure there are active UDP and TCP listeners for lockd. * * Even if we have only TCP NFS mounts and/or TCP NFSDs, some * local services (such as rpc.statd) still require UDP, and * some NFS servers do not yet support NLM over TCP. * * Returns zero if all listeners are available; otherwise a * negative errno value is returned. */ static int make_socks(struct svc_serv *serv, struct net *net, const struct cred *cred) { static int warned; int err; err = create_lockd_family(serv, net, PF_INET, cred); if (err < 0) goto out_err; err = create_lockd_family(serv, net, PF_INET6, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_err; warned = 0; return 0; out_err: if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); svc_xprt_destroy_all(serv, net, true); return err; } static int lockd_up_net(struct svc_serv *serv, struct net *net, const struct cred *cred) { struct lockd_net *ln = net_generic(net, lockd_net_id); int error; if (ln->nlmsvc_users++) return 0; error = svc_bind(serv, net); if (error) goto err_bind; error = make_socks(serv, net, cred); if (error < 0) goto err_bind; set_grace_period(net); dprintk("%s: per-net data created; net=%x\n", __func__, net->ns.inum); return 0; err_bind: ln->nlmsvc_users--; return error; } static void lockd_down_net(struct svc_serv *serv, struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); svc_xprt_destroy_all(serv, net, true); } } else { pr_err("%s: no users! net=%x\n", __func__, net->ns.inum); BUG(); } } static int lockd_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sockaddr_in sin; if (event != NETDEV_DOWN) goto out; if (nlmsvc_serv) { dprintk("lockd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin); } out: return NOTIFY_DONE; } static struct notifier_block lockd_inetaddr_notifier = { .notifier_call = lockd_inetaddr_event, }; #if IS_ENABLED(CONFIG_IPV6) static int lockd_inet6addr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sockaddr_in6 sin6; if (event != NETDEV_DOWN) goto out; if (nlmsvc_serv) { dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ifa->addr; if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6); } out: return NOTIFY_DONE; } static struct notifier_block lockd_inet6addr_notifier = { .notifier_call = lockd_inet6addr_event, }; #endif static int lockd_get(void) { struct svc_serv *serv; int error; if (nlmsvc_serv) { nlmsvc_users++; return 0; } /* * Sanity check: if there's no pid, * we should be the first user ... */ if (nlmsvc_users) printk(KERN_WARNING "lockd_up: no pid, %d users??\n", nlmsvc_users); serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); return -ENOMEM; } error = svc_set_num_threads(serv, NULL, 1); if (error < 0) { svc_destroy(&serv); return error; } nlmsvc_serv = serv; register_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) register_inet6addr_notifier(&lockd_inet6addr_notifier); #endif dprintk("lockd_up: service created\n"); nlmsvc_users++; return 0; } static void lockd_put(void) { if (WARN(nlmsvc_users <= 0, "lockd_down: no users!\n")) return; if (--nlmsvc_users) return; unregister_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&lockd_inet6addr_notifier); #endif svc_set_num_threads(nlmsvc_serv, NULL, 0); timer_delete_sync(&nlmsvc_retry); svc_destroy(&nlmsvc_serv); dprintk("lockd_down: service destroyed\n"); } /* * Bring up the lockd process if it's not already up. */ int lockd_up(struct net *net, const struct cred *cred) { int error; mutex_lock(&nlmsvc_mutex); error = lockd_get(); if (error) goto err; error = lockd_up_net(nlmsvc_serv, net, cred); if (error < 0) { lockd_put(); goto err; } err: mutex_unlock(&nlmsvc_mutex); return error; } EXPORT_SYMBOL_GPL(lockd_up); /* * Decrement the user count and bring down lockd if we're the last. */ void lockd_down(struct net *net) { mutex_lock(&nlmsvc_mutex); lockd_down_net(nlmsvc_serv, net); lockd_put(); mutex_unlock(&nlmsvc_mutex); } EXPORT_SYMBOL_GPL(lockd_down); #ifdef CONFIG_SYSCTL /* * Sysctl parameters (same as module parameters, different interface). */ static const struct ctl_table nlm_sysctls[] = { { .procname = "nlm_grace_period", .data = &nlm_grace_period, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = (unsigned long *) &nlm_grace_period_min, .extra2 = (unsigned long *) &nlm_grace_period_max, }, { .procname = "nlm_timeout", .data = &nlm_timeout, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = (unsigned long *) &nlm_timeout_min, .extra2 = (unsigned long *) &nlm_timeout_max, }, { .procname = "nlm_udpport", .data = &nlm_udpport, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (int *) &nlm_port_min, .extra2 = (int *) &nlm_port_max, }, { .procname = "nlm_tcpport", .data = &nlm_tcpport, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (int *) &nlm_port_min, .extra2 = (int *) &nlm_port_max, }, { .procname = "nsm_use_hostnames", .data = &nsm_use_hostnames, .maxlen = sizeof(bool), .mode = 0644, .proc_handler = proc_dobool, }, { .procname = "nsm_local_state", .data = &nsm_local_state, .maxlen = sizeof(nsm_local_state), .mode = 0644, .proc_handler = proc_douintvec, .extra1 = SYSCTL_ZERO, }, }; #endif /* CONFIG_SYSCTL */ /* * Module (and sysfs) parameters. */ #define param_set_min_max(name, type, which_strtol, min, max) \ static int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ char *endp; \ __typeof__(type) num = which_strtol(val, &endp, 0); \ if (endp == val || *endp || num < (min) || num > (max)) \ return -EINVAL; \ *((type *) kp->arg) = num; \ return 0; \ } static inline int is_callback(u32 proc) { return proc == NLMPROC_GRANTED || proc == NLMPROC_GRANTED_MSG || proc == NLMPROC_TEST_RES || proc == NLMPROC_LOCK_RES || proc == NLMPROC_CANCEL_RES || proc == NLMPROC_UNLOCK_RES || proc == NLMPROC_NSM_NOTIFY; } static enum svc_auth_status lockd_authenticate(struct svc_rqst *rqstp) { rqstp->rq_client = NULL; switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: case RPC_AUTH_UNIX: rqstp->rq_auth_stat = rpc_auth_ok; if (rqstp->rq_proc == 0) return SVC_OK; if (is_callback(rqstp->rq_proc)) { /* Leave it to individual procedures to * call nlmsvc_lookup_host(rqstp) */ return SVC_OK; } return svc_set_client(rqstp); } rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } param_set_min_max(port, int, simple_strtol, 0, 65535) param_set_min_max(grace_period, unsigned long, simple_strtoul, nlm_grace_period_min, nlm_grace_period_max) param_set_min_max(timeout, unsigned long, simple_strtoul, nlm_timeout_min, nlm_timeout_max) MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); MODULE_DESCRIPTION("NFS file locking service version " LOCKD_VERSION "."); MODULE_LICENSE("GPL"); module_param_call(nlm_grace_period, param_set_grace_period, param_get_ulong, &nlm_grace_period, 0644); module_param_call(nlm_timeout, param_set_timeout, param_get_ulong, &nlm_timeout, 0644); module_param_call(nlm_udpport, param_set_port, param_get_int, &nlm_udpport, 0644); module_param_call(nlm_tcpport, param_set_port, param_get_int, &nlm_tcpport, 0644); module_param(nsm_use_hostnames, bool, 0644); static int lockd_init_net(struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); INIT_LIST_HEAD(&ln->lockd_manager.list); ln->lockd_manager.block_opens = false; INIT_LIST_HEAD(&ln->nsm_handles); return 0; } static void lockd_exit_net(struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); WARN_ONCE(!list_empty(&ln->lockd_manager.list), "net %x %s: lockd_manager.list is not empty\n", net->ns.inum, __func__); WARN_ONCE(!list_empty(&ln->nsm_handles), "net %x %s: nsm_handles list is not empty\n", net->ns.inum, __func__); WARN_ONCE(delayed_work_pending(&ln->grace_period_end), "net %x %s: grace_period_end was not cancelled\n", net->ns.inum, __func__); } static struct pernet_operations lockd_net_ops = { .init = lockd_init_net, .exit = lockd_exit_net, .id = &lockd_net_id, .size = sizeof(struct lockd_net), }; /* * Initialising and terminating the module. */ static int __init init_nlm(void) { int err; #ifdef CONFIG_SYSCTL err = -ENOMEM; nlm_sysctl_table = register_sysctl("fs/nfs", nlm_sysctls); if (nlm_sysctl_table == NULL) goto err_sysctl; #endif err = register_pernet_subsys(&lockd_net_ops); if (err) goto err_pernet; err = genl_register_family(&lockd_nl_family); if (err) goto err_netlink; err = lockd_create_procfs(); if (err) goto err_procfs; return 0; err_procfs: genl_unregister_family(&lockd_nl_family); err_netlink: unregister_pernet_subsys(&lockd_net_ops); err_pernet: #ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); err_sysctl: #endif return err; } static void __exit exit_nlm(void) { /* FIXME: delete all NLM clients */ nlm_shutdown_hosts(); genl_unregister_family(&lockd_nl_family); lockd_remove_procfs(); unregister_pernet_subsys(&lockd_net_ops); #ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); #endif } module_init(init_nlm); module_exit(exit_nlm); /** * nlmsvc_dispatch - Process an NLM Request * @rqstp: incoming request * * Return values: * %0: Processing complete; do not send a Reply * %1: Processing complete; send Reply in rqstp->rq_res */ static int nlmsvc_dispatch(struct svc_rqst *rqstp) { const struct svc_procedure *procp = rqstp->rq_procinfo; __be32 *statp = rqstp->rq_accept_statp; if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; *statp = procp->pc_func(rqstp); if (*statp == rpc_drop_reply) return 0; if (*statp != rpc_success) return 1; if (!procp->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; return 1; out_decode_err: *statp = rpc_garbage_args; return 1; out_encode_err: *statp = rpc_system_err; return 1; } /* * Define NLM program and procedures */ static DEFINE_PER_CPU_ALIGNED(unsigned long, nlmsvc_version1_count[17]); static const struct svc_version nlmsvc_version1 = { .vs_vers = 1, .vs_nproc = 17, .vs_proc = nlmsvc_procedures, .vs_count = nlmsvc_version1_count, .vs_dispatch = nlmsvc_dispatch, .vs_xdrsize = NLMSVC_XDRSIZE, }; static DEFINE_PER_CPU_ALIGNED(unsigned long, nlmsvc_version3_count[ARRAY_SIZE(nlmsvc_procedures)]); static const struct svc_version nlmsvc_version3 = { .vs_vers = 3, .vs_nproc = ARRAY_SIZE(nlmsvc_procedures), .vs_proc = nlmsvc_procedures, .vs_count = nlmsvc_version3_count, .vs_dispatch = nlmsvc_dispatch, .vs_xdrsize = NLMSVC_XDRSIZE, }; #ifdef CONFIG_LOCKD_V4 static DEFINE_PER_CPU_ALIGNED(unsigned long, nlmsvc_version4_count[ARRAY_SIZE(nlmsvc_procedures4)]); static const struct svc_version nlmsvc_version4 = { .vs_vers = 4, .vs_nproc = ARRAY_SIZE(nlmsvc_procedures4), .vs_proc = nlmsvc_procedures4, .vs_count = nlmsvc_version4_count, .vs_dispatch = nlmsvc_dispatch, .vs_xdrsize = NLMSVC_XDRSIZE, }; #endif static const struct svc_version *nlmsvc_version[] = { [1] = &nlmsvc_version1, [3] = &nlmsvc_version3, #ifdef CONFIG_LOCKD_V4 [4] = &nlmsvc_version4, #endif }; #define NLM_NRVERS ARRAY_SIZE(nlmsvc_version) static struct svc_program nlmsvc_program = { .pg_prog = NLM_PROGRAM, /* program number */ .pg_nvers = NLM_NRVERS, /* number of entries in nlmsvc_version */ .pg_vers = nlmsvc_version, /* version table */ .pg_name = "lockd", /* service name */ .pg_class = "nfsd", /* share authentication with nfsd */ .pg_authenticate = &lockd_authenticate, /* export authentication */ .pg_init_request = svc_generic_init_request, .pg_rpcbind_set = svc_generic_rpcbind_set, }; /** * lockd_nl_server_set_doit - set the lockd server parameters via netlink * @skb: reply buffer * @info: netlink metadata and command arguments * * This updates the per-net values. When updating the values in the init_net * namespace, also update the "legacy" global values. * * Return 0 on success or a negative errno. */ int lockd_nl_server_set_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct lockd_net *ln = net_generic(net, lockd_net_id); const struct nlattr *attr; if (GENL_REQ_ATTR_CHECK(info, LOCKD_A_SERVER_GRACETIME)) return -EINVAL; if (info->attrs[LOCKD_A_SERVER_GRACETIME] || info->attrs[LOCKD_A_SERVER_TCP_PORT] || info->attrs[LOCKD_A_SERVER_UDP_PORT]) { attr = info->attrs[LOCKD_A_SERVER_GRACETIME]; if (attr) { u32 gracetime = nla_get_u32(attr); if (gracetime > nlm_grace_period_max) return -EINVAL; ln->gracetime = gracetime; if (net == &init_net) nlm_grace_period = gracetime; } attr = info->attrs[LOCKD_A_SERVER_TCP_PORT]; if (attr) { ln->tcp_port = nla_get_u16(attr); if (net == &init_net) nlm_tcpport = ln->tcp_port; } attr = info->attrs[LOCKD_A_SERVER_UDP_PORT]; if (attr) { ln->udp_port = nla_get_u16(attr); if (net == &init_net) nlm_udpport = ln->udp_port; } } return 0; } /** * lockd_nl_server_get_doit - get lockd server parameters via netlink * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int lockd_nl_server_get_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct lockd_net *ln = net_generic(net, lockd_net_id); void *hdr; int err; skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = genlmsg_iput(skb, info); if (!hdr) { err = -EMSGSIZE; goto err_free_msg; } err = nla_put_u32(skb, LOCKD_A_SERVER_GRACETIME, ln->gracetime) || nla_put_u16(skb, LOCKD_A_SERVER_TCP_PORT, ln->tcp_port) || nla_put_u16(skb, LOCKD_A_SERVER_UDP_PORT, ln->udp_port); if (err) goto err_free_msg; genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err_free_msg: nlmsg_free(skb); return err; }
25 25 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 // SPDX-License-Identifier: GPL-2.0-or-later /* Service connection management * * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/slab.h> #include "ar-internal.h" /* * Find a service connection under RCU conditions. * * We could use a hash table, but that is subject to bucket stuffing by an * attacker as the client gets to pick the epoch and cid values and would know * the hash function. So, instead, we use a hash table for the peer and from * that an rbtree to find the service connection. Under ordinary circumstances * it might be slower than a large hash table, but it is at least limited in * depth. */ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, struct sk_buff *skb) { struct rxrpc_connection *conn = NULL; struct rxrpc_conn_proto k; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rb_node *p; unsigned int seq = 1; k.epoch = sp->hdr.epoch; k.cid = sp->hdr.cid & RXRPC_CIDMASK; do { /* Unfortunately, rbtree walking doesn't give reliable results * under just the RCU read lock, so we have to check for * changes. */ seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&peer->service_conn_lock, &seq); p = rcu_dereference_raw(peer->service_conns.rb_node); while (p) { conn = rb_entry(p, struct rxrpc_connection, service_node); if (conn->proto.index_key < k.index_key) p = rcu_dereference_raw(p->rb_left); else if (conn->proto.index_key > k.index_key) p = rcu_dereference_raw(p->rb_right); else break; conn = NULL; } } while (need_seqretry(&peer->service_conn_lock, seq)); done_seqretry(&peer->service_conn_lock, seq); _leave(" = %d", conn ? conn->debug_id : -1); return conn; } /* * Insert a service connection into a peer's tree, thereby making it a target * for incoming packets. */ static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, struct rxrpc_connection *conn) { struct rxrpc_connection *cursor = NULL; struct rxrpc_conn_proto k = conn->proto; struct rb_node **pp, *parent; write_seqlock(&peer->service_conn_lock); pp = &peer->service_conns.rb_node; parent = NULL; while (*pp) { parent = *pp; cursor = rb_entry(parent, struct rxrpc_connection, service_node); if (cursor->proto.index_key < k.index_key) pp = &(*pp)->rb_left; else if (cursor->proto.index_key > k.index_key) pp = &(*pp)->rb_right; else goto found_extant_conn; } rb_link_node_rcu(&conn->service_node, parent, pp); rb_insert_color(&conn->service_node, &peer->service_conns); conn_published: set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags); write_sequnlock(&peer->service_conn_lock); _leave(" = %d [new]", conn->debug_id); return; found_extant_conn: if (refcount_read(&cursor->ref) == 0) goto replace_old_connection; write_sequnlock(&peer->service_conn_lock); /* We should not be able to get here. rxrpc_incoming_connection() is * called in a non-reentrant context, so there can't be a race to * insert a new connection. */ BUG(); replace_old_connection: /* The old connection is from an outdated epoch. */ _debug("replace conn"); rb_replace_node_rcu(&cursor->service_node, &conn->service_node, &peer->service_conns); clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &cursor->flags); goto conn_published; } /* * Preallocate a service connection. The connection is placed on the proc and * reap lists so that we don't have to get the lock from BH context. */ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet, gfp_t gfp) { struct rxrpc_connection *conn = rxrpc_alloc_connection(rxnet, gfp); if (conn) { /* We maintain an extra ref on the connection whilst it is on * the rxrpc_connections list. */ conn->state = RXRPC_CONN_SERVICE_PREALLOC; refcount_set(&conn->ref, 2); atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); list_add_tail(&conn->link, &rxnet->service_conns); list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); rxrpc_see_connection(conn, rxrpc_conn_new_service); } return conn; } /* * Set up an incoming connection. This is called in BH context with the RCU * read lock held. */ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, struct rxrpc_connection *conn, const struct rxrpc_security *sec, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _enter(""); conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; conn->orig_service_id = sp->hdr.serviceId; conn->service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; conn->security = sec; if (conn->security_ix) conn->state = RXRPC_CONN_SERVICE_UNSECURED; else conn->state = RXRPC_CONN_SERVICE; /* See if we should upgrade the service. This can only happen on the * first packet on a new connection. Once done, it applies to all * subsequent calls on that connection. */ if (sp->hdr.userStatus == RXRPC_USERSTATUS_SERVICE_UPGRADE && conn->service_id == rx->service_upgrade.from) conn->service_id = rx->service_upgrade.to; atomic_set(&conn->active, 1); /* Make the connection a target for incoming packets. */ rxrpc_publish_service_conn(conn->peer, conn); } /* * Remove the service connection from the peer's tree, thereby removing it as a * target for incoming packets. */ void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn) { struct rxrpc_peer *peer = conn->peer; write_seqlock(&peer->service_conn_lock); if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags)) rb_erase(&conn->service_node, &peer->service_conns); write_sequnlock(&peer->service_conn_lock); }
1 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _CRYPTO_XTS_H #define _CRYPTO_XTS_H #include <crypto/b128ops.h> #include <crypto/internal/skcipher.h> #include <linux/fips.h> #define XTS_BLOCK_SIZE 16 static inline int xts_verify_key(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { /* * key consists of keys of equal size concatenated, therefore * the length must be even. */ if (keylen % 2) return -EINVAL; /* * In FIPS mode only a combined key length of either 256 or * 512 bits is allowed, c.f. FIPS 140-3 IG C.I. */ if (fips_enabled && keylen != 32 && keylen != 64) return -EINVAL; /* * Ensure that the AES and tweak key are not identical when * in FIPS mode or the FORBID_WEAK_KEYS flag is set. */ if ((fips_enabled || (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) && !crypto_memneq(key, key + (keylen / 2), keylen / 2)) return -EINVAL; return 0; } #endif /* _CRYPTO_XTS_H */
51 35 53 1 3 25 6 53 50 103 6 51 46 95 108 102 84 78 7 103 97 1 64 65 7 7 7 18 3 15 7 7 10 3 1 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 // SPDX-License-Identifier: GPL-2.0-or-later /* Local endpoint object management * * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/udp.h> #include <linux/ip.h> #include <linux/hashtable.h> #include <net/sock.h> #include <net/udp.h> #include <net/udp_tunnel.h> #include <net/af_rxrpc.h> #include "ar-internal.h" static void rxrpc_local_rcu(struct rcu_head *); /* * Handle an ICMP/ICMP6 error turning up at the tunnel. Push it through the * usual mechanism so that it gets parsed and presented through the UDP * socket's error_report(). */ static void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { if (ip_hdr(skb)->version == IPVERSION) return ip_icmp_error(sk, skb, err, port, info, payload); if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) return ipv6_icmp_error(sk, skb, err, port, info, payload); } /* * Set or clear the Don't Fragment flag on a socket. */ void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set) { if (set) ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO); else ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DONT); } /* * Compare a local to an address. Return -ve, 0 or +ve to indicate less than, * same or greater than. * * We explicitly don't compare the RxRPC service ID as we want to reject * conflicting uses by differing services. Further, we don't want to share * addresses with different options (IPv6), so we don't compare those bits * either. */ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, const struct sockaddr_rxrpc *srx) { long diff; diff = ((local->srx.transport_type - srx->transport_type) ?: (local->srx.transport_len - srx->transport_len) ?: (local->srx.transport.family - srx->transport.family)); if (diff != 0) return diff; switch (srx->transport.family) { case AF_INET: /* If the choice of UDP port is left up to the transport, then * the endpoint record doesn't match. */ return ((u16 __force)local->srx.transport.sin.sin_port - (u16 __force)srx->transport.sin.sin_port) ?: memcmp(&local->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: /* If the choice of UDP6 port is left up to the transport, then * the endpoint record doesn't match. */ return ((u16 __force)local->srx.transport.sin6.sin6_port - (u16 __force)srx->transport.sin6.sin6_port) ?: memcmp(&local->srx.transport.sin6.sin6_addr, &srx->transport.sin6.sin6_addr, sizeof(struct in6_addr)); #endif default: BUG(); } } static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) { struct rxrpc_local *local = container_of(timer, struct rxrpc_local, client_conn_reap_timer); if (!local->kill_all_client_conns && test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags)) rxrpc_wake_up_io_thread(local); } /* * Allocate a new local endpoint. */ static struct rxrpc_local *rxrpc_alloc_local(struct net *net, const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; u32 tmp; local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { refcount_set(&local->ref, 1); atomic_set(&local->active_users, 1); local->net = net; local->rxnet = rxrpc_net(net); INIT_HLIST_NODE(&local->link); init_completion(&local->io_thread_ready); #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY skb_queue_head_init(&local->rx_delay_queue); #endif skb_queue_head_init(&local->rx_queue); INIT_LIST_HEAD(&local->conn_attend_q); INIT_LIST_HEAD(&local->call_attend_q); local->client_bundles = RB_ROOT; spin_lock_init(&local->client_bundles_lock); local->kill_all_client_conns = false; INIT_LIST_HEAD(&local->idle_client_conns); timer_setup(&local->client_conn_reap_timer, rxrpc_client_conn_reap_timeout, 0); spin_lock_init(&local->lock); rwlock_init(&local->services_lock); local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); local->srx.srx_service = 0; idr_init(&local->conn_ids); get_random_bytes(&tmp, sizeof(tmp)); tmp &= 0x3fffffff; if (tmp == 0) tmp = 1; idr_set_cursor(&local->conn_ids, tmp); INIT_LIST_HEAD(&local->new_client_calls); spin_lock_init(&local->client_call_lock); trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1); } _leave(" = %p", local); return local; } /* * create the local socket * - must be called with rxrpc_local_mutex locked */ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct sockaddr_rxrpc *srx = &local->srx; struct udp_port_cfg udp_conf = {0}; struct task_struct *io_thread; struct sock *usk; int ret; _enter("%p{%d,%d}", local, srx->transport_type, srx->transport.family); udp_conf.family = srx->transport.family; udp_conf.use_udp_checksums = true; if (udp_conf.family == AF_INET) { udp_conf.local_ip = srx->transport.sin.sin_addr; udp_conf.local_udp_port = srx->transport.sin.sin_port; #if IS_ENABLED(CONFIG_AF_RXRPC_IPV6) } else { udp_conf.local_ip6 = srx->transport.sin6.sin6_addr; udp_conf.local_udp_port = srx->transport.sin6.sin6_port; udp_conf.use_udp6_tx_checksums = true; udp_conf.use_udp6_rx_checksums = true; #endif } ret = udp_sock_create(net, &udp_conf, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; } tuncfg.encap_type = UDP_ENCAP_RXRPC; tuncfg.encap_rcv = rxrpc_encap_rcv; tuncfg.encap_err_rcv = rxrpc_encap_err_rcv; tuncfg.sk_user_data = local; setup_udp_tunnel_sock(net, local->socket, &tuncfg); /* set the socket up */ usk = local->socket->sk; usk->sk_error_report = rxrpc_error_report; switch (srx->transport.family) { case AF_INET6: /* we want to receive ICMPv6 errors */ ip6_sock_set_recverr(usk); /* Fall through and set IPv4 options too otherwise we don't get * errors from IPv4 packets sent through the IPv6 socket. */ fallthrough; case AF_INET: /* we want to receive ICMP errors */ ip_sock_set_recverr(usk); /* we want to set the don't fragment bit */ rxrpc_local_dont_fragment(local, true); break; default: BUG(); } io_thread = kthread_run(rxrpc_io_thread, local, "krxrpcio/%u", ntohs(udp_conf.local_udp_port)); if (IS_ERR(io_thread)) { ret = PTR_ERR(io_thread); goto error_sock; } wait_for_completion(&local->io_thread_ready); WRITE_ONCE(local->io_thread, io_thread); _leave(" = 0"); return 0; error_sock: kernel_sock_shutdown(local->socket, SHUT_RDWR); local->socket->sk->sk_user_data = NULL; sock_release(local->socket); local->socket = NULL; return ret; } /* * Look up or create a new local endpoint using the specified local address. */ struct rxrpc_local *rxrpc_lookup_local(struct net *net, const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; struct rxrpc_net *rxnet = rxrpc_net(net); struct hlist_node *cursor; long diff; int ret; _enter("{%d,%d,%pISp}", srx->transport_type, srx->transport.family, &srx->transport); mutex_lock(&rxnet->local_mutex); hlist_for_each(cursor, &rxnet->local_endpoints) { local = hlist_entry(cursor, struct rxrpc_local, link); diff = rxrpc_local_cmp_key(local, srx); if (diff != 0) continue; /* Services aren't allowed to share transport sockets, so * reject that here. It is possible that the object is dying - * but it may also still have the local transport address that * we want bound. */ if (srx->srx_service) { local = NULL; goto addr_in_use; } /* Found a match. We want to replace a dying object. * Attempting to bind the transport socket may still fail if * we're attempting to use a local address that the dying * object is still using. */ if (!rxrpc_use_local(local, rxrpc_local_use_lookup)) break; goto found; } local = rxrpc_alloc_local(net, srx); if (!local) goto nomem; ret = rxrpc_open_socket(local, net); if (ret < 0) goto sock_error; if (cursor) { hlist_replace_rcu(cursor, &local->link); cursor->pprev = NULL; } else { hlist_add_head_rcu(&local->link, &rxnet->local_endpoints); } found: mutex_unlock(&rxnet->local_mutex); _leave(" = %p", local); return local; nomem: ret = -ENOMEM; sock_error: mutex_unlock(&rxnet->local_mutex); if (local) call_rcu(&local->rcu, rxrpc_local_rcu); _leave(" = %d", ret); return ERR_PTR(ret); addr_in_use: mutex_unlock(&rxnet->local_mutex); _leave(" = -EADDRINUSE"); return ERR_PTR(-EADDRINUSE); } /* * Get a ref on a local endpoint. */ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { int r, u; u = atomic_read(&local->active_users); __refcount_inc(&local->ref, &r); trace_rxrpc_local(local->debug_id, why, r + 1, u); return local; } /* * Get a ref on a local endpoint unless its usage has already reached 0. */ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local, enum rxrpc_local_trace why) { int r, u; if (local && __refcount_inc_not_zero(&local->ref, &r)) { u = atomic_read(&local->active_users); trace_rxrpc_local(local->debug_id, why, r + 1, u); return local; } return NULL; } /* * Drop a ref on a local endpoint. */ void rxrpc_put_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { unsigned int debug_id; bool dead; int r, u; if (local) { debug_id = local->debug_id; u = atomic_read(&local->active_users); dead = __refcount_dec_and_test(&local->ref, &r); trace_rxrpc_local(debug_id, why, r, u); if (dead) call_rcu(&local->rcu, rxrpc_local_rcu); } } /* * Start using a local endpoint. */ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { local = rxrpc_get_local_maybe(local, rxrpc_local_get_for_use); if (!local) return NULL; if (!__rxrpc_use_local(local, why)) { rxrpc_put_local(local, rxrpc_local_put_for_use); return NULL; } return local; } /* * Cease using a local endpoint. Once the number of active users reaches 0, we * start the closure of the transport in the I/O thread.. */ void rxrpc_unuse_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { unsigned int debug_id; int r, u; if (local) { debug_id = local->debug_id; r = refcount_read(&local->ref); u = atomic_dec_return(&local->active_users); trace_rxrpc_local(debug_id, why, r, u); if (u == 0) kthread_stop(local->io_thread); } } /* * Destroy a local endpoint's socket and then hand the record to RCU to dispose * of. * * Closing the socket cannot be done from bottom half context or RCU callback * context because it might sleep. */ void rxrpc_destroy_local(struct rxrpc_local *local) { struct socket *socket = local->socket; struct rxrpc_net *rxnet = local->rxnet; _enter("%d", local->debug_id); local->dead = true; mutex_lock(&rxnet->local_mutex); hlist_del_init_rcu(&local->link); mutex_unlock(&rxnet->local_mutex); rxrpc_clean_up_local_conns(local); rxrpc_service_connection_reaper(&rxnet->service_conn_reaper); ASSERT(!local->service); if (socket) { local->socket = NULL; kernel_sock_shutdown(socket, SHUT_RDWR); socket->sk->sk_user_data = NULL; sock_release(socket); } /* At this point, there should be no more packets coming in to the * local endpoint. */ #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY rxrpc_purge_queue(&local->rx_delay_queue); #endif rxrpc_purge_queue(&local->rx_queue); rxrpc_purge_client_connections(local); page_frag_cache_drain(&local->tx_alloc); } /* * Destroy a local endpoint after the RCU grace period expires. */ static void rxrpc_local_rcu(struct rcu_head *rcu) { struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu); rxrpc_see_local(local, rxrpc_local_free); kfree(local); } /* * Verify the local endpoint list is empty by this point. */ void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) { struct rxrpc_local *local; _enter(""); flush_workqueue(rxrpc_workqueue); if (!hlist_empty(&rxnet->local_endpoints)) { mutex_lock(&rxnet->local_mutex); hlist_for_each_entry(local, &rxnet->local_endpoints, link) { pr_err("AF_RXRPC: Leaked local %p {%d}\n", local, refcount_read(&local->ref)); } mutex_unlock(&rxnet->local_mutex); BUG(); } }
58 58 7 298 299 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 /* * Module for handling utf8 just like any other charset. * By Urban Widmark 2000 */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static unsigned char identity[256]; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { int n; if (boundlen <= 0) return -ENAMETOOLONG; n = utf32_to_utf8(uni, out, boundlen); if (n < 0) { *out = '?'; return -EINVAL; } return n; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { int n; unicode_t u; n = utf8_to_utf32(rawstring, boundlen, &u); if (n < 0 || u > MAX_WCHAR_T) { *uni = 0x003f; /* ? */ return -EINVAL; } *uni = (wchar_t) u; return n; } static struct nls_table table = { .charset = "utf8", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = identity, /* no conversion */ .charset2upper = identity, }; static int __init init_nls_utf8(void) { int i; for (i=0; i<256; i++) identity[i] = i; return register_nls(&table); } static void __exit exit_nls_utf8(void) { unregister_nls(&table); } module_init(init_nls_utf8) module_exit(exit_nls_utf8) MODULE_DESCRIPTION("NLS UTF-8"); MODULE_LICENSE("Dual BSD/GPL");
14 14 17 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 /* RFCOMM implementation for Linux Bluetooth stack (BlueZ) Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> Copyright (C) 2002 Marcel Holtmann <marcel@holtmann.org> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #include <linux/refcount.h> #ifndef __RFCOMM_H #define __RFCOMM_H #define RFCOMM_CONN_TIMEOUT (HZ * 30) #define RFCOMM_DISC_TIMEOUT (HZ * 20) #define RFCOMM_AUTH_TIMEOUT (HZ * 25) #define RFCOMM_IDLE_TIMEOUT (HZ * 2) #define RFCOMM_DEFAULT_MTU 127 #define RFCOMM_DEFAULT_CREDITS 7 #define RFCOMM_MAX_CREDITS 40 #define RFCOMM_SKB_HEAD_RESERVE 8 #define RFCOMM_SKB_TAIL_RESERVE 2 #define RFCOMM_SKB_RESERVE (RFCOMM_SKB_HEAD_RESERVE + RFCOMM_SKB_TAIL_RESERVE) #define RFCOMM_SABM 0x2f #define RFCOMM_DISC 0x43 #define RFCOMM_UA 0x63 #define RFCOMM_DM 0x0f #define RFCOMM_UIH 0xef #define RFCOMM_TEST 0x08 #define RFCOMM_FCON 0x28 #define RFCOMM_FCOFF 0x18 #define RFCOMM_MSC 0x38 #define RFCOMM_RPN 0x24 #define RFCOMM_RLS 0x14 #define RFCOMM_PN 0x20 #define RFCOMM_NSC 0x04 #define RFCOMM_V24_FC 0x02 #define RFCOMM_V24_RTC 0x04 #define RFCOMM_V24_RTR 0x08 #define RFCOMM_V24_IC 0x40 #define RFCOMM_V24_DV 0x80 #define RFCOMM_RPN_BR_2400 0x0 #define RFCOMM_RPN_BR_4800 0x1 #define RFCOMM_RPN_BR_7200 0x2 #define RFCOMM_RPN_BR_9600 0x3 #define RFCOMM_RPN_BR_19200 0x4 #define RFCOMM_RPN_BR_38400 0x5 #define RFCOMM_RPN_BR_57600 0x6 #define RFCOMM_RPN_BR_115200 0x7 #define RFCOMM_RPN_BR_230400 0x8 #define RFCOMM_RPN_DATA_5 0x0 #define RFCOMM_RPN_DATA_6 0x1 #define RFCOMM_RPN_DATA_7 0x2 #define RFCOMM_RPN_DATA_8 0x3 #define RFCOMM_RPN_STOP_1 0 #define RFCOMM_RPN_STOP_15 1 #define RFCOMM_RPN_PARITY_NONE 0x0 #define RFCOMM_RPN_PARITY_ODD 0x1 #define RFCOMM_RPN_PARITY_EVEN 0x3 #define RFCOMM_RPN_PARITY_MARK 0x5 #define RFCOMM_RPN_PARITY_SPACE 0x7 #define RFCOMM_RPN_FLOW_NONE 0x00 #define RFCOMM_RPN_XON_CHAR 0x11 #define RFCOMM_RPN_XOFF_CHAR 0x13 #define RFCOMM_RPN_PM_BITRATE 0x0001 #define RFCOMM_RPN_PM_DATA 0x0002 #define RFCOMM_RPN_PM_STOP 0x0004 #define RFCOMM_RPN_PM_PARITY 0x0008 #define RFCOMM_RPN_PM_PARITY_TYPE 0x0010 #define RFCOMM_RPN_PM_XON 0x0020 #define RFCOMM_RPN_PM_XOFF 0x0040 #define RFCOMM_RPN_PM_FLOW 0x3F00 #define RFCOMM_RPN_PM_ALL 0x3F7F struct rfcomm_hdr { u8 addr; u8 ctrl; u8 len; /* Actual size can be 2 bytes */ } __packed; struct rfcomm_cmd { u8 addr; u8 ctrl; u8 len; u8 fcs; } __packed; struct rfcomm_mcc { u8 type; u8 len; } __packed; struct rfcomm_pn { u8 dlci; u8 flow_ctrl; u8 priority; u8 ack_timer; __le16 mtu; u8 max_retrans; u8 credits; } __packed; struct rfcomm_rpn { u8 dlci; u8 bit_rate; u8 line_settings; u8 flow_ctrl; u8 xon_char; u8 xoff_char; __le16 param_mask; } __packed; struct rfcomm_rls { u8 dlci; u8 status; } __packed; struct rfcomm_msc { u8 dlci; u8 v24_sig; } __packed; /* ---- Core structures, flags etc ---- */ struct rfcomm_session { struct list_head list; struct socket *sock; struct timer_list timer; unsigned long state; unsigned long flags; int initiator; /* Default DLC parameters */ int cfc; uint mtu; struct list_head dlcs; }; struct rfcomm_dlc { struct list_head list; struct rfcomm_session *session; struct sk_buff_head tx_queue; struct timer_list timer; struct mutex lock; unsigned long state; unsigned long flags; refcount_t refcnt; u8 dlci; u8 addr; u8 priority; u8 v24_sig; u8 remote_v24_sig; u8 mscex; u8 out; u8 sec_level; u8 role_switch; u32 defer_setup; uint mtu; uint cfc; uint rx_credits; uint tx_credits; void *owner; void (*data_ready)(struct rfcomm_dlc *d, struct sk_buff *skb); void (*state_change)(struct rfcomm_dlc *d, int err); void (*modem_status)(struct rfcomm_dlc *d, u8 v24_sig); }; /* DLC and session flags */ #define RFCOMM_RX_THROTTLED 0 #define RFCOMM_TX_THROTTLED 1 #define RFCOMM_TIMED_OUT 2 #define RFCOMM_MSC_PENDING 3 #define RFCOMM_SEC_PENDING 4 #define RFCOMM_AUTH_PENDING 5 #define RFCOMM_AUTH_ACCEPT 6 #define RFCOMM_AUTH_REJECT 7 #define RFCOMM_DEFER_SETUP 8 #define RFCOMM_ENC_DROP 9 /* Scheduling flags and events */ #define RFCOMM_SCHED_WAKEUP 31 /* MSC exchange flags */ #define RFCOMM_MSCEX_TX 1 #define RFCOMM_MSCEX_RX 2 #define RFCOMM_MSCEX_OK (RFCOMM_MSCEX_TX + RFCOMM_MSCEX_RX) /* CFC states */ #define RFCOMM_CFC_UNKNOWN -1 #define RFCOMM_CFC_DISABLED 0 #define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS /* ---- RFCOMM SEND RPN ---- */ int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, u8 bit_rate, u8 data_bits, u8 stop_bits, u8 parity, u8 flow_ctrl_settings, u8 xon_char, u8 xoff_char, u16 param_mask); /* ---- RFCOMM DLCs (channels) ---- */ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio); void rfcomm_dlc_free(struct rfcomm_dlc *d); int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb); void rfcomm_dlc_send_noerror(struct rfcomm_dlc *d, struct sk_buff *skb); int rfcomm_dlc_set_modem_status(struct rfcomm_dlc *d, u8 v24_sig); int rfcomm_dlc_get_modem_status(struct rfcomm_dlc *d, u8 *v24_sig); void rfcomm_dlc_accept(struct rfcomm_dlc *d); struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel); #define rfcomm_dlc_lock(d) mutex_lock(&d->lock) #define rfcomm_dlc_unlock(d) mutex_unlock(&d->lock) static inline void rfcomm_dlc_hold(struct rfcomm_dlc *d) { refcount_inc(&d->refcnt); } static inline void rfcomm_dlc_put(struct rfcomm_dlc *d) { if (refcount_dec_and_test(&d->refcnt)) rfcomm_dlc_free(d); } void __rfcomm_dlc_throttle(struct rfcomm_dlc *d); void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d); static inline void rfcomm_dlc_throttle(struct rfcomm_dlc *d) { if (!test_and_set_bit(RFCOMM_RX_THROTTLED, &d->flags)) __rfcomm_dlc_throttle(d); } static inline void rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) { if (test_and_clear_bit(RFCOMM_RX_THROTTLED, &d->flags)) __rfcomm_dlc_unthrottle(d); } /* ---- RFCOMM sessions ---- */ void rfcomm_session_getaddr(struct rfcomm_session *s, bdaddr_t *src, bdaddr_t *dst); /* ---- RFCOMM sockets ---- */ struct sockaddr_rc { sa_family_t rc_family; bdaddr_t rc_bdaddr; u8 rc_channel; }; #define RFCOMM_CONNINFO 0x02 struct rfcomm_conninfo { __u16 hci_handle; __u8 dev_class[3]; }; #define RFCOMM_LM 0x03 #define RFCOMM_LM_MASTER 0x0001 #define RFCOMM_LM_AUTH 0x0002 #define RFCOMM_LM_ENCRYPT 0x0004 #define RFCOMM_LM_TRUSTED 0x0008 #define RFCOMM_LM_RELIABLE 0x0010 #define RFCOMM_LM_SECURE 0x0020 #define RFCOMM_LM_FIPS 0x0040 #define rfcomm_pi(sk) ((struct rfcomm_pinfo *) sk) struct rfcomm_pinfo { struct bt_sock bt; bdaddr_t src; bdaddr_t dst; struct rfcomm_dlc *dlc; u8 channel; u8 sec_level; u8 role_switch; }; int rfcomm_init_sockets(void); void rfcomm_cleanup_sockets(void); int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc **d); /* ---- RFCOMM TTY ---- */ #define RFCOMM_MAX_DEV 256 #define RFCOMMCREATEDEV _IOW('R', 200, int) #define RFCOMMRELEASEDEV _IOW('R', 201, int) #define RFCOMMGETDEVLIST _IOR('R', 210, int) #define RFCOMMGETDEVINFO _IOR('R', 211, int) #define RFCOMMSTEALDLC _IOW('R', 220, int) /* rfcomm_dev.flags bit definitions */ #define RFCOMM_REUSE_DLC 0 #define RFCOMM_RELEASE_ONHUP 1 #define RFCOMM_HANGUP_NOW 2 #define RFCOMM_TTY_ATTACHED 3 #define RFCOMM_DEFUNCT_BIT4 4 /* don't reuse this bit - userspace visible */ /* rfcomm_dev.status bit definitions */ #define RFCOMM_DEV_RELEASED 0 #define RFCOMM_TTY_OWNED 1 struct rfcomm_dev_req { s16 dev_id; u32 flags; bdaddr_t src; bdaddr_t dst; u8 channel; }; struct rfcomm_dev_info { s16 id; u32 flags; u16 state; bdaddr_t src; bdaddr_t dst; u8 channel; }; struct rfcomm_dev_list_req { u16 dev_num; struct rfcomm_dev_info dev_info[] __counted_by(dev_num); }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); #ifdef CONFIG_BT_RFCOMM_TTY int rfcomm_init_ttys(void); void rfcomm_cleanup_ttys(void); #else static inline int rfcomm_init_ttys(void) { return 0; } static inline void rfcomm_cleanup_ttys(void) { } #endif #endif /* __RFCOMM_H */
90 87 90 90 64 64 65 65 8 13 5 13 13 8 287 6 8 6 14 15 14 14 14 5 1 2 2 4 2 14 14 1 2 2 2 6 13 6 5 5 5 63 64 62 64 8 5 5 5 64 68 68 1 65 17 53 14 2 63 8 64 63 63 8 64 64 1 67 47 46 66 66 30 23 11 40 79 9 80 79 2 80 80 7 2 80 33 2 86 86 6 87 84 17 89 25 64 90 11 11 38 2 1 91 91 65 1 11 31 3 67 67 68 85 85 85 83 67 65 16 2 11 67 32 33 4 31 16 83 84 84 6 80 2 80 66 21 61 45 49 2 12 60 38 24 38 35 25 2 8 1 2 1 2 3 1290 1160 437 8 389 67 67 38 38 13 11 641 627 2 22 14 280 281 281 349 350 344 347 350 349 1 587 587 1345 1343 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 // SPDX-License-Identifier: GPL-2.0+ /* * User-space Probes (UProbes) * * Copyright (C) IBM Corporation, 2008-2012 * Authors: * Srikar Dronamraju * Jim Keniston * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra */ #include <linux/kernel.h> #include <linux/highmem.h> #include <linux/pagemap.h> /* read_mapping_page */ #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/export.h> #include <linux/rmap.h> /* anon_vma_prepare */ #include <linux/mmu_notifier.h> #include <linux/swap.h> /* folio_free_swap */ #include <linux/ptrace.h> /* user_enable_single_step */ #include <linux/kdebug.h> /* notifier mechanism */ #include <linux/percpu-rwsem.h> #include <linux/task_work.h> #include <linux/shmem_fs.h> #include <linux/khugepaged.h> #include <linux/rcupdate_trace.h> #include <linux/workqueue.h> #include <linux/srcu.h> #include <linux/oom.h> /* check_stable_address_space */ #include <linux/pagewalk.h> #include <linux/uprobes.h> #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE static struct rb_root uprobes_tree = RB_ROOT; /* * allows us to skip the uprobe_mmap if there are no uprobe events active * at this time. Probably a fine grained per inode count is better? */ #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock); #define UPROBES_HASH_SZ 13 /* serialize uprobe->pending_list */ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); /* Covers return_instance's uprobe lifetime. */ DEFINE_STATIC_SRCU(uretprobes_srcu); /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 struct uprobe { struct rb_node rb_node; /* node in the rb tree */ refcount_t ref; struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; struct list_head pending_list; struct list_head consumers; struct inode *inode; /* Also hold a ref to inode */ union { struct rcu_head rcu; struct work_struct work; }; loff_t offset; loff_t ref_ctr_offset; unsigned long flags; /* "unsigned long" so bitops work */ /* * The generic code assumes that it has two members of unknown type * owned by the arch-specific code: * * insn - copy_insn() saves the original instruction here for * arch_uprobe_analyze_insn(). * * ixol - potentially modified instruction to execute out of * line, copied to xol_area by xol_get_insn_slot(). */ struct arch_uprobe arch; }; struct delayed_uprobe { struct list_head list; struct uprobe *uprobe; struct mm_struct *mm; }; static DEFINE_MUTEX(delayed_uprobe_lock); static LIST_HEAD(delayed_uprobe_list); /* * Execute out of line area: anonymous executable mapping installed * by the probed task to execute the copy of the original instruction * mangled by set_swbp(). * * On a breakpoint hit, thread contests for a slot. It frees the * slot after singlestep. Currently a fixed number of slots are * allocated. */ struct xol_area { wait_queue_head_t wq; /* if all slots are busy */ unsigned long *bitmap; /* 0 = free slot */ struct page *page; /* * We keep the vma's vm_start rather than a pointer to the vma * itself. The probed process or a naughty kernel module could make * the vma go away, and we must handle that reasonably gracefully. */ unsigned long vaddr; /* Page(s) of instruction slots */ }; static void uprobe_warn(struct task_struct *t, const char *msg) { pr_warn("uprobe: %s:%d failed to %s\n", t->comm, t->pid, msg); } /* * valid_vma: Verify if the specified vma is an executable vma * Relax restrictions while unregistering: vm_flags might have * changed after breakpoint was inserted. * - is_register: indicates if we are in register context. * - Return 1 if the specified virtual address is in an * executable vma. */ static bool valid_vma(struct vm_area_struct *vma, bool is_register) { vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE; if (is_register) flags |= VM_WRITE; return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC; } static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) { return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT); } static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) { return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start); } /** * is_swbp_insn - check if instruction is breakpoint instruction. * @insn: instruction to be checked. * Default implementation of is_swbp_insn * Returns true if @insn is a breakpoint instruction. */ bool __weak is_swbp_insn(uprobe_opcode_t *insn) { return *insn == UPROBE_SWBP_INSN; } /** * is_trap_insn - check if instruction is breakpoint instruction. * @insn: instruction to be checked. * Default implementation of is_trap_insn * Returns true if @insn is a breakpoint instruction. * * This function is needed for the case where an architecture has multiple * trap instructions (like powerpc). */ bool __weak is_trap_insn(uprobe_opcode_t *insn) { return is_swbp_insn(insn); } void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len) { void *kaddr = kmap_atomic(page); memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len); kunmap_atomic(kaddr); } static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len) { void *kaddr = kmap_atomic(page); memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len); kunmap_atomic(kaddr); } static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *insn, int nbytes, void *data) { uprobe_opcode_t old_opcode; bool is_swbp; /* * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here. * We do not check if it is any other 'trap variant' which could * be conditional trap instruction such as the one powerpc supports. * * The logic is that we do not care if the underlying instruction * is a trap variant; uprobes always wins over any other (gdb) * breakpoint. */ uprobe_copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); is_swbp = is_swbp_insn(&old_opcode); if (is_swbp_insn(insn)) { if (is_swbp) /* register: already installed? */ return 0; } else { if (!is_swbp) /* unregister: was it changed by us? */ return 0; } return 1; } static struct delayed_uprobe * delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm) { struct delayed_uprobe *du; list_for_each_entry(du, &delayed_uprobe_list, list) if (du->uprobe == uprobe && du->mm == mm) return du; return NULL; } static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm) { struct delayed_uprobe *du; if (delayed_uprobe_check(uprobe, mm)) return 0; du = kzalloc(sizeof(*du), GFP_KERNEL); if (!du) return -ENOMEM; du->uprobe = uprobe; du->mm = mm; list_add(&du->list, &delayed_uprobe_list); return 0; } static void delayed_uprobe_delete(struct delayed_uprobe *du) { if (WARN_ON(!du)) return; list_del(&du->list); kfree(du); } static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm) { struct list_head *pos, *q; struct delayed_uprobe *du; if (!uprobe && !mm) return; list_for_each_safe(pos, q, &delayed_uprobe_list) { du = list_entry(pos, struct delayed_uprobe, list); if (uprobe && du->uprobe != uprobe) continue; if (mm && du->mm != mm) continue; delayed_uprobe_delete(du); } } static bool valid_ref_ctr_vma(struct uprobe *uprobe, struct vm_area_struct *vma) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset); return uprobe->ref_ctr_offset && vma->vm_file && file_inode(vma->vm_file) == uprobe->inode && (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && vma->vm_start <= vaddr && vma->vm_end > vaddr; } static struct vm_area_struct * find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm) { VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *tmp; for_each_vma(vmi, tmp) if (valid_ref_ctr_vma(uprobe, tmp)) return tmp; return NULL; } static int __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) { void *kaddr; struct page *page; int ret; short *ptr; if (!vaddr || !d) return -EINVAL; ret = get_user_pages_remote(mm, vaddr, 1, FOLL_WRITE, &page, NULL); if (unlikely(ret <= 0)) { /* * We are asking for 1 page. If get_user_pages_remote() fails, * it may return 0, in that case we have to return error. */ return ret == 0 ? -EBUSY : ret; } kaddr = kmap_atomic(page); ptr = kaddr + (vaddr & ~PAGE_MASK); if (unlikely(*ptr + d < 0)) { pr_warn("ref_ctr going negative. vaddr: 0x%lx, " "curr val: %d, delta: %d\n", vaddr, *ptr, d); ret = -EINVAL; goto out; } *ptr += d; ret = 0; out: kunmap_atomic(kaddr); put_page(page); return ret; } static void update_ref_ctr_warn(struct uprobe *uprobe, struct mm_struct *mm, short d) { pr_warn("ref_ctr %s failed for inode: 0x%lx offset: " "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%p\n", d > 0 ? "increment" : "decrement", uprobe->inode->i_ino, (unsigned long long) uprobe->offset, (unsigned long long) uprobe->ref_ctr_offset, mm); } static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm, short d) { struct vm_area_struct *rc_vma; unsigned long rc_vaddr; int ret = 0; rc_vma = find_ref_ctr_vma(uprobe, mm); if (rc_vma) { rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset); ret = __update_ref_ctr(mm, rc_vaddr, d); if (ret) update_ref_ctr_warn(uprobe, mm, d); if (d > 0) return ret; } mutex_lock(&delayed_uprobe_lock); if (d > 0) ret = delayed_uprobe_add(uprobe, mm); else delayed_uprobe_remove(uprobe, mm); mutex_unlock(&delayed_uprobe_lock); return ret; } static bool orig_page_is_identical(struct vm_area_struct *vma, unsigned long vaddr, struct page *page, bool *pmd_mappable) { const pgoff_t index = vaddr_to_offset(vma, vaddr) >> PAGE_SHIFT; struct folio *orig_folio = filemap_get_folio(vma->vm_file->f_mapping, index); struct page *orig_page; bool identical; if (IS_ERR(orig_folio)) return false; orig_page = folio_file_page(orig_folio, index); *pmd_mappable = folio_test_pmd_mappable(orig_folio); identical = folio_test_uptodate(orig_folio) && pages_identical(page, orig_page); folio_put(orig_folio); return identical; } static int __uprobe_write(struct vm_area_struct *vma, struct folio_walk *fw, struct folio *folio, unsigned long insn_vaddr, uprobe_opcode_t *insn, int nbytes, bool is_register) { const unsigned long vaddr = insn_vaddr & PAGE_MASK; bool pmd_mappable; /* For now, we'll only handle PTE-mapped folios. */ if (fw->level != FW_LEVEL_PTE) return -EFAULT; /* * See can_follow_write_pte(): we'd actually prefer a writable PTE here, * but the VMA might not be writable. */ if (!pte_write(fw->pte)) { if (!PageAnonExclusive(fw->page)) return -EFAULT; if (unlikely(userfaultfd_pte_wp(vma, fw->pte))) return -EFAULT; /* SOFTDIRTY is handled via pte_mkdirty() below. */ } /* * We'll temporarily unmap the page and flush the TLB, such that we can * modify the page atomically. */ flush_cache_page(vma, vaddr, pte_pfn(fw->pte)); fw->pte = ptep_clear_flush(vma, vaddr, fw->ptep); copy_to_page(fw->page, insn_vaddr, insn, nbytes); /* * When unregistering, we may only zap a PTE if uffd is disabled and * there are no unexpected folio references ... */ if (is_register || userfaultfd_missing(vma) || (folio_ref_count(folio) != folio_expected_ref_count(folio) + 1)) goto remap; /* * ... and the mapped page is identical to the original page that * would get faulted in on next access. */ if (!orig_page_is_identical(vma, vaddr, fw->page, &pmd_mappable)) goto remap; dec_mm_counter(vma->vm_mm, MM_ANONPAGES); folio_remove_rmap_pte(folio, fw->page, vma); if (!folio_mapped(folio) && folio_test_swapcache(folio) && folio_trylock(folio)) { folio_free_swap(folio); folio_unlock(folio); } folio_put(folio); return pmd_mappable; remap: /* * Make sure that our copy_to_page() changes become visible before the * set_pte_at() write. */ smp_wmb(); /* We modified the page. Make sure to mark the PTE dirty. */ set_pte_at(vma->vm_mm, vaddr, fw->ptep, pte_mkdirty(fw->pte)); return 0; } /* * NOTE: * Expect the breakpoint instruction to be the smallest size instruction for * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction * supported by that architecture then we need to modify is_trap_at_addr and * uprobe_write_opcode accordingly. This would never be a problem for archs * that have fixed length instructions. * * uprobe_write_opcode - write the opcode at a given virtual address. * @auprobe: arch specific probepoint information. * @vma: the probed virtual memory area. * @opcode_vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @opcode_vaddr. * * Called with mm->mmap_lock held for write. * Return 0 (success) or a negative errno. */ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, uprobe_opcode_t opcode, bool is_register) { return uprobe_write(auprobe, vma, opcode_vaddr, &opcode, UPROBE_SWBP_INSN_SIZE, verify_opcode, is_register, true /* do_update_ref_ctr */, NULL); } int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long insn_vaddr, uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr, void *data) { const unsigned long vaddr = insn_vaddr & PAGE_MASK; struct mm_struct *mm = vma->vm_mm; struct uprobe *uprobe; int ret, ref_ctr_updated = 0; unsigned int gup_flags = FOLL_FORCE; struct mmu_notifier_range range; struct folio_walk fw; struct folio *folio; struct page *page; uprobe = container_of(auprobe, struct uprobe, arch); if (WARN_ON_ONCE(!is_cow_mapping(vma->vm_flags))) return -EINVAL; /* * When registering, we have to break COW to get an exclusive anonymous * page that we can safely modify. Use FOLL_WRITE to trigger a write * fault if required. When unregistering, we might be lucky and the * anon page is already gone. So defer write faults until really * required. Use FOLL_SPLIT_PMD, because __uprobe_write() * cannot deal with PMDs yet. */ if (is_register) gup_flags |= FOLL_WRITE | FOLL_SPLIT_PMD; retry: ret = get_user_pages_remote(mm, vaddr, 1, gup_flags, &page, NULL); if (ret <= 0) goto out; folio = page_folio(page); ret = verify(page, insn_vaddr, insn, nbytes, data); if (ret <= 0) { folio_put(folio); goto out; } /* We are going to replace instruction, update ref_ctr. */ if (do_update_ref_ctr && !ref_ctr_updated && uprobe->ref_ctr_offset) { ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1); if (ret) { folio_put(folio); goto out; } ref_ctr_updated = 1; } ret = 0; if (unlikely(!folio_test_anon(folio) || folio_is_zone_device(folio))) { VM_WARN_ON_ONCE(is_register); folio_put(folio); goto out; } if (!is_register) { /* * In the common case, we'll be able to zap the page when * unregistering. So trigger MMU notifiers now, as we won't * be able to do it under PTL. */ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, vaddr, vaddr + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); } ret = -EAGAIN; /* Walk the page tables again, to perform the actual update. */ if (folio_walk_start(&fw, vma, vaddr, 0)) { if (fw.page == page) ret = __uprobe_write(vma, &fw, folio, insn_vaddr, insn, nbytes, is_register); folio_walk_end(&fw, vma); } if (!is_register) mmu_notifier_invalidate_range_end(&range); folio_put(folio); switch (ret) { case -EFAULT: gup_flags |= FOLL_WRITE | FOLL_SPLIT_PMD; fallthrough; case -EAGAIN: goto retry; default: break; } out: /* Revert back reference counter if instruction update failed. */ if (do_update_ref_ctr && ret < 0 && ref_ctr_updated) update_ref_ctr(uprobe, mm, is_register ? -1 : 1); /* try collapse pmd for compound page */ if (ret > 0) collapse_pte_mapped_thp(mm, vaddr, false); return ret < 0 ? ret : 0; } /** * set_swbp - store breakpoint at a given address. * @auprobe: arch specific probepoint information. * @vma: the probed virtual memory area. * @vaddr: the virtual address to insert the opcode. * * For mm @mm, store the breakpoint instruction at @vaddr. * Return 0 (success) or a negative errno. */ int __weak set_swbp(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr) { return uprobe_write_opcode(auprobe, vma, vaddr, UPROBE_SWBP_INSN, true); } /** * set_orig_insn - Restore the original instruction. * @vma: the probed virtual memory area. * @auprobe: arch specific probepoint information. * @vaddr: the virtual address to insert the opcode. * * For mm @mm, restore the original opcode (opcode) at @vaddr. * Return 0 (success) or a negative errno. */ int __weak set_orig_insn(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr) { return uprobe_write_opcode(auprobe, vma, vaddr, *(uprobe_opcode_t *)&auprobe->insn, false); } /* uprobe should have guaranteed positive refcount */ static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); return uprobe; } /* * uprobe should have guaranteed lifetime, which can be either of: * - caller already has refcount taken (and wants an extra one); * - uprobe is RCU protected and won't be freed until after grace period; * - we are holding uprobes_treelock (for read or write, doesn't matter). */ static struct uprobe *try_get_uprobe(struct uprobe *uprobe) { if (refcount_inc_not_zero(&uprobe->ref)) return uprobe; return NULL; } static inline bool uprobe_is_active(struct uprobe *uprobe) { return !RB_EMPTY_NODE(&uprobe->rb_node); } static void uprobe_free_rcu_tasks_trace(struct rcu_head *rcu) { struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); kfree(uprobe); } static void uprobe_free_srcu(struct rcu_head *rcu) { struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu_tasks_trace); } static void uprobe_free_deferred(struct work_struct *work) { struct uprobe *uprobe = container_of(work, struct uprobe, work); write_lock(&uprobes_treelock); if (uprobe_is_active(uprobe)) { write_seqcount_begin(&uprobes_seqcount); rb_erase(&uprobe->rb_node, &uprobes_tree); write_seqcount_end(&uprobes_seqcount); } write_unlock(&uprobes_treelock); /* * If application munmap(exec_vma) before uprobe_unregister() * gets called, we don't get a chance to remove uprobe from * delayed_uprobe_list from remove_breakpoint(). Do it here. */ mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); /* start srcu -> rcu_tasks_trace -> kfree chain */ call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_srcu); } static void put_uprobe(struct uprobe *uprobe) { if (!refcount_dec_and_test(&uprobe->ref)) return; INIT_WORK(&uprobe->work, uprobe_free_deferred); schedule_work(&uprobe->work); } /* Initialize hprobe as SRCU-protected "leased" uprobe */ static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx) { WARN_ON(!uprobe); hprobe->state = HPROBE_LEASED; hprobe->uprobe = uprobe; hprobe->srcu_idx = srcu_idx; } /* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */ static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe) { hprobe->state = uprobe ? HPROBE_STABLE : HPROBE_GONE; hprobe->uprobe = uprobe; hprobe->srcu_idx = -1; } /* * hprobe_consume() fetches hprobe's underlying uprobe and detects whether * uprobe is SRCU protected or is refcounted. hprobe_consume() can be * used only once for a given hprobe. * * Caller has to call hprobe_finalize() and pass previous hprobe_state, so * that hprobe_finalize() can perform SRCU unlock or put uprobe, whichever * is appropriate. */ static inline struct uprobe *hprobe_consume(struct hprobe *hprobe, enum hprobe_state *hstate) { *hstate = xchg(&hprobe->state, HPROBE_CONSUMED); switch (*hstate) { case HPROBE_LEASED: case HPROBE_STABLE: return hprobe->uprobe; case HPROBE_GONE: /* uprobe is NULL, no SRCU */ case HPROBE_CONSUMED: /* uprobe was finalized already, do nothing */ return NULL; default: WARN(1, "hprobe invalid state %d", *hstate); return NULL; } } /* * Reset hprobe state and, if hprobe was LEASED, release SRCU lock. * hprobe_finalize() can only be used from current context after * hprobe_consume() call (which determines uprobe and hstate value). */ static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate) { switch (hstate) { case HPROBE_LEASED: __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx); break; case HPROBE_STABLE: put_uprobe(hprobe->uprobe); break; case HPROBE_GONE: case HPROBE_CONSUMED: break; default: WARN(1, "hprobe invalid state %d", hstate); break; } } /* * Attempt to switch (atomically) uprobe from being SRCU protected (LEASED) * to refcounted (STABLE) state. Competes with hprobe_consume(); only one of * them can win the race to perform SRCU unlocking. Whoever wins must perform * SRCU unlock. * * Returns underlying valid uprobe or NULL, if there was no underlying uprobe * to begin with or we failed to bump its refcount and it's going away. * * Returned non-NULL uprobe can be still safely used within an ongoing SRCU * locked region. If `get` is true, it's guaranteed that non-NULL uprobe has * an extra refcount for caller to assume and use. Otherwise, it's not * guaranteed that returned uprobe has a positive refcount, so caller has to * attempt try_get_uprobe(), if it needs to preserve uprobe beyond current * SRCU lock region. See dup_utask(). */ static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get) { enum hprobe_state hstate; /* * Caller should guarantee that return_instance is not going to be * freed from under us. This can be achieved either through holding * rcu_read_lock() or by owning return_instance in the first place. * * Underlying uprobe is itself protected from reuse by SRCU, so ensure * SRCU lock is held properly. */ lockdep_assert(srcu_read_lock_held(&uretprobes_srcu)); hstate = READ_ONCE(hprobe->state); switch (hstate) { case HPROBE_STABLE: /* uprobe has positive refcount, bump refcount, if necessary */ return get ? get_uprobe(hprobe->uprobe) : hprobe->uprobe; case HPROBE_GONE: /* * SRCU was unlocked earlier and we didn't manage to take * uprobe refcnt, so it's effectively NULL */ return NULL; case HPROBE_CONSUMED: /* * uprobe was consumed, so it's effectively NULL as far as * uretprobe processing logic is concerned */ return NULL; case HPROBE_LEASED: { struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe); /* * Try to switch hprobe state, guarding against * hprobe_consume() or another hprobe_expire() racing with us. * Note, if we failed to get uprobe refcount, we use special * HPROBE_GONE state to signal that hprobe->uprobe shouldn't * be used as it will be freed after SRCU is unlocked. */ if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) { /* We won the race, we are the ones to unlock SRCU */ __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx); return get ? get_uprobe(uprobe) : uprobe; } /* * We lost the race, undo refcount bump (if it ever happened), * unless caller would like an extra refcount anyways. */ if (uprobe && !get) put_uprobe(uprobe); /* * Even if hprobe_consume() or another hprobe_expire() wins * the state update race and unlocks SRCU from under us, we * still have a guarantee that underyling uprobe won't be * freed due to ongoing caller's SRCU lock region, so we can * return it regardless. Also, if `get` was true, we also have * an extra ref for the caller to own. This is used in dup_utask(). */ return uprobe; } default: WARN(1, "unknown hprobe state %d", hstate); return NULL; } } static __always_inline int uprobe_cmp(const struct inode *l_inode, const loff_t l_offset, const struct uprobe *r) { if (l_inode < r->inode) return -1; if (l_inode > r->inode) return 1; if (l_offset < r->offset) return -1; if (l_offset > r->offset) return 1; return 0; } #define __node_2_uprobe(node) \ rb_entry((node), struct uprobe, rb_node) struct __uprobe_key { struct inode *inode; loff_t offset; }; static inline int __uprobe_cmp_key(const void *key, const struct rb_node *b) { const struct __uprobe_key *a = key; return uprobe_cmp(a->inode, a->offset, __node_2_uprobe(b)); } static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b) { struct uprobe *u = __node_2_uprobe(a); return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b)); } /* * Assumes being inside RCU protected region. * No refcount is taken on returned uprobe. */ static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset) { struct __uprobe_key key = { .inode = inode, .offset = offset, }; struct rb_node *node; unsigned int seq; lockdep_assert(rcu_read_lock_trace_held()); do { seq = read_seqcount_begin(&uprobes_seqcount); node = rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_key); /* * Lockless RB-tree lookups can result only in false negatives. * If the element is found, it is correct and can be returned * under RCU protection. If we find nothing, we need to * validate that seqcount didn't change. If it did, we have to * try again as we might have missed the element (false * negative). If seqcount is unchanged, search truly failed. */ if (node) return __node_2_uprobe(node); } while (read_seqcount_retry(&uprobes_seqcount, seq)); return NULL; } /* * Attempt to insert a new uprobe into uprobes_tree. * * If uprobe already exists (for given inode+offset), we just increment * refcount of previously existing uprobe. * * If not, a provided new instance of uprobe is inserted into the tree (with * assumed initial refcount == 1). * * In any case, we return a uprobe instance that ends up being in uprobes_tree. * Caller has to clean up new uprobe instance, if it ended up not being * inserted into the tree. * * We assume that uprobes_treelock is held for writing. */ static struct uprobe *__insert_uprobe(struct uprobe *uprobe) { struct rb_node *node; again: node = rb_find_add_rcu(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); if (node) { struct uprobe *u = __node_2_uprobe(node); if (!try_get_uprobe(u)) { rb_erase(node, &uprobes_tree); RB_CLEAR_NODE(&u->rb_node); goto again; } return u; } return uprobe; } /* * Acquire uprobes_treelock and insert uprobe into uprobes_tree * (or reuse existing one, see __insert_uprobe() comments above). */ static struct uprobe *insert_uprobe(struct uprobe *uprobe) { struct uprobe *u; write_lock(&uprobes_treelock); write_seqcount_begin(&uprobes_seqcount); u = __insert_uprobe(uprobe); write_seqcount_end(&uprobes_seqcount); write_unlock(&uprobes_treelock); return u; } static void ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe) { pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx " "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n", uprobe->inode->i_ino, (unsigned long long) uprobe->offset, (unsigned long long) cur_uprobe->ref_ctr_offset, (unsigned long long) uprobe->ref_ctr_offset); } static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, loff_t ref_ctr_offset) { struct uprobe *uprobe, *cur_uprobe; uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL); if (!uprobe) return ERR_PTR(-ENOMEM); uprobe->inode = inode; uprobe->offset = offset; uprobe->ref_ctr_offset = ref_ctr_offset; INIT_LIST_HEAD(&uprobe->consumers); init_rwsem(&uprobe->register_rwsem); init_rwsem(&uprobe->consumer_rwsem); RB_CLEAR_NODE(&uprobe->rb_node); refcount_set(&uprobe->ref, 1); /* add to uprobes_tree, sorted on inode:offset */ cur_uprobe = insert_uprobe(uprobe); /* a uprobe exists for this inode:offset combination */ if (cur_uprobe != uprobe) { if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) { ref_ctr_mismatch_warn(cur_uprobe, uprobe); put_uprobe(cur_uprobe); kfree(uprobe); return ERR_PTR(-EINVAL); } kfree(uprobe); uprobe = cur_uprobe; } return uprobe; } static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) { static atomic64_t id; down_write(&uprobe->consumer_rwsem); list_add_rcu(&uc->cons_node, &uprobe->consumers); uc->id = (__u64) atomic64_inc_return(&id); up_write(&uprobe->consumer_rwsem); } /* * For uprobe @uprobe, delete the consumer @uc. * Should never be called with consumer that's not part of @uprobe->consumers. */ static void consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) { down_write(&uprobe->consumer_rwsem); list_del_rcu(&uc->cons_node); up_write(&uprobe->consumer_rwsem); } static int __copy_insn(struct address_space *mapping, struct file *filp, void *insn, int nbytes, loff_t offset) { struct page *page; /* * Ensure that the page that has the original instruction is populated * and in page-cache. If ->read_folio == NULL it must be shmem_mapping(), * see uprobe_register(). */ if (mapping->a_ops->read_folio) page = read_mapping_page(mapping, offset >> PAGE_SHIFT, filp); else page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); if (IS_ERR(page)) return PTR_ERR(page); uprobe_copy_from_page(page, offset, insn, nbytes); put_page(page); return 0; } static int copy_insn(struct uprobe *uprobe, struct file *filp) { struct address_space *mapping = uprobe->inode->i_mapping; loff_t offs = uprobe->offset; void *insn = &uprobe->arch.insn; int size = sizeof(uprobe->arch.insn); int len, err = -EIO; /* Copy only available bytes, -EIO if nothing was read */ do { if (offs >= i_size_read(uprobe->inode)) break; len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK)); err = __copy_insn(mapping, filp, insn, len, offs); if (err) break; insn += len; offs += len; size -= len; } while (size); return err; } static int prepare_uprobe(struct uprobe *uprobe, struct file *file, struct mm_struct *mm, unsigned long vaddr) { int ret = 0; if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) return ret; /* TODO: move this into _register, until then we abuse this sem. */ down_write(&uprobe->consumer_rwsem); if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) goto out; ret = copy_insn(uprobe, file); if (ret) goto out; ret = -ENOTSUPP; if (is_trap_insn((uprobe_opcode_t *)&uprobe->arch.insn)) goto out; ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); if (ret) goto out; smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */ set_bit(UPROBE_COPY_INSN, &uprobe->flags); out: up_write(&uprobe->consumer_rwsem); return ret; } static inline bool consumer_filter(struct uprobe_consumer *uc, struct mm_struct *mm) { return !uc->filter || uc->filter(uc, mm); } static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm) { struct uprobe_consumer *uc; bool ret = false; down_read(&uprobe->consumer_rwsem); list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { ret = consumer_filter(uc, mm); if (ret) break; } up_read(&uprobe->consumer_rwsem); return ret; } static int install_breakpoint(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long vaddr) { struct mm_struct *mm = vma->vm_mm; bool first_uprobe; int ret; ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); if (ret) return ret; /* * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), * the task can hit this breakpoint right after __replace_page(). */ first_uprobe = !mm_flags_test(MMF_HAS_UPROBES, mm); if (first_uprobe) mm_flags_set(MMF_HAS_UPROBES, mm); ret = set_swbp(&uprobe->arch, vma, vaddr); if (!ret) mm_flags_clear(MMF_RECALC_UPROBES, mm); else if (first_uprobe) mm_flags_clear(MMF_HAS_UPROBES, mm); return ret; } static int remove_breakpoint(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long vaddr) { struct mm_struct *mm = vma->vm_mm; mm_flags_set(MMF_RECALC_UPROBES, mm); return set_orig_insn(&uprobe->arch, vma, vaddr); } struct map_info { struct map_info *next; struct mm_struct *mm; unsigned long vaddr; }; static inline struct map_info *free_map_info(struct map_info *info) { struct map_info *next = info->next; kfree(info); return next; } static struct map_info * build_map_info(struct address_space *mapping, loff_t offset, bool is_register) { unsigned long pgoff = offset >> PAGE_SHIFT; struct vm_area_struct *vma; struct map_info *curr = NULL; struct map_info *prev = NULL; struct map_info *info; int more = 0; again: i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; if (!prev && !more) { /* * Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through * reclaim. This is optimistic, no harm done if it fails. */ prev = kmalloc(sizeof(struct map_info), GFP_NOWAIT | __GFP_NOMEMALLOC); if (prev) prev->next = NULL; } if (!prev) { more++; continue; } if (!mmget_not_zero(vma->vm_mm)) continue; info = prev; prev = prev->next; info->next = curr; curr = info; info->mm = vma->vm_mm; info->vaddr = offset_to_vaddr(vma, offset); } i_mmap_unlock_read(mapping); if (!more) goto out; prev = curr; while (curr) { mmput(curr->mm); curr = curr->next; } do { info = kmalloc(sizeof(struct map_info), GFP_KERNEL); if (!info) { curr = ERR_PTR(-ENOMEM); goto out; } info->next = prev; prev = info; } while (--more); goto again; out: while (prev) prev = free_map_info(prev); return curr; } static int register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) { bool is_register = !!new; struct map_info *info; int err = 0; percpu_down_write(&dup_mmap_sem); info = build_map_info(uprobe->inode->i_mapping, uprobe->offset, is_register); if (IS_ERR(info)) { err = PTR_ERR(info); goto out; } while (info) { struct mm_struct *mm = info->mm; struct vm_area_struct *vma; if (err && is_register) goto free; /* * We take mmap_lock for writing to avoid the race with * find_active_uprobe_rcu() which takes mmap_lock for reading. * Thus this install_breakpoint() can not make * is_trap_at_addr() true right after find_uprobe_rcu() * returns NULL in find_active_uprobe_rcu(). */ mmap_write_lock(mm); if (check_stable_address_space(mm)) goto unlock; vma = find_vma(mm, info->vaddr); if (!vma || !valid_vma(vma, is_register) || file_inode(vma->vm_file) != uprobe->inode) goto unlock; if (vma->vm_start > info->vaddr || vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; if (is_register) { /* consult only the "caller", new consumer. */ if (consumer_filter(new, mm)) err = install_breakpoint(uprobe, vma, info->vaddr); } else if (mm_flags_test(MMF_HAS_UPROBES, mm)) { if (!filter_chain(uprobe, mm)) err |= remove_breakpoint(uprobe, vma, info->vaddr); } unlock: mmap_write_unlock(mm); free: mmput(mm); info = free_map_info(info); } out: percpu_up_write(&dup_mmap_sem); return err; } /** * uprobe_unregister_nosync - unregister an already registered probe. * @uprobe: uprobe to remove * @uc: identify which probe if multiple probes are colocated. */ void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc) { int err; down_write(&uprobe->register_rwsem); consumer_del(uprobe, uc); err = register_for_each_vma(uprobe, NULL); up_write(&uprobe->register_rwsem); /* TODO : cant unregister? schedule a worker thread */ if (unlikely(err)) { uprobe_warn(current, "unregister, leaking uprobe"); return; } put_uprobe(uprobe); } EXPORT_SYMBOL_GPL(uprobe_unregister_nosync); void uprobe_unregister_sync(void) { /* * Now that handler_chain() and handle_uretprobe_chain() iterate over * uprobe->consumers list under RCU protection without holding * uprobe->register_rwsem, we need to wait for RCU grace period to * make sure that we can't call into just unregistered * uprobe_consumer's callbacks anymore. If we don't do that, fast and * unlucky enough caller can free consumer's memory and cause * handler_chain() or handle_uretprobe_chain() to do an use-after-free. */ synchronize_rcu_tasks_trace(); synchronize_srcu(&uretprobes_srcu); } EXPORT_SYMBOL_GPL(uprobe_unregister_sync); /** * uprobe_register - register a probe * @inode: the file in which the probe has to be placed. * @offset: offset from the start of the file. * @ref_ctr_offset: offset of SDT marker / reference counter * @uc: information on howto handle the probe.. * * Apart from the access refcount, uprobe_register() takes a creation * refcount (thro alloc_uprobe) if and only if this @uprobe is getting * inserted into the rbtree (i.e first consumer for a @inode:@offset * tuple). Creation refcount stops uprobe_unregister from freeing the * @uprobe even before the register operation is complete. Creation * refcount is released when the last @uc for the @uprobe * unregisters. Caller of uprobe_register() is required to keep @inode * (and the containing mount) referenced. * * Return: pointer to the new uprobe on success or an ERR_PTR on failure. */ struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) { struct uprobe *uprobe; int ret; /* Uprobe must have at least one set consumer */ if (!uc->handler && !uc->ret_handler) return ERR_PTR(-EINVAL); /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */ if (!inode->i_mapping->a_ops->read_folio && !shmem_mapping(inode->i_mapping)) return ERR_PTR(-EIO); /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) return ERR_PTR(-EINVAL); /* * This ensures that uprobe_copy_from_page(), copy_to_page() and * __update_ref_ctr() can't cross page boundary. */ if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE)) return ERR_PTR(-EINVAL); if (!IS_ALIGNED(ref_ctr_offset, sizeof(short))) return ERR_PTR(-EINVAL); uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); if (IS_ERR(uprobe)) return uprobe; down_write(&uprobe->register_rwsem); consumer_add(uprobe, uc); ret = register_for_each_vma(uprobe, uc); up_write(&uprobe->register_rwsem); if (ret) { uprobe_unregister_nosync(uprobe, uc); /* * Registration might have partially succeeded, so we can have * this consumer being called right at this time. We need to * sync here. It's ok, it's unlikely slow path. */ uprobe_unregister_sync(); return ERR_PTR(ret); } return uprobe; } EXPORT_SYMBOL_GPL(uprobe_register); /** * uprobe_apply - add or remove the breakpoints according to @uc->filter * @uprobe: uprobe which "owns" the breakpoint * @uc: consumer which wants to add more or remove some breakpoints * @add: add or remove the breakpoints * Return: 0 on success or negative error code. */ int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add) { struct uprobe_consumer *con; int ret = -ENOENT; down_write(&uprobe->register_rwsem); rcu_read_lock_trace(); list_for_each_entry_rcu(con, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { if (con == uc) { ret = register_for_each_vma(uprobe, add ? uc : NULL); break; } } rcu_read_unlock_trace(); up_write(&uprobe->register_rwsem); return ret; } static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) { VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma; int err = 0; mmap_write_lock(mm); for_each_vma(vmi, vma) { unsigned long vaddr; loff_t offset; if (!valid_vma(vma, false) || file_inode(vma->vm_file) != uprobe->inode) continue; offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; if (uprobe->offset < offset || uprobe->offset >= offset + vma->vm_end - vma->vm_start) continue; vaddr = offset_to_vaddr(vma, uprobe->offset); err |= remove_breakpoint(uprobe, vma, vaddr); } mmap_write_unlock(mm); return err; } static struct rb_node * find_node_in_range(struct inode *inode, loff_t min, loff_t max) { struct rb_node *n = uprobes_tree.rb_node; while (n) { struct uprobe *u = rb_entry(n, struct uprobe, rb_node); if (inode < u->inode) { n = n->rb_left; } else if (inode > u->inode) { n = n->rb_right; } else { if (max < u->offset) n = n->rb_left; else if (min > u->offset) n = n->rb_right; else break; } } return n; } /* * For a given range in vma, build a list of probes that need to be inserted. */ static void build_probe_list(struct inode *inode, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *head) { loff_t min, max; struct rb_node *n, *t; struct uprobe *u; INIT_LIST_HEAD(head); min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; read_lock(&uprobes_treelock); n = find_node_in_range(inode, min, max); if (n) { for (t = n; t; t = rb_prev(t)) { u = rb_entry(t, struct uprobe, rb_node); if (u->inode != inode || u->offset < min) break; /* if uprobe went away, it's safe to ignore it */ if (try_get_uprobe(u)) list_add(&u->pending_list, head); } for (t = n; (t = rb_next(t)); ) { u = rb_entry(t, struct uprobe, rb_node); if (u->inode != inode || u->offset > max) break; /* if uprobe went away, it's safe to ignore it */ if (try_get_uprobe(u)) list_add(&u->pending_list, head); } } read_unlock(&uprobes_treelock); } /* @vma contains reference counter, not the probed instruction. */ static int delayed_ref_ctr_inc(struct vm_area_struct *vma) { struct list_head *pos, *q; struct delayed_uprobe *du; unsigned long vaddr; int ret = 0, err = 0; mutex_lock(&delayed_uprobe_lock); list_for_each_safe(pos, q, &delayed_uprobe_list) { du = list_entry(pos, struct delayed_uprobe, list); if (du->mm != vma->vm_mm || !valid_ref_ctr_vma(du->uprobe, vma)) continue; vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset); ret = __update_ref_ctr(vma->vm_mm, vaddr, 1); if (ret) { update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1); if (!err) err = ret; } delayed_uprobe_delete(du); } mutex_unlock(&delayed_uprobe_lock); return err; } /* * Called from mmap_region/vma_merge with mm->mmap_lock acquired. * * Currently we ignore all errors and always return 0, the callers * can't handle the failure anyway. */ int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; struct uprobe *uprobe, *u; struct inode *inode; if (no_uprobe_events()) return 0; if (vma->vm_file && (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && mm_flags_test(MMF_HAS_UPROBES, vma->vm_mm)) delayed_ref_ctr_inc(vma); if (!valid_vma(vma, true)) return 0; inode = file_inode(vma->vm_file); if (!inode) return 0; mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); /* * We can race with uprobe_unregister(), this uprobe can be already * removed. But in this case filter_chain() must return false, all * consumers have gone away. */ list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!fatal_signal_pending(current) && filter_chain(uprobe, vma->vm_mm)) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); install_breakpoint(uprobe, vma, vaddr); } put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); return 0; } static bool vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end) { loff_t min, max; struct inode *inode; struct rb_node *n; inode = file_inode(vma->vm_file); min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; read_lock(&uprobes_treelock); n = find_node_in_range(inode, min, max); read_unlock(&uprobes_treelock); return !!n; } /* * Called in context of a munmap of a vma. */ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (no_uprobe_events() || !valid_vma(vma, false)) return; if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; if (!mm_flags_test(MMF_HAS_UPROBES, vma->vm_mm) || mm_flags_test(MMF_RECALC_UPROBES, vma->vm_mm)) return; if (vma_has_uprobes(vma, start, end)) mm_flags_set(MMF_RECALC_UPROBES, vma->vm_mm); } static vm_fault_t xol_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { struct xol_area *area = vma->vm_mm->uprobes_state.xol_area; vmf->page = area->page; get_page(vmf->page); return 0; } static int xol_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { return -EPERM; } static const struct vm_special_mapping xol_mapping = { .name = "[uprobes]", .fault = xol_fault, .mremap = xol_mremap, }; /* Slot allocation for XOL */ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) { struct vm_area_struct *vma; int ret; if (mmap_write_lock_killable(mm)) return -EINTR; if (mm->uprobes_state.xol_area) { ret = -EALREADY; goto fail; } if (!area->vaddr) { /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(area->vaddr)) { ret = area->vaddr; goto fail; } } vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE, VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO| VM_SEALED_SYSMAP, &xol_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto fail; } ret = 0; /* pairs with get_xol_area() */ smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ fail: mmap_write_unlock(mm); return ret; } void * __weak arch_uretprobe_trampoline(unsigned long *psize) { static uprobe_opcode_t insn = UPROBE_SWBP_INSN; *psize = UPROBE_SWBP_INSN_SIZE; return &insn; } static struct xol_area *__create_xol_area(unsigned long vaddr) { struct mm_struct *mm = current->mm; unsigned long insns_size; struct xol_area *area; void *insns; area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; area->bitmap = kcalloc(BITS_TO_LONGS(UINSNS_PER_PAGE), sizeof(long), GFP_KERNEL); if (!area->bitmap) goto free_area; area->page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); if (!area->page) goto free_bitmap; area->vaddr = vaddr; init_waitqueue_head(&area->wq); /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); insns = arch_uretprobe_trampoline(&insns_size); arch_uprobe_copy_ixol(area->page, 0, insns, insns_size); if (!xol_add_vma(mm, area)) return area; __free_page(area->page); free_bitmap: kfree(area->bitmap); free_area: kfree(area); out: return NULL; } /* * get_xol_area - Allocate process's xol_area if necessary. * This area will be used for storing instructions for execution out of line. * * Returns the allocated area or NULL. */ static struct xol_area *get_xol_area(void) { struct mm_struct *mm = current->mm; struct xol_area *area; if (!mm->uprobes_state.xol_area) __create_xol_area(0); /* Pairs with xol_add_vma() smp_store_release() */ area = READ_ONCE(mm->uprobes_state.xol_area); /* ^^^ */ return area; } void __weak arch_uprobe_clear_state(struct mm_struct *mm) { } void __weak arch_uprobe_init_state(struct mm_struct *mm) { } /* * uprobe_clear_state - Free the area allocated for slots. */ void uprobe_clear_state(struct mm_struct *mm) { struct xol_area *area = mm->uprobes_state.xol_area; mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(NULL, mm); mutex_unlock(&delayed_uprobe_lock); arch_uprobe_clear_state(mm); if (!area) return; put_page(area->page); kfree(area->bitmap); kfree(area); } void uprobe_start_dup_mmap(void) { percpu_down_read(&dup_mmap_sem); } void uprobe_end_dup_mmap(void) { percpu_up_read(&dup_mmap_sem); } void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { if (mm_flags_test(MMF_HAS_UPROBES, oldmm)) { mm_flags_set(MMF_HAS_UPROBES, newmm); /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */ mm_flags_set(MMF_RECALC_UPROBES, newmm); } } static unsigned long xol_get_slot_nr(struct xol_area *area) { unsigned long slot_nr; slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE); if (slot_nr < UINSNS_PER_PAGE) { if (!test_and_set_bit(slot_nr, area->bitmap)) return slot_nr; } return UINSNS_PER_PAGE; } /* * xol_get_insn_slot - allocate a slot for xol. */ static bool xol_get_insn_slot(struct uprobe *uprobe, struct uprobe_task *utask) { struct xol_area *area = get_xol_area(); unsigned long slot_nr; if (!area) return false; wait_event(area->wq, (slot_nr = xol_get_slot_nr(area)) < UINSNS_PER_PAGE); utask->xol_vaddr = area->vaddr + slot_nr * UPROBE_XOL_SLOT_BYTES; arch_uprobe_copy_ixol(area->page, utask->xol_vaddr, &uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); return true; } /* * xol_free_insn_slot - free the slot allocated by xol_get_insn_slot() */ static void xol_free_insn_slot(struct uprobe_task *utask) { struct xol_area *area = current->mm->uprobes_state.xol_area; unsigned long offset = utask->xol_vaddr - area->vaddr; unsigned int slot_nr; utask->xol_vaddr = 0; /* xol_vaddr must fit into [area->vaddr, area->vaddr + PAGE_SIZE) */ if (WARN_ON_ONCE(offset >= PAGE_SIZE)) return; slot_nr = offset / UPROBE_XOL_SLOT_BYTES; clear_bit(slot_nr, area->bitmap); smp_mb__after_atomic(); /* pairs with prepare_to_wait() */ if (waitqueue_active(&area->wq)) wake_up(&area->wq); } void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len) { /* Initialize the slot */ copy_to_page(page, vaddr, src, len); /* * We probably need flush_icache_user_page() but it needs vma. * This should work on most of architectures by default. If * architecture needs to do something different it can define * its own version of the function. */ flush_dcache_page(page); } /** * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs * @regs: Reflects the saved state of the task after it has hit a breakpoint * instruction. * Return the address of the breakpoint instruction. */ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) { return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; } unsigned long uprobe_get_trap_addr(struct pt_regs *regs) { struct uprobe_task *utask = current->utask; if (unlikely(utask && utask->active_uprobe)) return utask->vaddr; return instruction_pointer(regs); } static void ri_pool_push(struct uprobe_task *utask, struct return_instance *ri) { ri->cons_cnt = 0; ri->next = utask->ri_pool; utask->ri_pool = ri; } static struct return_instance *ri_pool_pop(struct uprobe_task *utask) { struct return_instance *ri = utask->ri_pool; if (likely(ri)) utask->ri_pool = ri->next; return ri; } static void ri_free(struct return_instance *ri) { kfree(ri->extra_consumers); kfree_rcu(ri, rcu); } static void free_ret_instance(struct uprobe_task *utask, struct return_instance *ri, bool cleanup_hprobe) { unsigned seq; if (cleanup_hprobe) { enum hprobe_state hstate; (void)hprobe_consume(&ri->hprobe, &hstate); hprobe_finalize(&ri->hprobe, hstate); } /* * At this point return_instance is unlinked from utask's * return_instances list and this has become visible to ri_timer(). * If seqcount now indicates that ri_timer's return instance * processing loop isn't active, we can return ri into the pool of * to-be-reused return instances for future uretprobes. If ri_timer() * happens to be running right now, though, we fallback to safety and * just perform RCU-delated freeing of ri. * Admittedly, this is a rather simple use of seqcount, but it nicely * abstracts away all the necessary memory barriers, so we use * a well-supported kernel primitive here. */ if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) { /* immediate reuse of ri without RCU GP is OK */ ri_pool_push(utask, ri); } else { /* we might be racing with ri_timer(), so play it safe */ ri_free(ri); } } /* * Called with no locks held. * Called in context of an exiting or an exec-ing thread. */ void uprobe_free_utask(struct task_struct *t) { struct uprobe_task *utask = t->utask; struct return_instance *ri, *ri_next; if (!utask) return; t->utask = NULL; WARN_ON_ONCE(utask->active_uprobe || utask->xol_vaddr); timer_delete_sync(&utask->ri_timer); ri = utask->return_instances; while (ri) { ri_next = ri->next; free_ret_instance(utask, ri, true /* cleanup_hprobe */); ri = ri_next; } /* free_ret_instance() above might add to ri_pool, so this loop should come last */ ri = utask->ri_pool; while (ri) { ri_next = ri->next; ri_free(ri); ri = ri_next; } kfree(utask); } #define RI_TIMER_PERIOD (HZ / 10) /* 100 ms */ #define for_each_ret_instance_rcu(pos, head) \ for (pos = rcu_dereference_raw(head); pos; pos = rcu_dereference_raw(pos->next)) static void ri_timer(struct timer_list *timer) { struct uprobe_task *utask = container_of(timer, struct uprobe_task, ri_timer); struct return_instance *ri; /* SRCU protects uprobe from reuse for the cmpxchg() inside hprobe_expire(). */ guard(srcu)(&uretprobes_srcu); /* RCU protects return_instance from freeing. */ guard(rcu)(); /* * See free_ret_instance() for notes on seqcount use. * We also employ raw API variants to avoid lockdep false-positive * warning complaining about enabled preemption. The timer can only be * invoked once for a uprobe_task. Therefore there can only be one * writer. The reader does not require an even sequence count to make * progress, so it is OK to remain preemptible on PREEMPT_RT. */ raw_write_seqcount_begin(&utask->ri_seqcount); for_each_ret_instance_rcu(ri, utask->return_instances) hprobe_expire(&ri->hprobe, false); raw_write_seqcount_end(&utask->ri_seqcount); } static struct uprobe_task *alloc_utask(void) { struct uprobe_task *utask; utask = kzalloc(sizeof(*utask), GFP_KERNEL); if (!utask) return NULL; timer_setup(&utask->ri_timer, ri_timer, 0); seqcount_init(&utask->ri_seqcount); return utask; } /* * Allocate a uprobe_task object for the task if necessary. * Called when the thread hits a breakpoint. * * Returns: * - pointer to new uprobe_task on success * - NULL otherwise */ static struct uprobe_task *get_utask(void) { if (!current->utask) current->utask = alloc_utask(); return current->utask; } static struct return_instance *alloc_return_instance(struct uprobe_task *utask) { struct return_instance *ri; ri = ri_pool_pop(utask); if (ri) return ri; ri = kzalloc(sizeof(*ri), GFP_KERNEL); if (!ri) return ZERO_SIZE_PTR; return ri; } static struct return_instance *dup_return_instance(struct return_instance *old) { struct return_instance *ri; ri = kmemdup(old, sizeof(*ri), GFP_KERNEL); if (!ri) return NULL; if (unlikely(old->cons_cnt > 1)) { ri->extra_consumers = kmemdup(old->extra_consumers, sizeof(ri->extra_consumers[0]) * (old->cons_cnt - 1), GFP_KERNEL); if (!ri->extra_consumers) { kfree(ri); return NULL; } } return ri; } static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) { struct uprobe_task *n_utask; struct return_instance **p, *o, *n; struct uprobe *uprobe; n_utask = alloc_utask(); if (!n_utask) return -ENOMEM; t->utask = n_utask; /* protect uprobes from freeing, we'll need try_get_uprobe() them */ guard(srcu)(&uretprobes_srcu); p = &n_utask->return_instances; for (o = o_utask->return_instances; o; o = o->next) { n = dup_return_instance(o); if (!n) return -ENOMEM; /* if uprobe is non-NULL, we'll have an extra refcount for uprobe */ uprobe = hprobe_expire(&o->hprobe, true); /* * New utask will have stable properly refcounted uprobe or * NULL. Even if we failed to get refcounted uprobe, we still * need to preserve full set of return_instances for proper * uretprobe handling and nesting in forked task. */ hprobe_init_stable(&n->hprobe, uprobe); n->next = NULL; rcu_assign_pointer(*p, n); p = &n->next; n_utask->depth++; } return 0; } static void dup_xol_work(struct callback_head *work) { if (current->flags & PF_EXITING) return; if (!__create_xol_area(current->utask->dup_xol_addr) && !fatal_signal_pending(current)) uprobe_warn(current, "dup xol area"); } /* * Called in context of a new clone/fork from copy_process. */ void uprobe_copy_process(struct task_struct *t, u64 flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; struct xol_area *area; t->utask = NULL; if (!utask || !utask->return_instances) return; if (mm == t->mm && !(flags & CLONE_VFORK)) return; if (dup_utask(t, utask)) return uprobe_warn(t, "dup ret instances"); /* The task can fork() after dup_xol_work() fails */ area = mm->uprobes_state.xol_area; if (!area) return uprobe_warn(t, "dup xol area"); if (mm == t->mm) return; t->utask->dup_xol_addr = area->vaddr; init_task_work(&t->utask->dup_xol_work, dup_xol_work); task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME); } /* * Current area->vaddr notion assume the trampoline address is always * equal area->vaddr. * * Returns -1 in case the xol_area is not allocated. */ unsigned long uprobe_get_trampoline_vaddr(void) { unsigned long trampoline_vaddr = UPROBE_NO_TRAMPOLINE_VADDR; struct xol_area *area; /* Pairs with xol_add_vma() smp_store_release() */ area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */ if (area) trampoline_vaddr = area->vaddr; return trampoline_vaddr; } static void cleanup_return_instances(struct uprobe_task *utask, bool chained, struct pt_regs *regs) { struct return_instance *ri = utask->return_instances, *ri_next; enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL; while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) { ri_next = ri->next; rcu_assign_pointer(utask->return_instances, ri_next); utask->depth--; free_ret_instance(utask, ri, true /* cleanup_hprobe */); ri = ri_next; } } static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs, struct return_instance *ri) { struct uprobe_task *utask = current->utask; unsigned long orig_ret_vaddr, trampoline_vaddr; bool chained; int srcu_idx; if (!get_xol_area()) goto free; if (utask->depth >= MAX_URETPROBE_DEPTH) { printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" " nestedness limit pid/tgid=%d/%d\n", current->pid, current->tgid); goto free; } trampoline_vaddr = uprobe_get_trampoline_vaddr(); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); if (orig_ret_vaddr == -1) goto free; /* drop the entries invalidated by longjmp() */ chained = (orig_ret_vaddr == trampoline_vaddr); cleanup_return_instances(utask, chained, regs); /* * We don't want to keep trampoline address in stack, rather keep the * original return address of first caller thru all the consequent * instances. This also makes breakpoint unwrapping easier. */ if (chained) { if (!utask->return_instances) { /* * This situation is not possible. Likely we have an * attack from user-space. */ uprobe_warn(current, "handle tail call"); goto free; } orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; } /* __srcu_read_lock() because SRCU lock survives switch to user space */ srcu_idx = __srcu_read_lock(&uretprobes_srcu); ri->func = instruction_pointer(regs); ri->stack = user_stack_pointer(regs); ri->orig_ret_vaddr = orig_ret_vaddr; ri->chained = chained; utask->depth++; hprobe_init_leased(&ri->hprobe, uprobe, srcu_idx); ri->next = utask->return_instances; rcu_assign_pointer(utask->return_instances, ri); mod_timer(&utask->ri_timer, jiffies + RI_TIMER_PERIOD); return; free: ri_free(ri); } /* Prepare to single-step probed instruction out of line. */ static int pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) { struct uprobe_task *utask = current->utask; int err; if (!try_get_uprobe(uprobe)) return -EINVAL; if (!xol_get_insn_slot(uprobe, utask)) { err = -ENOMEM; goto err_out; } utask->vaddr = bp_vaddr; err = arch_uprobe_pre_xol(&uprobe->arch, regs); if (unlikely(err)) { xol_free_insn_slot(utask); goto err_out; } utask->active_uprobe = uprobe; utask->state = UTASK_SSTEP; return 0; err_out: put_uprobe(uprobe); return err; } /* * If we are singlestepping, then ensure this thread is not connected to * non-fatal signals until completion of singlestep. When xol insn itself * triggers the signal, restart the original insn even if the task is * already SIGKILL'ed (since coredump should report the correct ip). This * is even more important if the task has a handler for SIGSEGV/etc, The * _same_ instruction should be repeated again after return from the signal * handler, and SSTEP can never finish in this case. */ bool uprobe_deny_signal(void) { struct task_struct *t = current; struct uprobe_task *utask = t->utask; if (likely(!utask || !utask->active_uprobe)) return false; WARN_ON_ONCE(utask->state != UTASK_SSTEP); if (task_sigpending(t)) { utask->signal_denied = true; clear_tsk_thread_flag(t, TIF_SIGPENDING); if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { utask->state = UTASK_SSTEP_TRAPPED; set_tsk_thread_flag(t, TIF_UPROBE); } } return true; } static void mmf_recalc_uprobes(struct mm_struct *mm) { VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma; for_each_vma(vmi, vma) { if (!valid_vma(vma, false)) continue; /* * This is not strictly accurate, we can race with * uprobe_unregister() and see the already removed * uprobe if delete_uprobe() was not yet called. * Or this uprobe can be filtered out. */ if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) return; } mm_flags_clear(MMF_HAS_UPROBES, mm); } static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) { struct page *page; uprobe_opcode_t opcode; int result; if (WARN_ON_ONCE(!IS_ALIGNED(vaddr, UPROBE_SWBP_INSN_SIZE))) return -EINVAL; pagefault_disable(); result = __get_user(opcode, (uprobe_opcode_t __user *)vaddr); pagefault_enable(); if (likely(result == 0)) goto out; result = get_user_pages(vaddr, 1, FOLL_FORCE, &page); if (result < 0) return result; uprobe_copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); put_page(page); out: /* This needs to return true for any variant of the trap insn */ return is_trap_insn(&opcode); } static struct uprobe *find_active_uprobe_speculative(unsigned long bp_vaddr) { struct mm_struct *mm = current->mm; struct uprobe *uprobe = NULL; struct vm_area_struct *vma; struct file *vm_file; loff_t offset; unsigned int seq; guard(rcu)(); if (!mmap_lock_speculate_try_begin(mm, &seq)) return NULL; vma = vma_lookup(mm, bp_vaddr); if (!vma) return NULL; /* * vm_file memory can be reused for another instance of struct file, * but can't be freed from under us, so it's safe to read fields from * it, even if the values are some garbage values; ultimately * find_uprobe_rcu() + mmap_lock_speculation_end() check will ensure * that whatever we speculatively found is correct */ vm_file = READ_ONCE(vma->vm_file); if (!vm_file) return NULL; offset = (loff_t)(vma->vm_pgoff << PAGE_SHIFT) + (bp_vaddr - vma->vm_start); uprobe = find_uprobe_rcu(vm_file->f_inode, offset); if (!uprobe) return NULL; /* now double check that nothing about MM changed */ if (mmap_lock_speculate_retry(mm, seq)) return NULL; return uprobe; } /* assumes being inside RCU protected region */ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swbp) { struct mm_struct *mm = current->mm; struct uprobe *uprobe = NULL; struct vm_area_struct *vma; uprobe = find_active_uprobe_speculative(bp_vaddr); if (uprobe) return uprobe; mmap_read_lock(mm); vma = vma_lookup(mm, bp_vaddr); if (vma) { if (vma->vm_file) { struct inode *inode = file_inode(vma->vm_file); loff_t offset = vaddr_to_offset(vma, bp_vaddr); uprobe = find_uprobe_rcu(inode, offset); } if (!uprobe) *is_swbp = is_trap_at_addr(mm, bp_vaddr); } else { *is_swbp = -EFAULT; } if (!uprobe && mm_flags_test_and_clear(MMF_RECALC_UPROBES, mm)) mmf_recalc_uprobes(mm); mmap_read_unlock(mm); return uprobe; } static struct return_instance *push_consumer(struct return_instance *ri, __u64 id, __u64 cookie) { struct return_consumer *ric; if (unlikely(ri == ZERO_SIZE_PTR)) return ri; if (unlikely(ri->cons_cnt > 0)) { ric = krealloc(ri->extra_consumers, sizeof(*ric) * ri->cons_cnt, GFP_KERNEL); if (!ric) { ri_free(ri); return ZERO_SIZE_PTR; } ri->extra_consumers = ric; } ric = likely(ri->cons_cnt == 0) ? &ri->consumer : &ri->extra_consumers[ri->cons_cnt - 1]; ric->id = id; ric->cookie = cookie; ri->cons_cnt++; return ri; } static struct return_consumer * return_consumer_find(struct return_instance *ri, int *iter, int id) { struct return_consumer *ric; int idx; for (idx = *iter; idx < ri->cons_cnt; idx++) { ric = likely(idx == 0) ? &ri->consumer : &ri->extra_consumers[idx - 1]; if (ric->id == id) { *iter = idx + 1; return ric; } } return NULL; } static bool ignore_ret_handler(int rc) { return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE; } static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; bool has_consumers = false, remove = true; struct return_instance *ri = NULL; struct uprobe_task *utask = current->utask; utask->auprobe = &uprobe->arch; list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { bool session = uc->handler && uc->ret_handler; __u64 cookie = 0; int rc = 0; if (uc->handler) { rc = uc->handler(uc, regs, &cookie); WARN(rc < 0 || rc > 2, "bad rc=0x%x from %ps()\n", rc, uc->handler); } remove &= rc == UPROBE_HANDLER_REMOVE; has_consumers = true; if (!uc->ret_handler || ignore_ret_handler(rc)) continue; if (!ri) ri = alloc_return_instance(utask); if (session) ri = push_consumer(ri, uc->id, cookie); } utask->auprobe = NULL; if (!ZERO_OR_NULL_PTR(ri)) prepare_uretprobe(uprobe, regs, ri); if (remove && has_consumers) { down_read(&uprobe->register_rwsem); /* re-check that removal is still required, this time under lock */ if (!filter_chain(uprobe, current->mm)) { WARN_ON(!uprobe_is_active(uprobe)); unapply_uprobe(uprobe, current->mm); } up_read(&uprobe->register_rwsem); } } static void handle_uretprobe_chain(struct return_instance *ri, struct uprobe *uprobe, struct pt_regs *regs) { struct return_consumer *ric; struct uprobe_consumer *uc; int ric_idx = 0; /* all consumers unsubscribed meanwhile */ if (unlikely(!uprobe)) return; rcu_read_lock_trace(); list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { bool session = uc->handler && uc->ret_handler; if (uc->ret_handler) { ric = return_consumer_find(ri, &ric_idx, uc->id); if (!session || ric) uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL); } } rcu_read_unlock_trace(); } static struct return_instance *find_next_ret_chain(struct return_instance *ri) { bool chained; do { chained = ri->chained; ri = ri->next; /* can't be NULL if chained */ } while (chained); return ri; } void uprobe_handle_trampoline(struct pt_regs *regs) { struct uprobe_task *utask; struct return_instance *ri, *ri_next, *next_chain; struct uprobe *uprobe; enum hprobe_state hstate; bool valid; utask = current->utask; if (!utask) goto sigill; ri = utask->return_instances; if (!ri) goto sigill; do { /* * We should throw out the frames invalidated by longjmp(). * If this chain is valid, then the next one should be alive * or NULL; the latter case means that nobody but ri->func * could hit this trampoline on return. TODO: sigaltstack(). */ next_chain = find_next_ret_chain(ri); valid = !next_chain || arch_uretprobe_is_alive(next_chain, RP_CHECK_RET, regs); instruction_pointer_set(regs, ri->orig_ret_vaddr); do { /* pop current instance from the stack of pending return instances, * as it's not pending anymore: we just fixed up original * instruction pointer in regs and are about to call handlers; * this allows fixup_uretprobe_trampoline_entries() to properly fix up * captured stack traces from uretprobe handlers, in which pending * trampoline addresses on the stack are replaced with correct * original return addresses */ ri_next = ri->next; rcu_assign_pointer(utask->return_instances, ri_next); utask->depth--; uprobe = hprobe_consume(&ri->hprobe, &hstate); if (valid) handle_uretprobe_chain(ri, uprobe, regs); hprobe_finalize(&ri->hprobe, hstate); /* We already took care of hprobe, no need to waste more time on that. */ free_ret_instance(utask, ri, false /* !cleanup_hprobe */); ri = ri_next; } while (ri != next_chain); } while (!valid); return; sigill: uprobe_warn(current, "handle uretprobe, sending SIGILL."); force_sig(SIGILL); } bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs) { return false; } bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs) { return true; } void __weak arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr) { } /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. */ static void handle_swbp(struct pt_regs *regs) { struct uprobe *uprobe; unsigned long bp_vaddr; int is_swbp; bp_vaddr = uprobe_get_swbp_addr(regs); if (bp_vaddr == uprobe_get_trampoline_vaddr()) return uprobe_handle_trampoline(regs); rcu_read_lock_trace(); uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp); if (!uprobe) { if (is_swbp > 0) { /* No matching uprobe; signal SIGTRAP. */ force_sig(SIGTRAP); } else { /* * Either we raced with uprobe_unregister() or we can't * access this memory. The latter is only possible if * another thread plays with our ->mm. In both cases * we can simply restart. If this vma was unmapped we * can pretend this insn was not executed yet and get * the (correct) SIGSEGV after restart. */ instruction_pointer_set(regs, bp_vaddr); } goto out; } /* change it in advance for ->handler() and restart */ instruction_pointer_set(regs, bp_vaddr); /* * TODO: move copy_insn/etc into _register and remove this hack. * After we hit the bp, _unregister + _register can install the * new and not-yet-analyzed uprobe at the same address, restart. */ if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) goto out; /* * Pairs with the smp_wmb() in prepare_uprobe(). * * Guarantees that if we see the UPROBE_COPY_INSN bit set, then * we must also see the stores to &uprobe->arch performed by the * prepare_uprobe() call. */ smp_rmb(); /* Tracing handlers use ->utask to communicate with fetch methods */ if (!get_utask()) goto out; if (arch_uprobe_ignore(&uprobe->arch, regs)) goto out; handler_chain(uprobe, regs); /* Try to optimize after first hit. */ arch_uprobe_optimize(&uprobe->arch, bp_vaddr); /* * If user decided to take execution elsewhere, it makes little sense * to execute the original instruction, so let's skip it. */ if (instruction_pointer(regs) != bp_vaddr) goto out; if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; if (pre_ssout(uprobe, regs, bp_vaddr)) goto out; out: /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */ rcu_read_unlock_trace(); } void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr) { struct uprobe *uprobe; int is_swbp; guard(rcu_tasks_trace)(); uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp); if (!uprobe) return; if (!get_utask()) return; if (arch_uprobe_ignore(&uprobe->arch, regs)) return; handler_chain(uprobe, regs); } /* * Perform required fix-ups and disable singlestep. * Allow pending signals to take effect. */ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) { struct uprobe *uprobe; int err = 0; uprobe = utask->active_uprobe; if (utask->state == UTASK_SSTEP_ACK) err = arch_uprobe_post_xol(&uprobe->arch, regs); else if (utask->state == UTASK_SSTEP_TRAPPED) arch_uprobe_abort_xol(&uprobe->arch, regs); else WARN_ON_ONCE(1); put_uprobe(uprobe); utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; xol_free_insn_slot(utask); if (utask->signal_denied) { set_thread_flag(TIF_SIGPENDING); utask->signal_denied = false; } if (unlikely(err)) { uprobe_warn(current, "execute the probed insn, sending SIGILL."); force_sig(SIGILL); } } /* * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and * allows the thread to return from interrupt. After that handle_swbp() * sets utask->active_uprobe. * * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag * and allows the thread to return from interrupt. * * While returning to userspace, thread notices the TIF_UPROBE flag and calls * uprobe_notify_resume(). */ void uprobe_notify_resume(struct pt_regs *regs) { struct uprobe_task *utask; clear_thread_flag(TIF_UPROBE); utask = current->utask; if (utask && utask->active_uprobe) handle_singlestep(utask, regs); else handle_swbp(regs); } /* * uprobe_pre_sstep_notifier gets called from interrupt context as part of * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit. */ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { if (!current->mm) return 0; if (!mm_flags_test(MMF_HAS_UPROBES, current->mm) && (!current->utask || !current->utask->return_instances)) return 0; set_thread_flag(TIF_UPROBE); return 1; } /* * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep. */ int uprobe_post_sstep_notifier(struct pt_regs *regs) { struct uprobe_task *utask = current->utask; if (!current->mm || !utask || !utask->active_uprobe) /* task is currently not uprobed */ return 0; utask->state = UTASK_SSTEP_ACK; set_thread_flag(TIF_UPROBE); return 1; } static struct notifier_block uprobe_exception_nb = { .notifier_call = arch_uprobe_exception_notify, .priority = INT_MAX-1, /* notified after kprobes, kgdb */ }; void __init uprobes_init(void) { int i; for (i = 0; i < UPROBES_HASH_SZ; i++) mutex_init(&uprobes_mmap_mutex[i]); BUG_ON(register_die_notifier(&uprobe_exception_nb)); }
526 525 526 48 526 525 524 526 523 48 526 525 523 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 // SPDX-License-Identifier: GPL-2.0 #include <linux/static_call.h> #include <linux/memory.h> #include <linux/bug.h> #include <asm/text-patching.h> enum insn_type { CALL = 0, /* site call */ NOP = 1, /* site cond-call */ JMP = 2, /* tramp / site tail-call */ RET = 3, /* tramp / site cond-tail-call */ JCC = 4, }; /* * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such * that there is no false-positive trampoline identification while also being a * speculation stop. */ static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; /* * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; /* * ud1 (%edx),%rdi -- see __WARN_trap() / decode_bug() */ static const u8 warninsn[] = { 0x67, 0x48, 0x0f, 0xb9, 0x3a }; static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */ { u8 ret = 0; if (insn[0] == 0x0f) { u8 tmp = insn[1]; if ((tmp & 0xf0) == 0x80) ret = tmp; } return ret; } extern void __static_call_return(void); asm (".global __static_call_return\n\t" ".type __static_call_return, @function\n\t" ASM_FUNC_ALIGN "\n\t" "__static_call_return:\n\t" ANNOTATE_NOENDBR "\n\t" ANNOTATE_RETPOLINE_SAFE "\n\t" "ret; int3\n\t" ".size __static_call_return, . - __static_call_return \n\t"); static void __ref __static_call_transform(void *insn, enum insn_type type, void *func, bool modinit) { const void *emulate = NULL; int size = CALL_INSN_SIZE; const void *code; u8 op, buf[6]; if ((type == JMP || type == RET) && (op = __is_Jcc(insn))) type = JCC; switch (type) { case CALL: func = callthunks_translate_call_dest(func); code = text_gen_insn(CALL_INSN_OPCODE, insn, func); if (func == &__static_call_return0) { emulate = code; code = &xor5rax; } if (func == &__WARN_trap) { emulate = code; code = &warninsn; } break; case NOP: code = x86_nops[5]; break; case JMP: code = text_gen_insn(JMP32_INSN_OPCODE, insn, func); break; case RET: if (cpu_wants_rethunk_at(insn)) code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; break; case JCC: if (!func) { func = __static_call_return; if (cpu_wants_rethunk()) func = x86_return_thunk; } buf[0] = 0x0f; __text_gen_insn(buf+1, op, insn+1, func, 5); code = buf; size = 6; break; } if (memcmp(insn, code, size) == 0) return; if (system_state == SYSTEM_BOOTING || modinit) return text_poke_early(insn, code, size); smp_text_poke_single(insn, code, size, emulate); } static void __static_call_validate(u8 *insn, bool tail, bool tramp) { u8 opcode = insn[0]; if (tramp && memcmp(insn+5, tramp_ud, 3)) { pr_err("trampoline signature fail"); BUG(); } if (tail) { if (opcode == JMP32_INSN_OPCODE || opcode == RET_INSN_OPCODE || __is_Jcc(insn)) return; } else { if (opcode == CALL_INSN_OPCODE || !memcmp(insn, x86_nops[5], 5) || !memcmp(insn, xor5rax, 5) || !memcmp(insn, warninsn, 5)) return; } /* * If we ever trigger this, our text is corrupt, we'll probably not live long. */ pr_err("unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn); BUG(); } static inline enum insn_type __sc_insn(bool null, bool tail) { /* * Encode the following table without branches: * * tail null insn * -----+-------+------ * 0 | 0 | CALL * 0 | 1 | NOP * 1 | 0 | JMP * 1 | 1 | RET */ return 2*tail + null; } void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { mutex_lock(&text_mutex); if (tramp && !site) { __static_call_validate(tramp, true, true); __static_call_transform(tramp, __sc_insn(!func, true), func, false); } if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { __static_call_validate(site, tail, false); __static_call_transform(site, __sc_insn(!func, tail), func, false); } mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); noinstr void __static_call_update_early(void *tramp, void *func) { BUG_ON(system_state != SYSTEM_BOOTING); BUG_ON(static_call_initialized); __text_gen_insn(tramp, JMP32_INSN_OPCODE, tramp, func, JMP32_INSN_SIZE); sync_core(); } #ifdef CONFIG_MITIGATION_RETHUNK /* * This is called by apply_returns() to fix up static call trampolines, * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as * having a return trampoline. * * The problem is that static_call() is available before determining * X86_FEATURE_RETHUNK and, by implication, running alternatives. * * This means that __static_call_transform() above can have overwritten the * return trampoline and we now need to fix things up to be consistent. */ bool __static_call_fixup(void *tramp, u8 op, void *dest) { unsigned long addr = (unsigned long)tramp; /* * Not all .return_sites are a static_call trampoline (most are not). * Check if the 3 bytes after the return are still kernel text, if not, * then this definitely is not a trampoline and we need not worry * further. * * This avoids the memcmp() below tripping over pagefaults etc.. */ if (((addr >> PAGE_SHIFT) != ((addr + 7) >> PAGE_SHIFT)) && !kernel_text_address(addr + 7)) return false; if (memcmp(tramp+5, tramp_ud, 3)) { /* Not a trampoline site, not our problem. */ return false; } mutex_lock(&text_mutex); if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) __static_call_transform(tramp, RET, NULL, true); mutex_unlock(&text_mutex); return true; } #endif
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 /* * * Author Karsten Keil <kkeil@novell.com> * * Copyright 2008 by Karsten Keil <kkeil@novell.com> * * This code is free software; you can redistribute it and/or modify * it under the terms of the GNU LESSER GENERAL PUBLIC LICENSE * version 2.1 as published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU LESSER GENERAL PUBLIC LICENSE for more details. * */ #ifndef mISDNIF_H #define mISDNIF_H #include <linux/types.h> #include <linux/errno.h> #include <linux/socket.h> /* * ABI Version 32 bit * * <8 bit> Major version * - changed if any interface become backwards incompatible * * <8 bit> Minor version * - changed if any interface is extended but backwards compatible * * <16 bit> Release number * - should be incremented on every checkin */ #define MISDN_MAJOR_VERSION 1 #define MISDN_MINOR_VERSION 1 #define MISDN_RELEASE 29 /* primitives for information exchange * generell format * <16 bit 0 > * <8 bit command> * BIT 8 = 1 LAYER private * BIT 7 = 1 answer * BIT 6 = 1 DATA * <8 bit target layer mask> * * Layer = 00 is reserved for general commands Layer = 01 L2 -> HW Layer = 02 HW -> L2 Layer = 04 L3 -> L2 Layer = 08 L2 -> L3 * Layer = FF is reserved for broadcast commands */ #define MISDN_CMDMASK 0xff00 #define MISDN_LAYERMASK 0x00ff /* generell commands */ #define OPEN_CHANNEL 0x0100 #define CLOSE_CHANNEL 0x0200 #define CONTROL_CHANNEL 0x0300 #define CHECK_DATA 0x0400 /* layer 2 -> layer 1 */ #define PH_ACTIVATE_REQ 0x0101 #define PH_DEACTIVATE_REQ 0x0201 #define PH_DATA_REQ 0x2001 #define MPH_ACTIVATE_REQ 0x0501 #define MPH_DEACTIVATE_REQ 0x0601 #define MPH_INFORMATION_REQ 0x0701 #define PH_CONTROL_REQ 0x0801 /* layer 1 -> layer 2 */ #define PH_ACTIVATE_IND 0x0102 #define PH_ACTIVATE_CNF 0x4102 #define PH_DEACTIVATE_IND 0x0202 #define PH_DEACTIVATE_CNF 0x4202 #define PH_DATA_IND 0x2002 #define PH_DATA_E_IND 0x3002 #define MPH_ACTIVATE_IND 0x0502 #define MPH_DEACTIVATE_IND 0x0602 #define MPH_INFORMATION_IND 0x0702 #define PH_DATA_CNF 0x6002 #define PH_CONTROL_IND 0x0802 #define PH_CONTROL_CNF 0x4802 /* layer 3 -> layer 2 */ #define DL_ESTABLISH_REQ 0x1004 #define DL_RELEASE_REQ 0x1104 #define DL_DATA_REQ 0x3004 #define DL_UNITDATA_REQ 0x3104 #define DL_INFORMATION_REQ 0x0004 /* layer 2 -> layer 3 */ #define DL_ESTABLISH_IND 0x1008 #define DL_ESTABLISH_CNF 0x5008 #define DL_RELEASE_IND 0x1108 #define DL_RELEASE_CNF 0x5108 #define DL_DATA_IND 0x3008 #define DL_UNITDATA_IND 0x3108 #define DL_INFORMATION_IND 0x0008 /* intern layer 2 management */ #define MDL_ASSIGN_REQ 0x1804 #define MDL_ASSIGN_IND 0x1904 #define MDL_REMOVE_REQ 0x1A04 #define MDL_REMOVE_IND 0x1B04 #define MDL_STATUS_UP_IND 0x1C04 #define MDL_STATUS_DOWN_IND 0x1D04 #define MDL_STATUS_UI_IND 0x1E04 #define MDL_ERROR_IND 0x1F04 #define MDL_ERROR_RSP 0x5F04 /* intern layer 2 */ #define DL_TIMER200_IND 0x7004 #define DL_TIMER203_IND 0x7304 #define DL_INTERN_MSG 0x7804 /* DL_INFORMATION_IND types */ #define DL_INFO_L2_CONNECT 0x0001 #define DL_INFO_L2_REMOVED 0x0002 /* PH_CONTROL types */ /* TOUCH TONE IS 0x20XX XX "0"..."9", "A","B","C","D","*","#" */ #define DTMF_TONE_VAL 0x2000 #define DTMF_TONE_MASK 0x007F #define DTMF_TONE_START 0x2100 #define DTMF_TONE_STOP 0x2200 #define DTMF_HFC_COEF 0x4000 #define DSP_CONF_JOIN 0x2403 #define DSP_CONF_SPLIT 0x2404 #define DSP_RECEIVE_OFF 0x2405 #define DSP_RECEIVE_ON 0x2406 #define DSP_ECHO_ON 0x2407 #define DSP_ECHO_OFF 0x2408 #define DSP_MIX_ON 0x2409 #define DSP_MIX_OFF 0x240a #define DSP_DELAY 0x240b #define DSP_JITTER 0x240c #define DSP_TXDATA_ON 0x240d #define DSP_TXDATA_OFF 0x240e #define DSP_TX_DEJITTER 0x240f #define DSP_TX_DEJ_OFF 0x2410 #define DSP_TONE_PATT_ON 0x2411 #define DSP_TONE_PATT_OFF 0x2412 #define DSP_VOL_CHANGE_TX 0x2413 #define DSP_VOL_CHANGE_RX 0x2414 #define DSP_BF_ENABLE_KEY 0x2415 #define DSP_BF_DISABLE 0x2416 #define DSP_BF_ACCEPT 0x2416 #define DSP_BF_REJECT 0x2417 #define DSP_PIPELINE_CFG 0x2418 #define HFC_VOL_CHANGE_TX 0x2601 #define HFC_VOL_CHANGE_RX 0x2602 #define HFC_SPL_LOOP_ON 0x2603 #define HFC_SPL_LOOP_OFF 0x2604 /* for T30 FAX and analog modem */ #define HW_MOD_FRM 0x4000 #define HW_MOD_FRH 0x4001 #define HW_MOD_FTM 0x4002 #define HW_MOD_FTH 0x4003 #define HW_MOD_FTS 0x4004 #define HW_MOD_CONNECT 0x4010 #define HW_MOD_OK 0x4011 #define HW_MOD_NOCARR 0x4012 #define HW_MOD_FCERROR 0x4013 #define HW_MOD_READY 0x4014 #define HW_MOD_LASTDATA 0x4015 /* DSP_TONE_PATT_ON parameter */ #define TONE_OFF 0x0000 #define TONE_GERMAN_DIALTONE 0x0001 #define TONE_GERMAN_OLDDIALTONE 0x0002 #define TONE_AMERICAN_DIALTONE 0x0003 #define TONE_GERMAN_DIALPBX 0x0004 #define TONE_GERMAN_OLDDIALPBX 0x0005 #define TONE_AMERICAN_DIALPBX 0x0006 #define TONE_GERMAN_RINGING 0x0007 #define TONE_GERMAN_OLDRINGING 0x0008 #define TONE_AMERICAN_RINGPBX 0x000b #define TONE_GERMAN_RINGPBX 0x000c #define TONE_GERMAN_OLDRINGPBX 0x000d #define TONE_AMERICAN_RINGING 0x000e #define TONE_GERMAN_BUSY 0x000f #define TONE_GERMAN_OLDBUSY 0x0010 #define TONE_AMERICAN_BUSY 0x0011 #define TONE_GERMAN_HANGUP 0x0012 #define TONE_GERMAN_OLDHANGUP 0x0013 #define TONE_AMERICAN_HANGUP 0x0014 #define TONE_SPECIAL_INFO 0x0015 #define TONE_GERMAN_GASSENBESETZT 0x0016 #define TONE_GERMAN_AUFSCHALTTON 0x0016 /* MPH_INFORMATION_IND */ #define L1_SIGNAL_LOS_OFF 0x0010 #define L1_SIGNAL_LOS_ON 0x0011 #define L1_SIGNAL_AIS_OFF 0x0012 #define L1_SIGNAL_AIS_ON 0x0013 #define L1_SIGNAL_RDI_OFF 0x0014 #define L1_SIGNAL_RDI_ON 0x0015 #define L1_SIGNAL_SLIP_RX 0x0020 #define L1_SIGNAL_SLIP_TX 0x0021 /* * protocol ids * D channel 1-31 * B channel 33 - 63 */ #define ISDN_P_NONE 0 #define ISDN_P_BASE 0 #define ISDN_P_TE_S0 0x01 #define ISDN_P_NT_S0 0x02 #define ISDN_P_TE_E1 0x03 #define ISDN_P_NT_E1 0x04 #define ISDN_P_TE_UP0 0x05 #define ISDN_P_NT_UP0 0x06 #define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \ (p == ISDN_P_TE_UP0) || (p == ISDN_P_LAPD_TE)) #define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \ (p == ISDN_P_NT_UP0) || (p == ISDN_P_LAPD_NT)) #define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0)) #define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1)) #define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0)) #define ISDN_P_LAPD_TE 0x10 #define ISDN_P_LAPD_NT 0x11 #define ISDN_P_B_MASK 0x1f #define ISDN_P_B_START 0x20 #define ISDN_P_B_RAW 0x21 #define ISDN_P_B_HDLC 0x22 #define ISDN_P_B_X75SLP 0x23 #define ISDN_P_B_L2DTMF 0x24 #define ISDN_P_B_L2DSP 0x25 #define ISDN_P_B_L2DSPHDLC 0x26 #define ISDN_P_B_T30_FAX 0x27 #define ISDN_P_B_MODEM_ASYNC 0x28 #define OPTION_L2_PMX 1 #define OPTION_L2_PTP 2 #define OPTION_L2_FIXEDTEI 3 #define OPTION_L2_CLEANUP 4 #define OPTION_L1_HOLD 5 /* should be in sync with linux/kobject.h:KOBJ_NAME_LEN */ #define MISDN_MAX_IDLEN 20 struct mISDNhead { unsigned int prim; unsigned int id; } __packed; #define MISDN_HEADER_LEN sizeof(struct mISDNhead) #define MAX_DATA_SIZE 2048 #define MAX_DATA_MEM (MAX_DATA_SIZE + MISDN_HEADER_LEN) #define MAX_DFRAME_LEN 260 #define MISDN_ID_ADDR_MASK 0xFFFF #define MISDN_ID_TEI_MASK 0xFF00 #define MISDN_ID_SAPI_MASK 0x00FF #define MISDN_ID_TEI_ANY 0x7F00 #define MISDN_ID_ANY 0xFFFF #define MISDN_ID_NONE 0xFFFE #define GROUP_TEI 127 #define TEI_SAPI 63 #define CTRL_SAPI 0 #define MISDN_MAX_CHANNEL 127 #define MISDN_CHMAP_SIZE ((MISDN_MAX_CHANNEL + 1) >> 3) #define SOL_MISDN 0 struct sockaddr_mISDN { sa_family_t family; unsigned char dev; unsigned char channel; unsigned char sapi; unsigned char tei; }; struct mISDNversion { unsigned char major; unsigned char minor; unsigned short release; }; struct mISDN_devinfo { u_int id; u_int Dprotocols; u_int Bprotocols; u_int protocol; u_char channelmap[MISDN_CHMAP_SIZE]; u_int nrbchan; char name[MISDN_MAX_IDLEN]; }; struct mISDN_devrename { u_int id; char name[MISDN_MAX_IDLEN]; /* new name */ }; /* MPH_INFORMATION_REQ payload */ struct ph_info_ch { __u32 protocol; __u64 Flags; }; struct ph_info_dch { struct ph_info_ch ch; __u16 state; __u16 num_bch; }; struct ph_info { struct ph_info_dch dch; struct ph_info_ch bch[]; }; /* timer device ioctl */ #define IMADDTIMER _IOR('I', 64, int) #define IMDELTIMER _IOR('I', 65, int) /* socket ioctls */ #define IMGETVERSION _IOR('I', 66, int) #define IMGETCOUNT _IOR('I', 67, int) #define IMGETDEVINFO _IOR('I', 68, int) #define IMCTRLREQ _IOR('I', 69, int) #define IMCLEAR_L2 _IOR('I', 70, int) #define IMSETDEVNAME _IOR('I', 71, struct mISDN_devrename) #define IMHOLD_L1 _IOR('I', 72, int) static inline int test_channelmap(u_int nr, u_char *map) { if (nr <= MISDN_MAX_CHANNEL) return map[nr >> 3] & (1 << (nr & 7)); else return 0; } static inline void set_channelmap(u_int nr, u_char *map) { map[nr >> 3] |= (1 << (nr & 7)); } static inline void clear_channelmap(u_int nr, u_char *map) { map[nr >> 3] &= ~(1 << (nr & 7)); } /* CONTROL_CHANNEL parameters */ #define MISDN_CTRL_GETOP 0x0000 #define MISDN_CTRL_LOOP 0x0001 #define MISDN_CTRL_CONNECT 0x0002 #define MISDN_CTRL_DISCONNECT 0x0004 #define MISDN_CTRL_RX_BUFFER 0x0008 #define MISDN_CTRL_PCMCONNECT 0x0010 #define MISDN_CTRL_PCMDISCONNECT 0x0020 #define MISDN_CTRL_SETPEER 0x0040 #define MISDN_CTRL_UNSETPEER 0x0080 #define MISDN_CTRL_RX_OFF 0x0100 #define MISDN_CTRL_FILL_EMPTY 0x0200 #define MISDN_CTRL_GETPEER 0x0400 #define MISDN_CTRL_L1_TIMER3 0x0800 #define MISDN_CTRL_HW_FEATURES_OP 0x2000 #define MISDN_CTRL_HW_FEATURES 0x2001 #define MISDN_CTRL_HFC_OP 0x4000 #define MISDN_CTRL_HFC_PCM_CONN 0x4001 #define MISDN_CTRL_HFC_PCM_DISC 0x4002 #define MISDN_CTRL_HFC_CONF_JOIN 0x4003 #define MISDN_CTRL_HFC_CONF_SPLIT 0x4004 #define MISDN_CTRL_HFC_RECEIVE_OFF 0x4005 #define MISDN_CTRL_HFC_RECEIVE_ON 0x4006 #define MISDN_CTRL_HFC_ECHOCAN_ON 0x4007 #define MISDN_CTRL_HFC_ECHOCAN_OFF 0x4008 #define MISDN_CTRL_HFC_WD_INIT 0x4009 #define MISDN_CTRL_HFC_WD_RESET 0x400A /* special RX buffer value for MISDN_CTRL_RX_BUFFER request.p1 is the minimum * buffer size request.p2 the maximum. Using MISDN_CTRL_RX_SIZE_IGNORE will * not change the value, but still read back the actual stetting. */ #define MISDN_CTRL_RX_SIZE_IGNORE -1 /* socket options */ #define MISDN_TIME_STAMP 0x0001 struct mISDN_ctrl_req { int op; int channel; int p1; int p2; }; /* muxer options */ #define MISDN_OPT_ALL 1 #define MISDN_OPT_TEIMGR 2 #ifdef __KERNEL__ #include <linux/list.h> #include <linux/skbuff.h> #include <linux/net.h> #include <net/sock.h> #include <linux/completion.h> #define DEBUG_CORE 0x000000ff #define DEBUG_CORE_FUNC 0x00000002 #define DEBUG_SOCKET 0x00000004 #define DEBUG_MANAGER 0x00000008 #define DEBUG_SEND_ERR 0x00000010 #define DEBUG_MSG_THREAD 0x00000020 #define DEBUG_QUEUE_FUNC 0x00000040 #define DEBUG_L1 0x0000ff00 #define DEBUG_L1_FSM 0x00000200 #define DEBUG_L2 0x00ff0000 #define DEBUG_L2_FSM 0x00020000 #define DEBUG_L2_CTRL 0x00040000 #define DEBUG_L2_RECV 0x00080000 #define DEBUG_L2_TEI 0x00100000 #define DEBUG_L2_TEIFSM 0x00200000 #define DEBUG_TIMER 0x01000000 #define DEBUG_CLOCK 0x02000000 #define mISDN_HEAD_P(s) ((struct mISDNhead *)&s->cb[0]) #define mISDN_HEAD_PRIM(s) (((struct mISDNhead *)&s->cb[0])->prim) #define mISDN_HEAD_ID(s) (((struct mISDNhead *)&s->cb[0])->id) /* socket states */ #define MISDN_OPEN 1 #define MISDN_BOUND 2 #define MISDN_CLOSED 3 struct mISDNchannel; struct mISDNdevice; struct mISDNstack; struct mISDNclock; struct channel_req { u_int protocol; struct sockaddr_mISDN adr; struct mISDNchannel *ch; }; typedef int (ctrl_func_t)(struct mISDNchannel *, u_int, void *); typedef int (send_func_t)(struct mISDNchannel *, struct sk_buff *); typedef int (create_func_t)(struct channel_req *); struct Bprotocol { struct list_head list; char *name; u_int Bprotocols; create_func_t *create; }; struct mISDNchannel { struct list_head list; u_int protocol; u_int nr; u_long opt; u_int addr; struct mISDNstack *st; struct mISDNchannel *peer; send_func_t *send; send_func_t *recv; ctrl_func_t *ctrl; }; struct mISDN_sock_list { struct hlist_head head; rwlock_t lock; }; struct mISDN_sock { struct sock sk; struct mISDNchannel ch; u_int cmask; struct mISDNdevice *dev; }; struct mISDNdevice { struct mISDNchannel D; u_int id; u_int Dprotocols; u_int Bprotocols; u_int nrbchan; u_char channelmap[MISDN_CHMAP_SIZE]; struct list_head bchannels; struct mISDNchannel *teimgr; struct device dev; }; struct mISDNstack { u_long status; struct mISDNdevice *dev; struct task_struct *thread; struct completion *notify; wait_queue_head_t workq; struct sk_buff_head msgq; struct list_head layer2; struct mISDNchannel *layer1; struct mISDNchannel own; struct mutex lmutex; /* protect lists */ struct mISDN_sock_list l1sock; #ifdef MISDN_MSG_STATS u_int msg_cnt; u_int sleep_cnt; u_int stopped_cnt; #endif }; typedef int (clockctl_func_t)(void *, int); struct mISDNclock { struct list_head list; char name[64]; int pri; clockctl_func_t *ctl; void *priv; }; /* global alloc/queue functions */ static inline struct sk_buff * mI_alloc_skb(unsigned int len, gfp_t gfp_mask) { struct sk_buff *skb; skb = alloc_skb(len + MISDN_HEADER_LEN, gfp_mask); if (likely(skb)) skb_reserve(skb, MISDN_HEADER_LEN); return skb; } static inline struct sk_buff * _alloc_mISDN_skb(u_int prim, u_int id, u_int len, void *dp, gfp_t gfp_mask) { struct sk_buff *skb = mI_alloc_skb(len, gfp_mask); struct mISDNhead *hh; if (!skb) return NULL; if (len) skb_put_data(skb, dp, len); hh = mISDN_HEAD_P(skb); hh->prim = prim; hh->id = id; return skb; } static inline void _queue_data(struct mISDNchannel *ch, u_int prim, u_int id, u_int len, void *dp, gfp_t gfp_mask) { struct sk_buff *skb; if (!ch->peer) return; skb = _alloc_mISDN_skb(prim, id, len, dp, gfp_mask); if (!skb) return; if (ch->recv(ch->peer, skb)) dev_kfree_skb(skb); } /* global register/unregister functions */ extern int mISDN_register_device(struct mISDNdevice *, struct device *parent, char *name); extern void mISDN_unregister_device(struct mISDNdevice *); extern int mISDN_register_Bprotocol(struct Bprotocol *); extern void mISDN_unregister_Bprotocol(struct Bprotocol *); extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *, void *); extern void mISDN_unregister_clock(struct mISDNclock *); static inline struct mISDNdevice *dev_to_mISDN(const struct device *dev) { if (dev) return dev_get_drvdata(dev); else return NULL; } extern void set_channel_address(struct mISDNchannel *, u_int, u_int); extern void mISDN_clock_update(struct mISDNclock *, int, ktime_t *); extern unsigned short mISDN_clock_get(void); extern const char *mISDNDevName4ch(struct mISDNchannel *); #endif /* __KERNEL__ */ #endif /* mISDNIF_H */
11 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include "lru_cache.h" #include "messages.h" /* * Initialize a cache object. * * @cache: The cache. * @max_size: Maximum size (number of entries) for the cache. * Use 0 for unlimited size, it's the user's responsibility to * trim the cache in that case. */ void btrfs_lru_cache_init(struct btrfs_lru_cache *cache, unsigned int max_size) { INIT_LIST_HEAD(&cache->lru_list); mt_init(&cache->entries); cache->size = 0; cache->max_size = max_size; } static struct btrfs_lru_cache_entry *match_entry(struct list_head *head, u64 key, u64 gen) { struct btrfs_lru_cache_entry *entry; list_for_each_entry(entry, head, list) { if (entry->key == key && entry->gen == gen) return entry; } return NULL; } /* * Lookup for an entry in the cache. * * @cache: The cache. * @key: The key of the entry we are looking for. * @gen: Generation associated to the key. * * Returns the entry associated with the key or NULL if none found. */ struct btrfs_lru_cache_entry *btrfs_lru_cache_lookup(struct btrfs_lru_cache *cache, u64 key, u64 gen) { struct list_head *head; struct btrfs_lru_cache_entry *entry; head = mtree_load(&cache->entries, key); if (!head) return NULL; entry = match_entry(head, key, gen); if (entry) list_move_tail(&entry->lru_list, &cache->lru_list); return entry; } /* * Remove an entry from the cache. * * @cache: The cache to remove from. * @entry: The entry to remove from the cache. * * Note: this also frees the memory used by the entry. */ void btrfs_lru_cache_remove(struct btrfs_lru_cache *cache, struct btrfs_lru_cache_entry *entry) { struct list_head *prev = entry->list.prev; ASSERT(cache->size > 0); ASSERT(!mtree_empty(&cache->entries)); list_del(&entry->list); list_del(&entry->lru_list); if (list_empty(prev)) { struct list_head *head; /* * If previous element in the list entry->list is now empty, it * means it's a head entry not pointing to any cached entries, * so remove it from the maple tree and free it. */ head = mtree_erase(&cache->entries, entry->key); ASSERT(head == prev); kfree(head); } kfree(entry); cache->size--; } /* * Store an entry in the cache. * * @cache: The cache. * @entry: The entry to store. * * Returns 0 on success and < 0 on error. */ int btrfs_lru_cache_store(struct btrfs_lru_cache *cache, struct btrfs_lru_cache_entry *new_entry, gfp_t gfp) { const u64 key = new_entry->key; struct list_head *head; int ret; head = kmalloc(sizeof(*head), gfp); if (!head) return -ENOMEM; ret = mtree_insert(&cache->entries, key, head, gfp); if (ret == 0) { INIT_LIST_HEAD(head); list_add_tail(&new_entry->list, head); } else if (ret == -EEXIST) { kfree(head); head = mtree_load(&cache->entries, key); ASSERT(head != NULL); if (match_entry(head, key, new_entry->gen) != NULL) return -EEXIST; list_add_tail(&new_entry->list, head); } else if (ret < 0) { kfree(head); return ret; } if (cache->max_size > 0 && cache->size == cache->max_size) { struct btrfs_lru_cache_entry *lru_entry; lru_entry = list_first_entry(&cache->lru_list, struct btrfs_lru_cache_entry, lru_list); btrfs_lru_cache_remove(cache, lru_entry); } list_add_tail(&new_entry->lru_list, &cache->lru_list); cache->size++; return 0; } /* * Empty a cache. * * @cache: The cache to empty. * * Removes all entries from the cache. */ void btrfs_lru_cache_clear(struct btrfs_lru_cache *cache) { struct btrfs_lru_cache_entry *entry; struct btrfs_lru_cache_entry *tmp; list_for_each_entry_safe(entry, tmp, &cache->lru_list, lru_list) btrfs_lru_cache_remove(cache, entry); ASSERT(cache->size == 0); ASSERT(mtree_empty(&cache->entries)); }
9 9 9 2 4 7 6 2 2 25 26 26 1 1 27 26 1 26 1 1 5 27 27 5 27 27 27 27 27 5 17 2 2 2 6 15 15 15 15 15 17 17 6 6 6 6 17 17 17 17 17 17 17 17 2 16 16 16 5 5 3 18 13 5 18 3 12 3 13 2 2 2 12 3 27 1 24 2 1 1 1 1 1 1 1 22 1 1 4 17 1 3 18 16 2 3 15 15 15 3 12 12 12 12 12 12 12 2 9 3 12 12 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * The filters are packed to hash tables of key nodes * with a set of 32bit key/mask pairs at every node. * Nodes reference next level hash tables etc. * * This scheme is the best universal classifier I managed to * invent; it is not super-fast, but it is not slow (provided you * program it correctly), and general enough. And its relative * speed grows as the number of rules becomes larger. * * It seems that it represents the best middle point between * speed and manageability both by human and by machine. * * It is especially useful for link sharing combined with QoS; * pure RSVP doesn't need such a general approach and can use * much simpler (and faster) schemes, sort of cls_rsvp.c. * * nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/percpu.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/bitmap.h> #include <linux/netdevice.h> #include <linux/hash.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <linux/idr.h> #include <net/tc_wrapper.h> struct tc_u_knode { struct tc_u_knode __rcu *next; u32 handle; struct tc_u_hnode __rcu *ht_up; struct tcf_exts exts; int ifindex; u8 fshift; struct tcf_result res; struct tc_u_hnode __rcu *ht_down; #ifdef CONFIG_CLS_U32_PERF struct tc_u32_pcnt __percpu *pf; #endif u32 flags; unsigned int in_hw_count; #ifdef CONFIG_CLS_U32_MARK u32 val; u32 mask; u32 __percpu *pcpu_success; #endif struct rcu_work rwork; /* The 'sel' field MUST be the last field in structure to allow for * tc_u32_keys allocated at end of structure. */ struct tc_u32_sel sel; }; struct tc_u_hnode { struct tc_u_hnode __rcu *next; u32 handle; u32 prio; refcount_t refcnt; unsigned int divisor; struct idr handle_idr; bool is_root; struct rcu_head rcu; u32 flags; /* The 'ht' field MUST be the last field in structure to allow for * more entries allocated at end of structure. */ struct tc_u_knode __rcu *ht[]; }; struct tc_u_common { struct tc_u_hnode __rcu *hlist; void *ptr; refcount_t refcnt; struct idr handle_idr; struct hlist_node hnode; long knodes; }; static u32 handle2id(u32 h) { return ((h & 0x80000000) ? ((h >> 20) & 0x7FF) : h); } static u32 id2handle(u32 id) { return (id | 0x800U) << 20; } static inline unsigned int u32_hash_fold(__be32 key, const struct tc_u32_sel *sel, u8 fshift) { unsigned int h = ntohl(key & sel->hmask) >> fshift; return h; } TC_INDIRECT_SCOPE int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct { struct tc_u_knode *knode; unsigned int off; } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = rcu_dereference_bh(tp->root); unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; int sel = 0; #ifdef CONFIG_CLS_U32_PERF int j; #endif int i, r; next_ht: n = rcu_dereference_bh(ht->ht[sel]); next_knode: if (n) { struct tc_u32_key *key = n->sel.keys; #ifdef CONFIG_CLS_U32_PERF __this_cpu_inc(n->pf->rcnt); j = 0; #endif if (tc_skip_sw(n->flags)) { n = rcu_dereference_bh(n->next); goto next_knode; } #ifdef CONFIG_CLS_U32_MARK if ((skb->mark & n->mask) != n->val) { n = rcu_dereference_bh(n->next); goto next_knode; } else { __this_cpu_inc(*n->pcpu_success); } #endif for (i = n->sel.nkeys; i > 0; i--, key++) { int toff = off + key->off + (off2 & key->offmask); __be32 *data, hdata; if (skb_headroom(skb) + toff > INT_MAX) goto out; data = skb_header_pointer(skb, toff, 4, &hdata); if (!data) goto out; if ((*data ^ key->val) & key->mask) { n = rcu_dereference_bh(n->next); goto next_knode; } #ifdef CONFIG_CLS_U32_PERF __this_cpu_inc(n->pf->kcnts[j]); j++; #endif } ht = rcu_dereference_bh(n->ht_down); if (!ht) { check_terminal: if (n->sel.flags & TC_U32_TERMINAL) { *res = n->res; if (!tcf_match_indev(skb, n->ifindex)) { n = rcu_dereference_bh(n->next); goto next_knode; } #ifdef CONFIG_CLS_U32_PERF __this_cpu_inc(n->pf->rhit); #endif r = tcf_exts_exec(skb, &n->exts, res); if (r < 0) { n = rcu_dereference_bh(n->next); goto next_knode; } return r; } n = rcu_dereference_bh(n->next); goto next_knode; } /* PUSH */ if (sdepth >= TC_U32_MAXDEPTH) goto deadloop; stack[sdepth].knode = n; stack[sdepth].off = off; sdepth++; ht = rcu_dereference_bh(n->ht_down); sel = 0; if (ht->divisor) { __be32 *data, hdata; data = skb_header_pointer(skb, off + n->sel.hoff, 4, &hdata); if (!data) goto out; sel = ht->divisor & u32_hash_fold(*data, &n->sel, n->fshift); } if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT))) goto next_ht; if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; if (n->sel.flags & TC_U32_VAROFFSET) { __be16 *data, hdata; data = skb_header_pointer(skb, off + n->sel.offoff, 2, &hdata); if (!data) goto out; off2 += ntohs(n->sel.offmask & *data) >> n->sel.offshift; } off2 &= ~3; } if (n->sel.flags & TC_U32_EAT) { off += off2; off2 = 0; } if (off < skb->len) goto next_ht; } /* POP */ if (sdepth--) { n = stack[sdepth].knode; ht = rcu_dereference_bh(n->ht_up); off = stack[sdepth].off; goto check_terminal; } out: return -1; deadloop: net_warn_ratelimited("cls_u32: dead loop\n"); return -1; } static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; for (ht = rtnl_dereference(tp_c->hlist); ht; ht = rtnl_dereference(ht->next)) if (ht->handle == handle) break; return ht; } static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) { unsigned int sel; struct tc_u_knode *n = NULL; sel = TC_U32_HASH(handle); if (sel > ht->divisor) goto out; for (n = rtnl_dereference(ht->ht[sel]); n; n = rtnl_dereference(n->next)) if (n->handle == handle) break; out: return n; } static void *u32_get(struct tcf_proto *tp, u32 handle) { struct tc_u_hnode *ht; struct tc_u_common *tp_c = tp->data; if (TC_U32_HTID(handle) == TC_U32_ROOT) ht = rtnl_dereference(tp->root); else ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); if (!ht) return NULL; if (TC_U32_KEY(handle) == 0) return ht; return u32_lookup_key(ht, handle); } /* Protected by rtnl lock */ static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) { int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL); if (id < 0) return 0; return id2handle(id); } static struct hlist_head *tc_u_common_hash; #define U32_HASH_SHIFT 10 #define U32_HASH_SIZE (1 << U32_HASH_SHIFT) static void *tc_u_common_ptr(const struct tcf_proto *tp) { struct tcf_block *block = tp->chain->block; /* The block sharing is currently supported only * for classless qdiscs. In that case we use block * for tc_u_common identification. In case the * block is not shared, block->q is a valid pointer * and we can use that. That works for classful qdiscs. */ if (tcf_block_shared(block)) return block; else return block->q; } static struct hlist_head *tc_u_hash(void *key) { return tc_u_common_hash + hash_ptr(key, U32_HASH_SHIFT); } static struct tc_u_common *tc_u_common_find(void *key) { struct tc_u_common *tc; hlist_for_each_entry(tc, tc_u_hash(key), hnode) { if (tc->ptr == key) return tc; } return NULL; } static int u32_init(struct tcf_proto *tp) { struct tc_u_hnode *root_ht; void *key = tc_u_common_ptr(tp); struct tc_u_common *tp_c = tc_u_common_find(key); root_ht = kzalloc(struct_size(root_ht, ht, 1), GFP_KERNEL); if (root_ht == NULL) return -ENOBUFS; refcount_set(&root_ht->refcnt, 1); root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : id2handle(0); root_ht->prio = tp->prio; root_ht->is_root = true; idr_init(&root_ht->handle_idr); if (tp_c == NULL) { tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); if (tp_c == NULL) { kfree(root_ht); return -ENOBUFS; } refcount_set(&tp_c->refcnt, 1); tp_c->ptr = key; INIT_HLIST_NODE(&tp_c->hnode); idr_init(&tp_c->handle_idr); hlist_add_head(&tp_c->hnode, tc_u_hash(key)); } else { refcount_inc(&tp_c->refcnt); } RCU_INIT_POINTER(root_ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, root_ht); /* root_ht must be destroyed when tcf_proto is destroyed */ rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; } static void __u32_destroy_key(struct tc_u_knode *n) { struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); tcf_exts_destroy(&n->exts); if (ht && refcount_dec_and_test(&ht->refcnt)) kfree(ht); kfree(n); } static void u32_destroy_key(struct tc_u_knode *n, bool free_pf) { tcf_exts_put_net(&n->exts); #ifdef CONFIG_CLS_U32_PERF if (free_pf) free_percpu(n->pf); #endif #ifdef CONFIG_CLS_U32_MARK if (free_pf) free_percpu(n->pcpu_success); #endif __u32_destroy_key(n); } /* u32_delete_key_rcu should be called when free'ing a copied * version of a tc_u_knode obtained from u32_init_knode(). When * copies are obtained from u32_init_knode() the statistics are * shared between the old and new copies to allow readers to * continue to update the statistics during the copy. To support * this the u32_delete_key_rcu variant does not free the percpu * statistics. */ static void u32_delete_key_work(struct work_struct *work) { struct tc_u_knode *key = container_of(to_rcu_work(work), struct tc_u_knode, rwork); rtnl_lock(); u32_destroy_key(key, false); rtnl_unlock(); } /* u32_delete_key_freepf_rcu is the rcu callback variant * that free's the entire structure including the statistics * percpu variables. Only use this if the key is not a copy * returned by u32_init_knode(). See u32_delete_key_rcu() * for the variant that should be used with keys return from * u32_init_knode() */ static void u32_delete_key_freepf_work(struct work_struct *work) { struct tc_u_knode *key = container_of(to_rcu_work(work), struct tc_u_knode, rwork); rtnl_lock(); u32_destroy_key(key, true); rtnl_unlock(); } static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) { struct tc_u_common *tp_c = tp->data; struct tc_u_knode __rcu **kp; struct tc_u_knode *pkp; struct tc_u_hnode *ht = rtnl_dereference(key->ht_up); if (ht) { kp = &ht->ht[TC_U32_HASH(key->handle)]; for (pkp = rtnl_dereference(*kp); pkp; kp = &pkp->next, pkp = rtnl_dereference(*kp)) { if (pkp == key) { RCU_INIT_POINTER(*kp, key->next); tp_c->knodes--; tcf_unbind_filter(tp, &key->res); idr_remove(&ht->handle_idr, key->handle); tcf_exts_get_net(&key->exts); tcf_queue_work(&key->rwork, u32_delete_key_freepf_work); return 0; } } } WARN_ON(1); return 0; } static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack); cls_u32.command = TC_CLSU32_DELETE_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, u32 flags, struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; bool skip_sw = tc_skip_sw(flags); bool offloaded = false; int err; tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); cls_u32.command = TC_CLSU32_NEW_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true); if (err < 0) { u32_clear_hw_hnode(tp, h, NULL); return err; } else if (err > 0) { offloaded = true; } if (skip_sw && !offloaded) return -EINVAL; return 0; } static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false, &n->flags, &n->in_hw_count, true); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags, struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; bool skip_sw = tc_skip_sw(flags); int err; tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); cls_u32.command = TC_CLSU32_REPLACE_KNODE; cls_u32.knode.handle = n->handle; cls_u32.knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK cls_u32.knode.val = n->val; cls_u32.knode.mask = n->mask; #else cls_u32.knode.val = 0; cls_u32.knode.mask = 0; #endif cls_u32.knode.sel = &n->sel; cls_u32.knode.res = &n->res; cls_u32.knode.exts = &n->exts; if (n->ht_down) cls_u32.knode.link_handle = ht->handle; err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw, &n->flags, &n->in_hw_count, true); if (err) { u32_remove_hw_knode(tp, n, NULL); return err; } if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) return -EINVAL; return 0; } static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_knode *n; unsigned int h; for (h = 0; h <= ht->divisor; h++) { while ((n = rtnl_dereference(ht->ht[h])) != NULL) { RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); tp_c->knodes--; tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n, extack); idr_remove(&ht->handle_idr, n->handle); if (tcf_exts_get_net(&n->exts)) tcf_queue_work(&n->rwork, u32_delete_key_freepf_work); else u32_destroy_key(n, true); } } } static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; u32_clear_hnode(tp, ht, extack); hn = &tp_c->hlist; for (phn = rtnl_dereference(*hn); phn; hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { u32_clear_hw_hnode(tp, ht, extack); idr_destroy(&ht->handle_idr); idr_remove(&tp_c->handle_idr, handle2id(ht->handle)); RCU_INIT_POINTER(*hn, ht->next); kfree_rcu(ht, rcu); return 0; } } return -ENOENT; } static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); WARN_ON(root_ht == NULL); if (root_ht && refcount_dec_and_test(&root_ht->refcnt)) u32_destroy_hnode(tp, root_ht, extack); if (refcount_dec_and_test(&tp_c->refcnt)) { struct tc_u_hnode *ht; hlist_del(&tp_c->hnode); while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { u32_clear_hnode(tp, ht, extack); RCU_INIT_POINTER(tp_c->hlist, ht->next); /* u32_destroy_key() will later free ht for us, if it's * still referenced by some knode */ if (refcount_dec_and_test(&ht->refcnt)) kfree_rcu(ht, rcu); } idr_destroy(&tp_c->handle_idr); kfree(tp_c); } tp->data = NULL; } static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = arg; struct tc_u_common *tp_c = tp->data; int ret = 0; if (TC_U32_KEY(ht->handle)) { u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack); ret = u32_delete_key(tp, (struct tc_u_knode *)ht); goto out; } if (ht->is_root) { NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node"); return -EINVAL; } if (refcount_dec_if_one(&ht->refcnt)) { u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); return -EBUSY; } out: *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0; return ret; } static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid) { u32 index = htid | 0x800; u32 max = htid | 0xFFF; if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, GFP_KERNEL)) { index = htid + 1; if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, GFP_KERNEL)) index = max; } return index; } static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { [TCA_U32_CLASSID] = { .type = NLA_U32 }, [TCA_U32_HASH] = { .type = NLA_U32 }, [TCA_U32_LINK] = { .type = NLA_U32 }, [TCA_U32_DIVISOR] = { .type = NLA_U32 }, [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) }, [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ }, [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) }, [TCA_U32_FLAGS] = { .type = NLA_U32 }, }; static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n, struct nlattr **tb) { if (tb[TCA_U32_CLASSID]) tcf_unbind_filter(tp, &n->res); } static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n, unsigned long base, struct nlattr **tb) { if (tb[TCA_U32_CLASSID]) { n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); tcf_bind_filter(tp, &n->res, base); } } static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { int err, ifindex = -1; err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags, fl_flags, extack); if (err < 0) return err; if (tb[TCA_U32_INDEV]) { ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); if (ifindex < 0) return -EINVAL; } if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; if (TC_U32_KEY(handle)) { NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table"); return -EINVAL; } if (handle) { ht_down = u32_lookup_ht(tp->data, handle); if (!ht_down) { NL_SET_ERR_MSG_MOD(extack, "Link hash table not found"); return -EINVAL; } if (ht_down->is_root) { NL_SET_ERR_MSG_MOD(extack, "Not linking to root node"); return -EINVAL; } refcount_inc(&ht_down->refcnt); } ht_old = rtnl_dereference(n->ht_down); rcu_assign_pointer(n->ht_down, ht_down); if (ht_old) refcount_dec(&ht_old->refcnt); } if (ifindex >= 0) n->ifindex = ifindex; return 0; } static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, struct tc_u_knode *n) { struct tc_u_knode __rcu **ins; struct tc_u_knode *pins; struct tc_u_hnode *ht; if (TC_U32_HTID(n->handle) == TC_U32_ROOT) ht = rtnl_dereference(tp->root); else ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle)); ins = &ht->ht[TC_U32_HASH(n->handle)]; /* The node must always exist for it to be replaced if this is not the * case then something went very wrong elsewhere. */ for (pins = rtnl_dereference(*ins); ; ins = &pins->next, pins = rtnl_dereference(*ins)) if (pins->handle == n->handle) break; idr_replace(&ht->handle_idr, n, n->handle); RCU_INIT_POINTER(n->next, pins->next); rcu_assign_pointer(*ins, n); } static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, struct tc_u_knode *n) { struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct tc_u32_sel *s = &n->sel; struct tc_u_knode *new; new = kzalloc(struct_size(new, sel.keys, s->nkeys), GFP_KERNEL); if (!new) return NULL; RCU_INIT_POINTER(new->next, n->next); new->handle = n->handle; RCU_INIT_POINTER(new->ht_up, n->ht_up); new->ifindex = n->ifindex; new->fshift = n->fshift; new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); #ifdef CONFIG_CLS_U32_PERF /* Statistics may be incremented by readers during update * so we must keep them in tact. When the node is later destroyed * a special destroy call must be made to not free the pf memory. */ new->pf = n->pf; #endif #ifdef CONFIG_CLS_U32_MARK new->val = n->val; new->mask = n->mask; /* Similarly success statistics must be moved as pointers */ new->pcpu_success = n->pcpu_success; #endif memcpy(&new->sel, s, struct_size(s, keys, s->nkeys)); if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) { kfree(new); return NULL; } /* bump reference count as long as we hold pointer to structure */ if (ht) refcount_inc(&ht->refcnt); return new; } static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; struct tc_u_knode *n; struct tc_u32_sel *s; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_U32_MAX + 1]; u32 htid, userflags = 0; size_t sel_size; int err; if (!opt) { if (handle) { NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options"); return -EINVAL; } else { return 0; } } err = nla_parse_nested_deprecated(tb, TCA_U32_MAX, opt, u32_policy, extack); if (err < 0) return err; if (tb[TCA_U32_FLAGS]) { userflags = nla_get_u32(tb[TCA_U32_FLAGS]); if (!tc_flags_valid(userflags)) { NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags"); return -EINVAL; } } n = *arg; if (n) { struct tc_u_knode *new; if (TC_U32_KEY(n->handle) == 0) { NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero"); return -EINVAL; } if ((n->flags ^ userflags) & ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) { NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags"); return -EINVAL; } new = u32_init_knode(net, tp, n); if (!new) return -ENOMEM; err = u32_set_parms(net, tp, new, tb, tca[TCA_RATE], flags, new->flags, extack); if (err) { __u32_destroy_key(new); return err; } u32_bind_filter(tp, new, base, tb); err = u32_replace_hw_knode(tp, new, flags, extack); if (err) { u32_unbind_filter(tp, new, tb); if (tb[TCA_U32_LINK]) { struct tc_u_hnode *ht_old; ht_old = rtnl_dereference(n->ht_down); if (ht_old) refcount_inc(&ht_old->refcnt); } __u32_destroy_key(new); return err; } if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; tcf_proto_update_usesw(tp, new->flags); u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); tcf_exts_get_net(&n->exts); tcf_queue_work(&n->rwork, u32_delete_key_work); return 0; } if (tb[TCA_U32_DIVISOR]) { unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); if (!is_power_of_2(divisor)) { NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2"); return -EINVAL; } if (divisor-- > 0x100) { NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets"); return -EINVAL; } if (TC_U32_KEY(handle)) { NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table"); return -EINVAL; } ht = kzalloc(struct_size(ht, ht, divisor + 1), GFP_KERNEL); if (ht == NULL) return -ENOBUFS; if (handle == 0) { handle = gen_new_htid(tp->data, ht); if (handle == 0) { kfree(ht); return -ENOMEM; } } else { err = idr_alloc_u32(&tp_c->handle_idr, ht, &handle, handle, GFP_KERNEL); if (err) { kfree(ht); return err; } } refcount_set(&ht->refcnt, 1); ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; idr_init(&ht->handle_idr); ht->flags = userflags; err = u32_replace_hw_hnode(tp, ht, userflags, extack); if (err) { idr_remove(&tp_c->handle_idr, handle2id(handle)); kfree(ht); return err; } RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); *arg = ht; return 0; } if (tb[TCA_U32_HASH]) { htid = nla_get_u32(tb[TCA_U32_HASH]); if (TC_U32_HTID(htid) == TC_U32_ROOT) { ht = rtnl_dereference(tp->root); htid = ht->handle; } else { ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid)); if (!ht) { NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found"); return -EINVAL; } } } else { ht = rtnl_dereference(tp->root); htid = ht->handle; } if (ht->divisor < TC_U32_HASH(htid)) { NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value"); return -EINVAL; } /* At this point, we need to derive the new handle that will be used to * uniquely map the identity of this table match entry. The * identity of the entry that we need to construct is 32 bits made of: * htid(12b):bucketid(8b):node/entryid(12b) * * At this point _we have the table(ht)_ in which we will insert this * entry. We carry the table's id in variable "htid". * Note that earlier code picked the ht selection either by a) the user * providing the htid specified via TCA_U32_HASH attribute or b) when * no such attribute is passed then the root ht, is default to at ID * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. * If OTOH the user passed us the htid, they may also pass a bucketid of * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be * passed via the htid, so even if it was non-zero it will be ignored. * * We may also have a handle, if the user passed one. The handle also * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). * Rule: the bucketid on the handle is ignored even if one was passed; * rather the value on "htid" is always assumed to be the bucketid. */ if (handle) { /* Rule: The htid from handle and tableid from htid must match */ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); return -EINVAL; } /* Ok, so far we have a valid htid(12b):bucketid(8b) but we * need to finalize the table entry identification with the last * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for * entries. Rule: nodeid of 0 is reserved only for tables(see * earlier code which processes TC_U32_DIVISOR attribute). * Rule: The nodeid can only be derived from the handle (and not * htid). * Rule: if the handle specified zero for the node id example * 0x60000000, then pick a new nodeid from the pool of IDs * this hash table has been allocating from. * If OTOH it is specified (i.e for example the user passed a * handle such as 0x60000123), then we use it generate our final * handle which is used to uniquely identify the match entry. */ if (!TC_U32_NODE(handle)) { handle = gen_new_kid(ht, htid); } else { handle = htid | TC_U32_NODE(handle); err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, GFP_KERNEL); if (err) return err; } } else { /* The user did not give us a handle; lets just generate one * from the table's pool of nodeids. */ handle = gen_new_kid(ht, htid); } if (tb[TCA_U32_SEL] == NULL) { NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); err = -EINVAL; goto erridr; } s = nla_data(tb[TCA_U32_SEL]); sel_size = struct_size(s, keys, s->nkeys); if (nla_len(tb[TCA_U32_SEL]) < sel_size) { err = -EINVAL; goto erridr; } n = kzalloc(struct_size(n, sel.keys, s->nkeys), GFP_KERNEL); if (n == NULL) { err = -ENOBUFS; goto erridr; } #ifdef CONFIG_CLS_U32_PERF n->pf = __alloc_percpu(struct_size(n->pf, kcnts, s->nkeys), __alignof__(struct tc_u32_pcnt)); if (!n->pf) { err = -ENOBUFS; goto errfree; } #endif unsafe_memcpy(&n->sel, s, sel_size, /* A composite flex-array structure destination, * which was correctly sized with struct_size(), * bounds-checked against nla_len(), and allocated * above. */); RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; n->flags = userflags; err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE); if (err < 0) goto errout; #ifdef CONFIG_CLS_U32_MARK n->pcpu_success = alloc_percpu(u32); if (!n->pcpu_success) { err = -ENOMEM; goto errout; } if (tb[TCA_U32_MARK]) { struct tc_u32_mark *mark; mark = nla_data(tb[TCA_U32_MARK]); n->val = mark->val; n->mask = mark->mask; } #endif err = u32_set_parms(net, tp, n, tb, tca[TCA_RATE], flags, n->flags, extack); u32_bind_filter(tp, n, base, tb); if (err == 0) { struct tc_u_knode __rcu **ins; struct tc_u_knode *pins; err = u32_replace_hw_knode(tp, n, flags, extack); if (err) goto errunbind; if (!tc_in_hw(n->flags)) n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; tcf_proto_update_usesw(tp, n->flags); ins = &ht->ht[TC_U32_HASH(handle)]; for (pins = rtnl_dereference(*ins); pins; ins = &pins->next, pins = rtnl_dereference(*ins)) if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle)) break; RCU_INIT_POINTER(n->next, pins); rcu_assign_pointer(*ins, n); tp_c->knodes++; *arg = n; return 0; } errunbind: u32_unbind_filter(tp, n, tb); #ifdef CONFIG_CLS_U32_MARK free_percpu(n->pcpu_success); #endif errout: tcf_exts_destroy(&n->exts); #ifdef CONFIG_CLS_U32_PERF errfree: free_percpu(n->pf); #endif kfree(n); erridr: idr_remove(&ht->handle_idr, handle); return err; } static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; struct tc_u_knode *n; unsigned int h; if (arg->stop) return; for (ht = rtnl_dereference(tp_c->hlist); ht; ht = rtnl_dereference(ht->next)) { if (ht->prio != tp->prio) continue; if (!tc_cls_stats_dump(tp, arg, ht)) return; for (h = 0; h <= ht->divisor; h++) { for (n = rtnl_dereference(ht->ht[h]); n; n = rtnl_dereference(n->next)) { if (!tc_cls_stats_dump(tp, arg, n)) return; } } } } static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { struct tc_cls_u32_offload cls_u32 = {}; int err; tc_cls_common_offload_init(&cls_u32.common, tp, ht->flags, extack); cls_u32.command = add ? TC_CLSU32_NEW_HNODE : TC_CLSU32_DELETE_HNODE; cls_u32.hnode.divisor = ht->divisor; cls_u32.hnode.handle = ht->handle; cls_u32.hnode.prio = ht->prio; err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv); if (err && add && tc_skip_sw(ht->flags)) return err; return 0; } static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); cls_u32.command = add ? TC_CLSU32_REPLACE_KNODE : TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; if (add) { cls_u32.knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK cls_u32.knode.val = n->val; cls_u32.knode.mask = n->mask; #else cls_u32.knode.val = 0; cls_u32.knode.mask = 0; #endif cls_u32.knode.sel = &n->sel; cls_u32.knode.res = &n->res; cls_u32.knode.exts = &n->exts; if (n->ht_down) cls_u32.knode.link_handle = ht->handle; } return tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32, &cls_u32, cb_priv, &n->flags, &n->in_hw_count); } static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; struct tc_u_knode *n; unsigned int h; int err; for (ht = rtnl_dereference(tp_c->hlist); ht; ht = rtnl_dereference(ht->next)) { if (ht->prio != tp->prio) continue; /* When adding filters to a new dev, try to offload the * hashtable first. When removing, do the filters before the * hashtable. */ if (add && !tc_skip_hw(ht->flags)) { err = u32_reoffload_hnode(tp, ht, add, cb, cb_priv, extack); if (err) return err; } for (h = 0; h <= ht->divisor; h++) { for (n = rtnl_dereference(ht->ht[h]); n; n = rtnl_dereference(n->next)) { if (tc_skip_hw(n->flags)) continue; err = u32_reoffload_knode(tp, n, add, cb, cb_priv, extack); if (err) return err; } } if (!add && !tc_skip_hw(ht->flags)) u32_reoffload_hnode(tp, ht, add, cb, cb_priv, extack); } return 0; } static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q, unsigned long base) { struct tc_u_knode *n = fh; tc_cls_bind_class(classid, cl, q, &n->res, base); } static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_u_knode *n = fh; struct tc_u_hnode *ht_up, *ht_down; struct nlattr *nest; if (n == NULL) return skb->len; t->tcm_handle = n->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (TC_U32_KEY(n->handle) == 0) { struct tc_u_hnode *ht = fh; u32 divisor = ht->divisor + 1; if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor)) goto nla_put_failure; } else { #ifdef CONFIG_CLS_U32_PERF struct tc_u32_pcnt *gpf; int cpu; #endif if (nla_put(skb, TCA_U32_SEL, struct_size(&n->sel, keys, n->sel.nkeys), &n->sel)) goto nla_put_failure; ht_up = rtnl_dereference(n->ht_up); if (ht_up) { u32 htid = n->handle & 0xFFFFF000; if (nla_put_u32(skb, TCA_U32_HASH, htid)) goto nla_put_failure; } if (n->res.classid && nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid)) goto nla_put_failure; ht_down = rtnl_dereference(n->ht_down); if (ht_down && nla_put_u32(skb, TCA_U32_LINK, ht_down->handle)) goto nla_put_failure; if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags)) goto nla_put_failure; #ifdef CONFIG_CLS_U32_MARK if ((n->val || n->mask)) { struct tc_u32_mark mark = {.val = n->val, .mask = n->mask, .success = 0}; int cpum; for_each_possible_cpu(cpum) { __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum); mark.success += cnt; } if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark)) goto nla_put_failure; } #endif if (tcf_exts_dump(skb, &n->exts) < 0) goto nla_put_failure; if (n->ifindex) { struct net_device *dev; dev = __dev_get_by_index(net, n->ifindex); if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name)) goto nla_put_failure; } #ifdef CONFIG_CLS_U32_PERF gpf = kzalloc(struct_size(gpf, kcnts, n->sel.nkeys), GFP_KERNEL); if (!gpf) goto nla_put_failure; for_each_possible_cpu(cpu) { int i; struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu); gpf->rcnt += pf->rcnt; gpf->rhit += pf->rhit; for (i = 0; i < n->sel.nkeys; i++) gpf->kcnts[i] += pf->kcnts[i]; } if (nla_put_64bit(skb, TCA_U32_PCNT, struct_size(gpf, kcnts, n->sel.nkeys), gpf, TCA_U32_PAD)) { kfree(gpf); goto nla_put_failure; } kfree(gpf); #endif } nla_nest_end(skb, nest); if (TC_U32_KEY(n->handle)) if (tcf_exts_dump_stats(skb, &n->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static struct tcf_proto_ops cls_u32_ops __read_mostly = { .kind = "u32", .classify = u32_classify, .init = u32_init, .destroy = u32_destroy, .get = u32_get, .change = u32_change, .delete = u32_delete, .walk = u32_walk, .reoffload = u32_reoffload, .dump = u32_dump, .bind_class = u32_bind_class, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_CLS("u32"); static int __init init_u32(void) { int i, ret; pr_info("u32 classifier\n"); #ifdef CONFIG_CLS_U32_PERF pr_info(" Performance counters on\n"); #endif pr_info(" input device check on\n"); #ifdef CONFIG_NET_CLS_ACT pr_info(" Actions configured\n"); #endif tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE, sizeof(struct hlist_head), GFP_KERNEL); if (!tc_u_common_hash) return -ENOMEM; for (i = 0; i < U32_HASH_SIZE; i++) INIT_HLIST_HEAD(&tc_u_common_hash[i]); ret = register_tcf_proto_ops(&cls_u32_ops); if (ret) kvfree(tc_u_common_hash); return ret; } static void __exit exit_u32(void) { unregister_tcf_proto_ops(&cls_u32_ops); kvfree(tc_u_common_hash); } module_init(init_u32) module_exit(exit_u32) MODULE_DESCRIPTION("Universal 32bit based TC Classifier"); MODULE_LICENSE("GPL");
24 11 11 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match one of a list of TCP/UDP(-Lite)/SCTP/DCCP ports: ports are in the same place so we can treat them as equal. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/types.h> #include <linux/udp.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/netfilter/xt_multiport.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP and DCCP"); MODULE_ALIAS("ipt_multiport"); MODULE_ALIAS("ip6t_multiport"); /* Returns 1 if the port is matched by the test, 0 otherwise. */ static inline bool ports_match_v1(const struct xt_multiport_v1 *minfo, u_int16_t src, u_int16_t dst) { unsigned int i; u_int16_t s, e; for (i = 0; i < minfo->count; i++) { s = minfo->ports[i]; if (minfo->pflags[i]) { /* range port matching */ e = minfo->ports[++i]; pr_debug("src or dst matches with %d-%d?\n", s, e); switch (minfo->flags) { case XT_MULTIPORT_SOURCE: if (src >= s && src <= e) return true ^ minfo->invert; break; case XT_MULTIPORT_DESTINATION: if (dst >= s && dst <= e) return true ^ minfo->invert; break; case XT_MULTIPORT_EITHER: if ((dst >= s && dst <= e) || (src >= s && src <= e)) return true ^ minfo->invert; break; default: break; } } else { /* exact port matching */ pr_debug("src or dst matches with %d?\n", s); switch (minfo->flags) { case XT_MULTIPORT_SOURCE: if (src == s) return true ^ minfo->invert; break; case XT_MULTIPORT_DESTINATION: if (dst == s) return true ^ minfo->invert; break; case XT_MULTIPORT_EITHER: if (src == s || dst == s) return true ^ minfo->invert; break; default: break; } } } return minfo->invert; } static bool multiport_mt(const struct sk_buff *skb, struct xt_action_param *par) { const __be16 *pptr; __be16 _ports[2]; const struct xt_multiport_v1 *multiinfo = par->matchinfo; if (par->fragoff != 0) return false; pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports); if (pptr == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ pr_debug("Dropping evil offset=0 tinygram.\n"); par->hotdrop = true; return false; } return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1])); } static inline bool check(u_int16_t proto, u_int8_t ip_invflags, u_int8_t match_flags, u_int8_t count) { /* Must specify supported protocol, no unknown flags or bad count */ return (proto == IPPROTO_TCP || proto == IPPROTO_UDP || proto == IPPROTO_UDPLITE || proto == IPPROTO_SCTP || proto == IPPROTO_DCCP) && !(ip_invflags & XT_INV_PROTO) && (match_flags == XT_MULTIPORT_SOURCE || match_flags == XT_MULTIPORT_DESTINATION || match_flags == XT_MULTIPORT_EITHER) && count <= XT_MULTI_PORTS; } static int multiport_mt_check(const struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; const struct xt_multiport_v1 *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count) ? 0 : -EINVAL; } static int multiport_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_ip6 *ip = par->entryinfo; const struct xt_multiport_v1 *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count) ? 0 : -EINVAL; } static struct xt_match multiport_mt_reg[] __read_mostly = { { .name = "multiport", .family = NFPROTO_IPV4, .revision = 1, .checkentry = multiport_mt_check, .match = multiport_mt, .matchsize = sizeof(struct xt_multiport_v1), .me = THIS_MODULE, }, { .name = "multiport", .family = NFPROTO_IPV6, .revision = 1, .checkentry = multiport_mt6_check, .match = multiport_mt, .matchsize = sizeof(struct xt_multiport_v1), .me = THIS_MODULE, }, }; static int __init multiport_mt_init(void) { return xt_register_matches(multiport_mt_reg, ARRAY_SIZE(multiport_mt_reg)); } static void __exit multiport_mt_exit(void) { xt_unregister_matches(multiport_mt_reg, ARRAY_SIZE(multiport_mt_reg)); } module_init(multiport_mt_init); module_exit(multiport_mt_exit);
50 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * Deflate algorithm (RFC 1951), implemented here primarily for use * by IPCOMP (RFC 3173 & RFC 2394). * * Copyright (c) 2003 James Morris <jmorris@intercode.com.au> * Copyright (c) 2023 Google, LLC. <ardb@kernel.org> * Copyright (c) 2025 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/acompress.h> #include <crypto/scatterwalk.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/overflow.h> #include <linux/percpu.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/zlib.h> #define DEFLATE_DEF_LEVEL Z_DEFAULT_COMPRESSION #define DEFLATE_DEF_WINBITS 11 #define DEFLATE_DEF_MEMLEVEL MAX_MEM_LEVEL struct deflate_stream { struct z_stream_s stream; u8 workspace[]; }; static DEFINE_MUTEX(deflate_stream_lock); static void *deflate_alloc_stream(void) { size_t size = max(zlib_inflate_workspacesize(), zlib_deflate_workspacesize(-DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL)); struct deflate_stream *ctx; ctx = kvmalloc(struct_size(ctx, workspace, size), GFP_KERNEL); if (!ctx) return ERR_PTR(-ENOMEM); ctx->stream.workspace = ctx->workspace; return ctx; } static void deflate_free_stream(void *ctx) { kvfree(ctx); } static struct crypto_acomp_streams deflate_streams = { .alloc_ctx = deflate_alloc_stream, .free_ctx = deflate_free_stream, }; static int deflate_compress_one(struct acomp_req *req, struct deflate_stream *ds) { struct z_stream_s *stream = &ds->stream; struct acomp_walk walk; int ret; ret = acomp_walk_virt(&walk, req, true); if (ret) return ret; do { unsigned int dcur; dcur = acomp_walk_next_dst(&walk); if (!dcur) return -ENOSPC; stream->avail_out = dcur; stream->next_out = walk.dst.virt.addr; do { int flush = Z_FINISH; unsigned int scur; stream->avail_in = 0; stream->next_in = NULL; scur = acomp_walk_next_src(&walk); if (scur) { if (acomp_walk_more_src(&walk, scur)) flush = Z_NO_FLUSH; stream->avail_in = scur; stream->next_in = walk.src.virt.addr; } ret = zlib_deflate(stream, flush); if (scur) { scur -= stream->avail_in; acomp_walk_done_src(&walk, scur); } } while (ret == Z_OK && stream->avail_out); acomp_walk_done_dst(&walk, dcur); } while (ret == Z_OK); if (ret != Z_STREAM_END) return -EINVAL; req->dlen = stream->total_out; return 0; } static int deflate_compress(struct acomp_req *req) { struct crypto_acomp_stream *s; struct deflate_stream *ds; int err; s = crypto_acomp_lock_stream_bh(&deflate_streams); ds = s->ctx; err = zlib_deflateInit2(&ds->stream, DEFLATE_DEF_LEVEL, Z_DEFLATED, -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL, Z_DEFAULT_STRATEGY); if (err != Z_OK) { err = -EINVAL; goto out; } err = deflate_compress_one(req, ds); out: crypto_acomp_unlock_stream_bh(s); return err; } static int deflate_decompress_one(struct acomp_req *req, struct deflate_stream *ds) { struct z_stream_s *stream = &ds->stream; bool out_of_space = false; struct acomp_walk walk; int ret; ret = acomp_walk_virt(&walk, req, true); if (ret) return ret; do { unsigned int scur; stream->avail_in = 0; stream->next_in = NULL; scur = acomp_walk_next_src(&walk); if (scur) { stream->avail_in = scur; stream->next_in = walk.src.virt.addr; } do { unsigned int dcur; dcur = acomp_walk_next_dst(&walk); if (!dcur) { out_of_space = true; break; } stream->avail_out = dcur; stream->next_out = walk.dst.virt.addr; ret = zlib_inflate(stream, Z_NO_FLUSH); dcur -= stream->avail_out; acomp_walk_done_dst(&walk, dcur); } while (ret == Z_OK && stream->avail_in); if (scur) acomp_walk_done_src(&walk, scur); if (out_of_space) return -ENOSPC; } while (ret == Z_OK); if (ret != Z_STREAM_END) return -EINVAL; req->dlen = stream->total_out; return 0; } static int deflate_decompress(struct acomp_req *req) { struct crypto_acomp_stream *s; struct deflate_stream *ds; int err; s = crypto_acomp_lock_stream_bh(&deflate_streams); ds = s->ctx; err = zlib_inflateInit2(&ds->stream, -DEFLATE_DEF_WINBITS); if (err != Z_OK) { err = -EINVAL; goto out; } err = deflate_decompress_one(req, ds); out: crypto_acomp_unlock_stream_bh(s); return err; } static int deflate_init(struct crypto_acomp *tfm) { int ret; mutex_lock(&deflate_stream_lock); ret = crypto_acomp_alloc_streams(&deflate_streams); mutex_unlock(&deflate_stream_lock); return ret; } static struct acomp_alg acomp = { .compress = deflate_compress, .decompress = deflate_decompress, .init = deflate_init, .base.cra_name = "deflate", .base.cra_driver_name = "deflate-generic", .base.cra_flags = CRYPTO_ALG_REQ_VIRT, .base.cra_module = THIS_MODULE, }; static int __init deflate_mod_init(void) { return crypto_register_acomp(&acomp); } static void __exit deflate_mod_fini(void) { crypto_unregister_acomp(&acomp); crypto_acomp_free_streams(&deflate_streams); } module_init(deflate_mod_init); module_exit(deflate_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Deflate Compression Algorithm for IPCOMP"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>"); MODULE_AUTHOR("Herbert Xu <herbert@gondor.apana.org.au>"); MODULE_ALIAS_CRYPTO("deflate"); MODULE_ALIAS_CRYPTO("deflate-generic");
15 15 3 138 136 137 137 137 137 137 5 2 3 3 3 8 2 6 47 60 60 60 60 51 9 8 8 57 7 57 13 43 43 1 21 31 1 1 7 24 15 21 21 60 3 9 3 85 85 23 13 79 84 13 13 13 13 10 11 7 5 3 19 17 3 10 10 10 2 8 10 17 3 14 14 2 2 2 1 3 3 1 1 1 1 9 8 10 11 1 7 3 10 8 2 9 9 9 2030 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "mesh-interface.h" #include "main.h" #include <linux/atomic.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> #include <linux/container_of.h> #include <linux/cpumask.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/percpu.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include <net/netlink.h> #include <net/rtnetlink.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "hard-interface.h" #include "multicast.h" #include "send.h" #include "translation-table.h" /** * batadv_skb_head_push() - Increase header size and move (push) head pointer * @skb: packet buffer which should be modified * @len: number of bytes to add * * Return: 0 on success or negative error number in case of failure */ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) { int result; /* TODO: We must check if we can release all references to non-payload * data using __skb_header_release in our skbs to allow skb_cow_header * to work optimally. This means that those skbs are not allowed to read * or write any data which is before the current position of skb->data * after that call and thus allow other skbs with the same data buffer * to write freely in that area. */ result = skb_cow_head(skb, len); if (result < 0) return result; skb_push(skb, len); return 0; } /** * batadv_sum_counter() - Sum the cpu-local counters for index 'idx' * @bat_priv: the bat priv with all the mesh interface information * @idx: index of counter to sum up * * Return: sum of all cpu-local counters */ static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) { u64 *counters, sum = 0; int cpu; for_each_possible_cpu(cpu) { counters = per_cpu_ptr(bat_priv->bat_counters, cpu); sum += counters[idx]; } return sum; } static struct net_device_stats *batadv_interface_stats(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX); stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES); stats->tx_dropped = batadv_sum_counter(bat_priv, BATADV_CNT_TX_DROPPED); stats->rx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_RX); stats->rx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_RX_BYTES); return stats; } static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_meshif_vlan *vlan; struct sockaddr *addr = p; u8 old_addr[ETH_ALEN]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; ether_addr_copy(old_addr, dev->dev_addr); eth_hw_addr_set(dev, addr->sa_data); /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) return 0; rcu_read_lock(); hlist_for_each_entry_rcu(vlan, &bat_priv->meshif_vlan_list, list) { batadv_tt_local_remove(bat_priv, old_addr, vlan->vid, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, vlan->vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); } rcu_read_unlock(); return 0; } static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) { struct batadv_priv *bat_priv = netdev_priv(dev); /* check ranges */ if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; WRITE_ONCE(dev->mtu, new_mtu); bat_priv->mtu_set_by_user = new_mtu; return 0; } /** * batadv_interface_set_rx_mode() - set the rx mode of a device * @dev: registered network device to modify * * We do not actually need to set any rx filters for the virtual batman * mesh interface. However a dummy handler enables a user to set static * multicast listeners for instance. */ static void batadv_interface_set_rx_mode(struct net_device *dev) { } static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, struct net_device *mesh_iface) { struct ethhdr *ethhdr; struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_hard_iface *primary_if = NULL; struct batadv_bcast_packet *bcast_packet; static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x00}; static const u8 ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, 0x00, 0x00}; enum batadv_dhcp_recipient dhcp_rcp = BATADV_DHCP_NO; u8 *dst_hint = NULL, chaddr[ETH_ALEN]; struct vlan_ethhdr *vhdr; unsigned int header_len = 0; int data_len = skb->len, ret; unsigned long brd_delay = 0; bool do_bcast = false, client_added; unsigned short vid; u32 seqno; int gw_mode; enum batadv_forw_mode forw_mode = BATADV_FORW_BCAST; int mcast_is_routable = 0; int network_offset = ETH_HLEN; __be16 proto; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; /* reset control block to avoid left overs from previous users */ memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); netif_trans_update(mesh_iface); vid = batadv_get_vid(skb, 0); skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); proto = ethhdr->h_proto; switch (ntohs(proto)) { case ETH_P_8021Q: if (!pskb_may_pull(skb, sizeof(*vhdr))) goto dropped; vhdr = vlan_eth_hdr(skb); proto = vhdr->h_vlan_encapsulated_proto; /* drop batman-in-batman packets to prevent loops */ if (proto != htons(ETH_P_BATMAN)) { network_offset += VLAN_HLEN; break; } fallthrough; case ETH_P_BATMAN: goto dropped; } skb_set_network_header(skb, network_offset); if (batadv_bla_tx(bat_priv, skb, vid)) goto dropped; /* skb->data might have been reallocated by batadv_bla_tx() */ ethhdr = eth_hdr(skb); /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source) && !batadv_bla_is_loopdetect_mac(ethhdr->h_source)) { client_added = batadv_tt_local_add(mesh_iface, ethhdr->h_source, vid, skb->skb_iif, skb->mark); if (!client_added) goto dropped; } /* Snoop address candidates from DHCPACKs for early DAT filling */ batadv_dat_snoop_outgoing_dhcp_ack(bat_priv, skb, proto, vid); /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... * * The same goes for ECTP sent at least by some Cisco Switches, * it might confuse the mesh when used with bridge loop avoidance. */ if (batadv_compare_eth(ethhdr->h_dest, stp_addr)) goto dropped; if (batadv_compare_eth(ethhdr->h_dest, ectp_addr)) goto dropped; gw_mode = atomic_read(&bat_priv->gw.mode); if (is_multicast_ether_addr(ethhdr->h_dest)) { /* if gw mode is off, broadcast every packet */ if (gw_mode == BATADV_GW_MODE_OFF) { do_bcast = true; goto send; } dhcp_rcp = batadv_gw_dhcp_recipient_get(skb, &header_len, chaddr); /* skb->data may have been modified by * batadv_gw_dhcp_recipient_get() */ ethhdr = eth_hdr(skb); /* if gw_mode is on, broadcast any non-DHCP message. * All the DHCP packets are going to be sent as unicast */ if (dhcp_rcp == BATADV_DHCP_NO) { do_bcast = true; goto send; } if (dhcp_rcp == BATADV_DHCP_TO_CLIENT) dst_hint = chaddr; else if ((gw_mode == BATADV_GW_MODE_SERVER) && (dhcp_rcp == BATADV_DHCP_TO_SERVER)) /* gateways should not forward any DHCP message if * directed to a DHCP server */ goto dropped; send: if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid, &mcast_is_routable); switch (forw_mode) { case BATADV_FORW_BCAST: break; case BATADV_FORW_UCASTS: case BATADV_FORW_MCAST: do_bcast = false; break; case BATADV_FORW_NONE: fallthrough; default: goto dropped; } } } batadv_skb_set_priority(skb, 0); /* ethernet packet should be broadcasted */ if (do_bcast) { primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto dropped; /* in case of ARP request, we do not immediately broadcasti the * packet, instead we first wait for DAT to try to retrieve the * correct ARP entry */ if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) brd_delay = msecs_to_jiffies(ARP_REQ_DELAY); if (batadv_skb_head_push(skb, sizeof(*bcast_packet)) < 0) goto dropped; bcast_packet = (struct batadv_bcast_packet *)skb->data; bcast_packet->version = BATADV_COMPAT_VERSION; bcast_packet->ttl = BATADV_TTL - 1; /* batman packet type: broadcast */ bcast_packet->packet_type = BATADV_BCAST; bcast_packet->reserved = 0; /* hw address of first interface is the orig mac because only * this mac is known throughout the mesh */ ether_addr_copy(bcast_packet->orig, primary_if->net_dev->dev_addr); /* set broadcast sequence number */ seqno = atomic_inc_return(&bat_priv->bcast_seqno); bcast_packet->seqno = htonl(seqno); batadv_send_bcast_packet(bat_priv, skb, brd_delay, true); /* unicast packet */ } else { /* DHCP packets going to a server will use the GW feature */ if (dhcp_rcp == BATADV_DHCP_TO_SERVER) { ret = batadv_gw_out_of_range(bat_priv, skb); if (ret) goto dropped; ret = batadv_send_skb_via_gw(bat_priv, skb, vid); } else if (forw_mode == BATADV_FORW_UCASTS) { ret = batadv_mcast_forw_send(bat_priv, skb, vid, mcast_is_routable); } else if (forw_mode == BATADV_FORW_MCAST) { ret = batadv_mcast_forw_mcsend(bat_priv, skb); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) goto dropped; batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint, vid); } if (ret != NET_XMIT_SUCCESS) goto dropped_freed; } batadv_inc_counter(bat_priv, BATADV_CNT_TX); batadv_add_counter(bat_priv, BATADV_CNT_TX_BYTES, data_len); goto end; dropped: kfree_skb(skb); dropped_freed: batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); end: batadv_hardif_put(primary_if); return NETDEV_TX_OK; } /** * batadv_interface_rx() - receive ethernet frame on local batman-adv interface * @mesh_iface: local interface which will receive the ethernet frame * @skb: ethernet frame for @mesh_iface * @hdr_size: size of already parsed batman-adv header * @orig_node: originator from which the batman-adv packet was sent * * Sends an ethernet frame to the receive path of the local @mesh_iface. * skb->data has still point to the batman-adv header with the size @hdr_size. * The caller has to have parsed this header already and made sure that at least * @hdr_size bytes are still available for pull in @skb. * * The packet may still get dropped. This can happen when the encapsulated * ethernet frame is invalid or contains again an batman-adv packet. Also * unicast packets will be dropped directly when it was sent between two * isolated clients. */ void batadv_interface_rx(struct net_device *mesh_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_bcast_packet *batadv_bcast_packet; struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct vlan_ethhdr *vhdr; struct ethhdr *ethhdr; unsigned short vid; int packet_type; batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; packet_type = batadv_bcast_packet->packet_type; skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); /* clean the netfilter state now that the batman-adv header has been * removed */ nf_reset_ct(skb); if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) goto dropped; vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) goto dropped; vhdr = skb_vlan_eth_hdr(skb); /* drop batman-in-batman packets to prevent loops */ if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) break; fallthrough; case ETH_P_BATMAN: goto dropped; } /* skb->dev & skb->pkt_type are set here */ skb->protocol = eth_type_trans(skb, mesh_iface); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); batadv_inc_counter(bat_priv, BATADV_CNT_RX); batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, skb->len + ETH_HLEN); /* Let the bridge loop avoidance check the packet. If will * not handle it, we can safely push it up. */ if (batadv_bla_rx(bat_priv, skb, vid, packet_type)) goto out; if (orig_node) batadv_tt_add_temporary_global_entry(bat_priv, orig_node, ethhdr->h_source, vid); if (is_multicast_ether_addr(ethhdr->h_dest)) { /* set the mark on broadcast packets if AP isolation is ON and * the packet is coming from an "isolated" client */ if (batadv_vlan_ap_isola_get(bat_priv, vid) && batadv_tt_global_is_isolated(bat_priv, ethhdr->h_source, vid)) { /* save bits in skb->mark not covered by the mask and * apply the mark on the rest */ skb->mark &= ~bat_priv->isolation_mark_mask; skb->mark |= bat_priv->isolation_mark; } } else if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest, vid)) { goto dropped; } netif_rx(skb); goto out; dropped: kfree_skb(skb); out: return; } /** * batadv_meshif_vlan_release() - release vlan from lists and queue for free * after rcu grace period * @ref: kref pointer of the vlan object */ void batadv_meshif_vlan_release(struct kref *ref) { struct batadv_meshif_vlan *vlan; vlan = container_of(ref, struct batadv_meshif_vlan, refcount); spin_lock_bh(&vlan->bat_priv->meshif_vlan_list_lock); hlist_del_rcu(&vlan->list); spin_unlock_bh(&vlan->bat_priv->meshif_vlan_list_lock); kfree_rcu(vlan, rcu); } /** * batadv_meshif_vlan_get() - get the vlan object for a specific vid * @bat_priv: the bat priv with all the mesh interface information * @vid: the identifier of the vlan object to retrieve * * Return: the private data of the vlan matching the vid passed as argument or * NULL otherwise. The refcounter of the returned object is incremented by 1. */ struct batadv_meshif_vlan *batadv_meshif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_meshif_vlan *vlan_tmp, *vlan = NULL; rcu_read_lock(); hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->meshif_vlan_list, list) { if (vlan_tmp->vid != vid) continue; if (!kref_get_unless_zero(&vlan_tmp->refcount)) continue; vlan = vlan_tmp; break; } rcu_read_unlock(); return vlan; } /** * batadv_meshif_create_vlan() - allocate the needed resources for a new vlan * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier * * Return: 0 on success, a negative error otherwise. */ int batadv_meshif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_meshif_vlan *vlan; spin_lock_bh(&bat_priv->meshif_vlan_list_lock); vlan = batadv_meshif_vlan_get(bat_priv, vid); if (vlan) { batadv_meshif_vlan_put(vlan); spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return -EEXIST; } vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); if (!vlan) { spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return -ENOMEM; } vlan->bat_priv = bat_priv; vlan->vid = vid; kref_init(&vlan->refcount); atomic_set(&vlan->ap_isolation, 0); kref_get(&vlan->refcount); hlist_add_head_rcu(&vlan->list, &bat_priv->meshif_vlan_list); spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ batadv_tt_local_add(bat_priv->mesh_iface, bat_priv->mesh_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); /* don't return reference to new meshif_vlan */ batadv_meshif_vlan_put(vlan); return 0; } /** * batadv_meshif_destroy_vlan() - remove and destroy a meshif_vlan object * @bat_priv: the bat priv with all the mesh interface information * @vlan: the object to remove */ static void batadv_meshif_destroy_vlan(struct batadv_priv *bat_priv, struct batadv_meshif_vlan *vlan) { /* explicitly remove the associated TT local entry because it is marked * with the NOPURGE flag */ batadv_tt_local_remove(bat_priv, bat_priv->mesh_iface->dev_addr, vlan->vid, "vlan interface destroyed", false); batadv_meshif_vlan_put(vlan); } /** * batadv_interface_add_vid() - ndo_add_vid API implementation * @dev: the netdev of the mesh interface * @proto: protocol of the vlan id * @vid: identifier of the new vlan * * Set up all the internal structures for handling the new vlan on top of the * mesh interface * * Return: 0 on success or a negative error code in case of failure. */ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_meshif_vlan *vlan; /* only 802.1Q vlans are supported. * batman-adv does not know how to handle other types */ if (proto != htons(ETH_P_8021Q)) return -EINVAL; /* VID 0 is only used to indicate "priority tag" frames which only * contain priority information and no VID. No management structures * should be created for this VID and it should be handled like an * untagged frame. */ if (vid == 0) return 0; vid |= BATADV_VLAN_HAS_TAG; /* if a new vlan is getting created and it already exists, it means that * it was not deleted yet. batadv_meshif_vlan_get() increases the * refcount in order to revive the object. * * if it does not exist then create it. */ vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) return batadv_meshif_create_vlan(bat_priv, vid); /* add a new TT local entry. This one will be marked with the NOPURGE * flag. This must be added again, even if the vlan object already * exists, because the entry was deleted by kill_vid() */ batadv_tt_local_add(bat_priv->mesh_iface, bat_priv->mesh_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); return 0; } /** * batadv_interface_kill_vid() - ndo_kill_vid API implementation * @dev: the netdev of the mesh interface * @proto: protocol of the vlan id * @vid: identifier of the deleted vlan * * Destroy all the internal structures used to handle the vlan identified by vid * on top of the mesh interface * * Return: 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q * or -ENOENT if the specified vlan id wasn't registered. */ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_meshif_vlan *vlan; /* only 802.1Q vlans are supported. batman-adv does not know how to * handle other types */ if (proto != htons(ETH_P_8021Q)) return -EINVAL; /* "priority tag" frames are handled like "untagged" frames * and no meshif_vlan needs to be destroyed */ if (vid == 0) return 0; vlan = batadv_meshif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); if (!vlan) return -ENOENT; batadv_meshif_destroy_vlan(bat_priv, vlan); /* finally free the vlan object */ batadv_meshif_vlan_put(vlan); return 0; } /* batman-adv network devices have devices nesting below it and are a special * "super class" of normal network devices; split their locks off into a * separate class since they always nest. */ static struct lock_class_key batadv_netdev_xmit_lock_key; static struct lock_class_key batadv_netdev_addr_lock_key; /** * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue * @dev: device which owns the tx queue * @txq: tx queue to modify * @_unused: always NULL */ static void batadv_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, void *_unused) { lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key); } /** * batadv_set_lockdep_class() - Set txq and addr_list lockdep class * @dev: network device to modify */ static void batadv_set_lockdep_class(struct net_device *dev) { lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); } /** * batadv_meshif_init_late() - late stage initialization of mesh interface * @dev: registered network device to modify * * Return: error code on failures */ static int batadv_meshif_init_late(struct net_device *dev) { struct batadv_priv *bat_priv; u32 random_seqno; int ret; size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM; batadv_set_lockdep_class(dev); bat_priv = netdev_priv(dev); bat_priv->mesh_iface = dev; /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called */ bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(u64)); if (!bat_priv->bat_counters) return -ENOMEM; atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bridge_loop_avoidance, 1); #endif #ifdef CONFIG_BATMAN_ADV_DAT atomic_set(&bat_priv->distributed_arp_table, 1); #endif #ifdef CONFIG_BATMAN_ADV_MCAST atomic_set(&bat_priv->multicast_mode, 1); atomic_set(&bat_priv->multicast_fanout, 16); atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0); #endif atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF); atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); atomic_set(&bat_priv->hop_penalty, 30); #ifdef CONFIG_BATMAN_ADV_DEBUG atomic_set(&bat_priv->log_level, 0); #endif atomic_set(&bat_priv->fragmentation, 1); atomic_set(&bat_priv->packet_size_max, BATADV_MAX_MTU); atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN); atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN); atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); atomic_set(&bat_priv->bcast_seqno, 1); atomic_set(&bat_priv->tt.vn, 0); atomic_set(&bat_priv->tt.ogm_append_cnt, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bla.num_requests, 0); #endif atomic_set(&bat_priv->tp_num, 0); WRITE_ONCE(bat_priv->tt.local_changes, 0); bat_priv->tt.last_changeset = NULL; bat_priv->tt.last_changeset_len = 0; bat_priv->isolation_mark = 0; bat_priv->isolation_mark_mask = 0; /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&bat_priv->frag_seqno, random_seqno); bat_priv->primary_if = NULL; if (!bat_priv->algo_ops) { ret = batadv_algo_select(bat_priv, batadv_routing_algo); if (ret < 0) goto free_bat_counters; } ret = batadv_mesh_init(dev); if (ret < 0) goto free_bat_counters; return 0; free_bat_counters: free_percpu(bat_priv->bat_counters); bat_priv->bat_counters = NULL; return ret; } /** * batadv_meshif_slave_add() - Add a slave interface to a batadv_mesh_interface * @dev: batadv_mesh_interface used as master interface * @slave_dev: net_device which should become the slave interface * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_meshif_slave_add(struct net_device *dev, struct net_device *slave_dev, struct netlink_ext_ack *extack) { struct batadv_hard_iface *hard_iface; int ret = -EINVAL; hard_iface = batadv_hardif_get_by_netdev(slave_dev); if (!hard_iface || hard_iface->mesh_iface) goto out; ret = batadv_hardif_enable_interface(hard_iface, dev); out: batadv_hardif_put(hard_iface); return ret; } /** * batadv_meshif_slave_del() - Delete a slave iface from a batadv_mesh_interface * @dev: batadv_mesh_interface used as master interface * @slave_dev: net_device which should be removed from the master interface * * Return: 0 if successful or error otherwise. */ static int batadv_meshif_slave_del(struct net_device *dev, struct net_device *slave_dev) { struct batadv_hard_iface *hard_iface; int ret = -EINVAL; hard_iface = batadv_hardif_get_by_netdev(slave_dev); if (!hard_iface || hard_iface->mesh_iface != dev) goto out; batadv_hardif_disable_interface(hard_iface); ret = 0; out: batadv_hardif_put(hard_iface); return ret; } static const struct net_device_ops batadv_netdev_ops = { .ndo_init = batadv_meshif_init_late, .ndo_get_stats = batadv_interface_stats, .ndo_vlan_rx_add_vid = batadv_interface_add_vid, .ndo_vlan_rx_kill_vid = batadv_interface_kill_vid, .ndo_set_mac_address = batadv_interface_set_mac_addr, .ndo_change_mtu = batadv_interface_change_mtu, .ndo_set_rx_mode = batadv_interface_set_rx_mode, .ndo_start_xmit = batadv_interface_tx, .ndo_validate_addr = eth_validate_addr, .ndo_add_slave = batadv_meshif_slave_add, .ndo_del_slave = batadv_meshif_slave_del, }; static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strscpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); strscpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); strscpy(info->fw_version, "N/A", sizeof(info->fw_version)); strscpy(info->bus_info, "batman", sizeof(info->bus_info)); } /* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 * Declare each description string in struct.name[] to get fixed sized buffer * and compile time checking for strings longer than ETH_GSTRING_LEN. */ static const struct { const char name[ETH_GSTRING_LEN]; } batadv_counters_strings[] = { { "tx" }, { "tx_bytes" }, { "tx_dropped" }, { "rx" }, { "rx_bytes" }, { "forward" }, { "forward_bytes" }, { "mgmt_tx" }, { "mgmt_tx_bytes" }, { "mgmt_rx" }, { "mgmt_rx_bytes" }, { "frag_tx" }, { "frag_tx_bytes" }, { "frag_rx" }, { "frag_rx_bytes" }, { "frag_fwd" }, { "frag_fwd_bytes" }, { "tt_request_tx" }, { "tt_request_rx" }, { "tt_response_tx" }, { "tt_response_rx" }, { "tt_roam_adv_tx" }, { "tt_roam_adv_rx" }, #ifdef CONFIG_BATMAN_ADV_MCAST { "mcast_tx" }, { "mcast_tx_bytes" }, { "mcast_tx_local" }, { "mcast_tx_local_bytes" }, { "mcast_rx" }, { "mcast_rx_bytes" }, { "mcast_rx_local" }, { "mcast_rx_local_bytes" }, { "mcast_fwd" }, { "mcast_fwd_bytes" }, #endif #ifdef CONFIG_BATMAN_ADV_DAT { "dat_get_tx" }, { "dat_get_rx" }, { "dat_put_tx" }, { "dat_put_rx" }, { "dat_cached_reply_tx" }, #endif }; static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) { if (stringset == ETH_SS_STATS) memcpy(data, batadv_counters_strings, sizeof(batadv_counters_strings)); } static void batadv_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct batadv_priv *bat_priv = netdev_priv(dev); int i; for (i = 0; i < BATADV_CNT_NUM; i++) data[i] = batadv_sum_counter(bat_priv, i); } static int batadv_get_sset_count(struct net_device *dev, int stringset) { if (stringset == ETH_SS_STATS) return BATADV_CNT_NUM; return -EOPNOTSUPP; } static const struct ethtool_ops batadv_ethtool_ops = { .get_drvinfo = batadv_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = batadv_get_strings, .get_ethtool_stats = batadv_get_ethtool_stats, .get_sset_count = batadv_get_sset_count, }; /** * batadv_meshif_free() - Deconstructor of batadv_mesh_interface * @dev: Device to cleanup and remove */ static void batadv_meshif_free(struct net_device *dev) { batadv_mesh_free(dev); /* some scheduled RCU callbacks need the bat_priv struct to accomplish * their tasks. Wait for them all to be finished before freeing the * netdev and its private data (bat_priv) */ rcu_barrier(); } /** * batadv_meshif_init_early() - early stage initialization of mesh interface * @dev: registered network device to modify */ static void batadv_meshif_init_early(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &batadv_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = batadv_meshif_free; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; dev->netns_immutable = true; /* can't call min_mtu, because the needed variables * have not been initialized yet */ dev->mtu = ETH_DATA_LEN; dev->max_mtu = BATADV_MAX_MTU; /* generate random address */ eth_hw_addr_random(dev); dev->ethtool_ops = &batadv_ethtool_ops; } /** * batadv_meshif_validate() - validate configuration of new batadv link * @tb: IFLA_INFO_DATA netlink attributes * @data: enum batadv_ifla_attrs attributes * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_meshif_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct batadv_algo_ops *algo_ops; if (!data) return 0; if (data[IFLA_BATADV_ALGO_NAME]) { algo_ops = batadv_algo_get(nla_data(data[IFLA_BATADV_ALGO_NAME])); if (!algo_ops) return -EINVAL; } return 0; } /** * batadv_meshif_newlink() - pre-initialize and register new batadv link * @dev: network device to register * @params: rtnl newlink parameters * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_meshif_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct batadv_priv *bat_priv = netdev_priv(dev); struct nlattr **data = params->data; const char *algo_name; int err; if (data && data[IFLA_BATADV_ALGO_NAME]) { algo_name = nla_data(data[IFLA_BATADV_ALGO_NAME]); err = batadv_algo_select(bat_priv, algo_name); if (err) return -EINVAL; } return register_netdevice(dev); } /** * batadv_meshif_destroy_netlink() - deletion of batadv_mesh_interface via * netlink * @mesh_iface: the to-be-removed batman-adv interface * @head: list pointer */ static void batadv_meshif_destroy_netlink(struct net_device *mesh_iface, struct list_head *head) { struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_hard_iface *hard_iface; struct batadv_meshif_vlan *vlan; while (!list_empty(&mesh_iface->adj_list.lower)) { hard_iface = netdev_adjacent_get_private(mesh_iface->adj_list.lower.next); batadv_hardif_disable_interface(hard_iface); } /* destroy the "untagged" VLAN */ vlan = batadv_meshif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (vlan) { batadv_meshif_destroy_vlan(bat_priv, vlan); batadv_meshif_vlan_put(vlan); } unregister_netdevice_queue(mesh_iface, head); } /** * batadv_meshif_is_valid() - Check whether device is a batadv mesh interface * @net_dev: device which should be checked * * Return: true when net_dev is a batman-adv interface, false otherwise */ bool batadv_meshif_is_valid(const struct net_device *net_dev) { if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) return true; return false; } static const struct nla_policy batadv_ifla_policy[IFLA_BATADV_MAX + 1] = { [IFLA_BATADV_ALGO_NAME] = { .type = NLA_NUL_STRING }, }; struct rtnl_link_ops batadv_link_ops __read_mostly = { .kind = "batadv", .priv_size = sizeof(struct batadv_priv), .setup = batadv_meshif_init_early, .maxtype = IFLA_BATADV_MAX, .policy = batadv_ifla_policy, .validate = batadv_meshif_validate, .newlink = batadv_meshif_newlink, .dellink = batadv_meshif_destroy_netlink, };
114 95 109 36 10 4 1 24 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 /* SPDX-License-Identifier: GPL-2.0+ */ /* * the_nilfs shared structure. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi. * */ #ifndef _THE_NILFS_H #define _THE_NILFS_H #include <linux/types.h> #include <linux/buffer_head.h> #include <linux/rbtree.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/slab.h> #include <linux/refcount.h> struct nilfs_sc_info; struct nilfs_sysfs_dev_subgroups; /* the_nilfs struct */ enum { THE_NILFS_INIT = 0, /* Information from super_block is set */ THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ THE_NILFS_GC_RUNNING, /* gc process is running */ THE_NILFS_SB_DIRTY, /* super block is dirty */ THE_NILFS_PURGING, /* disposing dirty files for cleanup */ }; /** * struct the_nilfs - struct to supervise multiple nilfs mount points * @ns_flags: flags * @ns_flushed_device: flag indicating if all volatile data was flushed * @ns_sb: back pointer to super block instance * @ns_bdev: block device * @ns_sem: semaphore for shared states * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data * @ns_sbwtime: previous write time of super block * @ns_sbwcount: write count of super block * @ns_sbsize: size of valid data in super block * @ns_mount_state: file system state * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds) * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. * @ns_nextnum: index number of the full segment index to be used next * @ns_pseg_offset: offset of next partial segment in the current full segment * @ns_cno: next checkpoint number * @ns_ctime: write time of the last segment * @ns_nongc_ctime: write time of the last segment not for cleaner operation * @ns_ndirtyblks: Number of dirty data blocks * @ns_last_segment_lock: lock protecting fields for the latest segment * @ns_last_pseg: start block number of the latest segment * @ns_last_seq: sequence value of the latest segment * @ns_last_cno: checkpoint number of the latest segment * @ns_prot_seq: least sequence number of segments which must not be reclaimed * @ns_prev_seq: base sequence number used to decide if advance log cursor * @ns_writer: log writer * @ns_segctor_sem: semaphore protecting log write * @ns_dat: DAT file inode * @ns_cpfile: checkpoint file inode * @ns_sufile: segusage file inode * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root) * @ns_cptree_lock: lock protecting @ns_cptree * @ns_dirty_files: list of dirty files * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks * @ns_mount_opt: mount options * @ns_resuid: uid for reserved blocks * @ns_resgid: gid for reserved blocks * @ns_interval: checkpoint creation interval * @ns_watermark: watermark for the number of dirty buffers * @ns_blocksize_bits: bit length of block size * @ns_blocksize: block size * @ns_nsegments: number of segments in filesystem * @ns_blocks_per_segment: number of blocks per segment * @ns_r_segments_percentage: reserved segments percentage * @ns_nrsvsegs: number of reserved segments * @ns_first_data_block: block number of first data block * @ns_inode_size: size of on-disk inode * @ns_first_ino: first not-special inode number * @ns_crc_seed: seed value of CRC32 calculation * @ns_dev_kobj: /sys/fs/<nilfs>/<device> * @ns_dev_kobj_unregister: completion state * @ns_dev_subgroups: <device> subgroups pointer */ struct the_nilfs { unsigned long ns_flags; int ns_flushed_device; struct super_block *ns_sb; struct block_device *ns_bdev; struct rw_semaphore ns_sem; struct mutex ns_snapshot_mount_mutex; /* * used for * - loading the latest checkpoint exclusively. * - allocating a new full segment. */ struct buffer_head *ns_sbh[2]; struct nilfs_super_block *ns_sbp[2]; time64_t ns_sbwtime; unsigned int ns_sbwcount; unsigned int ns_sbsize; unsigned int ns_mount_state; unsigned int ns_sb_update_freq; /* * The following fields are updated by a writable FS-instance. * These fields are protected by ns_segctor_sem outside load_nilfs(). */ u64 ns_seg_seq; __u64 ns_segnum; __u64 ns_nextnum; unsigned long ns_pseg_offset; __u64 ns_cno; time64_t ns_ctime; time64_t ns_nongc_ctime; atomic_t ns_ndirtyblks; /* * The following fields hold information on the latest partial segment * written to disk with a super root. These fields are protected by * ns_last_segment_lock. */ spinlock_t ns_last_segment_lock; sector_t ns_last_pseg; u64 ns_last_seq; __u64 ns_last_cno; u64 ns_prot_seq; u64 ns_prev_seq; struct nilfs_sc_info *ns_writer; struct rw_semaphore ns_segctor_sem; /* * Following fields are lock free except for the period before * the_nilfs is initialized. */ struct inode *ns_dat; struct inode *ns_cpfile; struct inode *ns_sufile; /* Checkpoint tree */ struct rb_root ns_cptree; spinlock_t ns_cptree_lock; /* Dirty inode list */ struct list_head ns_dirty_files; spinlock_t ns_inode_lock; /* GC inode list */ struct list_head ns_gc_inodes; /* Mount options */ unsigned long ns_mount_opt; uid_t ns_resuid; gid_t ns_resgid; unsigned long ns_interval; unsigned long ns_watermark; /* Disk layout information (static) */ unsigned int ns_blocksize_bits; unsigned int ns_blocksize; unsigned long ns_nsegments; unsigned long ns_blocks_per_segment; unsigned long ns_r_segments_percentage; unsigned long ns_nrsvsegs; unsigned long ns_first_data_block; int ns_inode_size; unsigned int ns_first_ino; u32 ns_crc_seed; /* /sys/fs/<nilfs>/<device> */ struct kobject ns_dev_kobj; struct completion ns_dev_kobj_unregister; struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups; }; #define THE_NILFS_FNS(bit, name) \ static inline void set_nilfs_##name(struct the_nilfs *nilfs) \ { \ set_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ } \ static inline void clear_nilfs_##name(struct the_nilfs *nilfs) \ { \ clear_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ } \ static inline int nilfs_##name(struct the_nilfs *nilfs) \ { \ return test_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ } THE_NILFS_FNS(INIT, init) THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(SB_DIRTY, sb_dirty) THE_NILFS_FNS(PURGING, purging) /* * Mount option operations */ #define nilfs_clear_opt(nilfs, opt) \ ((nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt) #define nilfs_set_opt(nilfs, opt) \ ((nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt) #define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt) /** * struct nilfs_root - nilfs root object * @cno: checkpoint number * @rb_node: red-black tree node * @count: refcount of this structure * @nilfs: nilfs object * @ifile: inode file * @inodes_count: number of inodes * @blocks_count: number of blocks * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> * @snapshot_kobj_unregister: completion state for kernel object */ struct nilfs_root { __u64 cno; struct rb_node rb_node; refcount_t count; struct the_nilfs *nilfs; struct inode *ifile; atomic64_t inodes_count; atomic64_t blocks_count; /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */ struct kobject snapshot_kobj; struct completion snapshot_kobj_unregister; }; /* Special checkpoint number */ #define NILFS_CPTREE_CURRENT_CNO 0 /* Minimum interval of periodical update of superblocks (in seconds) */ #define NILFS_SB_FREQ 10 static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) { u64 t = ktime_get_real_seconds(); return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq; } static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) { int flip_bits = nilfs->ns_sbwcount & 0x0FL; return (flip_bits != 0x08 && flip_bits != 0x0F); } void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); struct the_nilfs *alloc_nilfs(struct super_block *sb); void destroy_nilfs(struct the_nilfs *nilfs); int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb); int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs); void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs); int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno); void nilfs_put_root(struct nilfs_root *root); int nilfs_near_disk_full(struct the_nilfs *); void nilfs_fall_back_super_block(struct the_nilfs *); void nilfs_swap_super_block(struct the_nilfs *); static inline void nilfs_get_root(struct nilfs_root *root) { refcount_inc(&root->count); } static inline int nilfs_valid_fs(struct the_nilfs *nilfs) { unsigned int valid_fs; down_read(&nilfs->ns_sem); valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); up_read(&nilfs->ns_sem); return valid_fs; } static inline void nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, sector_t *seg_start, sector_t *seg_end) { *seg_start = (sector_t)nilfs->ns_blocks_per_segment * segnum; *seg_end = *seg_start + nilfs->ns_blocks_per_segment - 1; if (segnum == 0) *seg_start = nilfs->ns_first_data_block; } static inline sector_t nilfs_get_segment_start_blocknr(struct the_nilfs *nilfs, __u64 segnum) { return (segnum == 0) ? nilfs->ns_first_data_block : (sector_t)nilfs->ns_blocks_per_segment * segnum; } static inline __u64 nilfs_get_segnum_of_block(struct the_nilfs *nilfs, sector_t blocknr) { sector_t segnum = blocknr; sector_div(segnum, nilfs->ns_blocks_per_segment); return segnum; } static inline void nilfs_terminate_segment(struct the_nilfs *nilfs, sector_t seg_start, sector_t seg_end) { /* terminate the current full segment (used in case of I/O-error) */ nilfs->ns_pseg_offset = seg_end - seg_start + 1; } static inline void nilfs_shift_to_next_segment(struct the_nilfs *nilfs) { /* move forward with a full segment */ nilfs->ns_segnum = nilfs->ns_nextnum; nilfs->ns_pseg_offset = 0; nilfs->ns_seg_seq++; } static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs) { __u64 cno; spin_lock(&nilfs->ns_last_segment_lock); cno = nilfs->ns_last_cno; spin_unlock(&nilfs->ns_last_segment_lock); return cno; } static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n) { return n == nilfs->ns_segnum || n == nilfs->ns_nextnum; } static inline int nilfs_flush_device(struct the_nilfs *nilfs) { int err; if (!nilfs_test_opt(nilfs, BARRIER) || nilfs->ns_flushed_device) return 0; nilfs->ns_flushed_device = 1; /* * the store to ns_flushed_device must not be reordered after * blkdev_issue_flush(). */ smp_wmb(); err = blkdev_issue_flush(nilfs->ns_bdev); if (err != -EIO) err = 0; return err; } #endif /* _THE_NILFS_H */
115 115 84 84 115 1 1 1 1 114 114 114 114 114 114 11 11 11 11 11 62 63 62 40 39 3 39 39 39 39 40 26 26 108 62 84 84 84 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP token management * Copyright (c) 2017 - 2019, Intel Corporation. * * Note: This code is based on mptcp_ctrl.c from multipath-tcp.org, * authored by: * * Sébastien Barré <sebastien.barre@uclouvain.be> * Christoph Paasch <christoph.paasch@uclouvain.be> * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi> * Gregory Detal <gregory.detal@uclouvain.be> * Fabien Duchêne <fabien.duchene@uclouvain.be> * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de> * Lavkesh Lahngir <lavkesh51@gmail.com> * Andreas Ripke <ripke@neclab.eu> * Vlad Dogaru <vlad.dogaru@intel.com> * Octavian Purdila <octavian.purdila@intel.com> * John Ronan <jronan@tssg.org> * Catalin Nicutar <catalin.nicutar@gmail.com> * Brandon Heller <brandonh@stanford.edu> */ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/memblock.h> #include <linux/ip.h> #include <linux/tcp.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/protocol.h> #include <net/mptcp.h> #include "protocol.h" #define TOKEN_MAX_CHAIN_LEN 4 struct token_bucket { spinlock_t lock; int chain_len; struct hlist_nulls_head req_chain; struct hlist_nulls_head msk_chain; }; static struct token_bucket *token_hash __read_mostly; static unsigned int token_mask __read_mostly; static struct token_bucket *token_bucket(u32 token) { return &token_hash[token & token_mask]; } /* called with bucket lock held */ static struct mptcp_subflow_request_sock * __token_lookup_req(struct token_bucket *t, u32 token) { struct mptcp_subflow_request_sock *req; struct hlist_nulls_node *pos; hlist_nulls_for_each_entry_rcu(req, pos, &t->req_chain, token_node) if (req->token == token) return req; return NULL; } /* called with bucket lock held */ static struct mptcp_sock * __token_lookup_msk(struct token_bucket *t, u32 token) { struct hlist_nulls_node *pos; struct sock *sk; sk_nulls_for_each_rcu(sk, pos, &t->msk_chain) if (mptcp_sk(sk)->token == token) return mptcp_sk(sk); return NULL; } static bool __token_bucket_busy(struct token_bucket *t, u32 token) { return !token || t->chain_len >= TOKEN_MAX_CHAIN_LEN || __token_lookup_req(t, token) || __token_lookup_msk(t, token); } static void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) { /* we might consider a faster version that computes the key as a * hash of some information available in the MPTCP socket. Use * random data at the moment, as it's probably the safest option * in case multiple sockets are opened in different namespaces at * the same time. */ get_random_bytes(key, sizeof(u64)); mptcp_crypto_key_sha(*key, token, idsn); } /** * mptcp_token_new_request - create new key/idsn/token for subflow_request * @req: the request socket * * This function is called when a new mptcp connection is coming in. * * It creates a unique token to identify the new mptcp connection, * a secret local key and the initial data sequence number (idsn). * * Returns 0 on success. */ int mptcp_token_new_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct token_bucket *bucket; u32 token; mptcp_crypto_key_sha(subflow_req->local_key, &subflow_req->token, &subflow_req->idsn); pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n", req, subflow_req->local_key, subflow_req->token, subflow_req->idsn); token = subflow_req->token; bucket = token_bucket(token); spin_lock_bh(&bucket->lock); if (__token_bucket_busy(bucket, token)) { spin_unlock_bh(&bucket->lock); return -EBUSY; } hlist_nulls_add_head_rcu(&subflow_req->token_node, &bucket->req_chain); bucket->chain_len++; spin_unlock_bh(&bucket->lock); return 0; } /** * mptcp_token_new_connect - create new key/idsn/token for subflow * @ssk: the socket that will initiate a connection * * This function is called when a new outgoing mptcp connection is * initiated. * * It creates a unique token to identify the new mptcp connection, * a secret local key and the initial data sequence number (idsn). * * On success, the mptcp connection can be found again using * the computed token at a later time, this is needed to process * join requests. * * returns 0 on success. */ int mptcp_token_new_connect(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); int retries = MPTCP_TOKEN_MAX_RETRIES; struct sock *sk = subflow->conn; struct token_bucket *bucket; again: mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token, &subflow->idsn); bucket = token_bucket(subflow->token); spin_lock_bh(&bucket->lock); if (__token_bucket_busy(bucket, subflow->token)) { spin_unlock_bh(&bucket->lock); if (!--retries) return -EBUSY; goto again; } pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n", ssk, subflow->local_key, subflow->token, subflow->idsn); WRITE_ONCE(msk->token, subflow->token); __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain); bucket->chain_len++; spin_unlock_bh(&bucket->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); return 0; } /** * mptcp_token_accept - replace a req sk with full sock in token hash * @req: the request socket to be removed * @msk: the just cloned socket linked to the new connection * * Called when a SYN packet creates a new logical connection, i.e. * is not a join request. */ void mptcp_token_accept(struct mptcp_subflow_request_sock *req, struct mptcp_sock *msk) { struct mptcp_subflow_request_sock *pos; struct sock *sk = (struct sock *)msk; struct token_bucket *bucket; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); bucket = token_bucket(req->token); spin_lock_bh(&bucket->lock); /* pedantic lookup check for the moved token */ pos = __token_lookup_req(bucket, req->token); if (!WARN_ON_ONCE(pos != req)) hlist_nulls_del_init_rcu(&req->token_node); __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain); spin_unlock_bh(&bucket->lock); } bool mptcp_token_exists(u32 token) { struct hlist_nulls_node *pos; struct token_bucket *bucket; struct mptcp_sock *msk; struct sock *sk; rcu_read_lock(); bucket = token_bucket(token); again: sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { msk = mptcp_sk(sk); if (READ_ONCE(msk->token) == token) goto found; } if (get_nulls_value(pos) != (token & token_mask)) goto again; rcu_read_unlock(); return false; found: rcu_read_unlock(); return true; } /** * mptcp_token_get_sock - retrieve mptcp connection sock using its token * @net: restrict to this namespace * @token: token of the mptcp connection to retrieve * * This function returns the mptcp connection structure with the given token. * A reference count on the mptcp socket returned is taken. * * returns NULL if no connection with the given token value exists. */ struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token) { struct hlist_nulls_node *pos; struct token_bucket *bucket; struct mptcp_sock *msk; struct sock *sk; rcu_read_lock(); bucket = token_bucket(token); again: sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { msk = mptcp_sk(sk); if (READ_ONCE(msk->token) != token || !net_eq(sock_net(sk), net)) continue; if (!refcount_inc_not_zero(&sk->sk_refcnt)) goto not_found; if (READ_ONCE(msk->token) != token || !net_eq(sock_net(sk), net)) { sock_put(sk); goto again; } goto found; } if (get_nulls_value(pos) != (token & token_mask)) goto again; not_found: msk = NULL; found: rcu_read_unlock(); return msk; } EXPORT_SYMBOL_GPL(mptcp_token_get_sock); /** * mptcp_token_iter_next - iterate over the token container from given pos * @net: namespace to be iterated * @s_slot: start slot number * @s_num: start number inside the given lock * * This function returns the first mptcp connection structure found inside the * token container starting from the specified position, or NULL. * * On successful iteration, the iterator is moved to the next position and * a reference to the returned socket is acquired. */ struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, long *s_num) { struct mptcp_sock *ret = NULL; struct hlist_nulls_node *pos; int slot, num = 0; for (slot = *s_slot; slot <= token_mask; *s_num = 0, slot++) { struct token_bucket *bucket = &token_hash[slot]; struct sock *sk; num = 0; if (hlist_nulls_empty(&bucket->msk_chain)) continue; rcu_read_lock(); sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { ++num; if (!net_eq(sock_net(sk), net)) continue; if (num <= *s_num) continue; if (!refcount_inc_not_zero(&sk->sk_refcnt)) continue; if (!net_eq(sock_net(sk), net)) { sock_put(sk); continue; } ret = mptcp_sk(sk); rcu_read_unlock(); goto out; } rcu_read_unlock(); } out: *s_slot = slot; *s_num = num; return ret; } EXPORT_SYMBOL_GPL(mptcp_token_iter_next); /** * mptcp_token_destroy_request - remove mptcp connection/token * @req: mptcp request socket dropping the token * * Remove the token associated to @req. */ void mptcp_token_destroy_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_subflow_request_sock *pos; struct token_bucket *bucket; if (hlist_nulls_unhashed(&subflow_req->token_node)) return; bucket = token_bucket(subflow_req->token); spin_lock_bh(&bucket->lock); pos = __token_lookup_req(bucket, subflow_req->token); if (!WARN_ON_ONCE(pos != subflow_req)) { hlist_nulls_del_init_rcu(&pos->token_node); bucket->chain_len--; } spin_unlock_bh(&bucket->lock); } /** * mptcp_token_destroy - remove mptcp connection/token * @msk: mptcp connection dropping the token * * Remove the token associated to @msk */ void mptcp_token_destroy(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; struct token_bucket *bucket; struct mptcp_sock *pos; if (sk_unhashed((struct sock *)msk)) return; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); bucket = token_bucket(msk->token); spin_lock_bh(&bucket->lock); pos = __token_lookup_msk(bucket, msk->token); if (!WARN_ON_ONCE(pos != msk)) { __sk_nulls_del_node_init_rcu((struct sock *)pos); bucket->chain_len--; } spin_unlock_bh(&bucket->lock); WRITE_ONCE(msk->token, 0); } void __init mptcp_token_init(void) { int i; token_hash = alloc_large_system_hash("MPTCP token", sizeof(struct token_bucket), 0, 20,/* one slot per 1MB of memory */ HASH_ZERO, NULL, &token_mask, 0, 64 * 1024); for (i = 0; i < token_mask + 1; ++i) { INIT_HLIST_NULLS_HEAD(&token_hash[i].req_chain, i); INIT_HLIST_NULLS_HEAD(&token_hash[i].msk_chain, i); spin_lock_init(&token_hash[i].lock); } } #if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST) EXPORT_SYMBOL_GPL(mptcp_token_new_request); EXPORT_SYMBOL_GPL(mptcp_token_new_connect); EXPORT_SYMBOL_GPL(mptcp_token_accept); EXPORT_SYMBOL_GPL(mptcp_token_destroy_request); EXPORT_SYMBOL_GPL(mptcp_token_destroy); #endif
11 22 16 10 9 82 82 82 40 15 82 82 37 37 37 31 7 37 37 146 146 82 22 10 27 105 11 12 11 146 146 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/hash.c * * Copyright (C) 2002 by Theodore Ts'o */ #include <linux/fs.h> #include <linux/unicode.h> #include <linux/compiler.h> #include <linux/bitops.h> #include "ext4.h" #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[4], __u32 const in[]) { __u32 sum = 0; __u32 b0 = buf[0], b1 = buf[1]; __u32 a = in[0], b = in[1], c = in[2], d = in[3]; int n = 16; do { sum += DELTA; b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); } while (--n); buf[0] += b0; buf[1] += b1; } /* F, G and H are basic MD4 functions: selection, majority, parity */ #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) #define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) #define H(x, y, z) ((x) ^ (y) ^ (z)) /* * The generic round function. The application is so specific that * we don't bother protecting all the arguments with parens, as is generally * good macro practice, in favor of extra legibility. * Rotation is separate from addition to prevent recomputation */ #define ROUND(f, a, b, c, d, x, s) \ (a += f(b, c, d) + x, a = rol32(a, s)) #define K1 0 #define K2 013240474631UL #define K3 015666365641UL /* * Basic cut-down MD4 transform. Returns only 32 bits of result. */ static __u32 half_md4_transform(__u32 buf[4], __u32 const in[8]) { __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; /* Round 1 */ ROUND(F, a, b, c, d, in[0] + K1, 3); ROUND(F, d, a, b, c, in[1] + K1, 7); ROUND(F, c, d, a, b, in[2] + K1, 11); ROUND(F, b, c, d, a, in[3] + K1, 19); ROUND(F, a, b, c, d, in[4] + K1, 3); ROUND(F, d, a, b, c, in[5] + K1, 7); ROUND(F, c, d, a, b, in[6] + K1, 11); ROUND(F, b, c, d, a, in[7] + K1, 19); /* Round 2 */ ROUND(G, a, b, c, d, in[1] + K2, 3); ROUND(G, d, a, b, c, in[3] + K2, 5); ROUND(G, c, d, a, b, in[5] + K2, 9); ROUND(G, b, c, d, a, in[7] + K2, 13); ROUND(G, a, b, c, d, in[0] + K2, 3); ROUND(G, d, a, b, c, in[2] + K2, 5); ROUND(G, c, d, a, b, in[4] + K2, 9); ROUND(G, b, c, d, a, in[6] + K2, 13); /* Round 3 */ ROUND(H, a, b, c, d, in[3] + K3, 3); ROUND(H, d, a, b, c, in[7] + K3, 9); ROUND(H, c, d, a, b, in[2] + K3, 11); ROUND(H, b, c, d, a, in[6] + K3, 15); ROUND(H, a, b, c, d, in[1] + K3, 3); ROUND(H, d, a, b, c, in[5] + K3, 9); ROUND(H, c, d, a, b, in[0] + K3, 11); ROUND(H, b, c, d, a, in[4] + K3, 15); buf[0] += a; buf[1] += b; buf[2] += c; buf[3] += d; return buf[1]; /* "most hashed" word */ } #undef ROUND #undef K1 #undef K2 #undef K3 #undef F #undef G #undef H /* The old legacy hash */ static __u32 dx_hack_hash_unsigned(const char *name, int len) { __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; const unsigned char *ucp = (const unsigned char *) name; while (len--) { hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373)); if (hash & 0x80000000) hash -= 0x7fffffff; hash1 = hash0; hash0 = hash; } return hash0 << 1; } static __u32 dx_hack_hash_signed(const char *name, int len) { __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; const signed char *scp = (const signed char *) name; while (len--) { hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373)); if (hash & 0x80000000) hash -= 0x7fffffff; hash1 = hash0; hash0 = hash; } return hash0 << 1; } static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num) { __u32 pad, val; int i; const signed char *scp = (const signed char *) msg; pad = (__u32)len | ((__u32)len << 8); pad |= pad << 16; val = pad; if (len > num*4) len = num * 4; for (i = 0; i < len; i++) { val = ((int) scp[i]) + (val << 8); if ((i % 4) == 3) { *buf++ = val; val = pad; num--; } } if (--num >= 0) *buf++ = val; while (--num >= 0) *buf++ = pad; } static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num) { __u32 pad, val; int i; const unsigned char *ucp = (const unsigned char *) msg; pad = (__u32)len | ((__u32)len << 8); pad |= pad << 16; val = pad; if (len > num*4) len = num * 4; for (i = 0; i < len; i++) { val = ((int) ucp[i]) + (val << 8); if ((i % 4) == 3) { *buf++ = val; val = pad; num--; } } if (--num >= 0) *buf++ = val; while (--num >= 0) *buf++ = pad; } /* * Returns the hash of a filename. If len is 0 and name is NULL, then * this function can be used to test whether or not a hash version is * supported. * * The seed is an 4 longword (32 bits) "secret" which can be used to * uniquify a hash. If the seed is all zero's, then some default seed * may be used. * * A particular hash version specifies whether or not the seed is * represented, and whether or not the returned hash is 32 bits or 64 * bits. 32 bit hashes will return 0 for the minor hash. */ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { __u32 hash; __u32 minor_hash = 0; const char *p; int i; __u32 in[8], buf[4]; void (*str2hashbuf)(const char *, int, __u32 *, int) = str2hashbuf_signed; /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; buf[2] = 0x98badcfe; buf[3] = 0x10325476; /* Check to see if the seed is all zero's */ if (hinfo->seed) { for (i = 0; i < 4; i++) { if (hinfo->seed[i]) { memcpy(buf, hinfo->seed, sizeof(buf)); break; } } } switch (hinfo->hash_version) { case DX_HASH_LEGACY_UNSIGNED: hash = dx_hack_hash_unsigned(name, len); break; case DX_HASH_LEGACY: hash = dx_hack_hash_signed(name, len); break; case DX_HASH_HALF_MD4_UNSIGNED: str2hashbuf = str2hashbuf_unsigned; fallthrough; case DX_HASH_HALF_MD4: p = name; while (len > 0) { (*str2hashbuf)(p, len, in, 8); half_md4_transform(buf, in); len -= 32; p += 32; } minor_hash = buf[2]; hash = buf[1]; break; case DX_HASH_TEA_UNSIGNED: str2hashbuf = str2hashbuf_unsigned; fallthrough; case DX_HASH_TEA: p = name; while (len > 0) { (*str2hashbuf)(p, len, in, 4); TEA_transform(buf, in); len -= 16; p += 16; } hash = buf[0]; minor_hash = buf[1]; break; case DX_HASH_SIPHASH: { struct qstr qname = QSTR_INIT(name, len); __u64 combined_hash; if (fscrypt_has_encryption_key(dir)) { combined_hash = fscrypt_fname_siphash(dir, &qname); } else { ext4_warning_inode(dir, "Siphash requires key"); return -EINVAL; } hash = (__u32)(combined_hash >> 32); minor_hash = (__u32)combined_hash; break; } default: hinfo->hash = 0; hinfo->minor_hash = 0; ext4_warning(dir->i_sb, "invalid/unsupported hash tree version %u", hinfo->hash_version); return -EINVAL; } hash = hash & ~1; if (hash == (EXT4_HTREE_EOF_32BIT << 1)) hash = (EXT4_HTREE_EOF_32BIT - 1) << 1; hinfo->hash = hash; hinfo->minor_hash = minor_hash; return 0; } int ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { #if IS_ENABLED(CONFIG_UNICODE) const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; struct qstr qstr = {.name = name, .len = len }; if (len && IS_CASEFOLDED(dir) && (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir))) { buff = kzalloc(PATH_MAX, GFP_KERNEL); if (!buff) return -ENOMEM; dlen = utf8_casefold(um, &qstr, buff, PATH_MAX); if (dlen < 0) { kfree(buff); goto opaque_seq; } r = __ext4fs_dirhash(dir, buff, dlen, hinfo); kfree(buff); return r; } opaque_seq: #endif return __ext4fs_dirhash(dir, name, len, hinfo); }
223 223 1451 3 1454 2 1444 27 475 16 154 435 67 69 26 35 17 330 517 68 465 225 326 516 641 15 194 548 51 74 14 24 444 709 127 611 289 2 438 706 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/readdir.c * * Copyright (C) 1995 Linus Torvalds */ #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/stat.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/dirent.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/compat.h> #include <linux/uaccess.h> /* * Some filesystems were never converted to '->iterate_shared()' * and their directory iterators want the inode lock held for * writing. This wrapper allows for converting from the shared * semantics to the exclusive inode use. */ int wrap_directory_iterator(struct file *file, struct dir_context *ctx, int (*iter)(struct file *, struct dir_context *)) { struct inode *inode = file_inode(file); int ret; /* * We'd love to have an 'inode_upgrade_trylock()' operation, * see the comment in mmap_upgrade_trylock() in mm/memory.c. * * But considering this is for "filesystems that never got * converted", it really doesn't matter. * * Also note that since we have to return with the lock held * for reading, we can't use the "killable()" locking here, * since we do need to get the lock even if we're dying. * * We could do the write part killably and then get the read * lock unconditionally if it mattered, but see above on why * this does the very simplistic conversion. */ up_read(&inode->i_rwsem); down_write(&inode->i_rwsem); /* * Since we dropped the inode lock, we should do the * DEADDIR test again. See 'iterate_dir()' below. * * Note that we don't need to re-do the f_pos games, * since the file must be locked wrt f_pos anyway. */ ret = -ENOENT; if (!IS_DEADDIR(inode)) ret = iter(file, ctx); downgrade_write(&inode->i_rwsem); return ret; } EXPORT_SYMBOL(wrap_directory_iterator); /* * Note the "unsafe_put_user()" semantics: we goto a * label for errors. */ #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ char __user *dst = (_dst); \ const char *src = (_src); \ size_t len = (_len); \ unsafe_put_user(0, dst+len, label); \ unsafe_copy_to_user(dst, src, len, label); \ } while (0) int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); int res = -ENOTDIR; if (!file->f_op->iterate_shared) goto out; res = security_file_permission(file, MAY_READ); if (res) goto out; res = fsnotify_file_perm(file, MAY_READ); if (res) goto out; res = down_read_killable(&inode->i_rwsem); if (res) goto out; res = -ENOENT; if (!IS_DEADDIR(inode)) { ctx->pos = file->f_pos; res = file->f_op->iterate_shared(file, ctx); file->f_pos = ctx->pos; fsnotify_access(file); file_accessed(file); } inode_unlock_shared(inode); out: return res; } EXPORT_SYMBOL(iterate_dir); /* * POSIX says that a dirent name cannot contain NULL or a '/'. * * It's not 100% clear what we should really do in this case. * The filesystem is clearly corrupted, but returning a hard * error means that you now don't see any of the other names * either, so that isn't a perfect alternative. * * And if you return an error, what error do you use? Several * filesystems seem to have decided on EUCLEAN being the error * code for EFSCORRUPTED, and that may be the error to use. Or * just EIO, which is perhaps more obvious to users. * * In order to see the other file names in the directory, the * caller might want to make this a "soft" error: skip the * entry, and return the error at the end instead. * * Note that this should likely do a "memchr(name, 0, len)" * check too, since that would be filesystem corruption as * well. However, that case can't actually confuse user space, * which has to do a strlen() on the name anyway to find the * filename length, and the above "soft error" worry means * that it's probably better left alone until we have that * issue clarified. * * Note the PATH_MAX check - it's arbitrary but the real * kernel limit on a possible path component, not NAME_MAX, * which is the technical standard limit. */ static int verify_dirent_name(const char *name, int len) { if (len <= 0 || len >= PATH_MAX) return -EIO; if (memchr(name, '/', len)) return -EIO; return 0; } /* * Traditional linux readdir() handling.. * * "count=1" is a special case, meaning that the buffer is one * dirent-structure in size and that the code can't handle more * anyway. Thus the special "fillonedir()" function for that * case (the low-level handlers don't need to care about this). */ #ifdef __ARCH_WANT_OLD_READDIR struct old_linux_dirent { unsigned long d_ino; unsigned long d_offset; unsigned short d_namlen; char d_name[]; }; struct readdir_callback { struct dir_context ctx; struct old_linux_dirent __user * dirent; int result; }; static bool fillonedir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct readdir_callback *buf = container_of(ctx, struct readdir_callback, ctx); struct old_linux_dirent __user * dirent; unsigned long d_ino; if (buf->result) return false; buf->result = verify_dirent_name(name, namlen); if (buf->result) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->result = -EOVERFLOW; return false; } buf->result++; dirent = buf->dirent; if (!user_write_access_begin(dirent, (unsigned long)(dirent->d_name + namlen + 1) - (unsigned long)dirent)) goto efault; unsafe_put_user(d_ino, &dirent->d_ino, efault_end); unsafe_put_user(offset, &dirent->d_offset, efault_end); unsafe_put_user(namlen, &dirent->d_namlen, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); return true; efault_end: user_write_access_end(); efault: buf->result = -EFAULT; return false; } SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct old_linux_dirent __user *, dirent, unsigned int, count) { int error; CLASS(fd_pos, f)(fd); struct readdir_callback buf = { .ctx.actor = fillonedir, .ctx.count = 1, /* Hint to fs: just one entry. */ .dirent = dirent }; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (buf.result) error = buf.result; return error; } #endif /* __ARCH_WANT_OLD_READDIR */ /* * New, all-improved, singing, dancing, iBCS2-compliant getdents() * interface. */ struct linux_dirent { unsigned long d_ino; unsigned long d_off; unsigned short d_reclen; char d_name[]; }; struct getdents_callback { struct dir_context ctx; struct linux_dirent __user * current_dir; int prev_reclen; int error; }; static bool filldir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent __user *dirent, *prev; struct getdents_callback *buf = container_of(ctx, struct getdents_callback, ctx); unsigned long d_ino; int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); int prev_reclen; unsigned int flags = d_type; BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); d_type &= S_DT_MASK; buf->error = verify_dirent_name(name, namlen); if (unlikely(buf->error)) return false; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > ctx->count) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->error = -EOVERFLOW; return false; } prev_reclen = buf->prev_reclen; if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) return false; dirent = buf->current_dir; prev = (void __user *) dirent - prev_reclen; if (!user_write_access_begin(prev, reclen + prev_reclen)) goto efault; /* This might be 'dirent->d_off', but if so it will get overwritten */ unsafe_put_user(offset, &prev->d_off, efault_end); unsafe_put_user(d_ino, &dirent->d_ino, efault_end); unsafe_put_user(reclen, &dirent->d_reclen, efault_end); unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); buf->current_dir = (void __user *)dirent + reclen; buf->prev_reclen = reclen; ctx->count -= reclen; return true; efault_end: user_write_access_end(); efault: buf->error = -EFAULT; return false; } SYSCALL_DEFINE3(getdents, unsigned int, fd, struct linux_dirent __user *, dirent, unsigned int, count) { CLASS(fd_pos, f)(fd); struct getdents_callback buf = { .ctx.actor = filldir, .ctx.count = count, .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR, .current_dir = dirent }; int error; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (error >= 0) error = buf.error; if (buf.prev_reclen) { struct linux_dirent __user * lastdirent; lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.ctx.count; } return error; } struct getdents_callback64 { struct dir_context ctx; struct linux_dirent64 __user * current_dir; int prev_reclen; int error; }; static bool filldir64(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent64 __user *dirent, *prev; struct getdents_callback64 *buf = container_of(ctx, struct getdents_callback64, ctx); int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); int prev_reclen; unsigned int flags = d_type; BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); d_type &= S_DT_MASK; buf->error = verify_dirent_name(name, namlen); if (unlikely(buf->error)) return false; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > ctx->count) return false; prev_reclen = buf->prev_reclen; if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) return false; dirent = buf->current_dir; prev = (void __user *)dirent - prev_reclen; if (!user_write_access_begin(prev, reclen + prev_reclen)) goto efault; /* This might be 'dirent->d_off', but if so it will get overwritten */ unsafe_put_user(offset, &prev->d_off, efault_end); unsafe_put_user(ino, &dirent->d_ino, efault_end); unsafe_put_user(reclen, &dirent->d_reclen, efault_end); unsafe_put_user(d_type, &dirent->d_type, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); buf->prev_reclen = reclen; buf->current_dir = (void __user *)dirent + reclen; ctx->count -= reclen; return true; efault_end: user_write_access_end(); efault: buf->error = -EFAULT; return false; } SYSCALL_DEFINE3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count) { CLASS(fd_pos, f)(fd); struct getdents_callback64 buf = { .ctx.actor = filldir64, .ctx.count = count, .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR, .current_dir = dirent }; int error; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (error >= 0) error = buf.error; if (buf.prev_reclen) { struct linux_dirent64 __user * lastdirent; typeof(lastdirent->d_off) d_off = buf.ctx.pos; lastdirent = (void __user *) buf.current_dir - buf.prev_reclen; if (put_user(d_off, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.ctx.count; } return error; } #ifdef CONFIG_COMPAT struct compat_old_linux_dirent { compat_ulong_t d_ino; compat_ulong_t d_offset; unsigned short d_namlen; char d_name[]; }; struct compat_readdir_callback { struct dir_context ctx; struct compat_old_linux_dirent __user *dirent; int result; }; static bool compat_fillonedir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct compat_readdir_callback *buf = container_of(ctx, struct compat_readdir_callback, ctx); struct compat_old_linux_dirent __user *dirent; compat_ulong_t d_ino; if (buf->result) return false; buf->result = verify_dirent_name(name, namlen); if (buf->result) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->result = -EOVERFLOW; return false; } buf->result++; dirent = buf->dirent; if (!user_write_access_begin(dirent, (unsigned long)(dirent->d_name + namlen + 1) - (unsigned long)dirent)) goto efault; unsafe_put_user(d_ino, &dirent->d_ino, efault_end); unsafe_put_user(offset, &dirent->d_offset, efault_end); unsafe_put_user(namlen, &dirent->d_namlen, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); return true; efault_end: user_write_access_end(); efault: buf->result = -EFAULT; return false; } COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct compat_old_linux_dirent __user *, dirent, unsigned int, count) { int error; CLASS(fd_pos, f)(fd); struct compat_readdir_callback buf = { .ctx.actor = compat_fillonedir, .ctx.count = 1, /* Hint to fs: just one entry. */ .dirent = dirent }; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (buf.result) error = buf.result; return error; } struct compat_linux_dirent { compat_ulong_t d_ino; compat_ulong_t d_off; unsigned short d_reclen; char d_name[]; }; struct compat_getdents_callback { struct dir_context ctx; struct compat_linux_dirent __user *current_dir; int prev_reclen; int error; }; static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct compat_linux_dirent __user *dirent, *prev; struct compat_getdents_callback *buf = container_of(ctx, struct compat_getdents_callback, ctx); compat_ulong_t d_ino; int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + namlen + 2, sizeof(compat_long_t)); int prev_reclen; unsigned int flags = d_type; BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); d_type &= S_DT_MASK; buf->error = verify_dirent_name(name, namlen); if (unlikely(buf->error)) return false; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > ctx->count) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->error = -EOVERFLOW; return false; } prev_reclen = buf->prev_reclen; if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) return false; dirent = buf->current_dir; prev = (void __user *) dirent - prev_reclen; if (!user_write_access_begin(prev, reclen + prev_reclen)) goto efault; unsafe_put_user(offset, &prev->d_off, efault_end); unsafe_put_user(d_ino, &dirent->d_ino, efault_end); unsafe_put_user(reclen, &dirent->d_reclen, efault_end); unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); buf->prev_reclen = reclen; buf->current_dir = (void __user *)dirent + reclen; ctx->count -= reclen; return true; efault_end: user_write_access_end(); efault: buf->error = -EFAULT; return false; } COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, struct compat_linux_dirent __user *, dirent, unsigned int, count) { CLASS(fd_pos, f)(fd); struct compat_getdents_callback buf = { .ctx.actor = compat_filldir, .ctx.count = count, .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR, .current_dir = dirent, }; int error; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (error >= 0) error = buf.error; if (buf.prev_