Total coverage: 354528 (20%)of 1821504
163 1 161 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TRACE_SYSCALL_H #define _TRACE_SYSCALL_H #include <linux/tracepoint.h> #include <linux/unistd.h> #include <linux/trace_events.h> #include <linux/thread_info.h> #include <asm/ptrace.h> /* * A syscall entry in the ftrace syscalls array. * * @name: name of the syscall * @syscall_nr: number of the syscall * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) * @enter_fields: list of fields for syscall_enter trace event * @enter_event: associated syscall_enter trace event * @exit_event: associated syscall_exit trace event */ struct syscall_metadata { const char *name; int syscall_nr; int nb_args; const char **types; const char **args; struct list_head enter_fields; struct trace_event_call *enter_event; struct trace_event_call *exit_event; }; #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS) static inline void syscall_tracepoint_update(struct task_struct *p) { if (test_syscall_work(SYSCALL_TRACEPOINT)) set_task_syscall_work(p, SYSCALL_TRACEPOINT); else clear_task_syscall_work(p, SYSCALL_TRACEPOINT); } #else static inline void syscall_tracepoint_update(struct task_struct *p) { } #endif #endif /* _TRACE_SYSCALL_H */
2 2 2 2 59 2 54 2 1 54 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 // SPDX-License-Identifier: GPL-2.0 /* * Implement the manual drop-all-pagecache function */ #include <linux/pagemap.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/fs.h> #include <linux/writeback.h> #include <linux/sysctl.h> #include <linux/gfp.h> #include <linux/swap.h> #include "internal.h" /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); /* * We must skip inodes in unusual state. We may also skip * inodes without pages but we deliberately won't in case * we need to reschedule to avoid softlockups. */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || (mapping_empty(inode->i_mapping) && !need_resched())) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); iput(toput_inode); } int drop_caches_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int ret; ret = proc_dointvec_minmax(table, write, buffer, length, ppos); if (ret) return ret; if (write) { static int stfu; if (sysctl_drop_caches & 1) { lru_add_drain_all(); iterate_supers(drop_pagecache_sb, NULL); count_vm_event(DROP_PAGECACHE); } if (sysctl_drop_caches & 2) { drop_slab(); count_vm_event(DROP_SLAB); } if (!stfu) { pr_info("%s (%d): drop_caches: %d\n", current->comm, task_pid_nr(current), sysctl_drop_caches); } stfu |= sysctl_drop_caches & 4; } return 0; }
2 10 2 50 49 43 1 14 126 30 2 28 27 1 1 28 28 6 12 12 4 11 12 36 3 19 3 13 9 1 1 19 19 1 18 4 19 27 33 8 32 35 32 32 5 5 32 17 1 17 32 23 23 23 23 35 34 5 11 11 6 11 6 11 10 8 11 6 11 68 67 48 13 58 54 50 42 67 68 7 7 64 5 66 66 67 6 6 30 35 41 36 24 12 12 17 23 103 37 68 79 60 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 // SPDX-License-Identifier: GPL-2.0-only /* * balloc.c * * PURPOSE * Block allocation handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT * (C) 1999-2001 Ben Fennema * (C) 1999 Stelias Computing Inc * * HISTORY * * 02/24/99 blf Created. * */ #include "udfdecl.h" #include <linux/bitops.h> #include <linux/overflow.h> #include "udf_i.h" #include "udf_sb.h" #define udf_clear_bit __test_and_clear_bit_le #define udf_set_bit __test_and_set_bit_le #define udf_test_bit test_bit_le #define udf_find_next_one_bit find_next_bit_le static int read_block_bitmap(struct super_block *sb, struct udf_bitmap *bitmap, unsigned int block, unsigned long bitmap_nr) { struct buffer_head *bh = NULL; int i; int max_bits, off, count; struct kernel_lb_addr loc; loc.logicalBlockNum = bitmap->s_extPosition; loc.partitionReferenceNum = UDF_SB(sb)->s_partition; bh = sb_bread(sb, udf_get_lb_pblock(sb, &loc, block)); bitmap->s_block_bitmap[bitmap_nr] = bh; if (!bh) return -EIO; /* Check consistency of Space Bitmap buffer. */ max_bits = sb->s_blocksize * 8; if (!bitmap_nr) { off = sizeof(struct spaceBitmapDesc) << 3; count = min(max_bits - off, bitmap->s_nr_groups); } else { /* * Rough check if bitmap number is too big to have any bitmap * blocks reserved. */ if (bitmap_nr > (bitmap->s_nr_groups >> (sb->s_blocksize_bits + 3)) + 2) return 0; off = 0; count = bitmap->s_nr_groups - bitmap_nr * max_bits + (sizeof(struct spaceBitmapDesc) << 3); count = min(count, max_bits); } for (i = 0; i < count; i++) if (udf_test_bit(i + off, bh->b_data)) { bitmap->s_block_bitmap[bitmap_nr] = ERR_PTR(-EFSCORRUPTED); brelse(bh); return -EFSCORRUPTED; } return 0; } static int load_block_bitmap(struct super_block *sb, struct udf_bitmap *bitmap, unsigned int block_group) { int retval = 0; int nr_groups = bitmap->s_nr_groups; if (block_group >= nr_groups) { udf_debug("block_group (%u) > nr_groups (%d)\n", block_group, nr_groups); } if (bitmap->s_block_bitmap[block_group]) { /* * The bitmap failed verification in the past. No point in * trying again. */ if (IS_ERR(bitmap->s_block_bitmap[block_group])) return PTR_ERR(bitmap->s_block_bitmap[block_group]); return block_group; } retval = read_block_bitmap(sb, bitmap, block_group, block_group); if (retval < 0) return retval; return block_group; } static void udf_add_free_space(struct super_block *sb, u16 partition, u32 cnt) { struct udf_sb_info *sbi = UDF_SB(sb); struct logicalVolIntegrityDesc *lvid; if (!sbi->s_lvid_bh) return; lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; le32_add_cpu(&lvid->freeSpaceTable[partition], cnt); udf_updated_lvid(sb); } static void udf_bitmap_free_blocks(struct super_block *sb, struct udf_bitmap *bitmap, struct kernel_lb_addr *bloc, uint32_t offset, uint32_t count) { struct udf_sb_info *sbi = UDF_SB(sb); struct buffer_head *bh = NULL; unsigned long block; unsigned long block_group; unsigned long bit; unsigned long i; int bitmap_nr; unsigned long overflow; mutex_lock(&sbi->s_alloc_mutex); /* We make sure this cannot overflow when mounting the filesystem */ block = bloc->logicalBlockNum + offset + (sizeof(struct spaceBitmapDesc) << 3); do { overflow = 0; block_group = block >> (sb->s_blocksize_bits + 3); bit = block % (sb->s_blocksize << 3); /* * Check to see if we are freeing blocks across a group boundary. */ if (bit + count > (sb->s_blocksize << 3)) { overflow = bit + count - (sb->s_blocksize << 3); count -= overflow; } bitmap_nr = load_block_bitmap(sb, bitmap, block_group); if (bitmap_nr < 0) goto error_return; bh = bitmap->s_block_bitmap[bitmap_nr]; for (i = 0; i < count; i++) { if (udf_set_bit(bit + i, bh->b_data)) { udf_debug("bit %lu already set\n", bit + i); udf_debug("byte=%2x\n", ((__u8 *)bh->b_data)[(bit + i) >> 3]); } } udf_add_free_space(sb, sbi->s_partition, count); mark_buffer_dirty(bh); if (overflow) { block += count; count = overflow; } } while (overflow); error_return: mutex_unlock(&sbi->s_alloc_mutex); } static int udf_bitmap_prealloc_blocks(struct super_block *sb, struct udf_bitmap *bitmap, uint16_t partition, uint32_t first_block, uint32_t block_count) { struct udf_sb_info *sbi = UDF_SB(sb); int alloc_count = 0; int bit, block, block_group; int bitmap_nr; struct buffer_head *bh; __u32 part_len; mutex_lock(&sbi->s_alloc_mutex); part_len = sbi->s_partmaps[partition].s_partition_len; if (first_block >= part_len) goto out; if (first_block + block_count > part_len) block_count = part_len - first_block; do { block = first_block + (sizeof(struct spaceBitmapDesc) << 3); block_group = block >> (sb->s_blocksize_bits + 3); bitmap_nr = load_block_bitmap(sb, bitmap, block_group); if (bitmap_nr < 0) goto out; bh = bitmap->s_block_bitmap[bitmap_nr]; bit = block % (sb->s_blocksize << 3); while (bit < (sb->s_blocksize << 3) && block_count > 0) { if (!udf_clear_bit(bit, bh->b_data)) goto out; block_count--; alloc_count++; bit++; block++; } mark_buffer_dirty(bh); } while (block_count > 0); out: udf_add_free_space(sb, partition, -alloc_count); mutex_unlock(&sbi->s_alloc_mutex); return alloc_count; } static udf_pblk_t udf_bitmap_new_block(struct super_block *sb, struct udf_bitmap *bitmap, uint16_t partition, uint32_t goal, int *err) { struct udf_sb_info *sbi = UDF_SB(sb); int newbit, bit = 0; udf_pblk_t block; int block_group, group_start; int end_goal, nr_groups, bitmap_nr, i; struct buffer_head *bh = NULL; char *ptr; udf_pblk_t newblock = 0; *err = -ENOSPC; mutex_lock(&sbi->s_alloc_mutex); repeat: if (goal >= sbi->s_partmaps[partition].s_partition_len) goal = 0; nr_groups = bitmap->s_nr_groups; block = goal + (sizeof(struct spaceBitmapDesc) << 3); block_group = block >> (sb->s_blocksize_bits + 3); group_start = block_group ? 0 : sizeof(struct spaceBitmapDesc); bitmap_nr = load_block_bitmap(sb, bitmap, block_group); if (bitmap_nr < 0) goto error_return; bh = bitmap->s_block_bitmap[bitmap_nr]; ptr = memscan((char *)bh->b_data + group_start, 0xFF, sb->s_blocksize - group_start); if ((ptr - ((char *)bh->b_data)) < sb->s_blocksize) { bit = block % (sb->s_blocksize << 3); if (udf_test_bit(bit, bh->b_data)) goto got_block; end_goal = (bit + 63) & ~63; bit = udf_find_next_one_bit(bh->b_data, end_goal, bit); if (bit < end_goal) goto got_block; ptr = memscan((char *)bh->b_data + (bit >> 3), 0xFF, sb->s_blocksize - ((bit + 7) >> 3)); newbit = (ptr - ((char *)bh->b_data)) << 3; if (newbit < sb->s_blocksize << 3) { bit = newbit; goto search_back; } newbit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, bit); if (newbit < sb->s_blocksize << 3) { bit = newbit; goto got_block; } } for (i = 0; i < (nr_groups * 2); i++) { block_group++; if (block_group >= nr_groups) block_group = 0; group_start = block_group ? 0 : sizeof(struct spaceBitmapDesc); bitmap_nr = load_block_bitmap(sb, bitmap, block_group); if (bitmap_nr < 0) goto error_return; bh = bitmap->s_block_bitmap[bitmap_nr]; if (i < nr_groups) { ptr = memscan((char *)bh->b_data + group_start, 0xFF, sb->s_blocksize - group_start); if ((ptr - ((char *)bh->b_data)) < sb->s_blocksize) { bit = (ptr - ((char *)bh->b_data)) << 3; break; } } else { bit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, group_start << 3); if (bit < sb->s_blocksize << 3) break; } } if (i >= (nr_groups * 2)) { mutex_unlock(&sbi->s_alloc_mutex); return newblock; } if (bit < sb->s_blocksize << 3) goto search_back; else bit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, group_start << 3); if (bit >= sb->s_blocksize << 3) { mutex_unlock(&sbi->s_alloc_mutex); return 0; } search_back: i = 0; while (i < 7 && bit > (group_start << 3) && udf_test_bit(bit - 1, bh->b_data)) { ++i; --bit; } got_block: newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) - (sizeof(struct spaceBitmapDesc) << 3); if (newblock >= sbi->s_partmaps[partition].s_partition_len) { /* * Ran off the end of the bitmap, and bits following are * non-compliant (not all zero) */ udf_err(sb, "bitmap for partition %d corrupted (block %u marked" " as free, partition length is %u)\n", partition, newblock, sbi->s_partmaps[partition].s_partition_len); goto error_return; } if (!udf_clear_bit(bit, bh->b_data)) { udf_debug("bit already cleared for block %d\n", bit); goto repeat; } mark_buffer_dirty(bh); udf_add_free_space(sb, partition, -1); mutex_unlock(&sbi->s_alloc_mutex); *err = 0; return newblock; error_return: *err = -EIO; mutex_unlock(&sbi->s_alloc_mutex); return 0; } static void udf_table_free_blocks(struct super_block *sb, struct inode *table, struct kernel_lb_addr *bloc, uint32_t offset, uint32_t count) { struct udf_sb_info *sbi = UDF_SB(sb); uint32_t start, end; uint32_t elen; struct kernel_lb_addr eloc; struct extent_position oepos, epos; int8_t etype; struct udf_inode_info *iinfo; int ret = 0; mutex_lock(&sbi->s_alloc_mutex); iinfo = UDF_I(table); udf_add_free_space(sb, sbi->s_partition, count); start = bloc->logicalBlockNum + offset; end = bloc->logicalBlockNum + offset + count - 1; epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry); elen = 0; epos.block = oepos.block = iinfo->i_location; epos.bh = oepos.bh = NULL; while (count) { ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1); if (ret < 0) goto error_return; if (ret == 0) break; if (((eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) == start)) { if ((0x3FFFFFFF - elen) < (count << sb->s_blocksize_bits)) { uint32_t tmp = ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); count -= tmp; start += tmp; elen = (etype << 30) | (0x40000000 - sb->s_blocksize); } else { elen = (etype << 30) | (elen + (count << sb->s_blocksize_bits)); start += count; count = 0; } udf_write_aext(table, &oepos, &eloc, elen, 1); } else if (eloc.logicalBlockNum == (end + 1)) { if ((0x3FFFFFFF - elen) < (count << sb->s_blocksize_bits)) { uint32_t tmp = ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); count -= tmp; end -= tmp; eloc.logicalBlockNum -= tmp; elen = (etype << 30) | (0x40000000 - sb->s_blocksize); } else { eloc.logicalBlockNum = start; elen = (etype << 30) | (elen + (count << sb->s_blocksize_bits)); end -= count; count = 0; } udf_write_aext(table, &oepos, &eloc, elen, 1); } if (epos.bh != oepos.bh) { oepos.block = epos.block; brelse(oepos.bh); get_bh(epos.bh); oepos.bh = epos.bh; oepos.offset = 0; } else { oepos.offset = epos.offset; } } if (count) { /* * NOTE: we CANNOT use udf_add_aext here, as it can try to * allocate a new block, and since we hold the super block * lock already very bad things would happen :) * * We copy the behavior of udf_add_aext, but instead of * trying to allocate a new block close to the existing one, * we just steal a block from the extent we are trying to add. * * It would be nice if the blocks were close together, but it * isn't required. */ int adsize; eloc.logicalBlockNum = start; elen = EXT_RECORDED_ALLOCATED | (count << sb->s_blocksize_bits); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(struct long_ad); else goto error_return; if (epos.offset + (2 * adsize) > sb->s_blocksize) { /* Steal a block from the extent being free'd */ udf_setup_indirect_aext(table, eloc.logicalBlockNum, &epos); eloc.logicalBlockNum++; elen -= sb->s_blocksize; } /* It's possible that stealing the block emptied the extent */ if (elen) __udf_add_aext(table, &epos, &eloc, elen, 1); } error_return: brelse(epos.bh); brelse(oepos.bh); mutex_unlock(&sbi->s_alloc_mutex); return; } static int udf_table_prealloc_blocks(struct super_block *sb, struct inode *table, uint16_t partition, uint32_t first_block, uint32_t block_count) { struct udf_sb_info *sbi = UDF_SB(sb); int alloc_count = 0; uint32_t elen, adsize; struct kernel_lb_addr eloc; struct extent_position epos; int8_t etype = -1; struct udf_inode_info *iinfo; int ret = 0; if (first_block >= sbi->s_partmaps[partition].s_partition_len) return 0; iinfo = UDF_I(table); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(struct long_ad); else return 0; mutex_lock(&sbi->s_alloc_mutex); epos.offset = sizeof(struct unallocSpaceEntry); epos.block = iinfo->i_location; epos.bh = NULL; eloc.logicalBlockNum = 0xFFFFFFFF; while (first_block != eloc.logicalBlockNum) { ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1); if (ret < 0) goto err_out; if (ret == 0) break; udf_debug("eloc=%u, elen=%u, first_block=%u\n", eloc.logicalBlockNum, elen, first_block); } if (first_block == eloc.logicalBlockNum) { epos.offset -= adsize; alloc_count = (elen >> sb->s_blocksize_bits); if (alloc_count > block_count) { alloc_count = block_count; eloc.logicalBlockNum += alloc_count; elen -= (alloc_count << sb->s_blocksize_bits); udf_write_aext(table, &epos, &eloc, (etype << 30) | elen, 1); } else udf_delete_aext(table, epos); } else { alloc_count = 0; } err_out: brelse(epos.bh); if (alloc_count) udf_add_free_space(sb, partition, -alloc_count); mutex_unlock(&sbi->s_alloc_mutex); return alloc_count; } static udf_pblk_t udf_table_new_block(struct super_block *sb, struct inode *table, uint16_t partition, uint32_t goal, int *err) { struct udf_sb_info *sbi = UDF_SB(sb); uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF; udf_pblk_t newblock = 0; uint32_t adsize; uint32_t elen, goal_elen = 0; struct kernel_lb_addr eloc, goal_eloc; struct extent_position epos, goal_epos; int8_t etype; struct udf_inode_info *iinfo = UDF_I(table); int ret = 0; *err = -ENOSPC; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(struct long_ad); else return newblock; mutex_lock(&sbi->s_alloc_mutex); if (goal >= sbi->s_partmaps[partition].s_partition_len) goal = 0; /* We search for the closest matching block to goal. If we find a exact hit, we stop. Otherwise we keep going till we run out of extents. We store the buffer_head, bloc, and extoffset of the current closest match and use that when we are done. */ epos.offset = sizeof(struct unallocSpaceEntry); epos.block = iinfo->i_location; epos.bh = goal_epos.bh = NULL; while (spread) { ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1); if (ret <= 0) break; if (goal >= eloc.logicalBlockNum) { if (goal < eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) nspread = 0; else nspread = goal - eloc.logicalBlockNum - (elen >> sb->s_blocksize_bits); } else { nspread = eloc.logicalBlockNum - goal; } if (nspread < spread) { spread = nspread; if (goal_epos.bh != epos.bh) { brelse(goal_epos.bh); goal_epos.bh = epos.bh; get_bh(goal_epos.bh); } goal_epos.block = epos.block; goal_epos.offset = epos.offset - adsize; goal_eloc = eloc; goal_elen = (etype << 30) | elen; } } brelse(epos.bh); if (ret < 0 || spread == 0xFFFFFFFF) { brelse(goal_epos.bh); mutex_unlock(&sbi->s_alloc_mutex); if (ret < 0) *err = ret; return 0; } /* Only allocate blocks from the beginning of the extent. That way, we only delete (empty) extents, never have to insert an extent because of splitting */ /* This works, but very poorly.... */ newblock = goal_eloc.logicalBlockNum; goal_eloc.logicalBlockNum++; goal_elen -= sb->s_blocksize; if (goal_elen) udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1); else udf_delete_aext(table, goal_epos); brelse(goal_epos.bh); udf_add_free_space(sb, partition, -1); mutex_unlock(&sbi->s_alloc_mutex); *err = 0; return newblock; } void udf_free_blocks(struct super_block *sb, struct inode *inode, struct kernel_lb_addr *bloc, uint32_t offset, uint32_t count) { uint16_t partition = bloc->partitionReferenceNum; struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; uint32_t blk; if (check_add_overflow(bloc->logicalBlockNum, offset, &blk) || check_add_overflow(blk, count, &blk) || bloc->logicalBlockNum + count > map->s_partition_len) { udf_debug("Invalid request to free blocks: (%d, %u), off %u, " "len %u, partition len %u\n", partition, bloc->logicalBlockNum, offset, count, map->s_partition_len); return; } if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { udf_bitmap_free_blocks(sb, map->s_uspace.s_bitmap, bloc, offset, count); } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { udf_table_free_blocks(sb, map->s_uspace.s_table, bloc, offset, count); } if (inode) { inode_sub_bytes(inode, ((sector_t)count) << sb->s_blocksize_bits); } } inline int udf_prealloc_blocks(struct super_block *sb, struct inode *inode, uint16_t partition, uint32_t first_block, uint32_t block_count) { struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; int allocated; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) allocated = udf_bitmap_prealloc_blocks(sb, map->s_uspace.s_bitmap, partition, first_block, block_count); else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) allocated = udf_table_prealloc_blocks(sb, map->s_uspace.s_table, partition, first_block, block_count); else return 0; if (inode && allocated > 0) inode_add_bytes(inode, allocated << sb->s_blocksize_bits); return allocated; } inline udf_pblk_t udf_new_block(struct super_block *sb, struct inode *inode, uint16_t partition, uint32_t goal, int *err) { struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; udf_pblk_t block; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) block = udf_bitmap_new_block(sb, map->s_uspace.s_bitmap, partition, goal, err); else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) block = udf_table_new_block(sb, map->s_uspace.s_table, partition, goal, err); else { *err = -EIO; return 0; } if (inode && block) inode_add_bytes(inode, sb->s_blocksize); return block; }
57 57 163 164 128 2 128 128 128 127 2 2 127 21 21 20 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" #include "xfs_alloc_btree.h" #include "xfs_rmap_btree.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_rmap.h" #include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_health.h" #include "xfs_error.h" #include "xfs_bmap.h" #include "xfs_defer.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_trace.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_buf_item.h" #include "xfs_rtgroup.h" #include "xfs_rtbitmap.h" #include "xfs_metafile.h" #include "xfs_metadir.h" /* Find the first usable fsblock in this rtgroup. */ static inline uint32_t xfs_rtgroup_min_block( struct xfs_mount *mp, xfs_rgnumber_t rgno) { if (xfs_has_rtsb(mp) && rgno == 0) return mp->m_sb.sb_rextsize; return 0; } /* Precompute this group's geometry */ void xfs_rtgroup_calc_geometry( struct xfs_mount *mp, struct xfs_rtgroup *rtg, xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents) { rtg->rtg_extents = __xfs_rtgroup_extents(mp, rgno, rgcount, rextents); rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize; rtg_group(rtg)->xg_min_gbno = xfs_rtgroup_min_block(mp, rgno); } int xfs_rtgroup_alloc( struct xfs_mount *mp, xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents) { struct xfs_rtgroup *rtg; int error; rtg = kzalloc(sizeof(struct xfs_rtgroup), GFP_KERNEL); if (!rtg) return -ENOMEM; xfs_rtgroup_calc_geometry(mp, rtg, rgno, rgcount, rextents); error = xfs_group_insert(mp, rtg_group(rtg), rgno, XG_TYPE_RTG); if (error) goto out_free_rtg; return 0; out_free_rtg: kfree(rtg); return error; } void xfs_rtgroup_free( struct xfs_mount *mp, xfs_rgnumber_t rgno) { xfs_group_free(mp, rgno, XG_TYPE_RTG, NULL); } /* Free a range of incore rtgroup objects. */ void xfs_free_rtgroups( struct xfs_mount *mp, xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno) { xfs_rgnumber_t rgno; for (rgno = first_rgno; rgno < end_rgno; rgno++) xfs_rtgroup_free(mp, rgno); } /* Initialize some range of incore rtgroup objects. */ int xfs_initialize_rtgroups( struct xfs_mount *mp, xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno, xfs_rtbxlen_t rextents) { xfs_rgnumber_t index; int error; if (first_rgno >= end_rgno) return 0; for (index = first_rgno; index < end_rgno; index++) { error = xfs_rtgroup_alloc(mp, index, end_rgno, rextents); if (error) goto out_unwind_new_rtgs; } return 0; out_unwind_new_rtgs: xfs_free_rtgroups(mp, first_rgno, index); return error; } /* Compute the number of rt extents in this realtime group. */ xfs_rtxnum_t __xfs_rtgroup_extents( struct xfs_mount *mp, xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents) { ASSERT(rgno < rgcount); if (rgno == rgcount - 1) return rextents - ((xfs_rtxnum_t)rgno * mp->m_sb.sb_rgextents); ASSERT(xfs_has_rtgroups(mp)); return mp->m_sb.sb_rgextents; } xfs_rtxnum_t xfs_rtgroup_extents( struct xfs_mount *mp, xfs_rgnumber_t rgno) { return __xfs_rtgroup_extents(mp, rgno, mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents); } /* * Update the rt extent count of the previous tail rtgroup if it changed during * recovery (i.e. recovery of a growfs). */ int xfs_update_last_rtgroup_size( struct xfs_mount *mp, xfs_rgnumber_t prev_rgcount) { struct xfs_rtgroup *rtg; ASSERT(prev_rgcount > 0); rtg = xfs_rtgroup_grab(mp, prev_rgcount - 1); if (!rtg) return -EFSCORRUPTED; rtg->rtg_extents = __xfs_rtgroup_extents(mp, prev_rgcount - 1, mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents); rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize; xfs_rtgroup_rele(rtg); return 0; } /* Lock metadata inodes associated with this rt group. */ void xfs_rtgroup_lock( struct xfs_rtgroup *rtg, unsigned int rtglock_flags) { ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) || !(rtglock_flags & XFS_RTGLOCK_BITMAP)); if (rtglock_flags & XFS_RTGLOCK_BITMAP) { /* * Lock both realtime free space metadata inodes for a freespace * update. */ xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL); xfs_ilock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL); } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED); } } /* Unlock metadata inodes associated with this rt group. */ void xfs_rtgroup_unlock( struct xfs_rtgroup *rtg, unsigned int rtglock_flags) { ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) || !(rtglock_flags & XFS_RTGLOCK_BITMAP)); if (rtglock_flags & XFS_RTGLOCK_BITMAP) { xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL); xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL); } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED); } } /* * Join realtime group metadata inodes to the transaction. The ILOCKs will be * released on transaction commit. */ void xfs_rtgroup_trans_join( struct xfs_trans *tp, struct xfs_rtgroup *rtg, unsigned int rtglock_flags) { ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED)); if (rtglock_flags & XFS_RTGLOCK_BITMAP) { xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL); } } /* Retrieve rt group geometry. */ int xfs_rtgroup_get_geometry( struct xfs_rtgroup *rtg, struct xfs_rtgroup_geometry *rgeo) { /* Fill out form. */ memset(rgeo, 0, sizeof(*rgeo)); rgeo->rg_number = rtg_rgno(rtg); rgeo->rg_length = rtg_group(rtg)->xg_block_count; xfs_rtgroup_geom_health(rtg, rgeo); return 0; } #ifdef CONFIG_PROVE_LOCKING static struct lock_class_key xfs_rtginode_lock_class; static int xfs_rtginode_ilock_cmp_fn( const struct lockdep_map *m1, const struct lockdep_map *m2) { const struct xfs_inode *ip1 = container_of(m1, struct xfs_inode, i_lock.dep_map); const struct xfs_inode *ip2 = container_of(m2, struct xfs_inode, i_lock.dep_map); if (ip1->i_projid < ip2->i_projid) return -1; if (ip1->i_projid > ip2->i_projid) return 1; return 0; } static inline void xfs_rtginode_ilock_print_fn( const struct lockdep_map *m) { const struct xfs_inode *ip = container_of(m, struct xfs_inode, i_lock.dep_map); printk(KERN_CONT " rgno=%u", ip->i_projid); } /* * Most of the time each of the RTG inode locks are only taken one at a time. * But when committing deferred ops, more than one of a kind can be taken. * However, deferred rt ops will be committed in rgno order so there is no * potential for deadlocks. The code here is needed to tell lockdep about this * order. */ static inline void xfs_rtginode_lockdep_setup( struct xfs_inode *ip, xfs_rgnumber_t rgno, enum xfs_rtg_inodes type) { lockdep_set_class_and_subclass(&ip->i_lock, &xfs_rtginode_lock_class, type); lock_set_cmp_fn(&ip->i_lock, xfs_rtginode_ilock_cmp_fn, xfs_rtginode_ilock_print_fn); } #else #define xfs_rtginode_lockdep_setup(ip, rgno, type) do { } while (0) #endif /* CONFIG_PROVE_LOCKING */ struct xfs_rtginode_ops { const char *name; /* short name */ enum xfs_metafile_type metafile_type; unsigned int sick; /* rtgroup sickness flag */ /* Does the fs have this feature? */ bool (*enabled)(struct xfs_mount *mp); /* Create this rtgroup metadata inode and initialize it. */ int (*create)(struct xfs_rtgroup *rtg, struct xfs_inode *ip, struct xfs_trans *tp, bool init); }; static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = { [XFS_RTGI_BITMAP] = { .name = "bitmap", .metafile_type = XFS_METAFILE_RTBITMAP, .sick = XFS_SICK_RG_BITMAP, .create = xfs_rtbitmap_create, }, [XFS_RTGI_SUMMARY] = { .name = "summary", .metafile_type = XFS_METAFILE_RTSUMMARY, .sick = XFS_SICK_RG_SUMMARY, .create = xfs_rtsummary_create, }, }; /* Return the shortname of this rtgroup inode. */ const char * xfs_rtginode_name( enum xfs_rtg_inodes type) { return xfs_rtginode_ops[type].name; } /* Return the metafile type of this rtgroup inode. */ enum xfs_metafile_type xfs_rtginode_metafile_type( enum xfs_rtg_inodes type) { return xfs_rtginode_ops[type].metafile_type; } /* Should this rtgroup inode be present? */ bool xfs_rtginode_enabled( struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type) { const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; if (!ops->enabled) return true; return ops->enabled(rtg_mount(rtg)); } /* Mark an rtgroup inode sick */ void xfs_rtginode_mark_sick( struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type) { const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; xfs_group_mark_sick(rtg_group(rtg), ops->sick); } /* Load and existing rtgroup inode into the rtgroup structure. */ int xfs_rtginode_load( struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type, struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; struct xfs_inode *ip; const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; int error; if (!xfs_rtginode_enabled(rtg, type)) return 0; if (!xfs_has_rtgroups(mp)) { xfs_ino_t ino; switch (type) { case XFS_RTGI_BITMAP: ino = mp->m_sb.sb_rbmino; break; case XFS_RTGI_SUMMARY: ino = mp->m_sb.sb_rsumino; break; default: /* None of the other types exist on !rtgroups */ return 0; } error = xfs_trans_metafile_iget(tp, ino, ops->metafile_type, &ip); } else { const char *path; if (!mp->m_rtdirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } path = xfs_rtginode_path(rtg_rgno(rtg), type); if (!path) return -ENOMEM; error = xfs_metadir_load(tp, mp->m_rtdirip, path, ops->metafile_type, &ip); kfree(path); } if (error) { if (xfs_metadata_is_sick(error)) xfs_rtginode_mark_sick(rtg, type); return error; } if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) { xfs_irele(ip); xfs_rtginode_mark_sick(rtg, type); return -EFSCORRUPTED; } if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg_rgno(rtg))) { xfs_irele(ip); xfs_rtginode_mark_sick(rtg, type); return -EFSCORRUPTED; } xfs_rtginode_lockdep_setup(ip, rtg_rgno(rtg), type); rtg->rtg_inodes[type] = ip; return 0; } /* Release an rtgroup metadata inode. */ void xfs_rtginode_irele( struct xfs_inode **ipp) { if (*ipp) xfs_irele(*ipp); *ipp = NULL; } /* Add a metadata inode for a realtime rmap btree. */ int xfs_rtginode_create( struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type, bool init) { const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; struct xfs_mount *mp = rtg_mount(rtg); struct xfs_metadir_update upd = { .dp = mp->m_rtdirip, .metafile_type = ops->metafile_type, }; int error; if (!xfs_rtginode_enabled(rtg, type)) return 0; if (!mp->m_rtdirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } upd.path = xfs_rtginode_path(rtg_rgno(rtg), type); if (!upd.path) return -ENOMEM; error = xfs_metadir_start_create(&upd); if (error) goto out_path; error = xfs_metadir_create(&upd, S_IFREG); if (error) goto out_cancel; xfs_rtginode_lockdep_setup(upd.ip, rtg_rgno(rtg), type); upd.ip->i_projid = rtg_rgno(rtg); error = ops->create(rtg, upd.ip, upd.tp, init); if (error) goto out_cancel; error = xfs_metadir_commit(&upd); if (error) goto out_path; kfree(upd.path); xfs_finish_inode_setup(upd.ip); rtg->rtg_inodes[type] = upd.ip; return 0; out_cancel: xfs_metadir_cancel(&upd, error); /* Have to finish setting up the inode to ensure it's deleted. */ if (upd.ip) { xfs_finish_inode_setup(upd.ip); xfs_irele(upd.ip); } out_path: kfree(upd.path); return error; } /* Create the parent directory for all rtgroup inodes and load it. */ int xfs_rtginode_mkdir_parent( struct xfs_mount *mp) { if (!mp->m_metadirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } return xfs_metadir_mkdir(mp->m_metadirip, "rtgroups", &mp->m_rtdirip); } /* Load the parent directory of all rtgroup inodes. */ int xfs_rtginode_load_parent( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; if (!mp->m_metadirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } return xfs_metadir_load(tp, mp->m_metadirip, "rtgroups", XFS_METAFILE_DIR, &mp->m_rtdirip); } /* Check superblock fields for a read or a write. */ static xfs_failaddr_t xfs_rtsb_verify_common( struct xfs_buf *bp) { struct xfs_rtsb *rsb = bp->b_addr; if (!xfs_verify_magic(bp, rsb->rsb_magicnum)) return __this_address; if (rsb->rsb_pad) return __this_address; /* Everything to the end of the fs block must be zero */ if (memchr_inv(rsb + 1, 0, BBTOB(bp->b_length) - sizeof(*rsb))) return __this_address; return NULL; } /* Check superblock fields for a read or revalidation. */ static inline xfs_failaddr_t xfs_rtsb_verify_all( struct xfs_buf *bp) { struct xfs_rtsb *rsb = bp->b_addr; struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; fa = xfs_rtsb_verify_common(bp); if (fa) return fa; if (memcmp(&rsb->rsb_fname, &mp->m_sb.sb_fname, XFSLABEL_MAX)) return __this_address; if (!uuid_equal(&rsb->rsb_uuid, &mp->m_sb.sb_uuid)) return __this_address; if (!uuid_equal(&rsb->rsb_meta_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; return NULL; } static void xfs_rtsb_read_verify( struct xfs_buf *bp) { xfs_failaddr_t fa; if (!xfs_buf_verify_cksum(bp, XFS_RTSB_CRC_OFF)) { xfs_verifier_error(bp, -EFSBADCRC, __this_address); return; } fa = xfs_rtsb_verify_all(bp); if (fa) xfs_verifier_error(bp, -EFSCORRUPTED, fa); } static void xfs_rtsb_write_verify( struct xfs_buf *bp) { xfs_failaddr_t fa; fa = xfs_rtsb_verify_common(bp); if (fa) { xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_buf_update_cksum(bp, XFS_RTSB_CRC_OFF); } const struct xfs_buf_ops xfs_rtsb_buf_ops = { .name = "xfs_rtsb", .magic = { 0, cpu_to_be32(XFS_RTSB_MAGIC) }, .verify_read = xfs_rtsb_read_verify, .verify_write = xfs_rtsb_write_verify, .verify_struct = xfs_rtsb_verify_all, }; /* Update a realtime superblock from the primary fs super */ void xfs_update_rtsb( struct xfs_buf *rtsb_bp, const struct xfs_buf *sb_bp) { const struct xfs_dsb *dsb = sb_bp->b_addr; struct xfs_rtsb *rsb = rtsb_bp->b_addr; const uuid_t *meta_uuid; rsb->rsb_magicnum = cpu_to_be32(XFS_RTSB_MAGIC); rsb->rsb_pad = 0; memcpy(&rsb->rsb_fname, &dsb->sb_fname, XFSLABEL_MAX); memcpy(&rsb->rsb_uuid, &dsb->sb_uuid, sizeof(rsb->rsb_uuid)); /* * The metadata uuid is the fs uuid if the metauuid feature is not * enabled. */ if (dsb->sb_features_incompat & cpu_to_be32(XFS_SB_FEAT_INCOMPAT_META_UUID)) meta_uuid = &dsb->sb_meta_uuid; else meta_uuid = &dsb->sb_uuid; memcpy(&rsb->rsb_meta_uuid, meta_uuid, sizeof(rsb->rsb_meta_uuid)); } /* * Update the realtime superblock from a filesystem superblock and log it to * the given transaction. */ struct xfs_buf * xfs_log_rtsb( struct xfs_trans *tp, const struct xfs_buf *sb_bp) { struct xfs_buf *rtsb_bp; if (!xfs_has_rtsb(tp->t_mountp)) return NULL; rtsb_bp = xfs_trans_getrtsb(tp); if (!rtsb_bp) { /* * It's possible for the rtgroups feature to be enabled but * there is no incore rt superblock buffer if the rt geometry * was specified at mkfs time but the rt section has not yet * been attached. In this case, rblocks must be zero. */ ASSERT(tp->t_mountp->m_sb.sb_rblocks == 0); return NULL; } xfs_update_rtsb(rtsb_bp, sb_bp); xfs_trans_ordered_buf(tp, rtsb_bp); return rtsb_bp; }
128 172 63 199 130 8 405 11 232 3 1 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM tcp #if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_TCP_H #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/tracepoint.h> #include <net/ipv6.h> #include <net/tcp.h> #include <linux/sock_diag.h> #include <net/rstreason.h> /* * tcp event with arguments sk and skb * * Note: this class requires a valid sk pointer; while skb pointer could * be NULL. */ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", __entry->skbaddr, __entry->skaddr, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, show_tcp_state_name(__entry->state)) ); DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); #undef FN #define FN(reason) TRACE_DEFINE_ENUM(SK_RST_REASON_##reason); DEFINE_RST_REASON(FN, FN) #undef FN #undef FNe #define FN(reason) { SK_RST_REASON_##reason, #reason }, #define FNe(reason) { SK_RST_REASON_##reason, #reason } /* * skb of trace_tcp_send_reset is the skb that caused RST. In case of * active reset, skb should be NULL */ TRACE_EVENT(tcp_send_reset, TP_PROTO(const struct sock *sk, const struct sk_buff *skb__nullable, const enum sk_rst_reason reason), TP_ARGS(sk, skb__nullable, reason), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) __field(enum sk_rst_reason, reason) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->skbaddr = skb__nullable; __entry->skaddr = sk; /* Zero means unknown state. */ __entry->state = sk ? sk->sk_state : 0; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); if (sk && sk_fullsock(sk)) { const struct inet_sock *inet = inet_sk(sk); TP_STORE_ADDR_PORTS(__entry, inet, sk); } else if (skb__nullable) { const struct tcphdr *th = (const struct tcphdr *)skb__nullable->data; /* * We should reverse the 4-tuple of skb, so later * it can print the right flow direction of rst. */ TP_STORE_ADDR_PORTS_SKB(skb__nullable, th, entry->daddr, entry->saddr); } __entry->reason = reason; ), TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s reason=%s", __entry->skbaddr, __entry->skaddr, __entry->saddr, __entry->daddr, __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN", __print_symbolic(__entry->reason, DEFINE_RST_REASON(FN, FNe))) ); #undef FN #undef FNe /* * tcp event with arguments sk * * Note: this class requires a valid sk pointer. */ DECLARE_EVENT_CLASS(tcp_event_sk, TP_PROTO(struct sock *sk), TP_ARGS(sk), TP_STRUCT__entry( __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(__u64, sock_cookie) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skaddr = sk; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->sock_cookie = sock_gen_cookie(sk); ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->sock_cookie) ); DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); TRACE_EVENT(tcp_retransmit_synack, TP_PROTO(const struct sock *sk, const struct request_sock *req), TP_ARGS(sk, req), TP_STRUCT__entry( __field(const void *, skaddr) __field(const void *, req) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( struct inet_request_sock *ireq = inet_rsk(req); __be32 *p32; __entry->skaddr = sk; __entry->req = req; __entry->sport = ireq->ir_num; __entry->dport = ntohs(ireq->ir_rmt_port); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = ireq->ir_loc_addr; p32 = (__be32 *) __entry->daddr; *p32 = ireq->ir_rmt_addr; TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr, ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr); ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6) ); #include <trace/events/net_probe_common.h> TRACE_EVENT(tcp_probe, TP_PROTO(struct sock *sk, struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u32, mark) __field(__u16, data_len) __field(__u32, snd_nxt) __field(__u32, snd_una) __field(__u32, snd_cwnd) __field(__u32, ssthresh) __field(__u32, snd_wnd) __field(__u32, srtt) __field(__u32, rcv_wnd) __field(__u64, sock_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; const struct inet_sock *inet = inet_sk(sk); const struct tcp_sock *tp = tcp_sk(sk); memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->mark = skb->mark; __entry->family = sk->sk_family; __entry->data_len = skb->len - __tcp_hdrlen(th); __entry->snd_nxt = tp->snd_nxt; __entry->snd_una = tp->snd_una; __entry->snd_cwnd = tcp_snd_cwnd(tp); __entry->snd_wnd = tp->snd_wnd; __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); __entry->srtt = tp->srtt_us >> 3; __entry->sock_cookie = sock_gen_cookie(sk); __entry->skbaddr = skb; __entry->skaddr = sk; ), TP_printk("family=%s src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx skbaddr=%p skaddr=%p", show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->mark, __entry->data_len, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, __entry->skbaddr, __entry->skaddr) ); /* * tcp event with only skb */ DECLARE_EVENT_CLASS(tcp_event_skb, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __field(const void *, skbaddr) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->skbaddr = skb; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); ), TP_printk("skbaddr=%p src=%pISpc dest=%pISpc", __entry->skbaddr, __entry->saddr, __entry->daddr) ); DEFINE_EVENT(tcp_event_skb, tcp_bad_csum, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); TRACE_EVENT(tcp_cong_state_set, TP_PROTO(struct sock *sk, const u8 ca_state), TP_ARGS(sk, ca_state), TP_STRUCT__entry( __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(__u8, cong_state) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skaddr = sk; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->cong_state = ca_state; ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->cong_state) ); DECLARE_EVENT_CLASS(tcp_hash_event, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(int, l3index) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(bool, fin) __field(bool, syn) __field(bool, rst) __field(bool, psh) __field(bool, ack) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; /* For filtering use */ __entry->sport = ntohs(th->source); __entry->dport = ntohs(th->dest); __entry->family = sk->sk_family; __entry->fin = th->fin; __entry->syn = th->syn; __entry->rst = th->rst; __entry->psh = th->psh; __entry->ack = th->ack; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c]", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->l3index, __entry->fin ? 'F' : ' ', __entry->syn ? 'S' : ' ', __entry->rst ? 'R' : ' ', __entry->psh ? 'P' : ' ', __entry->ack ? '.' : ' ') ); DEFINE_EVENT(tcp_hash_event, tcp_hash_bad_header, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_required, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_unexpected, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_mismatch, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_ao_required, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DECLARE_EVENT_CLASS(tcp_ao_event, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(int, l3index) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(bool, fin) __field(bool, syn) __field(bool, rst) __field(bool, psh) __field(bool, ack) __field(__u8, keyid) __field(__u8, rnext) __field(__u8, maclen) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; /* For filtering use */ __entry->sport = ntohs(th->source); __entry->dport = ntohs(th->dest); __entry->family = sk->sk_family; __entry->fin = th->fin; __entry->syn = th->syn; __entry->rst = th->rst; __entry->psh = th->psh; __entry->ack = th->ack; __entry->keyid = keyid; __entry->rnext = rnext; __entry->maclen = maclen; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->l3index, __entry->fin ? 'F' : ' ', __entry->syn ? 'S' : ' ', __entry->rst ? 'R' : ' ', __entry->psh ? 'P' : ' ', __entry->ack ? '.' : ' ', __entry->keyid, __entry->rnext, __entry->maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_mismatch, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_key_not_found, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_rnext_request, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DECLARE_EVENT_CLASS(tcp_ao_event_sk, TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), TP_ARGS(sk, keyid, rnext), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u8, keyid) __field(__u8, rnext) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; __entry->keyid = keyid; __entry->rnext = rnext; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc keyid=%u rnext=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->keyid, __entry->rnext) ); DEFINE_EVENT(tcp_ao_event_sk, tcp_ao_synack_no_key, TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), TP_ARGS(sk, keyid, rnext) ); DECLARE_EVENT_CLASS(tcp_ao_event_sne, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u32, new_sne) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; __entry->new_sne = new_sne; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc sne=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->new_sne) ); DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_snd_sne_update, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne) ); DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne) ); #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 6 2 2 2 4 2 2 51 1 50 2 1 40 8 36 2 2 32 4 23 8 1 17 4 2 11 4 2 5 6 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 // SPDX-License-Identifier: GPL-2.0-or-later #include <net/genetlink.h> #include "br_private.h" #include "br_private_cfm.h" static const struct nla_policy br_cfm_mep_create_policy[IFLA_BRIDGE_CFM_MEP_CREATE_MAX + 1] = { [IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX] = { .type = NLA_U32 }, }; static const struct nla_policy br_cfm_mep_delete_policy[IFLA_BRIDGE_CFM_MEP_DELETE_MAX + 1] = { [IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE] = { .type = NLA_U32 }, }; static const struct nla_policy br_cfm_mep_config_policy[IFLA_BRIDGE_CFM_MEP_CONFIG_MAX + 1] = { [IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC] = NLA_POLICY_ETH_ADDR, [IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL] = NLA_POLICY_MAX(NLA_U32, 7), [IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID] = NLA_POLICY_MAX(NLA_U32, 0x1FFF), }; static const struct nla_policy br_cfm_cc_config_policy[IFLA_BRIDGE_CFM_CC_CONFIG_MAX + 1] = { [IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID] = { .type = NLA_BINARY, .len = CFM_MAID_LENGTH }, }; static const struct nla_policy br_cfm_cc_peer_mep_policy[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1] = { [IFLA_BRIDGE_CFM_CC_PEER_MEP_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_PEER_MEPID] = NLA_POLICY_MAX(NLA_U32, 0x1FFF), }; static const struct nla_policy br_cfm_cc_rdi_policy[IFLA_BRIDGE_CFM_CC_RDI_MAX + 1] = { [IFLA_BRIDGE_CFM_CC_RDI_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_CC_RDI_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_RDI_RDI] = { .type = NLA_U32 }, }; static const struct nla_policy br_cfm_cc_ccm_tx_policy[IFLA_BRIDGE_CFM_CC_CCM_TX_MAX + 1] = { [IFLA_BRIDGE_CFM_CC_CCM_TX_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC] = NLA_POLICY_ETH_ADDR, [IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE] = { .type = NLA_U8 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE] = { .type = NLA_U8 }, }; static const struct nla_policy br_cfm_policy[IFLA_BRIDGE_CFM_MAX + 1] = { [IFLA_BRIDGE_CFM_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_MEP_CREATE] = NLA_POLICY_NESTED(br_cfm_mep_create_policy), [IFLA_BRIDGE_CFM_MEP_DELETE] = NLA_POLICY_NESTED(br_cfm_mep_delete_policy), [IFLA_BRIDGE_CFM_MEP_CONFIG] = NLA_POLICY_NESTED(br_cfm_mep_config_policy), [IFLA_BRIDGE_CFM_CC_CONFIG] = NLA_POLICY_NESTED(br_cfm_cc_config_policy), [IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD] = NLA_POLICY_NESTED(br_cfm_cc_peer_mep_policy), [IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE] = NLA_POLICY_NESTED(br_cfm_cc_peer_mep_policy), [IFLA_BRIDGE_CFM_CC_RDI] = NLA_POLICY_NESTED(br_cfm_cc_rdi_policy), [IFLA_BRIDGE_CFM_CC_CCM_TX] = NLA_POLICY_NESTED(br_cfm_cc_ccm_tx_policy), }; static int br_mep_create_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_CREATE_MAX + 1]; struct br_cfm_mep_create create; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_CREATE_MAX, attr, br_cfm_mep_create_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN]) { NL_SET_ERR_MSG_MOD(extack, "Missing DOMAIN attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]) { NL_SET_ERR_MSG_MOD(extack, "Missing DIRECTION attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]) { NL_SET_ERR_MSG_MOD(extack, "Missing IFINDEX attribute"); return -EINVAL; } memset(&create, 0, sizeof(create)); instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]); create.domain = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN]); create.direction = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]); create.ifindex = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]); return br_cfm_mep_create(br, instance, &create, extack); } static int br_mep_delete_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_DELETE_MAX + 1]; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_DELETE_MAX, attr, br_cfm_mep_delete_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]); return br_cfm_mep_delete(br, instance, extack); } static int br_mep_config_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MAX + 1]; struct br_cfm_mep_config config; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_CONFIG_MAX, attr, br_cfm_mep_config_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC]) { NL_SET_ERR_MSG_MOD(extack, "Missing UNICAST_MAC attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]) { NL_SET_ERR_MSG_MOD(extack, "Missing MDLEVEL attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]) { NL_SET_ERR_MSG_MOD(extack, "Missing MEPID attribute"); return -EINVAL; } memset(&config, 0, sizeof(config)); instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]); nla_memcpy(&config.unicast_mac.addr, tb[IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC], sizeof(config.unicast_mac.addr)); config.mdlevel = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]); config.mepid = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]); return br_cfm_mep_config_set(br, instance, &config, extack); } static int br_cc_config_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_CONFIG_MAX + 1]; struct br_cfm_cc_config config; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_CONFIG_MAX, attr, br_cfm_cc_config_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]) { NL_SET_ERR_MSG_MOD(extack, "Missing ENABLE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL]) { NL_SET_ERR_MSG_MOD(extack, "Missing INTERVAL attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID]) { NL_SET_ERR_MSG_MOD(extack, "Missing MAID attribute"); return -EINVAL; } memset(&config, 0, sizeof(config)); instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]); config.enable = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]); config.exp_interval = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL]); nla_memcpy(&config.exp_maid.data, tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID], sizeof(config.exp_maid.data)); return br_cfm_cc_config_set(br, instance, &config, extack); } static int br_cc_peer_mep_add_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1]; u32 instance, peer_mep_id; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, attr, br_cfm_cc_peer_mep_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]) { NL_SET_ERR_MSG_MOD(extack, "Missing PEER_MEP_ID attribute"); return -EINVAL; } instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]); peer_mep_id = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]); return br_cfm_cc_peer_mep_add(br, instance, peer_mep_id, extack); } static int br_cc_peer_mep_remove_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1]; u32 instance, peer_mep_id; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, attr, br_cfm_cc_peer_mep_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]) { NL_SET_ERR_MSG_MOD(extack, "Missing PEER_MEP_ID attribute"); return -EINVAL; } instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]); peer_mep_id = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]); return br_cfm_cc_peer_mep_remove(br, instance, peer_mep_id, extack); } static int br_cc_rdi_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_RDI_MAX + 1]; u32 instance, rdi; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_RDI_MAX, attr, br_cfm_cc_rdi_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_RDI_RDI]) { NL_SET_ERR_MSG_MOD(extack, "Missing RDI attribute"); return -EINVAL; } instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]); rdi = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_RDI]); return br_cfm_cc_rdi_set(br, instance, rdi, extack); } static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_CCM_TX_MAX + 1]; struct br_cfm_cc_ccm_tx_info tx_info; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_CCM_TX_MAX, attr, br_cfm_cc_ccm_tx_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC]) { NL_SET_ERR_MSG_MOD(extack, "Missing DMAC attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE]) { NL_SET_ERR_MSG_MOD(extack, "Missing SEQ_NO_UPDATE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD]) { NL_SET_ERR_MSG_MOD(extack, "Missing PERIOD attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV]) { NL_SET_ERR_MSG_MOD(extack, "Missing IF_TLV attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE]) { NL_SET_ERR_MSG_MOD(extack, "Missing IF_TLV_VALUE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV]) { NL_SET_ERR_MSG_MOD(extack, "Missing PORT_TLV attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE]) { NL_SET_ERR_MSG_MOD(extack, "Missing PORT_TLV_VALUE attribute"); return -EINVAL; } memset(&tx_info, 0, sizeof(tx_info)); instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]); nla_memcpy(&tx_info.dmac.addr, tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC], sizeof(tx_info.dmac.addr)); tx_info.seq_no_update = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE]); tx_info.period = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD]); tx_info.if_tlv = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV]); tx_info.if_tlv_value = nla_get_u8(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE]); tx_info.port_tlv = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV]); tx_info.port_tlv_value = nla_get_u8(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE]); return br_cfm_cc_ccm_tx(br, instance, &tx_info, extack); } int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_MAX + 1]; int err; /* When this function is called for a port then the br pointer is * invalid, therefor set the br to point correctly */ if (p) br = p->br; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MAX, attr, br_cfm_policy, extack); if (err) return err; if (tb[IFLA_BRIDGE_CFM_MEP_CREATE]) { err = br_mep_create_parse(br, tb[IFLA_BRIDGE_CFM_MEP_CREATE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_MEP_DELETE]) { err = br_mep_delete_parse(br, tb[IFLA_BRIDGE_CFM_MEP_DELETE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_MEP_CONFIG]) { err = br_mep_config_parse(br, tb[IFLA_BRIDGE_CFM_MEP_CONFIG], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_CONFIG]) { err = br_cc_config_parse(br, tb[IFLA_BRIDGE_CFM_CC_CONFIG], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD]) { err = br_cc_peer_mep_add_parse(br, tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE]) { err = br_cc_peer_mep_remove_parse(br, tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_RDI]) { err = br_cc_rdi_parse(br, tb[IFLA_BRIDGE_CFM_CC_RDI], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_CCM_TX]) { err = br_cc_ccm_tx_parse(br, tb[IFLA_BRIDGE_CFM_CC_CCM_TX], extack); if (err) return err; } return 0; } int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; struct nlattr *tb; hlist_for_each_entry_rcu(mep, &br->mep_list, head) { tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_CREATE_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN, mep->create.domain)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION, mep->create.direction)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX, mep->create.ifindex)) goto nla_put_failure; nla_nest_end(skb, tb); tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC, sizeof(mep->config.unicast_mac.addr), mep->config.unicast_mac.addr)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL, mep->config.mdlevel)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID, mep->config.mepid)) goto nla_put_failure; nla_nest_end(skb, tb); tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_CONFIG_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE, mep->cc_config.enable)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL, mep->cc_config.exp_interval)) goto nla_put_failure; if (nla_put(skb, IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID, sizeof(mep->cc_config.exp_maid.data), mep->cc_config.exp_maid.data)) goto nla_put_failure; nla_nest_end(skb, tb); tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_RDI_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_RDI_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_RDI_RDI, mep->rdi)) goto nla_put_failure; nla_nest_end(skb, tb); tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC, sizeof(mep->cc_ccm_tx_info.dmac), mep->cc_ccm_tx_info.dmac.addr)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE, mep->cc_ccm_tx_info.seq_no_update)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD, mep->cc_ccm_tx_info.period)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV, mep->cc_ccm_tx_info.if_tlv)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE, mep->cc_ccm_tx_info.if_tlv_value)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV, mep->cc_ccm_tx_info.port_tlv)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE, mep->cc_ccm_tx_info.port_tlv_value)) goto nla_put_failure; nla_nest_end(skb, tb); hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) { tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_MEPID, peer_mep->mepid)) goto nla_put_failure; nla_nest_end(skb, tb); } } return 0; nla_put_failure: nla_nest_cancel(skb, tb); nla_info_failure: return -EMSGSIZE; } int br_cfm_status_fill_info(struct sk_buff *skb, struct net_bridge *br, bool getlink) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; struct nlattr *tb; hlist_for_each_entry_rcu(mep, &br->mep_list, head) { tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_STATUS_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN, mep->status.opcode_unexp_seen)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN, mep->status.version_unexp_seen)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN, mep->status.rx_level_low_seen)) goto nla_put_failure; /* Only clear if this is a GETLINK */ if (getlink) { /* Clear all 'seen' indications */ mep->status.opcode_unexp_seen = false; mep->status.version_unexp_seen = false; mep->status.rx_level_low_seen = false; } nla_nest_end(skb, tb); hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) { tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID, peer_mep->mepid)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT, peer_mep->cc_status.ccm_defect)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI, peer_mep->cc_status.rdi)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE, peer_mep->cc_status.port_tlv_value)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE, peer_mep->cc_status.if_tlv_value)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN, peer_mep->cc_status.seen)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN, peer_mep->cc_status.tlv_seen)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN, peer_mep->cc_status.seq_unexp_seen)) goto nla_put_failure; if (getlink) { /* Only clear if this is a GETLINK */ /* Clear all 'seen' indications */ peer_mep->cc_status.seen = false; peer_mep->cc_status.tlv_seen = false; peer_mep->cc_status.seq_unexp_seen = false; } nla_nest_end(skb, tb); } } return 0; nla_put_failure: nla_nest_cancel(skb, tb); nla_info_failure: return -EMSGSIZE; }
2 19 3 10 5 15 18 19 18 19 1 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 #ifndef DEFUTIL_H #define DEFUTIL_H #include <linux/zutil.h> #define Assert(err, str) #define Trace(dummy) #define Tracev(dummy) #define Tracecv(err, dummy) #define Tracevv(dummy) #define LENGTH_CODES 29 /* number of length codes, not counting the special END_BLOCK code */ #define LITERALS 256 /* number of literal bytes 0..255 */ #define L_CODES (LITERALS+1+LENGTH_CODES) /* number of Literal or Length codes, including the END_BLOCK code */ #define D_CODES 30 /* number of distance codes */ #define BL_CODES 19 /* number of codes used to transfer the bit lengths */ #define HEAP_SIZE (2*L_CODES+1) /* maximum heap size */ #define MAX_BITS 15 /* All codes must not exceed MAX_BITS bits */ #define INIT_STATE 42 #define BUSY_STATE 113 #define FINISH_STATE 666 /* Stream status */ /* Data structure describing a single value and its code string. */ typedef struct ct_data_s { union { ush freq; /* frequency count */ ush code; /* bit string */ } fc; union { ush dad; /* father node in Huffman tree */ ush len; /* length of bit string */ } dl; } ct_data; #define Freq fc.freq #define Code fc.code #define Dad dl.dad #define Len dl.len typedef struct static_tree_desc_s static_tree_desc; typedef struct tree_desc_s { ct_data *dyn_tree; /* the dynamic tree */ int max_code; /* largest code with non zero frequency */ static_tree_desc *stat_desc; /* the corresponding static tree */ } tree_desc; typedef ush Pos; typedef unsigned IPos; /* A Pos is an index in the character window. We use short instead of int to * save space in the various tables. IPos is used only for parameter passing. */ typedef struct deflate_state { z_streamp strm; /* pointer back to this zlib stream */ int status; /* as the name implies */ Byte *pending_buf; /* output still pending */ ulg pending_buf_size; /* size of pending_buf */ Byte *pending_out; /* next pending byte to output to the stream */ int pending; /* nb of bytes in the pending buffer */ int noheader; /* suppress zlib header and adler32 */ Byte data_type; /* UNKNOWN, BINARY or ASCII */ Byte method; /* STORED (for zip only) or DEFLATED */ int last_flush; /* value of flush param for previous deflate call */ /* used by deflate.c: */ uInt w_size; /* LZ77 window size (32K by default) */ uInt w_bits; /* log2(w_size) (8..16) */ uInt w_mask; /* w_size - 1 */ Byte *window; /* Sliding window. Input bytes are read into the second half of the window, * and move to the first half later to keep a dictionary of at least wSize * bytes. With this organization, matches are limited to a distance of * wSize-MAX_MATCH bytes, but this ensures that IO is always * performed with a length multiple of the block size. Also, it limits * the window size to 64K, which is quite useful on MSDOS. * To do: use the user input buffer as sliding window. */ ulg window_size; /* Actual size of window: 2*wSize, except when the user input buffer * is directly used as sliding window. */ Pos *prev; /* Link to older string with same hash index. To limit the size of this * array to 64K, this link is maintained only for the last 32K strings. * An index in this array is thus a window index modulo 32K. */ Pos *head; /* Heads of the hash chains or NIL. */ uInt ins_h; /* hash index of string to be inserted */ uInt hash_size; /* number of elements in hash table */ uInt hash_bits; /* log2(hash_size) */ uInt hash_mask; /* hash_size-1 */ uInt hash_shift; /* Number of bits by which ins_h must be shifted at each input * step. It must be such that after MIN_MATCH steps, the oldest * byte no longer takes part in the hash key, that is: * hash_shift * MIN_MATCH >= hash_bits */ long block_start; /* Window position at the beginning of the current output block. Gets * negative when the window is moved backwards. */ uInt match_length; /* length of best match */ IPos prev_match; /* previous match */ int match_available; /* set if previous match exists */ uInt strstart; /* start of string to insert */ uInt match_start; /* start of matching string */ uInt lookahead; /* number of valid bytes ahead in window */ uInt prev_length; /* Length of the best match at previous step. Matches not greater than this * are discarded. This is used in the lazy match evaluation. */ uInt max_chain_length; /* To speed up deflation, hash chains are never searched beyond this * length. A higher limit improves compression ratio but degrades the * speed. */ uInt max_lazy_match; /* Attempt to find a better match only when the current match is strictly * smaller than this value. This mechanism is used only for compression * levels >= 4. */ # define max_insert_length max_lazy_match /* Insert new strings in the hash table only if the match length is not * greater than this length. This saves time but degrades compression. * max_insert_length is used only for compression levels <= 3. */ int level; /* compression level (1..9) */ int strategy; /* favor or force Huffman coding*/ uInt good_match; /* Use a faster search when the previous match is longer than this */ int nice_match; /* Stop searching when current match exceeds this */ /* used by trees.c: */ /* Didn't use ct_data typedef below to suppress compiler warning */ struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ struct tree_desc_s l_desc; /* desc. for literal tree */ struct tree_desc_s d_desc; /* desc. for distance tree */ struct tree_desc_s bl_desc; /* desc. for bit length tree */ ush bl_count[MAX_BITS+1]; /* number of codes at each bit length for an optimal tree */ int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ int heap_len; /* number of elements in the heap */ int heap_max; /* element of largest frequency */ /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. * The same heap array is used to build all trees. */ uch depth[2*L_CODES+1]; /* Depth of each subtree used as tie breaker for trees of equal frequency */ uch *l_buf; /* buffer for literals or lengths */ uInt lit_bufsize; /* Size of match buffer for literals/lengths. There are 4 reasons for * limiting lit_bufsize to 64K: * - frequencies can be kept in 16 bit counters * - if compression is not successful for the first block, all input * data is still in the window so we can still emit a stored block even * when input comes from standard input. (This can also be done for * all blocks if lit_bufsize is not greater than 32K.) * - if compression is not successful for a file smaller than 64K, we can * even emit a stored file instead of a stored block (saving 5 bytes). * This is applicable only for zip (not gzip or zlib). * - creating new Huffman trees less frequently may not provide fast * adaptation to changes in the input data statistics. (Take for * example a binary file with poorly compressible code followed by * a highly compressible string table.) Smaller buffer sizes give * fast adaptation but have of course the overhead of transmitting * trees more frequently. * - I can't count above 4 */ uInt last_lit; /* running index in l_buf */ ush *d_buf; /* Buffer for distances. To simplify the code, d_buf and l_buf have * the same number of elements. To use different lengths, an extra flag * array would be necessary. */ ulg opt_len; /* bit length of current block with optimal trees */ ulg static_len; /* bit length of current block with static trees */ ulg compressed_len; /* total bit length of compressed file */ uInt matches; /* number of string matches in current block */ int last_eob_len; /* bit length of EOB code for last block */ #ifdef DEBUG_ZLIB ulg bits_sent; /* bit length of the compressed data */ #endif ush bi_buf; /* Output buffer. bits are inserted starting at the bottom (least * significant bits). */ int bi_valid; /* Number of valid bits in bi_buf. All bits above the last valid bit * are always zero. */ } deflate_state; #ifdef CONFIG_ZLIB_DFLTCC #define zlib_deflate_window_memsize(windowBits) \ (2 * (1 << (windowBits)) * sizeof(Byte) + PAGE_SIZE) #else #define zlib_deflate_window_memsize(windowBits) \ (2 * (1 << (windowBits)) * sizeof(Byte)) #endif #define zlib_deflate_prev_memsize(windowBits) \ ((1 << (windowBits)) * sizeof(Pos)) #define zlib_deflate_head_memsize(memLevel) \ ((1 << ((memLevel)+7)) * sizeof(Pos)) #define zlib_deflate_overlay_memsize(memLevel) \ ((1 << ((memLevel)+6)) * (sizeof(ush)+2)) /* Output a byte on the stream. * IN assertion: there is enough room in pending_buf. */ #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) /* Minimum amount of lookahead, except at the end of the input file. * See deflate.c for comments about the MIN_MATCH+1. */ #define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) /* In order to simplify the code, particularly on 16 bit machines, match * distances are limited to MAX_DIST instead of WSIZE. */ /* in trees.c */ void zlib_tr_init (deflate_state *s); int zlib_tr_tally (deflate_state *s, unsigned dist, unsigned lc); ulg zlib_tr_flush_block (deflate_state *s, char *buf, ulg stored_len, int eof); void zlib_tr_align (deflate_state *s); void zlib_tr_stored_block (deflate_state *s, char *buf, ulg stored_len, int eof); void zlib_tr_stored_type_only (deflate_state *); /* =========================================================================== * Output a short LSB first on the stream. * IN assertion: there is enough room in pendingBuf. */ #define put_short(s, w) { \ put_byte(s, (uch)((w) & 0xff)); \ put_byte(s, (uch)((ush)(w) >> 8)); \ } /* =========================================================================== * Reverse the first len bits of a code, using straightforward code (a faster * method would use a table) * IN assertion: 1 <= len <= 15 */ static inline unsigned bi_reverse( unsigned code, /* the value to invert */ int len /* its bit length */ ) { register unsigned res = 0; do { res |= code & 1; code >>= 1, res <<= 1; } while (--len > 0); return res >> 1; } /* =========================================================================== * Flush the bit buffer, keeping at most 7 bits in it. */ static inline void bi_flush(deflate_state *s) { if (s->bi_valid == 16) { put_short(s, s->bi_buf); s->bi_buf = 0; s->bi_valid = 0; } else if (s->bi_valid >= 8) { put_byte(s, (Byte)s->bi_buf); s->bi_buf >>= 8; s->bi_valid -= 8; } } /* =========================================================================== * Flush the bit buffer and align the output on a byte boundary */ static inline void bi_windup(deflate_state *s) { if (s->bi_valid > 8) { put_short(s, s->bi_buf); } else if (s->bi_valid > 0) { put_byte(s, (Byte)s->bi_buf); } s->bi_buf = 0; s->bi_valid = 0; #ifdef DEBUG_ZLIB s->bits_sent = (s->bits_sent+7) & ~7; #endif } typedef enum { need_more, /* block not completed, need more input or more output */ block_done, /* block flush performed */ finish_started, /* finish started, need only more output at next deflate */ finish_done /* finish done, accept no more input or output */ } block_state; #define Buf_size (8 * 2*sizeof(char)) /* Number of bits used within bi_buf. (bi_buf might be implemented on * more than 16 bits on some systems.) */ /* =========================================================================== * Send a value on a given number of bits. * IN assertion: length <= 16 and value fits in length bits. */ #ifdef DEBUG_ZLIB static void send_bits (deflate_state *s, int value, int length); static void send_bits( deflate_state *s, int value, /* value to send */ int length /* number of bits */ ) { Tracevv((stderr," l %2d v %4x ", length, value)); Assert(length > 0 && length <= 15, "invalid length"); s->bits_sent += (ulg)length; /* If not enough room in bi_buf, use (valid) bits from bi_buf and * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) * unused bits in value. */ if (s->bi_valid > (int)Buf_size - length) { s->bi_buf |= (value << s->bi_valid); put_short(s, s->bi_buf); s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); s->bi_valid += length - Buf_size; } else { s->bi_buf |= value << s->bi_valid; s->bi_valid += length; } } #else /* !DEBUG_ZLIB */ #define send_bits(s, value, length) \ { int len = length;\ if (s->bi_valid > (int)Buf_size - len) {\ int val = value;\ s->bi_buf |= (val << s->bi_valid);\ put_short(s, s->bi_buf);\ s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ s->bi_valid += len - Buf_size;\ } else {\ s->bi_buf |= (value) << s->bi_valid;\ s->bi_valid += len;\ }\ } #endif /* DEBUG_ZLIB */ static inline void zlib_tr_send_bits( deflate_state *s, int value, int length ) { send_bits(s, value, length); } /* ========================================================================= * Flush as much pending output as possible. All deflate() output goes * through this function so some applications may wish to modify it * to avoid allocating a large strm->next_out buffer and copying into it. * (See also read_buf()). */ static inline void flush_pending( z_streamp strm ) { unsigned len; deflate_state *s = (deflate_state *) strm->state; bi_flush(s); len = s->pending; if (len > strm->avail_out) len = strm->avail_out; if (len == 0) return; if (strm->next_out != NULL) { memcpy(strm->next_out, s->pending_out, len); strm->next_out += len; } s->pending_out += len; strm->total_out += len; strm->avail_out -= len; s->pending -= len; if (s->pending == 0) { s->pending_out = s->pending_buf; } } #endif /* DEFUTIL_H */
19 3 16 52 34 10 1 5 1 1 1 2 1 2 2 2 181 184 14 65 65 65 65 64 65 65 65 68 1 67 4 1 1 1 1 34 11 1 1 1 1 1 1 1 16 4 4 43 30 13 43 2 2 1 5 2 1 1 22 2 2 2 2 1 4 16 18 19 65 64 62 2 66 1 65 65 34 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 /* * Copyright (c) 2006-2009 Red Hat Inc. * Copyright (c) 2006-2008 Intel Corporation * Copyright (c) 2007 Dave Airlie <airlied@linux.ie> * * DRM framebuffer helper functions * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * * Authors: * Dave Airlie <airlied@linux.ie> * Jesse Barnes <jesse.barnes@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/console.h> #include <linux/pci.h> #include <linux/sysrq.h> #include <linux/vga_switcheroo.h> #include <drm/drm_atomic.h> #include <drm/drm_drv.h> #include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_print.h> #include <drm/drm_vblank.h> #include "drm_internal.h" #include "drm_crtc_internal.h" static bool drm_fbdev_emulation = true; module_param_named(fbdev_emulation, drm_fbdev_emulation, bool, 0600); MODULE_PARM_DESC(fbdev_emulation, "Enable legacy fbdev emulation [default=true]"); static int drm_fbdev_overalloc = CONFIG_DRM_FBDEV_OVERALLOC; module_param(drm_fbdev_overalloc, int, 0444); MODULE_PARM_DESC(drm_fbdev_overalloc, "Overallocation of the fbdev buffer (%) [default=" __MODULE_STRING(CONFIG_DRM_FBDEV_OVERALLOC) "]"); /* * In order to keep user-space compatibility, we want in certain use-cases * to keep leaking the fbdev physical address to the user-space program * handling the fbdev buffer. * * This is a bad habit, essentially kept to support closed-source OpenGL * drivers that should really be moved into open-source upstream projects * instead of using legacy physical addresses in user space to communicate * with other out-of-tree kernel modules. * * This module_param *should* be removed as soon as possible and be * considered as a broken and legacy behaviour from a modern fbdev device. */ static bool drm_leak_fbdev_smem; #if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM) module_param_unsafe(drm_leak_fbdev_smem, bool, 0600); MODULE_PARM_DESC(drm_leak_fbdev_smem, "Allow unsafe leaking fbdev physical smem address [default=false]"); #endif static LIST_HEAD(kernel_fb_helper_list); static DEFINE_MUTEX(kernel_fb_helper_lock); /** * DOC: fbdev helpers * * The fb helper functions are useful to provide an fbdev on top of a drm kernel * mode setting driver. They can be used mostly independently from the crtc * helper functions used by many drivers to implement the kernel mode setting * interfaces. Drivers that use one of the shared memory managers, TTM, SHMEM, * DMA, should instead use the corresponding fbdev emulation. * * For suspend/resume consider using drm_mode_config_helper_suspend() and * drm_mode_config_helper_resume() which takes care of fbdev as well. * * All other functions exported by the fb helper library can be used to * implement the fbdev driver interface by the driver. * * It is possible, though perhaps somewhat tricky, to implement race-free * hotplug detection using the fbdev helpers. The drm_fb_helper_prepare() * helper must be called first to initialize the minimum required to make * hotplug detection work. Drivers also need to make sure to properly set up * the &drm_mode_config.funcs member. After calling drm_kms_helper_poll_init() * it is safe to enable interrupts and start processing hotplug events. At the * same time, drivers should initialize all modeset objects such as CRTCs, * encoders and connectors. To finish up the fbdev helper initialization, the * drm_fb_helper_init() function is called. To probe for all attached displays * and set up an initial configuration using the detected hardware, drivers * should call drm_fb_helper_initial_config(). * * If &drm_framebuffer_funcs.dirty is set, the * drm_fb_helper_{cfb,sys}_{write,fillrect,copyarea,imageblit} functions will * accumulate changes and schedule &drm_fb_helper.dirty_work to run right * away. This worker then calls the dirty() function ensuring that it will * always run in process context since the fb_*() function could be running in * atomic context. If drm_fb_helper_deferred_io() is used as the deferred_io * callback it will also schedule dirty_work with the damage collected from the * mmap page writes. */ static void drm_fb_helper_restore_lut_atomic(struct drm_crtc *crtc) { uint16_t *r_base, *g_base, *b_base; if (crtc->funcs->gamma_set == NULL) return; r_base = crtc->gamma_store; g_base = r_base + crtc->gamma_size; b_base = g_base + crtc->gamma_size; crtc->funcs->gamma_set(crtc, r_base, g_base, b_base, crtc->gamma_size, NULL); } /** * drm_fb_helper_debug_enter - implementation for &fb_ops.fb_debug_enter * @info: fbdev registered by the helper */ int drm_fb_helper_debug_enter(struct fb_info *info) { struct drm_fb_helper *helper = info->par; const struct drm_crtc_helper_funcs *funcs; struct drm_mode_set *mode_set; list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) { mutex_lock(&helper->client.modeset_mutex); drm_client_for_each_modeset(mode_set, &helper->client) { if (!mode_set->crtc->enabled) continue; funcs = mode_set->crtc->helper_private; if (funcs->mode_set_base_atomic == NULL) continue; if (drm_drv_uses_atomic_modeset(mode_set->crtc->dev)) continue; funcs->mode_set_base_atomic(mode_set->crtc, mode_set->fb, mode_set->x, mode_set->y, ENTER_ATOMIC_MODE_SET); } mutex_unlock(&helper->client.modeset_mutex); } return 0; } EXPORT_SYMBOL(drm_fb_helper_debug_enter); /** * drm_fb_helper_debug_leave - implementation for &fb_ops.fb_debug_leave * @info: fbdev registered by the helper */ int drm_fb_helper_debug_leave(struct fb_info *info) { struct drm_fb_helper *helper = info->par; struct drm_client_dev *client = &helper->client; struct drm_device *dev = helper->dev; struct drm_crtc *crtc; const struct drm_crtc_helper_funcs *funcs; struct drm_mode_set *mode_set; struct drm_framebuffer *fb; mutex_lock(&client->modeset_mutex); drm_client_for_each_modeset(mode_set, client) { crtc = mode_set->crtc; if (drm_drv_uses_atomic_modeset(crtc->dev)) continue; funcs = crtc->helper_private; fb = crtc->primary->fb; if (!crtc->enabled) continue; if (!fb) { drm_err(dev, "no fb to restore?\n"); continue; } if (funcs->mode_set_base_atomic == NULL) continue; drm_fb_helper_restore_lut_atomic(mode_set->crtc); funcs->mode_set_base_atomic(mode_set->crtc, fb, crtc->x, crtc->y, LEAVE_ATOMIC_MODE_SET); } mutex_unlock(&client->modeset_mutex); return 0; } EXPORT_SYMBOL(drm_fb_helper_debug_leave); static int __drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper, bool force) { bool do_delayed; int ret; if (!drm_fbdev_emulation || !fb_helper) return -ENODEV; if (READ_ONCE(fb_helper->deferred_setup)) return 0; mutex_lock(&fb_helper->lock); if (force) { /* * Yes this is the _locked version which expects the master lock * to be held. But for forced restores we're intentionally * racing here, see drm_fb_helper_set_par(). */ ret = drm_client_modeset_commit_locked(&fb_helper->client); } else { ret = drm_client_modeset_commit(&fb_helper->client); } do_delayed = fb_helper->delayed_hotplug; if (do_delayed) fb_helper->delayed_hotplug = false; mutex_unlock(&fb_helper->lock); if (do_delayed) drm_fb_helper_hotplug_event(fb_helper); return ret; } /** * drm_fb_helper_restore_fbdev_mode_unlocked - restore fbdev configuration * @fb_helper: driver-allocated fbdev helper, can be NULL * * This helper should be called from fbdev emulation's &drm_client_funcs.restore * callback. It ensures that the user isn't greeted with a black screen when the * userspace compositor releases the display device. * * Returns: * 0 on success, or a negative errno code otherwise. */ int drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper) { return __drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper, false); } EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode_unlocked); #ifdef CONFIG_MAGIC_SYSRQ /* emergency restore, don't bother with error reporting */ static void drm_fb_helper_restore_work_fn(struct work_struct *ignored) { struct drm_fb_helper *helper; mutex_lock(&kernel_fb_helper_lock); list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) { struct drm_device *dev = helper->dev; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) continue; mutex_lock(&helper->lock); drm_client_modeset_commit_locked(&helper->client); mutex_unlock(&helper->lock); } mutex_unlock(&kernel_fb_helper_lock); } static DECLARE_WORK(drm_fb_helper_restore_work, drm_fb_helper_restore_work_fn); static void drm_fb_helper_sysrq(u8 dummy1) { schedule_work(&drm_fb_helper_restore_work); } static const struct sysrq_key_op sysrq_drm_fb_helper_restore_op = { .handler = drm_fb_helper_sysrq, .help_msg = "force-fb(v)", .action_msg = "Restore framebuffer console", }; #else static const struct sysrq_key_op sysrq_drm_fb_helper_restore_op = { }; #endif static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode) { struct drm_fb_helper *fb_helper = info->par; mutex_lock(&fb_helper->lock); drm_client_modeset_dpms(&fb_helper->client, dpms_mode); mutex_unlock(&fb_helper->lock); } /** * drm_fb_helper_blank - implementation for &fb_ops.fb_blank * @blank: desired blanking state * @info: fbdev registered by the helper */ int drm_fb_helper_blank(int blank, struct fb_info *info) { if (oops_in_progress) return -EBUSY; switch (blank) { /* Display: On; HSync: On, VSync: On */ case FB_BLANK_UNBLANK: drm_fb_helper_dpms(info, DRM_MODE_DPMS_ON); break; /* Display: Off; HSync: On, VSync: On */ case FB_BLANK_NORMAL: drm_fb_helper_dpms(info, DRM_MODE_DPMS_STANDBY); break; /* Display: Off; HSync: Off, VSync: On */ case FB_BLANK_HSYNC_SUSPEND: drm_fb_helper_dpms(info, DRM_MODE_DPMS_STANDBY); break; /* Display: Off; HSync: On, VSync: Off */ case FB_BLANK_VSYNC_SUSPEND: drm_fb_helper_dpms(info, DRM_MODE_DPMS_SUSPEND); break; /* Display: Off; HSync: Off, VSync: Off */ case FB_BLANK_POWERDOWN: drm_fb_helper_dpms(info, DRM_MODE_DPMS_OFF); break; } return 0; } EXPORT_SYMBOL(drm_fb_helper_blank); static void drm_fb_helper_resume_worker(struct work_struct *work) { struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper, resume_work); console_lock(); fb_set_suspend(helper->info, 0); console_unlock(); } static void drm_fb_helper_fb_dirty(struct drm_fb_helper *helper) { struct drm_device *dev = helper->dev; struct drm_clip_rect *clip = &helper->damage_clip; struct drm_clip_rect clip_copy; unsigned long flags; int ret; if (drm_WARN_ON_ONCE(dev, !helper->funcs->fb_dirty)) return; spin_lock_irqsave(&helper->damage_lock, flags); clip_copy = *clip; clip->x1 = clip->y1 = ~0; clip->x2 = clip->y2 = 0; spin_unlock_irqrestore(&helper->damage_lock, flags); ret = helper->funcs->fb_dirty(helper, &clip_copy); if (ret) goto err; return; err: /* * Restore damage clip rectangle on errors. The next run * of the damage worker will perform the update. */ spin_lock_irqsave(&helper->damage_lock, flags); clip->x1 = min_t(u32, clip->x1, clip_copy.x1); clip->y1 = min_t(u32, clip->y1, clip_copy.y1); clip->x2 = max_t(u32, clip->x2, clip_copy.x2); clip->y2 = max_t(u32, clip->y2, clip_copy.y2); spin_unlock_irqrestore(&helper->damage_lock, flags); } static void drm_fb_helper_damage_work(struct work_struct *work) { struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper, damage_work); drm_fb_helper_fb_dirty(helper); } /** * drm_fb_helper_prepare - setup a drm_fb_helper structure * @dev: DRM device * @helper: driver-allocated fbdev helper structure to set up * @preferred_bpp: Preferred bits per pixel for the device. * @funcs: pointer to structure of functions associate with this helper * * Sets up the bare minimum to make the framebuffer helper usable. This is * useful to implement race-free initialization of the polling helpers. */ void drm_fb_helper_prepare(struct drm_device *dev, struct drm_fb_helper *helper, unsigned int preferred_bpp, const struct drm_fb_helper_funcs *funcs) { /* * Pick a preferred bpp of 32 if no value has been given. This * will select XRGB8888 for the framebuffer formats. All drivers * have to support XRGB8888 for backwards compatibility with legacy * userspace, so it's the safe choice here. * * TODO: Replace struct drm_mode_config.preferred_depth and this * bpp value with a preferred format that is given as struct * drm_format_info. Then derive all other values from the * format. */ if (!preferred_bpp) preferred_bpp = 32; INIT_LIST_HEAD(&helper->kernel_fb_list); spin_lock_init(&helper->damage_lock); INIT_WORK(&helper->resume_work, drm_fb_helper_resume_worker); INIT_WORK(&helper->damage_work, drm_fb_helper_damage_work); helper->damage_clip.x1 = helper->damage_clip.y1 = ~0; mutex_init(&helper->lock); helper->funcs = funcs; helper->dev = dev; helper->preferred_bpp = preferred_bpp; } EXPORT_SYMBOL(drm_fb_helper_prepare); /** * drm_fb_helper_unprepare - clean up a drm_fb_helper structure * @fb_helper: driver-allocated fbdev helper structure to set up * * Cleans up the framebuffer helper. Inverse of drm_fb_helper_prepare(). */ void drm_fb_helper_unprepare(struct drm_fb_helper *fb_helper) { mutex_destroy(&fb_helper->lock); } EXPORT_SYMBOL(drm_fb_helper_unprepare); /** * drm_fb_helper_init - initialize a &struct drm_fb_helper * @dev: drm device * @fb_helper: driver-allocated fbdev helper structure to initialize * * This allocates the structures for the fbdev helper with the given limits. * Note that this won't yet touch the hardware (through the driver interfaces) * nor register the fbdev. This is only done in drm_fb_helper_initial_config() * to allow driver writes more control over the exact init sequence. * * Drivers must call drm_fb_helper_prepare() before calling this function. * * RETURNS: * Zero if everything went ok, nonzero otherwise. */ int drm_fb_helper_init(struct drm_device *dev, struct drm_fb_helper *fb_helper) { int ret; /* * If this is not the generic fbdev client, initialize a drm_client * without callbacks so we can use the modesets. */ if (!fb_helper->client.funcs) { ret = drm_client_init(dev, &fb_helper->client, "drm_fb_helper", NULL); if (ret) return ret; } dev->fb_helper = fb_helper; return 0; } EXPORT_SYMBOL(drm_fb_helper_init); /** * drm_fb_helper_alloc_info - allocate fb_info and some of its members * @fb_helper: driver-allocated fbdev helper * * A helper to alloc fb_info and the member cmap. Called by the driver * within the struct &drm_driver.fbdev_probe callback function. Drivers do * not need to release the allocated fb_info structure themselves, this is * automatically done when calling drm_fb_helper_fini(). * * RETURNS: * fb_info pointer if things went okay, pointer containing error code * otherwise */ struct fb_info *drm_fb_helper_alloc_info(struct drm_fb_helper *fb_helper) { struct device *dev = fb_helper->dev->dev; struct fb_info *info; int ret; info = framebuffer_alloc(0, dev); if (!info) return ERR_PTR(-ENOMEM); if (!drm_leak_fbdev_smem) info->flags |= FBINFO_HIDE_SMEM_START; ret = fb_alloc_cmap(&info->cmap, 256, 0); if (ret) goto err_release; fb_helper->info = info; info->skip_vt_switch = true; info->skip_panic = drm_panic_is_enabled(fb_helper->dev); return info; err_release: framebuffer_release(info); return ERR_PTR(ret); } EXPORT_SYMBOL(drm_fb_helper_alloc_info); /** * drm_fb_helper_release_info - release fb_info and its members * @fb_helper: driver-allocated fbdev helper * * A helper to release fb_info and the member cmap. Drivers do not * need to release the allocated fb_info structure themselves, this is * automatically done when calling drm_fb_helper_fini(). */ void drm_fb_helper_release_info(struct drm_fb_helper *fb_helper) { struct fb_info *info = fb_helper->info; if (!info) return; fb_helper->info = NULL; if (info->cmap.len) fb_dealloc_cmap(&info->cmap); framebuffer_release(info); } EXPORT_SYMBOL(drm_fb_helper_release_info); /** * drm_fb_helper_unregister_info - unregister fb_info framebuffer device * @fb_helper: driver-allocated fbdev helper, must not be NULL * * A wrapper around unregister_framebuffer, to release the fb_info * framebuffer device. This must be called before releasing all resources for * @fb_helper by calling drm_fb_helper_fini(). */ void drm_fb_helper_unregister_info(struct drm_fb_helper *fb_helper) { struct fb_info *info = fb_helper->info; struct device *dev = info->device; if (dev_is_pci(dev)) vga_switcheroo_client_fb_set(to_pci_dev(dev), NULL); unregister_framebuffer(fb_helper->info); } EXPORT_SYMBOL(drm_fb_helper_unregister_info); /** * drm_fb_helper_fini - finialize a &struct drm_fb_helper * @fb_helper: driver-allocated fbdev helper, can be NULL * * This cleans up all remaining resources associated with @fb_helper. */ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper) { if (!fb_helper) return; fb_helper->dev->fb_helper = NULL; if (!drm_fbdev_emulation) return; cancel_work_sync(&fb_helper->resume_work); cancel_work_sync(&fb_helper->damage_work); drm_fb_helper_release_info(fb_helper); mutex_lock(&kernel_fb_helper_lock); if (!list_empty(&fb_helper->kernel_fb_list)) { list_del(&fb_helper->kernel_fb_list); if (list_empty(&kernel_fb_helper_list)) unregister_sysrq_key('v', &sysrq_drm_fb_helper_restore_op); } mutex_unlock(&kernel_fb_helper_lock); if (!fb_helper->client.funcs) drm_client_release(&fb_helper->client); } EXPORT_SYMBOL(drm_fb_helper_fini); static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u32 y, u32 width, u32 height) { struct drm_clip_rect *clip = &helper->damage_clip; unsigned long flags; spin_lock_irqsave(&helper->damage_lock, flags); clip->x1 = min_t(u32, clip->x1, x); clip->y1 = min_t(u32, clip->y1, y); clip->x2 = max_t(u32, clip->x2, x + width); clip->y2 = max_t(u32, clip->y2, y + height); spin_unlock_irqrestore(&helper->damage_lock, flags); } static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y, u32 width, u32 height) { /* * This function may be invoked by panic() to flush the frame * buffer, where all CPUs except the panic CPU are stopped. * During the following schedule_work(), the panic CPU needs * the worker_pool lock, which might be held by a stopped CPU, * causing schedule_work() and panic() to block. Return early on * oops_in_progress to prevent this blocking. */ if (oops_in_progress) return; drm_fb_helper_add_damage_clip(helper, x, y, width, height); schedule_work(&helper->damage_work); } /* * Convert memory region into area of scanlines and pixels per * scanline. The parameters off and len must not reach beyond * the end of the framebuffer. */ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, size_t len, struct drm_rect *clip) { u32 line_length = info->fix.line_length; u32 fb_height = info->var.yres; off_t end = off + len; u32 x1 = 0; u32 y1 = off / line_length; u32 x2 = info->var.xres; u32 y2 = DIV_ROUND_UP(end, line_length); /* Don't allow any of them beyond the bottom bound of display area */ if (y1 > fb_height) y1 = fb_height; if (y2 > fb_height) y2 = fb_height; if ((y2 - y1) == 1) { /* * We've only written to a single scanline. Try to reduce * the number of horizontal pixels that need an update. */ off_t bit_off = (off % line_length) * 8; off_t bit_end = (end % line_length) * 8; x1 = bit_off / info->var.bits_per_pixel; x2 = DIV_ROUND_UP(bit_end, info->var.bits_per_pixel); } drm_rect_init(clip, x1, y1, x2 - x1, y2 - y1); } /* Don't use in new code. */ void drm_fb_helper_damage_range(struct fb_info *info, off_t off, size_t len) { struct drm_fb_helper *fb_helper = info->par; struct drm_rect damage_area; drm_fb_helper_memory_range_to_clip(info, off, len, &damage_area); drm_fb_helper_damage(fb_helper, damage_area.x1, damage_area.y1, drm_rect_width(&damage_area), drm_rect_height(&damage_area)); } EXPORT_SYMBOL(drm_fb_helper_damage_range); /* Don't use in new code. */ void drm_fb_helper_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, u32 height) { struct drm_fb_helper *fb_helper = info->par; drm_fb_helper_damage(fb_helper, x, y, width, height); } EXPORT_SYMBOL(drm_fb_helper_damage_area); #ifdef CONFIG_FB_DEFERRED_IO /** * drm_fb_helper_deferred_io() - fbdev deferred_io callback function * @info: fb_info struct pointer * @pagereflist: list of mmap framebuffer pages that have to be flushed * * This function is used as the &fb_deferred_io.deferred_io * callback function for flushing the fbdev mmap writes. */ void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagereflist) { struct drm_fb_helper *helper = info->par; unsigned long start, end, min_off, max_off, total_size; struct fb_deferred_io_pageref *pageref; struct drm_rect damage_area; min_off = ULONG_MAX; max_off = 0; list_for_each_entry(pageref, pagereflist, list) { start = pageref->offset; end = start + PAGE_SIZE; min_off = min(min_off, start); max_off = max(max_off, end); } /* * As we can only track pages, we might reach beyond the end * of the screen and account for non-existing scanlines. Hence, * keep the covered memory area within the screen buffer. */ if (info->screen_size) total_size = info->screen_size; else total_size = info->fix.smem_len; max_off = min(max_off, total_size); if (min_off < max_off) { drm_fb_helper_memory_range_to_clip(info, min_off, max_off - min_off, &damage_area); drm_fb_helper_damage(helper, damage_area.x1, damage_area.y1, drm_rect_width(&damage_area), drm_rect_height(&damage_area)); } } EXPORT_SYMBOL(drm_fb_helper_deferred_io); #endif /** * drm_fb_helper_set_suspend - wrapper around fb_set_suspend * @fb_helper: driver-allocated fbdev helper, can be NULL * @suspend: whether to suspend or resume * * A wrapper around fb_set_suspend implemented by fbdev core. * Use drm_fb_helper_set_suspend_unlocked() if you don't need to take * the lock yourself */ void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, bool suspend) { if (fb_helper && fb_helper->info) fb_set_suspend(fb_helper->info, suspend); } EXPORT_SYMBOL(drm_fb_helper_set_suspend); /** * drm_fb_helper_set_suspend_unlocked - wrapper around fb_set_suspend that also * takes the console lock * @fb_helper: driver-allocated fbdev helper, can be NULL * @suspend: whether to suspend or resume * * A wrapper around fb_set_suspend() that takes the console lock. If the lock * isn't available on resume, a worker is tasked with waiting for the lock * to become available. The console lock can be pretty contented on resume * due to all the printk activity. * * This function can be called multiple times with the same state since * &fb_info.state is checked to see if fbdev is running or not before locking. * * Use drm_fb_helper_set_suspend() if you need to take the lock yourself. */ void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper, bool suspend) { if (!fb_helper || !fb_helper->info) return; /* make sure there's no pending/ongoing resume */ flush_work(&fb_helper->resume_work); if (suspend) { if (fb_helper->info->state != FBINFO_STATE_RUNNING) return; console_lock(); } else { if (fb_helper->info->state == FBINFO_STATE_RUNNING) return; if (!console_trylock()) { schedule_work(&fb_helper->resume_work); return; } } fb_set_suspend(fb_helper->info, suspend); console_unlock(); } EXPORT_SYMBOL(drm_fb_helper_set_suspend_unlocked); static int setcmap_pseudo_palette(struct fb_cmap *cmap, struct fb_info *info) { u32 *palette = (u32 *)info->pseudo_palette; int i; if (cmap->start + cmap->len > 16) return -EINVAL; for (i = 0; i < cmap->len; ++i) { u16 red = cmap->red[i]; u16 green = cmap->green[i]; u16 blue = cmap->blue[i]; u32 value; red >>= 16 - info->var.red.length; green >>= 16 - info->var.green.length; blue >>= 16 - info->var.blue.length; value = (red << info->var.red.offset) | (green << info->var.green.offset) | (blue << info->var.blue.offset); if (info->var.transp.length > 0) { u32 mask = (1 << info->var.transp.length) - 1; mask <<= info->var.transp.offset; value |= mask; } palette[cmap->start + i] = value; } return 0; } static int setcmap_legacy(struct fb_cmap *cmap, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_mode_set *modeset; struct drm_crtc *crtc; u16 *r, *g, *b; int ret = 0; drm_modeset_lock_all(fb_helper->dev); drm_client_for_each_modeset(modeset, &fb_helper->client) { crtc = modeset->crtc; if (!crtc->funcs->gamma_set || !crtc->gamma_size) { ret = -EINVAL; goto out; } if (cmap->start + cmap->len > crtc->gamma_size) { ret = -EINVAL; goto out; } r = crtc->gamma_store; g = r + crtc->gamma_size; b = g + crtc->gamma_size; memcpy(r + cmap->start, cmap->red, cmap->len * sizeof(*r)); memcpy(g + cmap->start, cmap->green, cmap->len * sizeof(*g)); memcpy(b + cmap->start, cmap->blue, cmap->len * sizeof(*b)); ret = crtc->funcs->gamma_set(crtc, r, g, b, crtc->gamma_size, NULL); if (ret) goto out; } out: drm_modeset_unlock_all(fb_helper->dev); return ret; } static struct drm_property_blob *setcmap_new_gamma_lut(struct drm_crtc *crtc, struct fb_cmap *cmap) { struct drm_device *dev = crtc->dev; struct drm_property_blob *gamma_lut; struct drm_color_lut *lut; int size = crtc->gamma_size; int i; if (!size || cmap->start + cmap->len > size) return ERR_PTR(-EINVAL); gamma_lut = drm_property_create_blob(dev, sizeof(*lut) * size, NULL); if (IS_ERR(gamma_lut)) return gamma_lut; lut = gamma_lut->data; if (cmap->start || cmap->len != size) { u16 *r = crtc->gamma_store; u16 *g = r + crtc->gamma_size; u16 *b = g + crtc->gamma_size; for (i = 0; i < cmap->start; i++) { lut[i].red = r[i]; lut[i].green = g[i]; lut[i].blue = b[i]; } for (i = cmap->start + cmap->len; i < size; i++) { lut[i].red = r[i]; lut[i].green = g[i]; lut[i].blue = b[i]; } } for (i = 0; i < cmap->len; i++) { lut[cmap->start + i].red = cmap->red[i]; lut[cmap->start + i].green = cmap->green[i]; lut[cmap->start + i].blue = cmap->blue[i]; } return gamma_lut; } static int setcmap_atomic(struct fb_cmap *cmap, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_device *dev = fb_helper->dev; struct drm_property_blob *gamma_lut = NULL; struct drm_modeset_acquire_ctx ctx; struct drm_crtc_state *crtc_state; struct drm_atomic_state *state; struct drm_mode_set *modeset; struct drm_crtc *crtc; u16 *r, *g, *b; bool replaced; int ret = 0; drm_modeset_acquire_init(&ctx, 0); state = drm_atomic_state_alloc(dev); if (!state) { ret = -ENOMEM; goto out_ctx; } state->acquire_ctx = &ctx; retry: drm_client_for_each_modeset(modeset, &fb_helper->client) { crtc = modeset->crtc; if (!gamma_lut) gamma_lut = setcmap_new_gamma_lut(crtc, cmap); if (IS_ERR(gamma_lut)) { ret = PTR_ERR(gamma_lut); gamma_lut = NULL; goto out_state; } crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); goto out_state; } /* * FIXME: This always uses gamma_lut. Some HW have only * degamma_lut, in which case we should reset gamma_lut and set * degamma_lut. See drm_crtc_legacy_gamma_set(). */ replaced = drm_property_replace_blob(&crtc_state->degamma_lut, NULL); replaced |= drm_property_replace_blob(&crtc_state->ctm, NULL); replaced |= drm_property_replace_blob(&crtc_state->gamma_lut, gamma_lut); crtc_state->color_mgmt_changed |= replaced; } ret = drm_atomic_commit(state); if (ret) goto out_state; drm_client_for_each_modeset(modeset, &fb_helper->client) { crtc = modeset->crtc; r = crtc->gamma_store; g = r + crtc->gamma_size; b = g + crtc->gamma_size; memcpy(r + cmap->start, cmap->red, cmap->len * sizeof(*r)); memcpy(g + cmap->start, cmap->green, cmap->len * sizeof(*g)); memcpy(b + cmap->start, cmap->blue, cmap->len * sizeof(*b)); } out_state: if (ret == -EDEADLK) goto backoff; drm_property_blob_put(gamma_lut); drm_atomic_state_put(state); out_ctx: drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); return ret; backoff: drm_atomic_state_clear(state); drm_modeset_backoff(&ctx); goto retry; } /** * drm_fb_helper_setcmap - implementation for &fb_ops.fb_setcmap * @cmap: cmap to set * @info: fbdev registered by the helper */ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_device *dev = fb_helper->dev; int ret; if (oops_in_progress) return -EBUSY; mutex_lock(&fb_helper->lock); if (!drm_master_internal_acquire(dev)) { ret = -EBUSY; goto unlock; } mutex_lock(&fb_helper->client.modeset_mutex); if (info->fix.visual == FB_VISUAL_TRUECOLOR) ret = setcmap_pseudo_palette(cmap, info); else if (drm_drv_uses_atomic_modeset(fb_helper->dev)) ret = setcmap_atomic(cmap, info); else ret = setcmap_legacy(cmap, info); mutex_unlock(&fb_helper->client.modeset_mutex); drm_master_internal_release(dev); unlock: mutex_unlock(&fb_helper->lock); return ret; } EXPORT_SYMBOL(drm_fb_helper_setcmap); /** * drm_fb_helper_ioctl - legacy ioctl implementation * @info: fbdev registered by the helper * @cmd: ioctl command * @arg: ioctl argument * * A helper to implement the standard fbdev ioctl. Only * FBIO_WAITFORVSYNC is implemented for now. */ int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg) { struct drm_fb_helper *fb_helper = info->par; struct drm_device *dev = fb_helper->dev; struct drm_crtc *crtc; int ret = 0; mutex_lock(&fb_helper->lock); if (!drm_master_internal_acquire(dev)) { ret = -EBUSY; goto unlock; } switch (cmd) { case FBIO_WAITFORVSYNC: /* * Only consider the first CRTC. * * This ioctl is supposed to take the CRTC number as * an argument, but in fbdev times, what that number * was supposed to be was quite unclear, different * drivers were passing that argument differently * (some by reference, some by value), and most of the * userspace applications were just hardcoding 0 as an * argument. * * The first CRTC should be the integrated panel on * most drivers, so this is the best choice we can * make. If we're not smart enough here, one should * just consider switch the userspace to KMS. */ crtc = fb_helper->client.modesets[0].crtc; /* * Only wait for a vblank event if the CRTC is * enabled, otherwise just don't do anythintg, * not even report an error. */ ret = drm_crtc_vblank_get(crtc); if (!ret) { drm_crtc_wait_one_vblank(crtc); drm_crtc_vblank_put(crtc); } ret = 0; break; default: ret = -ENOTTY; } drm_master_internal_release(dev); unlock: mutex_unlock(&fb_helper->lock); return ret; } EXPORT_SYMBOL(drm_fb_helper_ioctl); static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1, const struct fb_var_screeninfo *var_2) { return var_1->bits_per_pixel == var_2->bits_per_pixel && var_1->grayscale == var_2->grayscale && var_1->red.offset == var_2->red.offset && var_1->red.length == var_2->red.length && var_1->red.msb_right == var_2->red.msb_right && var_1->green.offset == var_2->green.offset && var_1->green.length == var_2->green.length && var_1->green.msb_right == var_2->green.msb_right && var_1->blue.offset == var_2->blue.offset && var_1->blue.length == var_2->blue.length && var_1->blue.msb_right == var_2->blue.msb_right && var_1->transp.offset == var_2->transp.offset && var_1->transp.length == var_2->transp.length && var_1->transp.msb_right == var_2->transp.msb_right; } static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var, const struct drm_format_info *format) { u8 depth = format->depth; if (format->is_color_indexed) { var->red.offset = 0; var->green.offset = 0; var->blue.offset = 0; var->red.length = depth; var->green.length = depth; var->blue.length = depth; var->transp.offset = 0; var->transp.length = 0; return; } switch (depth) { case 15: var->red.offset = 10; var->green.offset = 5; var->blue.offset = 0; var->red.length = 5; var->green.length = 5; var->blue.length = 5; var->transp.offset = 15; var->transp.length = 1; break; case 16: var->red.offset = 11; var->green.offset = 5; var->blue.offset = 0; var->red.length = 5; var->green.length = 6; var->blue.length = 5; var->transp.offset = 0; break; case 24: var->red.offset = 16; var->green.offset = 8; var->blue.offset = 0; var->red.length = 8; var->green.length = 8; var->blue.length = 8; var->transp.offset = 0; var->transp.length = 0; break; case 32: var->red.offset = 16; var->green.offset = 8; var->blue.offset = 0; var->red.length = 8; var->green.length = 8; var->blue.length = 8; var->transp.offset = 24; var->transp.length = 8; break; default: break; } } static void __fill_var(struct fb_var_screeninfo *var, struct fb_info *info, struct drm_framebuffer *fb) { int i; var->xres_virtual = fb->width; var->yres_virtual = fb->height; var->accel_flags = 0; var->bits_per_pixel = drm_format_info_bpp(fb->format, 0); var->height = info->var.height; var->width = info->var.width; var->left_margin = var->right_margin = 0; var->upper_margin = var->lower_margin = 0; var->hsync_len = var->vsync_len = 0; var->sync = var->vmode = 0; var->rotate = 0; var->colorspace = 0; for (i = 0; i < 4; i++) var->reserved[i] = 0; } /** * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var * @var: screeninfo to check * @info: fbdev registered by the helper */ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; const struct drm_format_info *format = fb->format; struct drm_device *dev = fb_helper->dev; unsigned int bpp; if (in_dbg_master()) return -EINVAL; if (var->pixclock != 0) { drm_dbg_kms(dev, "fbdev emulation doesn't support changing the pixel clock, value of pixclock is ignored\n"); var->pixclock = 0; } switch (format->format) { case DRM_FORMAT_C1: case DRM_FORMAT_C2: case DRM_FORMAT_C4: /* supported format with sub-byte pixels */ break; default: if ((drm_format_info_block_width(format, 0) > 1) || (drm_format_info_block_height(format, 0) > 1)) return -EINVAL; break; } /* * Changes struct fb_var_screeninfo are currently not pushed back * to KMS, hence fail if different settings are requested. */ bpp = drm_format_info_bpp(format, 0); if (var->bits_per_pixel > bpp || var->xres > fb->width || var->yres > fb->height || var->xres_virtual > fb->width || var->yres_virtual > fb->height) { drm_dbg_kms(dev, "fb requested width/height/bpp can't fit in current fb " "request %dx%d-%d (virtual %dx%d) > %dx%d-%d\n", var->xres, var->yres, var->bits_per_pixel, var->xres_virtual, var->yres_virtual, fb->width, fb->height, bpp); return -EINVAL; } __fill_var(var, info, fb); /* * fb_pan_display() validates this, but fb_set_par() doesn't and just * falls over. Note that __fill_var above adjusts y/res_virtual. */ if (var->yoffset > var->yres_virtual - var->yres || var->xoffset > var->xres_virtual - var->xres) return -EINVAL; /* We neither support grayscale nor FOURCC (also stored in here). */ if (var->grayscale > 0) return -EINVAL; if (var->nonstd) return -EINVAL; /* * Workaround for SDL 1.2, which is known to be setting all pixel format * fields values to zero in some cases. We treat this situation as a * kind of "use some reasonable autodetected values". */ if (!var->red.offset && !var->green.offset && !var->blue.offset && !var->transp.offset && !var->red.length && !var->green.length && !var->blue.length && !var->transp.length && !var->red.msb_right && !var->green.msb_right && !var->blue.msb_right && !var->transp.msb_right) { drm_fb_helper_fill_pixel_fmt(var, format); } /* * drm fbdev emulation doesn't support changing the pixel format at all, * so reject all pixel format changing requests. */ if (!drm_fb_pixel_format_equal(var, &info->var)) { drm_dbg_kms(dev, "fbdev emulation doesn't support changing the pixel format\n"); return -EINVAL; } return 0; } EXPORT_SYMBOL(drm_fb_helper_check_var); /** * drm_fb_helper_set_par - implementation for &fb_ops.fb_set_par * @info: fbdev registered by the helper * * This will let fbcon do the mode init and is called at initialization time by * the fbdev core when registering the driver, and later on through the hotplug * callback. */ int drm_fb_helper_set_par(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct fb_var_screeninfo *var = &info->var; bool force; if (oops_in_progress) return -EBUSY; /* * Normally we want to make sure that a kms master takes precedence over * fbdev, to avoid fbdev flickering and occasionally stealing the * display status. But Xorg first sets the vt back to text mode using * the KDSET IOCTL with KD_TEXT, and only after that drops the master * status when exiting. * * In the past this was caught by drm_fb_helper_lastclose(), but on * modern systems where logind always keeps a drm fd open to orchestrate * the vt switching, this doesn't work. * * To not break the userspace ABI we have this special case here, which * is only used for the above case. Everything else uses the normal * commit function, which ensures that we never steal the display from * an active drm master. */ force = var->activate & FB_ACTIVATE_KD_TEXT; __drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper, force); return 0; } EXPORT_SYMBOL(drm_fb_helper_set_par); static void pan_set(struct drm_fb_helper *fb_helper, int x, int y) { struct drm_mode_set *mode_set; mutex_lock(&fb_helper->client.modeset_mutex); drm_client_for_each_modeset(mode_set, &fb_helper->client) { mode_set->x = x; mode_set->y = y; } mutex_unlock(&fb_helper->client.modeset_mutex); } static int pan_display_atomic(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; int ret; pan_set(fb_helper, var->xoffset, var->yoffset); ret = drm_client_modeset_commit_locked(&fb_helper->client); if (!ret) { info->var.xoffset = var->xoffset; info->var.yoffset = var->yoffset; } else pan_set(fb_helper, info->var.xoffset, info->var.yoffset); return ret; } static int pan_display_legacy(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_client_dev *client = &fb_helper->client; struct drm_mode_set *modeset; int ret = 0; mutex_lock(&client->modeset_mutex); drm_modeset_lock_all(fb_helper->dev); drm_client_for_each_modeset(modeset, client) { modeset->x = var->xoffset; modeset->y = var->yoffset; if (modeset->num_connectors) { ret = drm_mode_set_config_internal(modeset); if (!ret) { info->var.xoffset = var->xoffset; info->var.yoffset = var->yoffset; } } } drm_modeset_unlock_all(fb_helper->dev); mutex_unlock(&client->modeset_mutex); return ret; } /** * drm_fb_helper_pan_display - implementation for &fb_ops.fb_pan_display * @var: updated screen information * @info: fbdev registered by the helper */ int drm_fb_helper_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_device *dev = fb_helper->dev; int ret; if (oops_in_progress) return -EBUSY; mutex_lock(&fb_helper->lock); if (!drm_master_internal_acquire(dev)) { ret = -EBUSY; goto unlock; } if (drm_drv_uses_atomic_modeset(dev)) ret = pan_display_atomic(var, info); else ret = pan_display_legacy(var, info); drm_master_internal_release(dev); unlock: mutex_unlock(&fb_helper->lock); return ret; } EXPORT_SYMBOL(drm_fb_helper_pan_display); static uint32_t drm_fb_helper_find_format(struct drm_fb_helper *fb_helper, const uint32_t *formats, size_t format_count, unsigned int color_mode) { struct drm_device *dev = fb_helper->dev; uint32_t format; size_t i; format = drm_driver_color_mode_format(dev, color_mode); if (!format) { drm_info(dev, "unsupported color mode of %d\n", color_mode); return DRM_FORMAT_INVALID; } for (i = 0; i < format_count; ++i) { if (formats[i] == format) return format; } drm_warn(dev, "format %p4cc not supported\n", &format); return DRM_FORMAT_INVALID; } static int __drm_fb_helper_find_sizes(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; int crtc_count = 0; struct drm_connector_list_iter conn_iter; struct drm_connector *connector; struct drm_mode_set *mode_set; uint32_t surface_format = DRM_FORMAT_INVALID; const struct drm_format_info *info; memset(sizes, 0, sizeof(*sizes)); sizes->fb_width = (u32)-1; sizes->fb_height = (u32)-1; drm_client_for_each_modeset(mode_set, client) { struct drm_crtc *crtc = mode_set->crtc; struct drm_plane *plane = crtc->primary; drm_dbg_kms(dev, "test CRTC %u primary plane\n", drm_crtc_index(crtc)); drm_connector_list_iter_begin(fb_helper->dev, &conn_iter); drm_client_for_each_connector_iter(connector, &conn_iter) { struct drm_cmdline_mode *cmdline_mode = &connector->cmdline_mode; if (!cmdline_mode->bpp_specified) continue; surface_format = drm_fb_helper_find_format(fb_helper, plane->format_types, plane->format_count, cmdline_mode->bpp); if (surface_format != DRM_FORMAT_INVALID) break; /* found supported format */ } drm_connector_list_iter_end(&conn_iter); if (surface_format != DRM_FORMAT_INVALID) break; /* found supported format */ /* try preferred color mode */ surface_format = drm_fb_helper_find_format(fb_helper, plane->format_types, plane->format_count, fb_helper->preferred_bpp); if (surface_format != DRM_FORMAT_INVALID) break; /* found supported format */ } if (surface_format == DRM_FORMAT_INVALID) { /* * If none of the given color modes works, fall back * to XRGB8888. Drivers are expected to provide this * format for compatibility with legacy applications. */ drm_warn(dev, "No compatible format found\n"); surface_format = drm_driver_legacy_fb_format(dev, 32, 24); } info = drm_format_info(surface_format); sizes->surface_bpp = drm_format_info_bpp(info, 0); sizes->surface_depth = info->depth; /* first up get a count of crtcs now in use and new min/maxes width/heights */ crtc_count = 0; drm_client_for_each_modeset(mode_set, client) { struct drm_display_mode *desired_mode; int x, y, j; /* in case of tile group, are we the last tile vert or horiz? * If no tile group you are always the last one both vertically * and horizontally */ bool lastv = true, lasth = true; desired_mode = mode_set->mode; if (!desired_mode) continue; crtc_count++; x = mode_set->x; y = mode_set->y; sizes->surface_width = max_t(u32, desired_mode->hdisplay + x, sizes->surface_width); sizes->surface_height = max_t(u32, desired_mode->vdisplay + y, sizes->surface_height); for (j = 0; j < mode_set->num_connectors; j++) { struct drm_connector *connector = mode_set->connectors[j]; if (connector->has_tile && desired_mode->hdisplay == connector->tile_h_size && desired_mode->vdisplay == connector->tile_v_size) { lasth = (connector->tile_h_loc == (connector->num_h_tile - 1)); lastv = (connector->tile_v_loc == (connector->num_v_tile - 1)); /* cloning to multiple tiles is just crazy-talk, so: */ break; } } if (lasth) sizes->fb_width = min_t(u32, desired_mode->hdisplay + x, sizes->fb_width); if (lastv) sizes->fb_height = min_t(u32, desired_mode->vdisplay + y, sizes->fb_height); } if (crtc_count == 0 || sizes->fb_width == -1 || sizes->fb_height == -1) { drm_info(dev, "Cannot find any crtc or sizes\n"); return -EAGAIN; } return 0; } static int drm_fb_helper_find_sizes(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; struct drm_mode_config *config = &dev->mode_config; int ret; mutex_lock(&client->modeset_mutex); ret = __drm_fb_helper_find_sizes(fb_helper, sizes); mutex_unlock(&client->modeset_mutex); if (ret) return ret; /* Handle our overallocation */ sizes->surface_height *= drm_fbdev_overalloc; sizes->surface_height /= 100; if (sizes->surface_height > config->max_height) { drm_dbg_kms(dev, "Fbdev over-allocation too large; clamping height to %d\n", config->max_height); sizes->surface_height = config->max_height; } return 0; } /* * Allocates the backing storage and sets up the fbdev info structure through * the ->fbdev_probe callback. */ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; struct drm_fb_helper_surface_size sizes; struct fb_info *info; int ret; ret = drm_fb_helper_find_sizes(fb_helper, &sizes); if (ret) { /* First time: disable all crtc's.. */ if (!fb_helper->deferred_setup) drm_client_modeset_commit(client); return ret; } /* push down into drivers */ if (dev->driver->fbdev_probe) ret = dev->driver->fbdev_probe(fb_helper, &sizes); else if (fb_helper->funcs) ret = fb_helper->funcs->fb_probe(fb_helper, &sizes); if (ret < 0) return ret; strcpy(fb_helper->fb->comm, "[fbcon]"); info = fb_helper->info; /* Set the fb info for vgaswitcheroo clients. Does nothing otherwise. */ if (dev_is_pci(info->device)) vga_switcheroo_client_fb_set(to_pci_dev(info->device), info); return 0; } static void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch, bool is_color_indexed) { info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = is_color_indexed ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR; info->fix.mmio_start = 0; info->fix.mmio_len = 0; info->fix.type_aux = 0; info->fix.xpanstep = 1; /* doing it in hw */ info->fix.ypanstep = 1; /* doing it in hw */ info->fix.ywrapstep = 0; info->fix.accel = FB_ACCEL_NONE; info->fix.line_length = pitch; } static void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helper, uint32_t fb_width, uint32_t fb_height) { struct drm_framebuffer *fb = fb_helper->fb; const struct drm_format_info *format = fb->format; switch (format->format) { case DRM_FORMAT_C1: case DRM_FORMAT_C2: case DRM_FORMAT_C4: /* supported format with sub-byte pixels */ break; default: WARN_ON((drm_format_info_block_width(format, 0) > 1) || (drm_format_info_block_height(format, 0) > 1)); break; } info->pseudo_palette = fb_helper->pseudo_palette; info->var.xoffset = 0; info->var.yoffset = 0; __fill_var(&info->var, info, fb); info->var.activate = FB_ACTIVATE_NOW; drm_fb_helper_fill_pixel_fmt(&info->var, format); info->var.xres = fb_width; info->var.yres = fb_height; } /** * drm_fb_helper_fill_info - initializes fbdev information * @info: fbdev instance to set up * @fb_helper: fb helper instance to use as template * @sizes: describes fbdev size and scanout surface size * * Sets up the variable and fixed fbdev metainformation from the given fb helper * instance and the drm framebuffer allocated in &drm_fb_helper.fb. * * Drivers should call this (or their equivalent setup code) from their * &drm_driver.fbdev_probe callback after having allocated the fbdev * backing storage framebuffer. */ void drm_fb_helper_fill_info(struct fb_info *info, struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_framebuffer *fb = fb_helper->fb; drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->is_color_indexed); drm_fb_helper_fill_var(info, fb_helper, sizes->fb_width, sizes->fb_height); info->par = fb_helper; /* * The DRM drivers fbdev emulation device name can be confusing if the * driver name also has a "drm" suffix on it. Leading to names such as * "simpledrmdrmfb" in /proc/fb. Unfortunately, it's an uAPI and can't * be changed due user-space tools (e.g: pm-utils) matching against it. */ snprintf(info->fix.id, sizeof(info->fix.id), "%sdrmfb", fb_helper->dev->driver->name); } EXPORT_SYMBOL(drm_fb_helper_fill_info); /* * This is a continuation of drm_setup_crtcs() that sets up anything related * to the framebuffer. During initialization, drm_setup_crtcs() is called before * the framebuffer has been allocated (fb_helper->fb and fb_helper->info). * So, any setup that touches those fields needs to be done here instead of in * drm_setup_crtcs(). */ static void drm_setup_crtcs_fb(struct drm_fb_helper *fb_helper) { struct drm_client_dev *client = &fb_helper->client; struct drm_connector_list_iter conn_iter; struct fb_info *info = fb_helper->info; unsigned int rotation, sw_rotations = 0; struct drm_connector *connector; struct drm_mode_set *modeset; mutex_lock(&client->modeset_mutex); drm_client_for_each_modeset(modeset, client) { if (!modeset->num_connectors) continue; modeset->fb = fb_helper->fb; if (drm_client_rotation(modeset, &rotation)) /* Rotating in hardware, fbcon should not rotate */ sw_rotations |= DRM_MODE_ROTATE_0; else sw_rotations |= rotation; } mutex_unlock(&client->modeset_mutex); drm_connector_list_iter_begin(fb_helper->dev, &conn_iter); drm_client_for_each_connector_iter(connector, &conn_iter) { /* use first connected connector for the physical dimensions */ if (connector->status == connector_status_connected) { info->var.width = connector->display_info.width_mm; info->var.height = connector->display_info.height_mm; break; } } drm_connector_list_iter_end(&conn_iter); switch (sw_rotations) { case DRM_MODE_ROTATE_0: info->fbcon_rotate_hint = FB_ROTATE_UR; break; case DRM_MODE_ROTATE_90: info->fbcon_rotate_hint = FB_ROTATE_CCW; break; case DRM_MODE_ROTATE_180: info->fbcon_rotate_hint = FB_ROTATE_UD; break; case DRM_MODE_ROTATE_270: info->fbcon_rotate_hint = FB_ROTATE_CW; break; default: /* * Multiple bits are set / multiple rotations requested * fbcon cannot handle separate rotation settings per * output, so fallback to unrotated. */ info->fbcon_rotate_hint = FB_ROTATE_UR; } } /* Note: Drops fb_helper->lock before returning. */ static int __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper) { struct drm_device *dev = fb_helper->dev; struct fb_info *info; unsigned int width, height; int ret; width = dev->mode_config.max_width; height = dev->mode_config.max_height; drm_client_modeset_probe(&fb_helper->client, width, height); ret = drm_fb_helper_single_fb_probe(fb_helper); if (ret < 0) { if (ret == -EAGAIN) { fb_helper->deferred_setup = true; ret = 0; } mutex_unlock(&fb_helper->lock); return ret; } drm_setup_crtcs_fb(fb_helper); fb_helper->deferred_setup = false; info = fb_helper->info; info->var.pixclock = 0; /* Need to drop locks to avoid recursive deadlock in * register_framebuffer. This is ok because the only thing left to do is * register the fbdev emulation instance in kernel_fb_helper_list. */ mutex_unlock(&fb_helper->lock); ret = register_framebuffer(info); if (ret < 0) return ret; drm_info(dev, "fb%d: %s frame buffer device\n", info->node, info->fix.id); mutex_lock(&kernel_fb_helper_lock); if (list_empty(&kernel_fb_helper_list)) register_sysrq_key('v', &sysrq_drm_fb_helper_restore_op); list_add(&fb_helper->kernel_fb_list, &kernel_fb_helper_list); mutex_unlock(&kernel_fb_helper_lock); return 0; } /** * drm_fb_helper_initial_config - setup a sane initial connector configuration * @fb_helper: fb_helper device struct * * Scans the CRTCs and connectors and tries to put together an initial setup. * At the moment, this is a cloned configuration across all heads with * a new framebuffer object as the backing store. * * Note that this also registers the fbdev and so allows userspace to call into * the driver through the fbdev interfaces. * * This function will call down into the &drm_driver.fbdev_probe callback * to let the driver allocate and initialize the fbdev info structure and the * drm framebuffer used to back the fbdev. drm_fb_helper_fill_info() is provided * as a helper to setup simple default values for the fbdev info structure. * * HANG DEBUGGING: * * When you have fbcon support built-in or already loaded, this function will do * a full modeset to setup the fbdev console. Due to locking misdesign in the * VT/fbdev subsystem that entire modeset sequence has to be done while holding * console_lock. Until console_unlock is called no dmesg lines will be sent out * to consoles, not even serial console. This means when your driver crashes, * you will see absolutely nothing else but a system stuck in this function, * with no further output. Any kind of printk() you place within your own driver * or in the drm core modeset code will also never show up. * * Standard debug practice is to run the fbcon setup without taking the * console_lock as a hack, to be able to see backtraces and crashes on the * serial line. This can be done by setting the fb.lockless_register_fb=1 kernel * cmdline option. * * The other option is to just disable fbdev emulation since very likely the * first modeset from userspace will crash in the same way, and is even easier * to debug. This can be done by setting the drm_kms_helper.fbdev_emulation=0 * kernel cmdline option. * * RETURNS: * Zero if everything went ok, nonzero otherwise. */ int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper) { int ret; if (!drm_fbdev_emulation) return 0; mutex_lock(&fb_helper->lock); ret = __drm_fb_helper_initial_config_and_unlock(fb_helper); return ret; } EXPORT_SYMBOL(drm_fb_helper_initial_config); /** * drm_fb_helper_hotplug_event - respond to a hotplug notification by * probing all the outputs attached to the fb * @fb_helper: driver-allocated fbdev helper, can be NULL * * Scan the connectors attached to the fb_helper and try to put together a * setup after notification of a change in output configuration. * * Called at runtime, takes the mode config locks to be able to check/change the * modeset configuration. Must be run from process context (which usually means * either the output polling work or a work item launched from the driver's * hotplug interrupt). * * Note that drivers may call this even before calling * drm_fb_helper_initial_config but only after drm_fb_helper_init. This allows * for a race-free fbcon setup and will make sure that the fbdev emulation will * not miss any hotplug events. * * RETURNS: * 0 on success and a non-zero error code otherwise. */ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) { int err = 0; if (!drm_fbdev_emulation || !fb_helper) return 0; mutex_lock(&fb_helper->lock); if (fb_helper->deferred_setup) { err = __drm_fb_helper_initial_config_and_unlock(fb_helper); return err; } if (!fb_helper->fb || !drm_master_internal_acquire(fb_helper->dev)) { fb_helper->delayed_hotplug = true; mutex_unlock(&fb_helper->lock); return err; } drm_master_internal_release(fb_helper->dev); drm_dbg_kms(fb_helper->dev, "\n"); drm_client_modeset_probe(&fb_helper->client, fb_helper->fb->width, fb_helper->fb->height); drm_setup_crtcs_fb(fb_helper); mutex_unlock(&fb_helper->lock); drm_fb_helper_set_par(fb_helper->info); return 0; } EXPORT_SYMBOL(drm_fb_helper_hotplug_event); /** * drm_fb_helper_lastclose - DRM driver lastclose helper for fbdev emulation * @dev: DRM device * * This function is obsolete. Call drm_fb_helper_restore_fbdev_mode_unlocked() * instead. */ void drm_fb_helper_lastclose(struct drm_device *dev) { drm_fb_helper_restore_fbdev_mode_unlocked(dev->fb_helper); } EXPORT_SYMBOL(drm_fb_helper_lastclose);
2 168 170 171 34 15 15 59 21 12 12 92 2 1 2 59 54 4 29 17 17 30 59 12 23 86 1352 1079 1 1 40 330 26 510 13 497 1 496 496 2 238 127 136 209 1 495 3 3 1 1 1 1 4 4 4 1 172 173 171 172 172 172 171 171 172 173 1 171 172 171 172 172 3 168 172 2 2 1 1 1 1 165 165 166 165 148 28 166 6 2 4 4 2 4 173 28 148 173 10 64 14 14 3 2 9 1 4 2 4 1 3 3 2 2 2 2 1 2 3 3 2 15 15 14 1 2 12 11 1 3 7 1 1 8 6 2 4 1 1 1 1 21 21 21 58 59 3 55 1 51 2 55 55 53 6 42 14 3 1 47 5 4 51 49 4 1 54 30 51 4 54 53 22 30 31 31 31 13 23 36 17 45 1 7 3 1 4 33 12 21 21 41 14 55 55 21 21 2 2 14 3 3 120 115 3 120 120 116 116 117 6 1 14 15 1 59 21 1562 1405 192 22 408 410 49 401 402 342 84 347 79 465 32 32 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 // SPDX-License-Identifier: GPL-2.0-or-later /* * Neighbour Discovery for IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * Mike Shaver <shaver@ingenia.com> */ /* * Changes: * * Alexey I. Froloff : RFC6106 (DNSSL) support * Pierre Ynard : export userland ND options * through netlink (RDNSS support) * Lars Fenneberg : fixed MTU setting on receipt * of an RA. * Janos Farkas : kmalloc failure checks * Alexey Kuznetsov : state machine reworked * and moved to net/core. * Pekka Savola : RFC2461 validation * YOSHIFUJI Hideaki @USAGI : Verify ND options properly */ #define pr_fmt(fmt) "ICMPv6: " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/sched.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/route.h> #include <linux/init.h> #include <linux/rcupdate.h> #include <linux/slab.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <linux/if_addr.h> #include <linux/if_ether.h> #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/jhash.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/icmp.h> #include <net/netlink.h> #include <linux/rtnetlink.h> #include <net/flow.h> #include <net/ip6_checksum.h> #include <net/inet_common.h> #include <linux/proc_fs.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey); static bool ndisc_allow_add(const struct net_device *dev, struct netlink_ext_ack *extack); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); static int pndisc_constructor(struct pneigh_entry *n); static void pndisc_destructor(struct pneigh_entry *n); static void pndisc_redo(struct sk_buff *skb); static int ndisc_is_multicast(const void *pkey); static const struct neigh_ops ndisc_generic_ops = { .family = AF_INET6, .solicit = ndisc_solicit, .error_report = ndisc_error_report, .output = neigh_resolve_output, .connected_output = neigh_connected_output, }; static const struct neigh_ops ndisc_hh_ops = { .family = AF_INET6, .solicit = ndisc_solicit, .error_report = ndisc_error_report, .output = neigh_resolve_output, .connected_output = neigh_resolve_output, }; static const struct neigh_ops ndisc_direct_ops = { .family = AF_INET6, .output = neigh_direct_output, .connected_output = neigh_direct_output, }; struct neigh_table nd_tbl = { .family = AF_INET6, .key_len = sizeof(struct in6_addr), .protocol = cpu_to_be16(ETH_P_IPV6), .hash = ndisc_hash, .key_eq = ndisc_key_eq, .constructor = ndisc_constructor, .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, .proxy_redo = pndisc_redo, .is_multicast = ndisc_is_multicast, .allow_add = ndisc_allow_add, .id = "ndisc_cache", .parms = { .tbl = &nd_tbl, .reachable_time = ND_REACHABLE_TIME, .data = { [NEIGH_VAR_MCAST_PROBES] = 3, [NEIGH_VAR_UCAST_PROBES] = 3, [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER, [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME, [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, [NEIGH_VAR_PROXY_QLEN] = 64, [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, }, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, .gc_thresh2 = 512, .gc_thresh3 = 1024, }; EXPORT_SYMBOL_GPL(nd_tbl); void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, int data_len, int pad) { int space = __ndisc_opt_addr_space(data_len, pad); u8 *opt = skb_put(skb, space); opt[0] = type; opt[1] = space>>3; memset(opt + 2, 0, pad); opt += pad; space -= pad; memcpy(opt+2, data, data_len); data_len += 2; opt += data_len; space -= data_len; if (space > 0) memset(opt, 0, space); } EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option); static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, u8 icmp6_type) { __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len, ndisc_addr_option_pad(skb->dev->type)); ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type); } static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb, void *ha, const u8 *ops_data) { ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT); ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data); } static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, struct nd_opt_hdr *end) { int type; if (!cur || !end || cur >= end) return NULL; type = cur->nd_opt_type; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while (cur < end && cur->nd_opt_type != type); return cur <= end && cur->nd_opt_type == type ? cur : NULL; } static inline int ndisc_is_useropt(const struct net_device *dev, struct nd_opt_hdr *opt) { return opt->nd_opt_type == ND_OPT_PREFIX_INFO || opt->nd_opt_type == ND_OPT_RDNSS || opt->nd_opt_type == ND_OPT_DNSSL || opt->nd_opt_type == ND_OPT_6CO || opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL || opt->nd_opt_type == ND_OPT_PREF64; } static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev, struct nd_opt_hdr *cur, struct nd_opt_hdr *end) { if (!cur || !end || cur >= end) return NULL; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while (cur < end && !ndisc_is_useropt(dev, cur)); return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL; } struct ndisc_options *ndisc_parse_options(const struct net_device *dev, u8 *opt, int opt_len, struct ndisc_options *ndopts) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt; if (!nd_opt || opt_len < 0 || !ndopts) return NULL; memset(ndopts, 0, sizeof(*ndopts)); while (opt_len) { bool unknown = false; int l; if (opt_len < sizeof(struct nd_opt_hdr)) return NULL; l = nd_opt->nd_opt_len << 3; if (opt_len < l || l == 0) return NULL; if (ndisc_ops_parse_options(dev, nd_opt, ndopts)) goto next_opt; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LL_ADDR: case ND_OPT_TARGET_LL_ADDR: case ND_OPT_MTU: case ND_OPT_NONCE: case ND_OPT_REDIRECT_HDR: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { ND_PRINTK(2, warn, "%s: duplicated ND6 option found: type=%d\n", __func__, nd_opt->nd_opt_type); } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } break; case ND_OPT_PREFIX_INFO: ndopts->nd_opts_pi_end = nd_opt; if (!ndopts->nd_opt_array[nd_opt->nd_opt_type]) ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; break; #ifdef CONFIG_IPV6_ROUTE_INFO case ND_OPT_ROUTE_INFO: ndopts->nd_opts_ri_end = nd_opt; if (!ndopts->nd_opts_ri) ndopts->nd_opts_ri = nd_opt; break; #endif default: unknown = true; } if (ndisc_is_useropt(dev, nd_opt)) { ndopts->nd_useropts_end = nd_opt; if (!ndopts->nd_useropts) ndopts->nd_useropts = nd_opt; } else if (unknown) { /* * Unknown options must be silently ignored, * to accommodate future extension to the * protocol. */ ND_PRINTK(2, notice, "%s: ignored unsupported option; type=%d, len=%d\n", __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len); } next_opt: opt_len -= l; nd_opt = ((void *)nd_opt) + l; } return ndopts; } int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir) { switch (dev->type) { case ARPHRD_ETHER: case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */ case ARPHRD_FDDI: ipv6_eth_mc_map(addr, buf); return 0; case ARPHRD_ARCNET: ipv6_arcnet_mc_map(addr, buf); return 0; case ARPHRD_INFINIBAND: ipv6_ib_mc_map(addr, dev->broadcast, buf); return 0; case ARPHRD_IPGRE: return ipv6_ipgre_mc_map(addr, dev->broadcast, buf); default: if (dir) { memcpy(buf, dev->broadcast, dev->addr_len); return 0; } } return -EINVAL; } EXPORT_SYMBOL(ndisc_mc_map); static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { return ndisc_hashfn(pkey, dev, hash_rnd); } static bool ndisc_key_eq(const struct neighbour *n, const void *pkey) { return neigh_key_eq128(n, pkey); } static int ndisc_constructor(struct neighbour *neigh) { struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key; struct net_device *dev = neigh->dev; struct inet6_dev *in6_dev; struct neigh_parms *parms; bool is_multicast = ipv6_addr_is_multicast(addr); in6_dev = in6_dev_get(dev); if (!in6_dev) { return -EINVAL; } parms = in6_dev->nd_parms; __neigh_parms_put(neigh->parms); neigh->parms = neigh_parms_clone(parms); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; if (!dev->header_ops) { neigh->nud_state = NUD_NOARP; neigh->ops = &ndisc_direct_ops; neigh->output = neigh_direct_output; } else { if (is_multicast) { neigh->nud_state = NUD_NOARP; ndisc_mc_map(addr, neigh->ha, dev, 1); } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->dev_addr, dev->addr_len); if (dev->flags&IFF_LOOPBACK) neigh->type = RTN_LOCAL; } else if (dev->flags&IFF_POINTOPOINT) { neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->broadcast, dev->addr_len); } if (dev->header_ops->cache) neigh->ops = &ndisc_hh_ops; else neigh->ops = &ndisc_generic_ops; if (neigh->nud_state&NUD_VALID) neigh->output = neigh->ops->connected_output; else neigh->output = neigh->ops->output; } in6_dev_put(in6_dev); return 0; } static int pndisc_constructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; if (!dev || !__in6_dev_get(dev)) return -EINVAL; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_inc(dev, &maddr); return 0; } static void pndisc_destructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; if (!dev || !__in6_dev_get(dev)) return; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_dec(dev, &maddr); } /* called with rtnl held */ static bool ndisc_allow_add(const struct net_device *dev, struct netlink_ext_ack *extack) { struct inet6_dev *idev = __in6_dev_get(dev); if (!idev || idev->cnf.disable_ipv6) { NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device"); return false; } return true; } static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, int len) { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); if (!skb) { ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", __func__); return NULL; } skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); skb_reset_transport_header(skb); /* Manually assign socket ownership as we avoid calling * sock_alloc_send_pskb() to bypass wmem buffer limits */ skb_set_owner_w(skb, sk); return skb; } static void ip6_nd_hdr(struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int hop_limit, int len) { struct ipv6hdr *hdr; struct inet6_dev *idev; unsigned tclass; rcu_read_lock(); idev = __in6_dev_get(skb->dev); tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0; rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); ip6_flow_hdr(hdr, tclass, 0); hdr->payload_len = htons(len); hdr->nexthdr = IPPROTO_ICMPV6; hdr->hop_limit = hop_limit; hdr->saddr = *saddr; hdr->daddr = *daddr; } void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, const struct in6_addr *saddr) { struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(skb->dev); struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; int err; struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; if (!dst) { struct flowi6 fl6; int oif = skb->dev->ifindex; icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); return; } skb_dst_set(skb, dst); } icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, csum_partial(icmp6h, skb->len, 0)); ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } rcu_read_unlock(); } EXPORT_SYMBOL(ndisc_send_skb); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt) { struct sk_buff *skb; struct in6_addr tmpaddr; struct inet6_ifaddr *ifp; const struct in6_addr *src_addr; struct nd_msg *msg; int optlen = 0; /* for anycast or proxy, solicited_addr != src_addr */ ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1); if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = false; inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao); in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs, &tmpaddr)) return; src_addr = &tmpaddr; } if (!dev->addr_len) inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_ADVERTISEMENT); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) return; msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { .icmph = { .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, .icmp6_router = router, .icmp6_solicited = solicited, .icmp6_override = override, }, .target = *solicited_addr, }; if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, NDISC_NEIGHBOUR_ADVERTISEMENT); ndisc_send_skb(skb, daddr, src_addr); } static void ndisc_send_unsol_na(struct net_device *dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; idev = in6_dev_get(dev); if (!idev) return; read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { /* skip tentative addresses until dad completes */ if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_OPTIMISTIC)) continue; ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr, /*router=*/ !!idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, /*inc_opt=*/ true); } read_unlock_bh(&idev->lock); in6_dev_put(idev); } struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *saddr, u64 nonce) { int inc_opt = dev->addr_len; struct sk_buff *skb; struct nd_msg *msg; int optlen = 0; if (!saddr) return NULL; if (ipv6_addr_any(saddr)) inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_SOLICITATION); if (nonce != 0) optlen += 8; skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) return NULL; msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { .icmph = { .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, }, .target = *solicit, }; if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, NDISC_NEIGHBOUR_SOLICITATION); if (nonce != 0) { u8 *opt = skb_put(skb, 8); opt[0] = ND_OPT_NONCE; opt[1] = 8 >> 3; memcpy(opt + 2, &nonce, 6); } return skb; } EXPORT_SYMBOL(ndisc_ns_create); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr, u64 nonce) { struct in6_addr addr_buf; struct sk_buff *skb; if (!saddr) { if (ipv6_get_lladdr(dev, &addr_buf, (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC))) return; saddr = &addr_buf; } skb = ndisc_ns_create(dev, solicit, saddr, nonce); if (skb) ndisc_send_skb(skb, daddr, saddr); } void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr) { struct sk_buff *skb; struct rs_msg *msg; int send_sllao = dev->addr_len; int optlen = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* * According to section 2.2 of RFC 4429, we must not * send router solicitations with a sllao from * optimistic addresses, but we may send the solicitation * if we don't include the sllao. So here we check * if our address is optimistic, and if so, we * suppress the inclusion of the sllao. */ if (send_sllao) { struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr, dev, 1); if (ifp) { if (ifp->flags & IFA_F_OPTIMISTIC) { send_sllao = 0; } in6_ifa_put(ifp); } else { send_sllao = 0; } } #endif if (send_sllao) optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) return; msg = skb_put(skb, sizeof(*msg)); *msg = (struct rs_msg) { .icmph = { .icmp6_type = NDISC_ROUTER_SOLICITATION, }, }; if (send_sllao) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, NDISC_ROUTER_SOLICITATION); ndisc_send_skb(skb, daddr, saddr); } static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb) { /* * "The sender MUST return an ICMP * destination unreachable" */ dst_link_failure(skb); kfree_skb(skb); } /* Called with locked neigh: either read or both */ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) { struct in6_addr *saddr = NULL; struct in6_addr mcaddr; struct net_device *dev = neigh->dev; struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, false, 1, IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } ndisc_send_ns(dev, target, target, saddr, 0); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(dev, target, &mcaddr, saddr, 0); } } static int pndisc_is_router(const void *pkey, struct net_device *dev) { struct pneigh_entry *n; int ret = -1; read_lock_bh(&nd_tbl.lock); n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev); if (n) ret = !!(n->flags & NTF_ROUTER); read_unlock_bh(&nd_tbl.lock); return ret; } void ndisc_update(const struct net_device *dev, struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, struct ndisc_options *ndopts) { neigh_update(neigh, lladdr, new, flags, 0); /* report ndisc ops about neighbour update */ ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts); } static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; struct inet6_ifaddr *ifp; struct inet6_dev *idev = NULL; struct neighbour *neigh; int dad = ipv6_addr_any(saddr); int is_router = -1; SKB_DR(reason); u64 nonce = 0; bool inc; if (skb->len < sizeof(struct nd_msg)) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK(2, warn, "NS: multicast target address\n"); return reason; } /* * RFC2461 7.1.1: * DAD has to be destined for solicited node multicast address. */ if (dad && !ipv6_addr_is_solict_mult(daddr)) { ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); return reason; } if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev); if (!lladdr) { ND_PRINTK(2, warn, "NS: invalid link-layer address length\n"); return reason; } /* RFC2461 7.1.1: * If the IP source address is the unspecified address, * there MUST NOT be source link-layer address option * in the message. */ if (dad) { ND_PRINTK(2, warn, "NS: bad DAD packet (link-layer address option)\n"); return reason; } } if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1) memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6); inc = ipv6_addr_is_multicast(daddr); ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { have_ifp: if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { if (nonce != 0 && ifp->dad_nonce == nonce) { u8 *np = (u8 *)&nonce; /* Matching nonce if looped back */ ND_PRINTK(2, notice, "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", ifp->idev->dev->name, &ifp->addr, np); goto out; } /* * We are colliding with another node * who is doing DAD * so fail our DAD process */ addrconf_dad_failure(skb, ifp); return reason; } else { /* * This is not a dad solicitation. * If we are an optimistic node, * we should respond. * Otherwise, we should ignore it. */ if (!(ifp->flags & IFA_F_OPTIMISTIC)) goto out; } } idev = ifp->idev; } else { struct net *net = dev_net(dev); /* perhaps an address on the master device */ if (netif_is_l3_slave(dev)) { struct net_device *mdev; mdev = netdev_master_upper_dev_get_rcu(dev); if (mdev) { ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1); if (ifp) goto have_ifp; } } idev = in6_dev_get(dev); if (!idev) { /* XXX: count this drop? */ return reason; } if (ipv6_chk_acast_addr(net, dev, &msg->target) || (READ_ONCE(idev->cnf.forwarding) && (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) || READ_ONCE(idev->cnf.proxy_ndp)) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc && NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) { /* * for anycast or proxy, * sender should delay its response * by a random time between 0 and * MAX_ANYCAST_DELAY_TIME seconds. * (RFC2461) -- yoshfuji */ struct sk_buff *n = skb_clone(skb, GFP_ATOMIC); if (n) pneigh_enqueue(&nd_tbl, idev->nd_parms, n); goto out; } } else { SKB_DR_SET(reason, IPV6_NDISC_NS_OTHERHOST); goto out; } } if (is_router < 0) is_router = READ_ONCE(idev->cnf.forwarding); if (dad) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target, !!is_router, false, (ifp != NULL), true); goto out; } if (inc) NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast); else NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast); /* * update / create cache entry * for the source address */ neigh = __neigh_lookup(&nd_tbl, saddr, dev, !inc || lladdr || !dev->addr_len); if (neigh) ndisc_update(dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE, NDISC_NEIGHBOUR_SOLICITATION, &ndopts); if (neigh || !dev->header_ops) { ndisc_send_na(dev, saddr, &msg->target, !!is_router, true, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); reason = SKB_CONSUMED; } out: if (ifp) in6_ifa_put(ifp); else in6_dev_put(idev); return reason; } static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr) { struct inet6_dev *idev = __in6_dev_get(dev); switch (READ_ONCE(idev->cnf.accept_untracked_na)) { case 0: /* Don't accept untracked na (absent in neighbor cache) */ return 0; case 1: /* Create new entries from na if currently untracked */ return 1; case 2: /* Create new entries from untracked na only if saddr is in the * same subnet as an address configured on the interface that * received the na */ return !!ipv6_chk_prefix(saddr, dev); default: return 0; } } static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; SKB_DR(reason); u8 new_state; if (skb->len < sizeof(struct nd_msg)) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK(2, warn, "NA: target address is multicast\n"); return reason; } if (ipv6_addr_is_multicast(daddr) && msg->icmph.icmp6_solicited) { ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n"); return reason; } /* For some 802.11 wireless deployments (and possibly other networks), * there will be a NA proxy and unsolicitd packets are attacks * and thus should not be accepted. * drop_unsolicited_na takes precedence over accept_untracked_na */ if (!msg->icmph.icmp6_solicited && idev && READ_ONCE(idev->cnf.drop_unsolicited_na)) return reason; if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (ndopts.nd_opts_tgt_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev); if (!lladdr) { ND_PRINTK(2, warn, "NA: invalid link-layer address length\n"); return reason; } } ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { if (skb->pkt_type != PACKET_LOOPBACK && (ifp->flags & IFA_F_TENTATIVE)) { addrconf_dad_failure(skb, ifp); return reason; } /* What should we make now? The advertisement is invalid, but ndisc specs say nothing about it. It could be misconfiguration, or an smart proxy agent tries to help us :-) We should not print the error if NA has been received from loopback - it is just our own unsolicited advertisement. */ if (skb->pkt_type != PACKET_LOOPBACK) ND_PRINTK(1, warn, "NA: %pM advertised our address %pI6c on %s!\n", eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name); in6_ifa_put(ifp); return reason; } neigh = neigh_lookup(&nd_tbl, &msg->target, dev); /* RFC 9131 updates original Neighbour Discovery RFC 4861. * NAs with Target LL Address option without a corresponding * entry in the neighbour cache can now create a STALE neighbour * cache entry on routers. * * entry accept fwding solicited behaviour * ------- ------ ------ --------- ---------------------- * present X X 0 Set state to STALE * present X X 1 Set state to REACHABLE * absent 0 X X Do nothing * absent 1 0 X Do nothing * absent 1 1 X Add a new STALE entry * * Note that we don't do a (daddr == all-routers-mcast) check. */ new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE; if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) { if (accept_untracked_na(dev, saddr)) { neigh = neigh_create(&nd_tbl, &msg->target, dev); new_state = NUD_STALE; } } if (neigh && !IS_ERR(neigh)) { u8 old_flags = neigh->flags; struct net *net = dev_net(dev); if (READ_ONCE(neigh->nud_state) & NUD_FAILED) goto out; /* * Don't update the neighbor cache entry on a proxy NA from * ourselves because either the proxied node is off link or it * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && READ_ONCE(net->ipv6.devconf_all->forwarding) && READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; } ndisc_update(dev, neigh, lladdr, new_state, NEIGH_UPDATE_F_WEAK_OVERRIDE| (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0), NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts); if ((old_flags & ~neigh->flags) & NTF_ROUTER) { /* * Change: router to host */ rt6_clean_tohost(dev_net(dev), saddr); } reason = SKB_CONSUMED; out: neigh_release(neigh); } return reason; } static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) { struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb); unsigned long ndoptlen = skb->len - sizeof(*rs_msg); struct neighbour *neigh; struct inet6_dev *idev; const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; struct ndisc_options ndopts; u8 *lladdr = NULL; SKB_DR(reason); if (skb->len < sizeof(*rs_msg)) return SKB_DROP_REASON_PKT_TOO_SMALL; idev = __in6_dev_get(skb->dev); if (!idev) { ND_PRINTK(1, err, "RS: can't find in6 device\n"); return reason; } /* Don't accept RS if we're not in router mode */ if (!READ_ONCE(idev->cnf.forwarding)) goto out; /* * Don't update NCE if src = ::; * this implies that the source node has no ip address assigned yet. */ if (ipv6_addr_any(saddr)) goto out; /* Parse ND options */ if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) goto out; } neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1); if (neigh) { ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE_ISROUTER, NDISC_ROUTER_SOLICITATION, &ndopts); neigh_release(neigh); reason = SKB_CONSUMED; } out: return reason; } static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) { struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra); struct sk_buff *skb; struct nlmsghdr *nlh; struct nduseroptmsg *ndmsg; struct net *net = dev_net(ra->dev); int err; int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg) + (opt->nd_opt_len << 3)); size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr)); skb = nlmsg_new(msg_size, GFP_ATOMIC); if (!skb) { err = -ENOBUFS; goto errout; } nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0); if (!nlh) { goto nla_put_failure; } ndmsg = nlmsg_data(nlh); ndmsg->nduseropt_family = AF_INET6; ndmsg->nduseropt_ifindex = ra->dev->ifindex; ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr)) goto nla_put_failure; nlmsg_end(skb, nlh); rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); return; nla_put_failure: nlmsg_free(skb); err = -EMSGSIZE; errout: rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); } static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) { struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); bool send_ifinfo_notify = false; struct neighbour *neigh = NULL; struct ndisc_options ndopts; struct fib6_info *rt = NULL; struct inet6_dev *in6_dev; struct fib6_table *table; u32 defrtr_usr_metric; unsigned int pref = 0; __u32 old_if_flags; struct net *net; SKB_DR(reason); int lifetime; int optlen; __u8 *opt = (__u8 *)(ra_msg + 1); optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); ND_PRINTK(2, info, "RA: %s, dev: %s\n", __func__, skb->dev->name); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); return reason; } if (optlen < 0) return SKB_DROP_REASON_PKT_TOO_SMALL; #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { ND_PRINTK(2, warn, "RA: from host or unauthorized router\n"); return reason; } #endif in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) { ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", skb->dev->name); return reason; } if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (!ipv6_accept_ra(in6_dev)) { ND_PRINTK(2, info, "RA: %s, did not accept ra for dev: %s\n", __func__, skb->dev->name); goto skip_linkparms; } #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { ND_PRINTK(2, info, "RA: %s, nodetype is NODEFAULT, dev: %s\n", __func__, skb->dev->name); goto skip_linkparms; } #endif if (in6_dev->if_flags & IF_RS_SENT) { /* * flag that an RA was received after an RS was sent * out on this interface. */ in6_dev->if_flags |= IF_RA_RCVD; } /* * Remember the managed/otherconf flags from most recently * received RA message (RFC 2462) -- yoshfuji */ old_if_flags = in6_dev->if_flags; in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED | IF_RA_OTHERCONF)) | (ra_msg->icmph.icmp6_addrconf_managed ? IF_RA_MANAGED : 0) | (ra_msg->icmph.icmp6_addrconf_other ? IF_RA_OTHERCONF : 0); if (old_if_flags != in6_dev->if_flags) send_ifinfo_notify = true; if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) { ND_PRINTK(2, info, "RA: %s, defrtr is false for dev: %s\n", __func__, skb->dev->name); goto skip_defrtr; } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); if (lifetime != 0 && lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) { ND_PRINTK(2, info, "RA: router lifetime (%ds) is too short: %s\n", lifetime, skb->dev->name); goto skip_defrtr; } /* Do not accept RA with source-addr found on local machine unless * accept_ra_from_local is set to true. */ net = dev_net(in6_dev->dev); if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", skb->dev->name); goto skip_defrtr; } #ifdef CONFIG_IPV6_ROUTER_PREF pref = ra_msg->icmph.icmp6_router_pref; /* 10b is handled as if it were 00b (medium) */ if (pref == ICMPV6_ROUTER_PREF_INVALID || !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref)) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif /* routes added from RAs do not use nexthop objects */ rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6, rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); fib6_info_release(rt); return reason; } } /* Set default route metric as specified by user */ defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric; /* delete the route if lifetime is 0 or if metric needs change */ if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) { ip6_del_rt(net, rt, false); rt = NULL; } ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", rt, lifetime, defrtr_usr_metric, skb->dev->name); if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); if (neigh) neigh_release(neigh); rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev, pref, defrtr_usr_metric, lifetime); if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", __func__); return reason; } neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6, rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); fib6_info_release(rt); return reason; } neigh->flags |= NTF_ROUTER; } else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) { struct nl_info nlinfo = { .nl_net = net, }; rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE); } if (rt) { table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); fib6_set_expires(rt, jiffies + (HZ * lifetime)); fib6_add_gc_list(rt); spin_unlock_bh(&table->tb6_lock); } if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 && ra_msg->icmph.icmp6_hop_limit) { if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <= ra_msg->icmph.icmp6_hop_limit) { WRITE_ONCE(in6_dev->cnf.hop_limit, ra_msg->icmph.icmp6_hop_limit); fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); } } skip_defrtr: /* * Update Reachable Time and Retrans Timer */ if (in6_dev->nd_parms) { unsigned long rtime = ntohl(ra_msg->retrans_timer); if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) { rtime = (rtime*HZ)/1000; if (rtime < HZ/100) rtime = HZ/100; NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime); in6_dev->tstamp = jiffies; send_ifinfo_notify = true; } rtime = ntohl(ra_msg->reachable_time); if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) { rtime = (rtime*HZ)/1000; if (rtime < HZ/10) rtime = HZ/10; if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) { NEIGH_VAR_SET(in6_dev->nd_parms, BASE_REACHABLE_TIME, rtime); NEIGH_VAR_SET(in6_dev->nd_parms, GC_STALETIME, 3 * rtime); in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime); in6_dev->tstamp = jiffies; send_ifinfo_notify = true; } } } skip_linkparms: /* * Process options. */ if (!neigh) neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr, skb->dev, 1); if (neigh) { u8 *lladdr = NULL; if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) { ND_PRINTK(2, warn, "RA: invalid link-layer address length\n"); goto out; } } ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| NEIGH_UPDATE_F_ISROUTER, NDISC_ROUTER_ADVERTISEMENT, &ndopts); reason = SKB_CONSUMED; } if (!ipv6_accept_ra(in6_dev)) { ND_PRINTK(2, info, "RA: %s, accept_ra is false for dev: %s\n", __func__, skb->dev->name); goto out; } #ifdef CONFIG_IPV6_ROUTE_INFO if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: router info ignored.\n", skb->dev->name); goto skip_routeinfo; } if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_ri; p; p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) { struct route_info *ri = (struct route_info *)p; #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT && ri->prefix_len == 0) continue; #endif if (ri->prefix_len == 0 && !READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) continue; if (ri->lifetime != 0 && ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) continue; if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen)) continue; if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen)) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, &ipv6_hdr(skb)->saddr); } } skip_routeinfo: #endif #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { ND_PRINTK(2, info, "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", __func__, skb->dev->name); goto out; } #endif if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_pi; p; p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) { addrconf_prefix_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, ndopts.nd_opts_src_lladdr != NULL); } } if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) { __be32 n; u32 mtu; memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); mtu = ntohl(n); if (in6_dev->ra_mtu != mtu) { in6_dev->ra_mtu = mtu; send_ifinfo_notify = true; } if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } } if (ndopts.nd_useropts) { struct nd_opt_hdr *p; for (p = ndopts.nd_useropts; p; p = ndisc_next_useropt(skb->dev, p, ndopts.nd_useropts_end)) { ndisc_ra_useropt(skb, p); } } if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: /* Send a notify if RA changed managed/otherconf flags or * timer settings or ra_mtu value */ if (send_ifinfo_notify) inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); fib6_info_release(rt); if (neigh) neigh_release(neigh); return reason; } static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb) { struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct rd_msg, opt)); struct ndisc_options ndopts; SKB_DR(reason); u8 *hdr; #ifdef CONFIG_IPV6_NDISC_NODETYPE switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: case NDISC_NODETYPE_NODEFAULT: ND_PRINTK(2, warn, "Redirect: from host or unauthorized router\n"); return reason; } #endif if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "Redirect: source address is not link-local\n"); return reason; } if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (!ndopts.nd_opts_rh) { ip6_redirect_no_header(skb, dev_net(skb->dev), skb->dev->ifindex); return reason; } hdr = (u8 *)ndopts.nd_opts_rh; hdr += 8; if (!pskb_pull(skb, hdr - skb_transport_header(skb))) return SKB_DROP_REASON_PKT_TOO_SMALL; return icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, struct sk_buff *orig_skb, int rd_len) { u8 *opt = skb_put(skb, rd_len); memset(opt, 0, 8); *(opt++) = ND_OPT_REDIRECT_HDR; *(opt++) = (rd_len >> 3); opt += 6; skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt, rd_len - 8); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; int optlen = 0; struct inet_peer *peer; struct sk_buff *buff; struct rd_msg *msg; struct in6_addr saddr_buf; struct rt6_info *rt; struct dst_entry *dst; struct flowi6 fl6; int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL, ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; bool ret; if (netif_is_l3_master(skb->dev)) { dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); if (!dev) return; } if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", dev->name); return; } if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) && ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "Redirect: target address is not link-local unicast\n"); return; } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); return; } dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) return; rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) { ND_PRINTK(2, warn, "Redirect: destination is not a neighbour\n"); goto release; } peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1); ret = inet_peer_xrlim_allow(peer, 1*HZ); if (peer) inet_putpeer(peer); if (!ret) goto release; if (dev->addr_len) { struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target); if (!neigh) { ND_PRINTK(2, warn, "Redirect: no neigh for target address\n"); goto release; } read_lock_bh(&neigh->lock); if (neigh->nud_state & NUD_VALID) { memcpy(ha_buf, neigh->ha, dev->addr_len); read_unlock_bh(&neigh->lock); ha = ha_buf; optlen += ndisc_redirect_opt_addr_space(dev, neigh, ops_data_buf, &ops_data); } else read_unlock_bh(&neigh->lock); neigh_release(neigh); } rd_len = min_t(unsigned int, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen, skb->len + 8); rd_len &= ~0x7; optlen += rd_len; buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!buff) goto release; msg = skb_put(buff, sizeof(*msg)); *msg = (struct rd_msg) { .icmph = { .icmp6_type = NDISC_REDIRECT, }, .target = *target, .dest = ipv6_hdr(skb)->daddr, }; /* * include target_address option */ if (ha) ndisc_fill_redirect_addr_option(buff, ha, ops_data); /* * build redirect option and copy skb over to the new packet. */ if (rd_len) ndisc_fill_redirect_hdr_option(buff, skb, rd_len); skb_dst_set(buff, dst); ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf); return; release: dst_release(dst); } static void pndisc_redo(struct sk_buff *skb) { enum skb_drop_reason reason = ndisc_recv_ns(skb); kfree_skb_reason(skb, reason); } static int ndisc_is_multicast(const void *pkey) { return ipv6_addr_is_multicast((struct in6_addr *)pkey); } static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); if (!idev) return true; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED && READ_ONCE(idev->cnf.suppress_frag_ndisc)) { net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n"); return true; } return false; } enum skb_drop_reason ndisc_rcv(struct sk_buff *skb) { struct nd_msg *msg; SKB_DR(reason); if (ndisc_suppress_frag_ndisc(skb)) return SKB_DROP_REASON_IPV6_NDISC_FRAG; if (skb_linearize(skb)) return SKB_DROP_REASON_NOMEM; msg = (struct nd_msg *)skb_transport_header(skb); __skb_push(skb, skb->data - skb_transport_header(skb)); if (ipv6_hdr(skb)->hop_limit != 255) { ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit); return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT; } if (msg->icmph.icmp6_code != 0) { ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code); return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE; } switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); reason = ndisc_recv_ns(skb); break; case NDISC_NEIGHBOUR_ADVERTISEMENT: reason = ndisc_recv_na(skb); break; case NDISC_ROUTER_SOLICITATION: reason = ndisc_recv_rs(skb); break; case NDISC_ROUTER_ADVERTISEMENT: reason = ndisc_router_discovery(skb); break; case NDISC_REDIRECT: reason = ndisc_redirect_rcv(skb); break; } return reason; } static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; bool evict_nocarrier; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(0, net, false); fallthrough; case NETDEV_UP: idev = in6_dev_get(dev); if (!idev) break; if (READ_ONCE(idev->cnf.ndisc_notify) || READ_ONCE(net->ipv6.devconf_all->ndisc_notify)) ndisc_send_unsol_na(dev); in6_dev_put(idev); break; case NETDEV_CHANGE: idev = in6_dev_get(dev); if (!idev) evict_nocarrier = true; else { evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) && READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier); in6_dev_put(idev); } change_info = ptr; if (change_info->flags_changed & IFF_NOARP) neigh_changeaddr(&nd_tbl, dev); if (evict_nocarrier && !netif_carrier_ok(dev)) neigh_carrier_down(&nd_tbl, dev); break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); fib6_run_gc(0, net, false); break; case NETDEV_NOTIFY_PEERS: ndisc_send_unsol_na(dev); break; default: break; } return NOTIFY_DONE; } static struct notifier_block ndisc_netdev_notifier = { .notifier_call = ndisc_netdev_event, .priority = ADDRCONF_NOTIFY_PRIORITY - 5, }; #ifdef CONFIG_SYSCTL static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl, const char *func, const char *dev_name) { static char warncomm[TASK_COMM_LEN]; static int warned; if (strcmp(warncomm, current->comm) && warned < 5) { strscpy(warncomm, current->comm); pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n", warncomm, func, dev_name, ctl->procname, dev_name, ctl->procname); warned++; } } int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; int ret; if ((strcmp(ctl->procname, "retrans_time") == 0) || (strcmp(ctl->procname, "base_reachable_time") == 0)) ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); if (strcmp(ctl->procname, "retrans_time") == 0) ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos); else if (strcmp(ctl->procname, "base_reachable_time") == 0) ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); else ret = -1; if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)) idev->nd_parms->reachable_time = neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)); WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); in6_dev_put(idev); } return ret; } #endif static int __net_init ndisc_net_init(struct net *net) { struct ipv6_pinfo *np; struct sock *sk; int err; err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { ND_PRINTK(0, err, "NDISC: Failed to initialize the control socket (err %d)\n", err); return err; } net->ipv6.ndisc_sk = sk; np = inet6_sk(sk); np->hop_limit = 255; /* Do not loopback ndisc messages */ inet6_clear_bit(MC6_LOOP, sk); return 0; } static void __net_exit ndisc_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.ndisc_sk); } static struct pernet_operations ndisc_net_ops = { .init = ndisc_net_init, .exit = ndisc_net_exit, }; int __init ndisc_init(void) { int err; err = register_pernet_subsys(&ndisc_net_ops); if (err) return err; /* * Initialize the neighbour table */ neigh_table_init(NEIGH_ND_TABLE, &nd_tbl); #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; out: #endif return err; #ifdef CONFIG_SYSCTL out_unregister_pernet: unregister_pernet_subsys(&ndisc_net_ops); goto out; #endif } int __init ndisc_late_init(void) { return register_netdevice_notifier(&ndisc_netdev_notifier); } void ndisc_late_cleanup(void) { unregister_netdevice_notifier(&ndisc_netdev_notifier); } void ndisc_cleanup(void) { #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(&nd_tbl.parms); #endif neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl); unregister_pernet_subsys(&ndisc_net_ops); }
55 50 26 55 1 1 49 1 51 51 1 36 11 1 1 1 1 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/wrapper.c * * Copyright (C) 2001 * Brad Boyer (flar@allandria.com) * (C) 2003 Ardis Technologies <roman@ardistech.com> * * Handling of HFS wrappers around HFS+ volumes */ #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/cdrom.h> #include <linux/unaligned.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" struct hfsplus_wd { u32 ablk_size; u16 ablk_start; u16 embed_start; u16 embed_count; }; /** * hfsplus_submit_bio - Perform block I/O * @sb: super block of volume for I/O * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes * @buf: buffer for I/O * @data: output pointer for location of requested data * @opf: I/O operation type and flags * * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads * @data will return a pointer to the start of the requested sector, * which may not be the same location as @buf. * * If @sector is not aligned to the bdev logical block size it will * be rounded down. For writes this means that @buf should contain data * that starts at the rounded-down address. As long as the data was * read using hfsplus_submit_bio() and the same buffer is used things * will work correctly. * * Returns: %0 on success else -errno code */ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, void **data, blk_opf_t opf) { const enum req_op op = opf & REQ_OP_MASK; struct bio *bio; int ret = 0; u64 io_size; loff_t start; int offset; /* * Align sector to hardware sector size and find offset. We * assume that io_size is a power of two, which _should_ * be true. */ io_size = hfsplus_min_io_size(sb); start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT; offset = start & (io_size - 1); sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1); bio = bio_alloc(sb->s_bdev, 1, opf, GFP_NOIO); bio->bi_iter.bi_sector = sector; if (op != REQ_OP_WRITE && data) *data = (u8 *)buf + offset; while (io_size > 0) { unsigned int page_offset = offset_in_page(buf); unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset, io_size); ret = bio_add_page(bio, virt_to_page(buf), len, page_offset); if (ret != len) { ret = -EIO; goto out; } io_size -= len; buf = (u8 *)buf + len; } ret = submit_bio_wait(bio); out: bio_put(bio); return ret < 0 ? ret : 0; } static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) { u32 extent; u16 attrib; __be16 sig; sig = *(__be16 *)(bufptr + HFSP_WRAPOFF_EMBEDSIG); if (sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIG) && sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) return 0; attrib = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ATTRIB)); if (!(attrib & HFSP_WRAP_ATTRIB_SLOCK) || !(attrib & HFSP_WRAP_ATTRIB_SPARED)) return 0; wd->ablk_size = be32_to_cpu(*(__be32 *)(bufptr + HFSP_WRAPOFF_ABLKSIZE)); if (wd->ablk_size < HFSPLUS_SECTOR_SIZE) return 0; if (wd->ablk_size % HFSPLUS_SECTOR_SIZE) return 0; wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT); wd->embed_start = (extent >> 16) & 0xFFFF; wd->embed_count = extent & 0xFFFF; return 1; } static int hfsplus_get_last_session(struct super_block *sb, sector_t *start, sector_t *size) { struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); /* default values */ *start = 0; *size = bdev_nr_sectors(sb->s_bdev); if (HFSPLUS_SB(sb)->session >= 0) { struct cdrom_tocentry te; if (!cdi) return -EINVAL; te.cdte_track = HFSPLUS_SB(sb)->session; te.cdte_format = CDROM_LBA; if (cdrom_read_tocentry(cdi, &te) || (te.cdte_ctrl & CDROM_DATA_TRACK) != 4) { pr_err("invalid session number or type of track\n"); return -EINVAL; } *start = (sector_t)te.cdte_addr.lba << 2; } else if (cdi) { struct cdrom_multisession ms_info; ms_info.addr_format = CDROM_LBA; if (cdrom_multisession(cdi, &ms_info) == 0 && ms_info.xa_flag) *start = (sector_t)ms_info.addr.lba << 2; } return 0; } /* Find the volume header and fill in some minimum bits in superblock */ /* Takes in super block, returns true if good data read */ int hfsplus_read_wrapper(struct super_block *sb) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_wd wd; sector_t part_start, part_size; u32 blocksize; int error = 0; error = -EINVAL; blocksize = sb_min_blocksize(sb, HFSPLUS_SECTOR_SIZE); if (!blocksize) goto out; sbi->min_io_size = blocksize; if (hfsplus_get_last_session(sb, &part_start, &part_size)) goto out; error = -ENOMEM; sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); if (!sbi->s_vhdr_buf) goto out; sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); if (!sbi->s_backup_vhdr_buf) goto out_free_vhdr; reread: error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, sbi->s_vhdr_buf, (void **)&sbi->s_vhdr, REQ_OP_READ); if (error) goto out_free_backup_vhdr; error = -EINVAL; switch (sbi->s_vhdr->signature) { case cpu_to_be16(HFSPLUS_VOLHEAD_SIGX): set_bit(HFSPLUS_SB_HFSX, &sbi->flags); fallthrough; case cpu_to_be16(HFSPLUS_VOLHEAD_SIG): break; case cpu_to_be16(HFSP_WRAP_MAGIC): if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) goto out_free_backup_vhdr; wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; part_start += (sector_t)wd.ablk_start + (sector_t)wd.embed_start * wd.ablk_size; part_size = (sector_t)wd.embed_count * wd.ablk_size; goto reread; default: /* * Check for a partition block. * * (should do this only for cdrom/loop though) */ if (hfs_part_find(sb, &part_start, &part_size)) goto out_free_backup_vhdr; goto reread; } error = hfsplus_submit_bio(sb, part_start + part_size - 2, sbi->s_backup_vhdr_buf, (void **)&sbi->s_backup_vhdr, REQ_OP_READ); if (error) goto out_free_backup_vhdr; error = -EINVAL; if (sbi->s_backup_vhdr->signature != sbi->s_vhdr->signature) { pr_warn("invalid secondary volume header\n"); goto out_free_backup_vhdr; } blocksize = be32_to_cpu(sbi->s_vhdr->blocksize); /* * Block size must be at least as large as a sector and a multiple of 2. */ if (blocksize < HFSPLUS_SECTOR_SIZE || ((blocksize - 1) & blocksize)) goto out_free_backup_vhdr; sbi->alloc_blksz = blocksize; sbi->alloc_blksz_shift = ilog2(blocksize); blocksize = min_t(u32, sbi->alloc_blksz, PAGE_SIZE); /* * Align block size to block offset. */ while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) blocksize >>= 1; if (sb_set_blocksize(sb, blocksize) != blocksize) { pr_err("unable to set blocksize to %u!\n", blocksize); goto out_free_backup_vhdr; } sbi->blockoffset = part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); sbi->part_start = part_start; sbi->sect_count = part_size; sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; return 0; out_free_backup_vhdr: kfree(sbi->s_backup_vhdr_buf); out_free_vhdr: kfree(sbi->s_vhdr_buf); out: return error; }
19 1 18 28 5 1 1 3 1 3 2 2 3 1 2 2 2 1 1 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 // SPDX-License-Identifier: GPL-2.0-or-later /* user_defined.c: user defined key type * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/err.h> #include <keys/user-type.h> #include <linux/uaccess.h> #include "internal.h" static int logon_vet_description(const char *desc); /* * user defined keys take an arbitrary string as the description and an * arbitrary blob of data as the payload */ struct key_type key_type_user = { .name = "user", .preparse = user_preparse, .free_preparse = user_free_preparse, .instantiate = generic_key_instantiate, .update = user_update, .revoke = user_revoke, .destroy = user_destroy, .describe = user_describe, .read = user_read, }; EXPORT_SYMBOL_GPL(key_type_user); /* * This key type is essentially the same as key_type_user, but it does * not define a .read op. This is suitable for storing username and * password pairs in the keyring that you do not want to be readable * from userspace. */ struct key_type key_type_logon = { .name = "logon", .preparse = user_preparse, .free_preparse = user_free_preparse, .instantiate = generic_key_instantiate, .update = user_update, .revoke = user_revoke, .destroy = user_destroy, .describe = user_describe, .vet_description = logon_vet_description, }; EXPORT_SYMBOL_GPL(key_type_logon); /* * Preparse a user defined key payload */ int user_preparse(struct key_preparsed_payload *prep) { struct user_key_payload *upayload; size_t datalen = prep->datalen; if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; upayload = kmalloc(sizeof(*upayload) + datalen, GFP_KERNEL); if (!upayload) return -ENOMEM; /* attach the data */ prep->quotalen = datalen; prep->payload.data[0] = upayload; upayload->datalen = datalen; memcpy(upayload->data, prep->data, datalen); return 0; } EXPORT_SYMBOL_GPL(user_preparse); /* * Free a preparse of a user defined key payload */ void user_free_preparse(struct key_preparsed_payload *prep) { kfree_sensitive(prep->payload.data[0]); } EXPORT_SYMBOL_GPL(user_free_preparse); static void user_free_payload_rcu(struct rcu_head *head) { struct user_key_payload *payload; payload = container_of(head, struct user_key_payload, rcu); kfree_sensitive(payload); } /* * update a user defined key * - the key's semaphore is write-locked */ int user_update(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *zap = NULL; int ret; /* check the quota and attach the new data */ ret = key_payload_reserve(key, prep->datalen); if (ret < 0) return ret; /* attach the new data, displacing the old */ key->expiry = prep->expiry; if (key_is_positive(key)) zap = dereference_key_locked(key); rcu_assign_keypointer(key, prep->payload.data[0]); prep->payload.data[0] = NULL; if (zap) call_rcu(&zap->rcu, user_free_payload_rcu); return ret; } EXPORT_SYMBOL_GPL(user_update); /* * dispose of the links from a revoked keyring * - called with the key sem write-locked */ void user_revoke(struct key *key) { struct user_key_payload *upayload = user_key_payload_locked(key); /* clear the quota */ key_payload_reserve(key, 0); if (upayload) { rcu_assign_keypointer(key, NULL); call_rcu(&upayload->rcu, user_free_payload_rcu); } } EXPORT_SYMBOL(user_revoke); /* * dispose of the data dangling from the corpse of a user key */ void user_destroy(struct key *key) { struct user_key_payload *upayload = key->payload.data[0]; kfree_sensitive(upayload); } EXPORT_SYMBOL_GPL(user_destroy); /* * describe the user key */ void user_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); if (key_is_positive(key)) seq_printf(m, ": %u", key->datalen); } EXPORT_SYMBOL_GPL(user_describe); /* * read the key data * - the key's semaphore is read-locked */ long user_read(const struct key *key, char *buffer, size_t buflen) { const struct user_key_payload *upayload; long ret; upayload = user_key_payload_locked(key); ret = upayload->datalen; /* we can return the data as is */ if (buffer && buflen > 0) { if (buflen > upayload->datalen) buflen = upayload->datalen; memcpy(buffer, upayload->data, buflen); } return ret; } EXPORT_SYMBOL_GPL(user_read); /* Vet the description for a "logon" key */ static int logon_vet_description(const char *desc) { char *p; /* require a "qualified" description string */ p = strchr(desc, ':'); if (!p) return -EINVAL; /* also reject description with ':' as first char */ if (p == desc) return -EINVAL; return 0; }
136 135 1 136 136 134 2 1 6 3 4 114 112 114 2 113 1 1 1 107 116 110 107 46 46 36 36 25 1 116 116 116 133 131 130 46 1 1 46 143 141 113 131 131 87 131 5 2 3 3 1 2 54 56 2 2 72 72 2 3 3 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 // SPDX-License-Identifier: GPL-2.0-or-later /* * Spanning tree protocol; generic parts * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #include <linux/kernel.h> #include <linux/rculist.h> #include <net/switchdev.h> #include "br_private.h" #include "br_private_stp.h" /* since time values in bpdu are in jiffies and then scaled (1/256) * before sending, make sure that is at least one STP tick. */ #define MESSAGE_AGE_INCR ((HZ / 256) + 1) static const char *const br_port_state_names[] = { [BR_STATE_DISABLED] = "disabled", [BR_STATE_LISTENING] = "listening", [BR_STATE_LEARNING] = "learning", [BR_STATE_FORWARDING] = "forwarding", [BR_STATE_BLOCKING] = "blocking", }; void br_set_state(struct net_bridge_port *p, unsigned int state) { struct switchdev_attr attr = { .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, .flags = SWITCHDEV_F_DEFER, .u.stp_state = state, }; int err; /* Don't change the state of the ports if they are driven by a different * protocol. */ if (p->flags & BR_MRP_AWARE) return; p->state = state; if (br_opt_get(p->br, BROPT_MST_ENABLED)) { err = br_mst_set_state(p, 0, state, NULL); if (err) br_warn(p->br, "error setting MST state on port %u(%s)\n", p->port_no, netdev_name(p->dev)); } err = switchdev_port_attr_set(p->dev, &attr, NULL); if (err && err != -EOPNOTSUPP) br_warn(p->br, "error setting offload STP state on port %u(%s)\n", (unsigned int) p->port_no, p->dev->name); else br_info(p->br, "port %u(%s) entered %s state\n", (unsigned int) p->port_no, p->dev->name, br_port_state_names[p->state]); if (p->br->stp_enabled == BR_KERNEL_STP) { switch (p->state) { case BR_STATE_BLOCKING: p->stp_xstats.transition_blk++; break; case BR_STATE_FORWARDING: p->stp_xstats.transition_fwd++; break; } } } u8 br_port_get_stp_state(const struct net_device *dev) { struct net_bridge_port *p; ASSERT_RTNL(); p = br_port_get_rtnl(dev); if (!p) return BR_STATE_DISABLED; return p->state; } EXPORT_SYMBOL_GPL(br_port_get_stp_state); /* called under bridge lock */ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no) { struct net_bridge_port *p; list_for_each_entry_rcu(p, &br->port_list, list, lockdep_is_held(&br->lock)) { if (p->port_no == port_no) return p; } return NULL; } /* called under bridge lock */ static int br_should_become_root_port(const struct net_bridge_port *p, u16 root_port) { struct net_bridge *br; struct net_bridge_port *rp; int t; br = p->br; if (p->state == BR_STATE_DISABLED || br_is_designated_port(p)) return 0; if (memcmp(&br->bridge_id, &p->designated_root, 8) <= 0) return 0; if (!root_port) return 1; rp = br_get_port(br, root_port); t = memcmp(&p->designated_root, &rp->designated_root, 8); if (t < 0) return 1; else if (t > 0) return 0; if (p->designated_cost + p->path_cost < rp->designated_cost + rp->path_cost) return 1; else if (p->designated_cost + p->path_cost > rp->designated_cost + rp->path_cost) return 0; t = memcmp(&p->designated_bridge, &rp->designated_bridge, 8); if (t < 0) return 1; else if (t > 0) return 0; if (p->designated_port < rp->designated_port) return 1; else if (p->designated_port > rp->designated_port) return 0; if (p->port_id < rp->port_id) return 1; return 0; } static void br_root_port_block(const struct net_bridge *br, struct net_bridge_port *p) { br_notice(br, "port %u(%s) tried to become root port (blocked)", (unsigned int) p->port_no, p->dev->name); br_set_state(p, BR_STATE_LISTENING); br_ifinfo_notify(RTM_NEWLINK, NULL, p); if (br->forward_delay > 0) mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); } /* called under bridge lock */ static void br_root_selection(struct net_bridge *br) { struct net_bridge_port *p; u16 root_port = 0; list_for_each_entry(p, &br->port_list, list) { if (!br_should_become_root_port(p, root_port)) continue; if (p->flags & BR_ROOT_BLOCK) br_root_port_block(br, p); else root_port = p->port_no; } br->root_port = root_port; if (!root_port) { br->designated_root = br->bridge_id; br->root_path_cost = 0; } else { p = br_get_port(br, root_port); br->designated_root = p->designated_root; br->root_path_cost = p->designated_cost + p->path_cost; } } /* called under bridge lock */ void br_become_root_bridge(struct net_bridge *br) { br->max_age = br->bridge_max_age; br->hello_time = br->bridge_hello_time; br->forward_delay = br->bridge_forward_delay; br_topology_change_detection(br); del_timer(&br->tcn_timer); if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); mod_timer(&br->hello_timer, jiffies + br->hello_time); } } /* called under bridge lock */ void br_transmit_config(struct net_bridge_port *p) { struct br_config_bpdu bpdu; struct net_bridge *br; if (timer_pending(&p->hold_timer)) { p->config_pending = 1; return; } br = p->br; bpdu.topology_change = br->topology_change; bpdu.topology_change_ack = p->topology_change_ack; bpdu.root = br->designated_root; bpdu.root_path_cost = br->root_path_cost; bpdu.bridge_id = br->bridge_id; bpdu.port_id = p->port_id; if (br_is_root_bridge(br)) bpdu.message_age = 0; else { struct net_bridge_port *root = br_get_port(br, br->root_port); bpdu.message_age = (jiffies - root->designated_age) + MESSAGE_AGE_INCR; } bpdu.max_age = br->max_age; bpdu.hello_time = br->hello_time; bpdu.forward_delay = br->forward_delay; if (bpdu.message_age < br->max_age) { br_send_config_bpdu(p, &bpdu); p->topology_change_ack = 0; p->config_pending = 0; if (p->br->stp_enabled == BR_KERNEL_STP) mod_timer(&p->hold_timer, round_jiffies(jiffies + BR_HOLD_TIME)); } } /* called under bridge lock */ static void br_record_config_information(struct net_bridge_port *p, const struct br_config_bpdu *bpdu) { p->designated_root = bpdu->root; p->designated_cost = bpdu->root_path_cost; p->designated_bridge = bpdu->bridge_id; p->designated_port = bpdu->port_id; p->designated_age = jiffies - bpdu->message_age; mod_timer(&p->message_age_timer, jiffies + (bpdu->max_age - bpdu->message_age)); } /* called under bridge lock */ static void br_record_config_timeout_values(struct net_bridge *br, const struct br_config_bpdu *bpdu) { br->max_age = bpdu->max_age; br->hello_time = bpdu->hello_time; br->forward_delay = bpdu->forward_delay; __br_set_topology_change(br, bpdu->topology_change); } /* called under bridge lock */ void br_transmit_tcn(struct net_bridge *br) { struct net_bridge_port *p; p = br_get_port(br, br->root_port); if (p) br_send_tcn_bpdu(p); else br_notice(br, "root port %u not found for topology notice\n", br->root_port); } /* called under bridge lock */ static int br_should_become_designated_port(const struct net_bridge_port *p) { struct net_bridge *br; int t; br = p->br; if (br_is_designated_port(p)) return 1; if (memcmp(&p->designated_root, &br->designated_root, 8)) return 1; if (br->root_path_cost < p->designated_cost) return 1; else if (br->root_path_cost > p->designated_cost) return 0; t = memcmp(&br->bridge_id, &p->designated_bridge, 8); if (t < 0) return 1; else if (t > 0) return 0; if (p->port_id < p->designated_port) return 1; return 0; } /* called under bridge lock */ static void br_designated_port_selection(struct net_bridge *br) { struct net_bridge_port *p; list_for_each_entry(p, &br->port_list, list) { if (p->state != BR_STATE_DISABLED && br_should_become_designated_port(p)) br_become_designated_port(p); } } /* called under bridge lock */ static int br_supersedes_port_info(const struct net_bridge_port *p, const struct br_config_bpdu *bpdu) { int t; t = memcmp(&bpdu->root, &p->designated_root, 8); if (t < 0) return 1; else if (t > 0) return 0; if (bpdu->root_path_cost < p->designated_cost) return 1; else if (bpdu->root_path_cost > p->designated_cost) return 0; t = memcmp(&bpdu->bridge_id, &p->designated_bridge, 8); if (t < 0) return 1; else if (t > 0) return 0; if (memcmp(&bpdu->bridge_id, &p->br->bridge_id, 8)) return 1; if (bpdu->port_id <= p->designated_port) return 1; return 0; } /* called under bridge lock */ static void br_topology_change_acknowledged(struct net_bridge *br) { br->topology_change_detected = 0; del_timer(&br->tcn_timer); } /* called under bridge lock */ void br_topology_change_detection(struct net_bridge *br) { int isroot = br_is_root_bridge(br); if (br->stp_enabled != BR_KERNEL_STP) return; br_info(br, "topology change detected, %s\n", isroot ? "propagating" : "sending tcn bpdu"); if (isroot) { __br_set_topology_change(br, 1); mod_timer(&br->topology_change_timer, jiffies + br->bridge_forward_delay + br->bridge_max_age); } else if (!br->topology_change_detected) { br_transmit_tcn(br); mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time); } br->topology_change_detected = 1; } /* called under bridge lock */ void br_config_bpdu_generation(struct net_bridge *br) { struct net_bridge_port *p; list_for_each_entry(p, &br->port_list, list) { if (p->state != BR_STATE_DISABLED && br_is_designated_port(p)) br_transmit_config(p); } } /* called under bridge lock */ static void br_reply(struct net_bridge_port *p) { br_transmit_config(p); } /* called under bridge lock */ void br_configuration_update(struct net_bridge *br) { br_root_selection(br); br_designated_port_selection(br); } /* called under bridge lock */ void br_become_designated_port(struct net_bridge_port *p) { struct net_bridge *br; br = p->br; p->designated_root = br->designated_root; p->designated_cost = br->root_path_cost; p->designated_bridge = br->bridge_id; p->designated_port = p->port_id; } /* called under bridge lock */ static void br_make_blocking(struct net_bridge_port *p) { if (p->state != BR_STATE_DISABLED && p->state != BR_STATE_BLOCKING) { if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) br_topology_change_detection(p->br); br_set_state(p, BR_STATE_BLOCKING); br_ifinfo_notify(RTM_NEWLINK, NULL, p); del_timer(&p->forward_delay_timer); } } /* called under bridge lock */ static void br_make_forwarding(struct net_bridge_port *p) { struct net_bridge *br = p->br; if (p->state != BR_STATE_BLOCKING) return; if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) { br_set_state(p, BR_STATE_FORWARDING); br_topology_change_detection(br); del_timer(&p->forward_delay_timer); } else if (br->stp_enabled == BR_KERNEL_STP) br_set_state(p, BR_STATE_LISTENING); else br_set_state(p, BR_STATE_LEARNING); br_ifinfo_notify(RTM_NEWLINK, NULL, p); if (br->forward_delay != 0) mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); } /* called under bridge lock */ void br_port_state_selection(struct net_bridge *br) { struct net_bridge_port *p; unsigned int liveports = 0; list_for_each_entry(p, &br->port_list, list) { if (p->state == BR_STATE_DISABLED) continue; /* Don't change port states if userspace is handling STP */ if (br->stp_enabled != BR_USER_STP) { if (p->port_no == br->root_port) { p->config_pending = 0; p->topology_change_ack = 0; br_make_forwarding(p); } else if (br_is_designated_port(p)) { del_timer(&p->message_age_timer); br_make_forwarding(p); } else { p->config_pending = 0; p->topology_change_ack = 0; br_make_blocking(p); } } if (p->state != BR_STATE_BLOCKING) br_multicast_enable_port(p); /* Multicast is not disabled for the port when it goes in * blocking state because the timers will expire and stop by * themselves without sending more queries. */ if (p->state == BR_STATE_FORWARDING) ++liveports; } if (liveports == 0) netif_carrier_off(br->dev); else netif_carrier_on(br->dev); } /* called under bridge lock */ static void br_topology_change_acknowledge(struct net_bridge_port *p) { p->topology_change_ack = 1; br_transmit_config(p); } /* called under bridge lock */ void br_received_config_bpdu(struct net_bridge_port *p, const struct br_config_bpdu *bpdu) { struct net_bridge *br; int was_root; p->stp_xstats.rx_bpdu++; br = p->br; was_root = br_is_root_bridge(br); if (br_supersedes_port_info(p, bpdu)) { br_record_config_information(p, bpdu); br_configuration_update(br); br_port_state_selection(br); if (!br_is_root_bridge(br) && was_root) { del_timer(&br->hello_timer); if (br->topology_change_detected) { del_timer(&br->topology_change_timer); br_transmit_tcn(br); mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time); } } if (p->port_no == br->root_port) { br_record_config_timeout_values(br, bpdu); br_config_bpdu_generation(br); if (bpdu->topology_change_ack) br_topology_change_acknowledged(br); } } else if (br_is_designated_port(p)) { br_reply(p); } } /* called under bridge lock */ void br_received_tcn_bpdu(struct net_bridge_port *p) { p->stp_xstats.rx_tcn++; if (br_is_designated_port(p)) { br_info(p->br, "port %u(%s) received tcn bpdu\n", (unsigned int) p->port_no, p->dev->name); br_topology_change_detection(p->br); br_topology_change_acknowledge(p); } } /* Change bridge STP parameter */ int br_set_hello_time(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); if (t < BR_MIN_HELLO_TIME || t > BR_MAX_HELLO_TIME) return -ERANGE; spin_lock_bh(&br->lock); br->bridge_hello_time = t; if (br_is_root_bridge(br)) br->hello_time = br->bridge_hello_time; spin_unlock_bh(&br->lock); return 0; } int br_set_max_age(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); if (t < BR_MIN_MAX_AGE || t > BR_MAX_MAX_AGE) return -ERANGE; spin_lock_bh(&br->lock); br->bridge_max_age = t; if (br_is_root_bridge(br)) br->max_age = br->bridge_max_age; spin_unlock_bh(&br->lock); return 0; } /* called under bridge lock */ int __set_ageing_time(struct net_device *dev, unsigned long t) { struct switchdev_attr attr = { .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, .u.ageing_time = jiffies_to_clock_t(t), }; int err; err = switchdev_port_attr_set(dev, &attr, NULL); if (err && err != -EOPNOTSUPP) return err; return 0; } /* Set time interval that dynamic forwarding entries live * For pure software bridge, allow values outside the 802.1 * standard specification for special cases: * 0 - entry never ages (all permanent) * 1 - entry disappears (no persistence) * * Offloaded switch entries maybe more restrictive */ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time) { unsigned long t = clock_t_to_jiffies(ageing_time); int err; err = __set_ageing_time(br->dev, t); if (err) return err; spin_lock_bh(&br->lock); br->bridge_ageing_time = t; br->ageing_time = t; spin_unlock_bh(&br->lock); mod_delayed_work(system_long_wq, &br->gc_work, 0); return 0; } clock_t br_get_ageing_time(const struct net_device *br_dev) { const struct net_bridge *br; if (!netif_is_bridge_master(br_dev)) return 0; br = netdev_priv(br_dev); return jiffies_to_clock_t(br->ageing_time); } EXPORT_SYMBOL_GPL(br_get_ageing_time); /* called under bridge lock */ void __br_set_topology_change(struct net_bridge *br, unsigned char val) { unsigned long t; int err; if (br->stp_enabled == BR_KERNEL_STP && br->topology_change != val) { /* On topology change, set the bridge ageing time to twice the * forward delay. Otherwise, restore its default ageing time. */ if (val) { t = 2 * br->forward_delay; br_debug(br, "decreasing ageing time to %lu\n", t); } else { t = br->bridge_ageing_time; br_debug(br, "restoring ageing time to %lu\n", t); } err = __set_ageing_time(br->dev, t); if (err) br_warn(br, "error offloading ageing time\n"); else br->ageing_time = t; } br->topology_change = val; } void __br_set_forward_delay(struct net_bridge *br, unsigned long t) { br->bridge_forward_delay = t; if (br_is_root_bridge(br)) br->forward_delay = br->bridge_forward_delay; } int br_set_forward_delay(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); int err = -ERANGE; spin_lock_bh(&br->lock); if (br->stp_enabled != BR_NO_STP && (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)) goto unlock; __br_set_forward_delay(br, t); err = 0; unlock: spin_unlock_bh(&br->lock); return err; }
8 1 18 16 17 17 3 1560 1557 43 4 6 14 12 14 1 14 3 3 3 3 16 1 1 14 1 1 1 1 1 1 1 1 1 1 1 1 1 12 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 3 1 1 7 3 4 1 3 4 4 13 1 13 1 11 1 1 6 1 5 4 3 1 1 6 21 21 2 18 1 1 15 16 2 13 10 8 1 2 1 1 3 1 2 2 3 1 2 1 5 1 1 2 1 1 1 39 22 1 1 11 1 1 1 1 7 3 1 1 7 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net) * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi) */ #include <linux/capability.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/spinlock.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/stat.h> #include <net/net_namespace.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <net/rose.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/tcp_states.h> #include <net/ip.h> #include <net/arp.h> static int rose_ndevs = 10; int sysctl_rose_restart_request_timeout = ROSE_DEFAULT_T0; int sysctl_rose_call_request_timeout = ROSE_DEFAULT_T1; int sysctl_rose_reset_request_timeout = ROSE_DEFAULT_T2; int sysctl_rose_clear_request_timeout = ROSE_DEFAULT_T3; int sysctl_rose_no_activity_timeout = ROSE_DEFAULT_IDLE; int sysctl_rose_ack_hold_back_timeout = ROSE_DEFAULT_HB; int sysctl_rose_routing_control = ROSE_DEFAULT_ROUTING; int sysctl_rose_link_fail_timeout = ROSE_DEFAULT_FAIL_TIMEOUT; int sysctl_rose_maximum_vcs = ROSE_DEFAULT_MAXVC; int sysctl_rose_window_size = ROSE_DEFAULT_WINDOW_SIZE; static HLIST_HEAD(rose_list); static DEFINE_SPINLOCK(rose_list_lock); static const struct proto_ops rose_proto_ops; ax25_address rose_callsign; /* * ROSE network devices are virtual network devices encapsulating ROSE * frames into AX.25 which will be sent through an AX.25 device, so form a * special "super class" of normal net devices; split their locks off into a * separate class since they always nest. */ static struct lock_class_key rose_netdev_xmit_lock_key; static struct lock_class_key rose_netdev_addr_lock_key; static void rose_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, void *_unused) { lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key); } static void rose_set_lockdep_key(struct net_device *dev) { lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); } /* * Convert a ROSE address into text. */ char *rose2asc(char *buf, const rose_address *addr) { if (addr->rose_addr[0] == 0x00 && addr->rose_addr[1] == 0x00 && addr->rose_addr[2] == 0x00 && addr->rose_addr[3] == 0x00 && addr->rose_addr[4] == 0x00) { strcpy(buf, "*"); } else { sprintf(buf, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF, addr->rose_addr[1] & 0xFF, addr->rose_addr[2] & 0xFF, addr->rose_addr[3] & 0xFF, addr->rose_addr[4] & 0xFF); } return buf; } /* * Compare two ROSE addresses, 0 == equal. */ int rosecmp(const rose_address *addr1, const rose_address *addr2) { int i; for (i = 0; i < 5; i++) if (addr1->rose_addr[i] != addr2->rose_addr[i]) return 1; return 0; } /* * Compare two ROSE addresses for only mask digits, 0 == equal. */ int rosecmpm(const rose_address *addr1, const rose_address *addr2, unsigned short mask) { unsigned int i, j; if (mask > 10) return 1; for (i = 0; i < mask; i++) { j = i / 2; if ((i % 2) != 0) { if ((addr1->rose_addr[j] & 0x0F) != (addr2->rose_addr[j] & 0x0F)) return 1; } else { if ((addr1->rose_addr[j] & 0xF0) != (addr2->rose_addr[j] & 0xF0)) return 1; } } return 0; } /* * Socket removal during an interrupt is now safe. */ static void rose_remove_socket(struct sock *sk) { spin_lock_bh(&rose_list_lock); sk_del_node_init(sk); spin_unlock_bh(&rose_list_lock); } /* * Kill all bound sockets on a broken link layer connection to a * particular neighbour. */ void rose_kill_by_neigh(struct rose_neigh *neigh) { struct sock *s; spin_lock_bh(&rose_list_lock); sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->neighbour == neigh) { rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); rose->neighbour->use--; rose->neighbour = NULL; } } spin_unlock_bh(&rose_list_lock); } /* * Kill all bound sockets on a dropped device. */ static void rose_kill_by_device(struct net_device *dev) { struct sock *sk, *array[16]; struct rose_sock *rose; bool rescan; int i, cnt; start: rescan = false; cnt = 0; spin_lock_bh(&rose_list_lock); sk_for_each(sk, &rose_list) { rose = rose_sk(sk); if (rose->device == dev) { if (cnt == ARRAY_SIZE(array)) { rescan = true; break; } sock_hold(sk); array[cnt++] = sk; } } spin_unlock_bh(&rose_list_lock); for (i = 0; i < cnt; i++) { sk = array[cnt]; rose = rose_sk(sk); lock_sock(sk); spin_lock_bh(&rose_list_lock); if (rose->device == dev) { rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) rose->neighbour->use--; netdev_put(rose->device, &rose->dev_tracker); rose->device = NULL; } spin_unlock_bh(&rose_list_lock); release_sock(sk); sock_put(sk); cond_resched(); } if (rescan) goto start; } /* * Handle device status changes. */ static int rose_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_DOWN) return NOTIFY_DONE; switch (dev->type) { case ARPHRD_ROSE: rose_kill_by_device(dev); break; case ARPHRD_AX25: rose_link_device_down(dev); rose_rt_device_down(dev); break; } return NOTIFY_DONE; } /* * Add a socket to the bound sockets list. */ static void rose_insert_socket(struct sock *sk) { spin_lock_bh(&rose_list_lock); sk_add_node(sk, &rose_list); spin_unlock_bh(&rose_list_lock); } /* * Find a socket that wants to accept the Call Request we just * received. */ static struct sock *rose_find_listener(rose_address *addr, ax25_address *call) { struct sock *s; spin_lock_bh(&rose_list_lock); sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && !ax25cmp(&rose->source_call, call) && !rose->source_ndigis && s->sk_state == TCP_LISTEN) goto found; } sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && !ax25cmp(&rose->source_call, &null_ax25_address) && s->sk_state == TCP_LISTEN) goto found; } s = NULL; found: spin_unlock_bh(&rose_list_lock); return s; } /* * Find a connected ROSE socket given my LCI and device. */ struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh) { struct sock *s; spin_lock_bh(&rose_list_lock); sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->lci == lci && rose->neighbour == neigh) goto found; } s = NULL; found: spin_unlock_bh(&rose_list_lock); return s; } /* * Find a unique LCI for a given device. */ unsigned int rose_new_lci(struct rose_neigh *neigh) { int lci; if (neigh->dce_mode) { for (lci = 1; lci <= sysctl_rose_maximum_vcs; lci++) if (rose_find_socket(lci, neigh) == NULL && rose_route_free_lci(lci, neigh) == NULL) return lci; } else { for (lci = sysctl_rose_maximum_vcs; lci > 0; lci--) if (rose_find_socket(lci, neigh) == NULL && rose_route_free_lci(lci, neigh) == NULL) return lci; } return 0; } /* * Deferred destroy. */ void rose_destroy_socket(struct sock *); /* * Handler for deferred kills. */ static void rose_destroy_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); rose_destroy_socket(sk); } /* * This is called from user mode and the timers. Thus it protects itself * against interrupt users but doesn't worry about being called during * work. Once it is removed from the queue no interrupt or bottom half * will touch it and we are (fairly 8-) ) safe. */ void rose_destroy_socket(struct sock *sk) { struct sk_buff *skb; rose_remove_socket(sk); rose_stop_heartbeat(sk); rose_stop_idletimer(sk); rose_stop_timer(sk); rose_clear_queues(sk); /* Flush the queues */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (skb->sk != sk) { /* A pending connection */ /* Queue the unaccepted socket for death */ sock_set_flag(skb->sk, SOCK_DEAD); rose_start_heartbeat(skb->sk); rose_sk(skb->sk)->state = ROSE_STATE_0; } kfree_skb(skb); } if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ timer_setup(&sk->sk_timer, rose_destroy_timer, 0); sk->sk_timer.expires = jiffies + 10 * HZ; add_timer(&sk->sk_timer); } else sock_put(sk); } /* * Handling for system calls applied via the various interfaces to a * ROSE socket object. */ static int rose_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); int opt; if (level != SOL_ROSE) return -ENOPROTOOPT; if (optlen < sizeof(int)) return -EINVAL; if (copy_from_sockptr(&opt, optval, sizeof(int))) return -EFAULT; switch (optname) { case ROSE_DEFER: rose->defer = opt ? 1 : 0; return 0; case ROSE_T1: if (opt < 1) return -EINVAL; rose->t1 = opt * HZ; return 0; case ROSE_T2: if (opt < 1) return -EINVAL; rose->t2 = opt * HZ; return 0; case ROSE_T3: if (opt < 1) return -EINVAL; rose->t3 = opt * HZ; return 0; case ROSE_HOLDBACK: if (opt < 1) return -EINVAL; rose->hb = opt * HZ; return 0; case ROSE_IDLE: if (opt < 0) return -EINVAL; rose->idle = opt * 60 * HZ; return 0; case ROSE_QBITINCL: rose->qbitincl = opt ? 1 : 0; return 0; default: return -ENOPROTOOPT; } } static int rose_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); int val = 0; int len; if (level != SOL_ROSE) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; switch (optname) { case ROSE_DEFER: val = rose->defer; break; case ROSE_T1: val = rose->t1 / HZ; break; case ROSE_T2: val = rose->t2 / HZ; break; case ROSE_T3: val = rose->t3 / HZ; break; case ROSE_HOLDBACK: val = rose->hb / HZ; break; case ROSE_IDLE: val = rose->idle / (60 * HZ); break; case ROSE_QBITINCL: val = rose->qbitincl; break; default: return -ENOPROTOOPT; } len = min_t(unsigned int, len, sizeof(int)); if (put_user(len, optlen)) return -EFAULT; return copy_to_user(optval, &val, len) ? -EFAULT : 0; } static int rose_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; lock_sock(sk); if (sock->state != SS_UNCONNECTED) { release_sock(sk); return -EINVAL; } if (sk->sk_state != TCP_LISTEN) { struct rose_sock *rose = rose_sk(sk); rose->dest_ndigis = 0; memset(&rose->dest_addr, 0, ROSE_ADDR_LEN); memset(&rose->dest_call, 0, AX25_ADDR_LEN); memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS); sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; release_sock(sk); return 0; } release_sock(sk); return -EOPNOTSUPP; } static struct proto rose_proto = { .name = "ROSE", .owner = THIS_MODULE, .obj_size = sizeof(struct rose_sock), }; static int rose_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct rose_sock *rose; if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern); if (sk == NULL) return -ENOMEM; rose = rose_sk(sk); sock_init_data(sock, sk); skb_queue_head_init(&rose->ack_queue); #ifdef M_BIT skb_queue_head_init(&rose->frag_queue); rose->fraglen = 0; #endif sock->ops = &rose_proto_ops; sk->sk_protocol = protocol; timer_setup(&rose->timer, NULL, 0); timer_setup(&rose->idletimer, NULL, 0); rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout); rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout); rose->t3 = msecs_to_jiffies(sysctl_rose_clear_request_timeout); rose->hb = msecs_to_jiffies(sysctl_rose_ack_hold_back_timeout); rose->idle = msecs_to_jiffies(sysctl_rose_no_activity_timeout); rose->state = ROSE_STATE_0; return 0; } static struct sock *rose_make_new(struct sock *osk) { struct sock *sk; struct rose_sock *rose, *orose; if (osk->sk_type != SOCK_SEQPACKET) return NULL; sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0); if (sk == NULL) return NULL; rose = rose_sk(sk); sock_init_data(NULL, sk); skb_queue_head_init(&rose->ack_queue); #ifdef M_BIT skb_queue_head_init(&rose->frag_queue); rose->fraglen = 0; #endif sk->sk_type = osk->sk_type; sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; sock_copy_flags(sk, osk); timer_setup(&rose->timer, NULL, 0); timer_setup(&rose->idletimer, NULL, 0); orose = rose_sk(osk); rose->t1 = orose->t1; rose->t2 = orose->t2; rose->t3 = orose->t3; rose->hb = orose->hb; rose->idle = orose->idle; rose->defer = orose->defer; rose->device = orose->device; if (rose->device) netdev_hold(rose->device, &rose->dev_tracker, GFP_ATOMIC); rose->qbitincl = orose->qbitincl; return sk; } static int rose_release(struct socket *sock) { struct sock *sk = sock->sk; struct rose_sock *rose; if (sk == NULL) return 0; sock_hold(sk); sock_orphan(sk); lock_sock(sk); rose = rose_sk(sk); switch (rose->state) { case ROSE_STATE_0: release_sock(sk); rose_disconnect(sk, 0, -1, -1); lock_sock(sk); rose_destroy_socket(sk); break; case ROSE_STATE_2: rose->neighbour->use--; release_sock(sk); rose_disconnect(sk, 0, -1, -1); lock_sock(sk); rose_destroy_socket(sk); break; case ROSE_STATE_1: case ROSE_STATE_3: case ROSE_STATE_4: case ROSE_STATE_5: rose_clear_queues(sk); rose_stop_idletimer(sk); rose_write_internal(sk, ROSE_CLEAR_REQUEST); rose_start_t3timer(sk); rose->state = ROSE_STATE_2; sk->sk_state = TCP_CLOSE; sk->sk_shutdown |= SEND_SHUTDOWN; sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); sock_set_flag(sk, SOCK_DESTROY); break; default: break; } spin_lock_bh(&rose_list_lock); netdev_put(rose->device, &rose->dev_tracker); rose->device = NULL; spin_unlock_bh(&rose_list_lock); sock->sk = NULL; release_sock(sk); sock_put(sk); return 0; } static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; struct net_device *dev; ax25_address *source; ax25_uid_assoc *user; int n; if (!sock_flag(sk, SOCK_ZAPPED)) return -EINVAL; if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) return -EINVAL; if (addr->srose_family != AF_ROSE) return -EINVAL; if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) return -EADDRNOTAVAIL; source = &addr->srose_call; user = ax25_findbyuid(current_euid()); if (user) { rose->source_call = user->call; ax25_uid_put(user); } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); return -EACCES; } rose->source_call = *source; } rose->source_addr = addr->srose_addr; rose->device = dev; netdev_tracker_alloc(rose->device, &rose->dev_tracker, GFP_KERNEL); rose->source_ndigis = addr->srose_ndigis; if (addr_len == sizeof(struct full_sockaddr_rose)) { struct full_sockaddr_rose *full_addr = (struct full_sockaddr_rose *)uaddr; for (n = 0 ; n < addr->srose_ndigis ; n++) rose->source_digis[n] = full_addr->srose_digis[n]; } else { if (rose->source_ndigis == 1) { rose->source_digis[0] = addr->srose_digi; } } rose_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); return 0; } static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; unsigned char cause, diagnostic; ax25_uid_assoc *user; int n, err = 0; if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) return -EINVAL; if (addr->srose_family != AF_ROSE) return -EINVAL; if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; /* Source + Destination digis should not exceed ROSE_MAX_DIGIS */ if ((rose->source_ndigis + addr->srose_ndigis) > ROSE_MAX_DIGIS) return -EINVAL; lock_sock(sk); if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { /* Connect completed during a ERESTARTSYS event */ sock->state = SS_CONNECTED; goto out_release; } if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) { sock->state = SS_UNCONNECTED; err = -ECONNREFUSED; goto out_release; } if (sk->sk_state == TCP_ESTABLISHED) { /* No reconnect on a seqpacket socket */ err = -EISCONN; goto out_release; } sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0); if (!rose->neighbour) { err = -ENETUNREACH; goto out_release; } rose->lci = rose_new_lci(rose->neighbour); if (!rose->lci) { err = -ENETUNREACH; goto out_release; } if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */ struct net_device *dev; sock_reset_flag(sk, SOCK_ZAPPED); dev = rose_dev_first(); if (!dev) { err = -ENETUNREACH; goto out_release; } user = ax25_findbyuid(current_euid()); if (!user) { err = -EINVAL; dev_put(dev); goto out_release; } memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN); rose->source_call = user->call; rose->device = dev; netdev_tracker_alloc(rose->device, &rose->dev_tracker, GFP_KERNEL); ax25_uid_put(user); rose_insert_socket(sk); /* Finish the bind */ } rose->dest_addr = addr->srose_addr; rose->dest_call = addr->srose_call; rose->rand = ((long)rose & 0xFFFF) + rose->lci; rose->dest_ndigis = addr->srose_ndigis; if (addr_len == sizeof(struct full_sockaddr_rose)) { struct full_sockaddr_rose *full_addr = (struct full_sockaddr_rose *)uaddr; for (n = 0 ; n < addr->srose_ndigis ; n++) rose->dest_digis[n] = full_addr->srose_digis[n]; } else { if (rose->dest_ndigis == 1) { rose->dest_digis[0] = addr->srose_digi; } } /* Move to connecting socket, start sending Connect Requests */ sock->state = SS_CONNECTING; sk->sk_state = TCP_SYN_SENT; rose->state = ROSE_STATE_1; rose->neighbour->use++; rose_write_internal(sk, ROSE_CALL_REQUEST); rose_start_heartbeat(sk); rose_start_t1timer(sk); /* Now the loop */ if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) { err = -EINPROGRESS; goto out_release; } /* * A Connect Ack with Choke or timeout or failed routing will go to * closed. */ if (sk->sk_state == TCP_SYN_SENT) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; if (!signal_pending(current)) { release_sock(sk); schedule(); lock_sock(sk); continue; } err = -ERESTARTSYS; break; } finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; } if (sk->sk_state != TCP_ESTABLISHED) { sock->state = SS_UNCONNECTED; err = sock_error(sk); /* Always set at this point */ goto out_release; } sock->state = SS_CONNECTED; out_release: release_sock(sk); return err; } static int rose_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { struct sk_buff *skb; struct sock *newsk; DEFINE_WAIT(wait); struct sock *sk; int err = 0; if ((sk = sock->sk) == NULL) return -EINVAL; lock_sock(sk); if (sk->sk_type != SOCK_SEQPACKET) { err = -EOPNOTSUPP; goto out_release; } if (sk->sk_state != TCP_LISTEN) { err = -EINVAL; goto out_release; } /* * The write queue this time is holding sockets ready to use * hooked into the SABM we saved */ for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) break; if (arg->flags & O_NONBLOCK) { err = -EWOULDBLOCK; break; } if (!signal_pending(current)) { release_sock(sk); schedule(); lock_sock(sk); continue; } err = -ERESTARTSYS; break; } finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; newsk = skb->sk; sock_graft(newsk, newsock); /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); sk_acceptq_removed(sk); out_release: release_sock(sk); return err; } static int rose_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr; struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); int n; memset(srose, 0, sizeof(*srose)); if (peer != 0) { if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; srose->srose_ndigis = rose->dest_ndigis; for (n = 0; n < rose->dest_ndigis; n++) srose->srose_digis[n] = rose->dest_digis[n]; } else { srose->srose_family = AF_ROSE; srose->srose_addr = rose->source_addr; srose->srose_call = rose->source_call; srose->srose_ndigis = rose->source_ndigis; for (n = 0; n < rose->source_ndigis; n++) srose->srose_digis[n] = rose->source_digis[n]; } return sizeof(struct full_sockaddr_rose); } int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci) { struct sock *sk; struct sock *make; struct rose_sock *make_rose; struct rose_facilities_struct facilities; int n; skb->sk = NULL; /* Initially we don't know who it's for */ /* * skb->data points to the rose frame start */ memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, skb->len - ROSE_CALL_REQ_FACILITIES_OFF, &facilities)) { rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76); return 0; } sk = rose_find_listener(&facilities.source_addr, &facilities.source_call); /* * We can't accept the Call Request. */ if (sk == NULL || sk_acceptq_is_full(sk) || (make = rose_make_new(sk)) == NULL) { rose_transmit_clear_request(neigh, lci, ROSE_NETWORK_CONGESTION, 120); return 0; } skb->sk = make; make->sk_state = TCP_ESTABLISHED; make_rose = rose_sk(make); make_rose->lci = lci; make_rose->dest_addr = facilities.dest_addr; make_rose->dest_call = facilities.dest_call; make_rose->dest_ndigis = facilities.dest_ndigis; for (n = 0 ; n < facilities.dest_ndigis ; n++) make_rose->dest_digis[n] = facilities.dest_digis[n]; make_rose->source_addr = facilities.source_addr; make_rose->source_call = facilities.source_call; make_rose->source_ndigis = facilities.source_ndigis; for (n = 0 ; n < facilities.source_ndigis ; n++) make_rose->source_digis[n] = facilities.source_digis[n]; make_rose->neighbour = neigh; make_rose->device = dev; /* Caller got a reference for us. */ netdev_tracker_alloc(make_rose->device, &make_rose->dev_tracker, GFP_ATOMIC); make_rose->facilities = facilities; make_rose->neighbour->use++; if (rose_sk(sk)->defer) { make_rose->state = ROSE_STATE_5; } else { rose_write_internal(make, ROSE_CALL_ACCEPTED); make_rose->state = ROSE_STATE_3; rose_start_idletimer(make); } make_rose->condition = 0x00; make_rose->vs = 0; make_rose->va = 0; make_rose->vr = 0; make_rose->vl = 0; sk_acceptq_added(sk); rose_insert_socket(make); skb_queue_head(&sk->sk_receive_queue, skb); rose_start_heartbeat(make); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk); return 1; } static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); DECLARE_SOCKADDR(struct sockaddr_rose *, usrose, msg->msg_name); int err; struct full_sockaddr_rose srose; struct sk_buff *skb; unsigned char *asmptr; int n, size, qbit = 0; if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; if (sock_flag(sk, SOCK_ZAPPED)) return -EADDRNOTAVAIL; if (sk->sk_shutdown & SEND_SHUTDOWN) { send_sig(SIGPIPE, current, 0); return -EPIPE; } if (rose->neighbour == NULL || rose->device == NULL) return -ENETUNREACH; if (usrose != NULL) { if (msg->msg_namelen != sizeof(struct sockaddr_rose) && msg->msg_namelen != sizeof(struct full_sockaddr_rose)) return -EINVAL; memset(&srose, 0, sizeof(struct full_sockaddr_rose)); memcpy(&srose, usrose, msg->msg_namelen); if (rosecmp(&rose->dest_addr, &srose.srose_addr) != 0 || ax25cmp(&rose->dest_call, &srose.srose_call) != 0) return -EISCONN; if (srose.srose_ndigis != rose->dest_ndigis) return -EISCONN; if (srose.srose_ndigis == rose->dest_ndigis) { for (n = 0 ; n < srose.srose_ndigis ; n++) if (ax25cmp(&rose->dest_digis[n], &srose.srose_digis[n])) return -EISCONN; } if (srose.srose_family != AF_ROSE) return -EINVAL; } else { if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; srose.srose_family = AF_ROSE; srose.srose_addr = rose->dest_addr; srose.srose_call = rose->dest_call; srose.srose_ndigis = rose->dest_ndigis; for (n = 0 ; n < rose->dest_ndigis ; n++) srose.srose_digis[n] = rose->dest_digis[n]; } /* Build a packet */ /* Sanity check the packet size */ if (len > 65535) return -EMSGSIZE; size = len + AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL) return err; skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN); /* * Put the data on the end */ skb_reset_transport_header(skb); skb_put(skb, len); err = memcpy_from_msg(skb_transport_header(skb), msg, len); if (err) { kfree_skb(skb); return err; } /* * If the Q BIT Include socket option is in force, the first * byte of the user data is the logical value of the Q Bit. */ if (rose->qbitincl) { qbit = skb->data[0]; skb_pull(skb, 1); } /* * Push down the ROSE header */ asmptr = skb_push(skb, ROSE_MIN_LEN); /* Build a ROSE Network header */ asmptr[0] = ((rose->lci >> 8) & 0x0F) | ROSE_GFI; asmptr[1] = (rose->lci >> 0) & 0xFF; asmptr[2] = ROSE_DATA; if (qbit) asmptr[0] |= ROSE_Q_BIT; if (sk->sk_state != TCP_ESTABLISHED) { kfree_skb(skb); return -ENOTCONN; } #ifdef M_BIT #define ROSE_PACLEN (256-ROSE_MIN_LEN) if (skb->len - ROSE_MIN_LEN > ROSE_PACLEN) { unsigned char header[ROSE_MIN_LEN]; struct sk_buff *skbn; int frontlen; int lg; /* Save a copy of the Header */ skb_copy_from_linear_data(skb, header, ROSE_MIN_LEN); skb_pull(skb, ROSE_MIN_LEN); frontlen = skb_headroom(skb); while (skb->len > 0) { if ((skbn = sock_alloc_send_skb(sk, frontlen + ROSE_PACLEN, 0, &err)) == NULL) { kfree_skb(skb); return err; } skbn->sk = sk; skbn->free = 1; skbn->arp = 1; skb_reserve(skbn, frontlen); lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN; /* Copy the user data */ skb_copy_from_linear_data(skb, skb_put(skbn, lg), lg); skb_pull(skb, lg); /* Duplicate the Header */ skb_push(skbn, ROSE_MIN_LEN); skb_copy_to_linear_data(skbn, header, ROSE_MIN_LEN); if (skb->len > 0) skbn->data[2] |= M_BIT; skb_queue_tail(&sk->sk_write_queue, skbn); /* Throw it on the queue */ } skb->free = 1; kfree_skb(skb); } else { skb_queue_tail(&sk->sk_write_queue, skb); /* Throw it on the queue */ } #else skb_queue_tail(&sk->sk_write_queue, skb); /* Shove it onto the queue */ #endif rose_kick(sk); return len; } static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); size_t copied; unsigned char *asmptr; struct sk_buff *skb; int n, er, qbit; /* * This works for seqpacket too. The receiver has ordered the queue for * us! We do one quick check first though */ if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; /* Now we can treat all alike */ skb = skb_recv_datagram(sk, flags, &er); if (!skb) return er; qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT; skb_pull(skb, ROSE_MIN_LEN); if (rose->qbitincl) { asmptr = skb_push(skb, 1); *asmptr = qbit; } skb_reset_transport_header(skb); copied = skb->len; if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; } skb_copy_datagram_msg(skb, 0, msg, copied); if (msg->msg_name) { struct sockaddr_rose *srose; DECLARE_SOCKADDR(struct full_sockaddr_rose *, full_srose, msg->msg_name); memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); srose = msg->msg_name; srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; srose->srose_ndigis = rose->dest_ndigis; for (n = 0 ; n < rose->dest_ndigis ; n++) full_srose->srose_digis[n] = rose->dest_digis[n]; msg->msg_namelen = sizeof(struct full_sockaddr_rose); } skb_free_datagram(sk, skb); return copied; } static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); void __user *argp = (void __user *)arg; switch (cmd) { case TIOCOUTQ: { long amount; amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; return put_user(amount, (unsigned int __user *) argp); } case TIOCINQ: { struct sk_buff *skb; long amount = 0L; spin_lock_irq(&sk->sk_receive_queue.lock); if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; spin_unlock_irq(&sk->sk_receive_queue.lock); return put_user(amount, (unsigned int __user *) argp); } case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: case SIOCGIFMETRIC: case SIOCSIFMETRIC: return -EINVAL; case SIOCADDRT: case SIOCDELRT: case SIOCRSCLRRT: if (!capable(CAP_NET_ADMIN)) return -EPERM; return rose_rt_ioctl(cmd, argp); case SIOCRSGCAUSE: { struct rose_cause_struct rose_cause; rose_cause.cause = rose->cause; rose_cause.diagnostic = rose->diagnostic; return copy_to_user(argp, &rose_cause, sizeof(struct rose_cause_struct)) ? -EFAULT : 0; } case SIOCRSSCAUSE: { struct rose_cause_struct rose_cause; if (copy_from_user(&rose_cause, argp, sizeof(struct rose_cause_struct))) return -EFAULT; rose->cause = rose_cause.cause; rose->diagnostic = rose_cause.diagnostic; return 0; } case SIOCRSSL2CALL: if (!capable(CAP_NET_ADMIN)) return -EPERM; if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) ax25_listen_release(&rose_callsign, NULL); if (copy_from_user(&rose_callsign, argp, sizeof(ax25_address))) return -EFAULT; if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) return ax25_listen_register(&rose_callsign, NULL); return 0; case SIOCRSGL2CALL: return copy_to_user(argp, &rose_callsign, sizeof(ax25_address)) ? -EFAULT : 0; case SIOCRSACCEPT: if (rose->state == ROSE_STATE_5) { rose_write_internal(sk, ROSE_CALL_ACCEPTED); rose_start_idletimer(sk); rose->condition = 0x00; rose->vs = 0; rose->va = 0; rose->vr = 0; rose->vl = 0; rose->state = ROSE_STATE_3; } return 0; default: return -ENOIOCTLCMD; } return 0; } #ifdef CONFIG_PROC_FS static void *rose_info_start(struct seq_file *seq, loff_t *pos) __acquires(rose_list_lock) { spin_lock_bh(&rose_list_lock); return seq_hlist_start_head(&rose_list, *pos); } static void *rose_info_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_hlist_next(v, &rose_list, pos); } static void rose_info_stop(struct seq_file *seq, void *v) __releases(rose_list_lock) { spin_unlock_bh(&rose_list_lock); } static int rose_info_show(struct seq_file *seq, void *v) { char buf[11], rsbuf[11]; if (v == SEQ_START_TOKEN) seq_puts(seq, "dest_addr dest_call src_addr src_call dev lci neigh st vs vr va t t1 t2 t3 hb idle Snd-Q Rcv-Q inode\n"); else { struct sock *s = sk_entry(v); struct rose_sock *rose = rose_sk(s); const char *devname, *callsign; const struct net_device *dev = rose->device; if (!dev) devname = "???"; else devname = dev->name; seq_printf(seq, "%-10s %-9s ", rose2asc(rsbuf, &rose->dest_addr), ax2asc(buf, &rose->dest_call)); if (ax25cmp(&rose->source_call, &null_ax25_address) == 0) callsign = "??????-?"; else callsign = ax2asc(buf, &rose->source_call); seq_printf(seq, "%-10s %-9s %-5s %3.3X %05d %d %d %d %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %ld\n", rose2asc(rsbuf, &rose->source_addr), callsign, devname, rose->lci & 0x0FFF, (rose->neighbour) ? rose->neighbour->number : 0, rose->state, rose->vs, rose->vr, rose->va, ax25_display_timer(&rose->timer) / HZ, rose->t1 / HZ, rose->t2 / HZ, rose->t3 / HZ, rose->hb / HZ, ax25_display_timer(&rose->idletimer) / (60 * HZ), rose->idle / (60 * HZ), sk_wmem_alloc_get(s), sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); } return 0; } static const struct seq_operations rose_info_seqops = { .start = rose_info_start, .next = rose_info_next, .stop = rose_info_stop, .show = rose_info_show, }; #endif /* CONFIG_PROC_FS */ static const struct net_proto_family rose_family_ops = { .family = PF_ROSE, .create = rose_create, .owner = THIS_MODULE, }; static const struct proto_ops rose_proto_ops = { .family = PF_ROSE, .owner = THIS_MODULE, .release = rose_release, .bind = rose_bind, .connect = rose_connect, .socketpair = sock_no_socketpair, .accept = rose_accept, .getname = rose_getname, .poll = datagram_poll, .ioctl = rose_ioctl, .gettstamp = sock_gettstamp, .listen = rose_listen, .shutdown = sock_no_shutdown, .setsockopt = rose_setsockopt, .getsockopt = rose_getsockopt, .sendmsg = rose_sendmsg, .recvmsg = rose_recvmsg, .mmap = sock_no_mmap, }; static struct notifier_block rose_dev_notifier = { .notifier_call = rose_device_event, }; static struct net_device **dev_rose; static struct ax25_protocol rose_pid = { .pid = AX25_P_ROSE, .func = rose_route_frame }; static struct ax25_linkfail rose_linkfail_notifier = { .func = rose_link_failed }; static int __init rose_proto_init(void) { int i; int rc; if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) { printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter too large\n"); rc = -EINVAL; goto out; } rc = proto_register(&rose_proto, 0); if (rc != 0) goto out; rose_callsign = null_ax25_address; dev_rose = kcalloc(rose_ndevs, sizeof(struct net_device *), GFP_KERNEL); if (dev_rose == NULL) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n"); rc = -ENOMEM; goto out_proto_unregister; } for (i = 0; i < rose_ndevs; i++) { struct net_device *dev; char name[IFNAMSIZ]; sprintf(name, "rose%d", i); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, rose_setup); if (!dev) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n"); rc = -ENOMEM; goto fail; } rc = register_netdev(dev); if (rc) { printk(KERN_ERR "ROSE: netdevice registration failed\n"); free_netdev(dev); goto fail; } rose_set_lockdep_key(dev); dev_rose[i] = dev; } sock_register(&rose_family_ops); register_netdevice_notifier(&rose_dev_notifier); ax25_register_pid(&rose_pid); ax25_linkfail_register(&rose_linkfail_notifier); #ifdef CONFIG_SYSCTL rose_register_sysctl(); #endif rose_loopback_init(); rose_add_loopback_neigh(); proc_create_seq("rose", 0444, init_net.proc_net, &rose_info_seqops); proc_create_seq("rose_neigh", 0444, init_net.proc_net, &rose_neigh_seqops); proc_create_seq("rose_nodes", 0444, init_net.proc_net, &rose_node_seqops); proc_create_seq("rose_routes", 0444, init_net.proc_net, &rose_route_seqops); out: return rc; fail: while (--i >= 0) { unregister_netdev(dev_rose[i]); free_netdev(dev_rose[i]); } kfree(dev_rose); out_proto_unregister: proto_unregister(&rose_proto); goto out; } module_init(rose_proto_init); module_param(rose_ndevs, int, 0); MODULE_PARM_DESC(rose_ndevs, "number of ROSE devices"); MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>"); MODULE_DESCRIPTION("The amateur radio ROSE network layer protocol"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_ROSE); static void __exit rose_exit(void) { int i; remove_proc_entry("rose", init_net.proc_net); remove_proc_entry("rose_neigh", init_net.proc_net); remove_proc_entry("rose_nodes", init_net.proc_net); remove_proc_entry("rose_routes", init_net.proc_net); rose_loopback_clear(); rose_rt_free(); ax25_protocol_release(AX25_P_ROSE); ax25_linkfail_release(&rose_linkfail_notifier); if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) ax25_listen_release(&rose_callsign, NULL); #ifdef CONFIG_SYSCTL rose_unregister_sysctl(); #endif unregister_netdevice_notifier(&rose_dev_notifier); sock_unregister(PF_ROSE); for (i = 0; i < rose_ndevs; i++) { struct net_device *dev = dev_rose[i]; if (dev) { unregister_netdev(dev); free_netdev(dev); } } kfree(dev_rose); proto_unregister(&rose_proto); } module_exit(rose_exit);
3 2 7 1 1 1 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 /* SPDX-License-Identifier: GPL-2.0 */ /* Based on net/wireless/trace.h */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cfg802154 #if !defined(__RDEV_CFG802154_OPS_TRACE) || defined(TRACE_HEADER_MULTI_READ) #define __RDEV_CFG802154_OPS_TRACE #include <linux/tracepoint.h> #include <net/cfg802154.h> #define MAXNAME 32 #define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME) #define WPAN_PHY_ASSIGN strscpy(__entry->wpan_phy_name, \ wpan_phy_name(wpan_phy), \ MAXNAME) #define WPAN_PHY_PR_FMT "%s" #define WPAN_PHY_PR_ARG __entry->wpan_phy_name #define WPAN_DEV_ENTRY __field(u32, identifier) #define WPAN_DEV_ASSIGN (__entry->identifier) = (!IS_ERR_OR_NULL(wpan_dev) \ ? wpan_dev->identifier : 0) #define WPAN_DEV_PR_FMT "wpan_dev(%u)" #define WPAN_DEV_PR_ARG (__entry->identifier) #define WPAN_CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \ __field(enum nl802154_cca_opts, cca_opt) #define WPAN_CCA_ASSIGN \ do { \ (__entry->cca_mode) = cca->mode; \ (__entry->cca_opt) = cca->opt; \ } while (0) #define WPAN_CCA_PR_FMT "cca_mode: %d, cca_opt: %d" #define WPAN_CCA_PR_ARG __entry->cca_mode, __entry->cca_opt #define BOOL_TO_STR(bo) (bo) ? "true" : "false" /************************************************************* * rdev->ops traces * *************************************************************/ DECLARE_EVENT_CLASS(wpan_phy_only_evt, TP_PROTO(struct wpan_phy *wpan_phy), TP_ARGS(wpan_phy), TP_STRUCT__entry( WPAN_PHY_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT, WPAN_PHY_PR_ARG) ); DEFINE_EVENT(wpan_phy_only_evt, 802154_rdev_suspend, TP_PROTO(struct wpan_phy *wpan_phy), TP_ARGS(wpan_phy) ); DEFINE_EVENT(wpan_phy_only_evt, 802154_rdev_resume, TP_PROTO(struct wpan_phy *wpan_phy), TP_ARGS(wpan_phy) ); TRACE_EVENT(802154_rdev_add_virtual_intf, TP_PROTO(struct wpan_phy *wpan_phy, char *name, enum nl802154_iftype type, __le64 extended_addr), TP_ARGS(wpan_phy, name, type, extended_addr), TP_STRUCT__entry( WPAN_PHY_ENTRY __string(vir_intf_name, name ? name : "<noname>") __field(enum nl802154_iftype, type) __field(__le64, extended_addr) ), TP_fast_assign( WPAN_PHY_ASSIGN; __assign_str(vir_intf_name); __entry->type = type; __entry->extended_addr = extended_addr; ), TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, extended addr: 0x%llx", WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type, __le64_to_cpu(__entry->extended_addr)) ); TRACE_EVENT(802154_rdev_del_virtual_intf, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), TP_ARGS(wpan_phy, wpan_dev), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT, WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG) ); TRACE_EVENT(802154_rdev_set_channel, TP_PROTO(struct wpan_phy *wpan_phy, u8 page, u8 channel), TP_ARGS(wpan_phy, page, channel), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(u8, page) __field(u8, channel) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->page = page; __entry->channel = channel; ), TP_printk(WPAN_PHY_PR_FMT ", page: %d, channel: %d", WPAN_PHY_PR_ARG, __entry->page, __entry->channel) ); TRACE_EVENT(802154_rdev_set_tx_power, TP_PROTO(struct wpan_phy *wpan_phy, s32 power), TP_ARGS(wpan_phy, power), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(s32, power) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->power = power; ), TP_printk(WPAN_PHY_PR_FMT ", mbm: %d", WPAN_PHY_PR_ARG, __entry->power) ); TRACE_EVENT(802154_rdev_set_cca_mode, TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca), TP_ARGS(wpan_phy, cca), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_CCA_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_CCA_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_CCA_PR_FMT, WPAN_PHY_PR_ARG, WPAN_CCA_PR_ARG) ); TRACE_EVENT(802154_rdev_set_cca_ed_level, TP_PROTO(struct wpan_phy *wpan_phy, s32 ed_level), TP_ARGS(wpan_phy, ed_level), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(s32, ed_level) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->ed_level = ed_level; ), TP_printk(WPAN_PHY_PR_FMT ", ed level: %d", WPAN_PHY_PR_ARG, __entry->ed_level) ); DECLARE_EVENT_CLASS(802154_le16_template, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), TP_ARGS(wpan_phy, wpan_dev, le16arg), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(__le16, le16arg) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->le16arg = le16arg; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", pan id: 0x%04x", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __le16_to_cpu(__entry->le16arg)) ); DEFINE_EVENT(802154_le16_template, 802154_rdev_set_pan_id, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), TP_ARGS(wpan_phy, wpan_dev, le16arg) ); DEFINE_EVENT_PRINT(802154_le16_template, 802154_rdev_set_short_addr, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), TP_ARGS(wpan_phy, wpan_dev, le16arg), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", short addr: 0x%04x", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __le16_to_cpu(__entry->le16arg)) ); TRACE_EVENT(802154_rdev_set_backoff_exponent, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, u8 min_be, u8 max_be), TP_ARGS(wpan_phy, wpan_dev, min_be, max_be), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(u8, min_be) __field(u8, max_be) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->min_be = min_be; __entry->max_be = max_be; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", min be: %d, max be: %d", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be) ); TRACE_EVENT(802154_rdev_set_csma_backoffs, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, u8 max_csma_backoffs), TP_ARGS(wpan_phy, wpan_dev, max_csma_backoffs), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(u8, max_csma_backoffs) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->max_csma_backoffs = max_csma_backoffs; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", max csma backoffs: %d", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->max_csma_backoffs) ); TRACE_EVENT(802154_rdev_set_max_frame_retries, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, s8 max_frame_retries), TP_ARGS(wpan_phy, wpan_dev, max_frame_retries), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(s8, max_frame_retries) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->max_frame_retries = max_frame_retries; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", max frame retries: %d", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->max_frame_retries) ); TRACE_EVENT(802154_rdev_set_lbt_mode, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool mode), TP_ARGS(wpan_phy, wpan_dev, mode), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(bool, mode) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->mode = mode; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", lbt mode: %s", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->mode)) ); TRACE_EVENT(802154_rdev_set_ackreq_default, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool ackreq), TP_ARGS(wpan_phy, wpan_dev, ackreq), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(bool, ackreq) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->ackreq = ackreq; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", ackreq default: %s", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->ackreq)) ); TRACE_EVENT(802154_rdev_trigger_scan, TP_PROTO(struct wpan_phy *wpan_phy, struct cfg802154_scan_request *request), TP_ARGS(wpan_phy, request), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(u8, page) __field(u32, channels) __field(u8, duration) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->page = request->page; __entry->channels = request->channels; __entry->duration = request->duration; ), TP_printk(WPAN_PHY_PR_FMT ", scan, page: %d, channels: %x, duration %d", WPAN_PHY_PR_ARG, __entry->page, __entry->channels, __entry->duration) ); TRACE_EVENT(802154_rdev_send_beacons, TP_PROTO(struct wpan_phy *wpan_phy, struct cfg802154_beacon_request *request), TP_ARGS(wpan_phy, request), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(u8, interval) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->interval = request->interval; ), TP_printk(WPAN_PHY_PR_FMT ", sending beacons (interval order: %d)", WPAN_PHY_PR_ARG, __entry->interval) ); DECLARE_EVENT_CLASS(802154_wdev_template, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), TP_ARGS(wpan_phy, wpan_dev), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT, WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG) ); DEFINE_EVENT(802154_wdev_template, 802154_rdev_abort_scan, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), TP_ARGS(wpan_phy, wpan_dev) ); DEFINE_EVENT(802154_wdev_template, 802154_rdev_stop_beacons, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), TP_ARGS(wpan_phy, wpan_dev) ); TRACE_EVENT(802154_rdev_associate, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, struct ieee802154_addr *coord), TP_ARGS(wpan_phy, wpan_dev, coord), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(__le64, addr) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->addr = coord->extended_addr; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", associating with: 0x%llx", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr) ); TRACE_EVENT(802154_rdev_disassociate, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, struct ieee802154_addr *target), TP_ARGS(wpan_phy, wpan_dev, target), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(__le64, addr) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->addr = target->extended_addr; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", disassociating with: 0x%llx", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr) ); TRACE_EVENT(802154_rdev_return_int, TP_PROTO(struct wpan_phy *wpan_phy, int ret), TP_ARGS(wpan_phy, ret), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(int, ret) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->ret = ret; ), TP_printk(WPAN_PHY_PR_FMT ", returned: %d", WPAN_PHY_PR_ARG, __entry->ret) ); #endif /* !__RDEV_CFG802154_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h>
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * configfs_internal.h - Internal stuff for configfs * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * * configfs Copyright (C) 2005 Oracle. All rights reserved. */ #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> struct configfs_fragment { atomic_t frag_count; struct rw_semaphore frag_sem; bool frag_dead; }; void put_fragment(struct configfs_fragment *); struct configfs_fragment *get_fragment(struct configfs_fragment *); struct configfs_dirent { atomic_t s_count; int s_dependent_count; struct list_head s_sibling; struct list_head s_children; int s_links; void * s_element; int s_type; umode_t s_mode; struct dentry * s_dentry; struct iattr * s_iattr; #ifdef CONFIG_LOCKDEP int s_depth; #endif struct configfs_fragment *s_frag; }; #define CONFIGFS_ROOT 0x0001 #define CONFIGFS_DIR 0x0002 #define CONFIGFS_ITEM_ATTR 0x0004 #define CONFIGFS_ITEM_BIN_ATTR 0x0008 #define CONFIGFS_ITEM_LINK 0x0020 #define CONFIGFS_USET_DIR 0x0040 #define CONFIGFS_USET_DEFAULT 0x0080 #define CONFIGFS_USET_DROPPING 0x0100 #define CONFIGFS_USET_IN_MKDIR 0x0200 #define CONFIGFS_USET_CREATING 0x0400 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR | CONFIGFS_ITEM_BIN_ATTR) #define CONFIGFS_PINNED \ (CONFIGFS_ROOT | CONFIGFS_DIR | CONFIGFS_ITEM_LINK) extern struct mutex configfs_symlink_mutex; extern spinlock_t configfs_dirent_lock; extern struct kmem_cache *configfs_dir_cachep; extern int configfs_is_root(struct config_item *item); extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *); extern struct inode *configfs_create(struct dentry *, umode_t mode); extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); extern int configfs_create_bin_file(struct config_item *, const struct configfs_bin_attribute *); extern int configfs_make_dirent(struct configfs_dirent *, struct dentry *, void *, umode_t, int, struct configfs_fragment *); extern int configfs_dirent_is_ready(struct configfs_dirent *); extern const unsigned char * configfs_get_name(struct configfs_dirent *sd); extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent); extern int configfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr); extern struct dentry *configfs_pin_fs(void); extern void configfs_release_fs(void); extern const struct file_operations configfs_dir_operations; extern const struct file_operations configfs_file_operations; extern const struct file_operations configfs_bin_file_operations; extern const struct inode_operations configfs_dir_inode_operations; extern const struct inode_operations configfs_root_inode_operations; extern const struct inode_operations configfs_symlink_inode_operations; extern const struct dentry_operations configfs_dentry_ops; extern int configfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname); extern int configfs_unlink(struct inode *dir, struct dentry *dentry); int configfs_create_link(struct configfs_dirent *target, struct dentry *parent, struct dentry *dentry, char *body); static inline struct config_item * to_item(struct dentry * dentry) { struct configfs_dirent * sd = dentry->d_fsdata; return ((struct config_item *) sd->s_element); } static inline struct configfs_attribute * to_attr(struct dentry * dentry) { struct configfs_dirent * sd = dentry->d_fsdata; return ((struct configfs_attribute *) sd->s_element); } static inline struct configfs_bin_attribute *to_bin_attr(struct dentry *dentry) { struct configfs_attribute *attr = to_attr(dentry); return container_of(attr, struct configfs_bin_attribute, cb_attr); } static inline struct config_item *configfs_get_config_item(struct dentry *dentry) { struct config_item * item = NULL; spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { struct configfs_dirent * sd = dentry->d_fsdata; item = config_item_get(sd->s_element); } spin_unlock(&dentry->d_lock); return item; } static inline void release_configfs_dirent(struct configfs_dirent * sd) { if (!(sd->s_type & CONFIGFS_ROOT)) { kfree(sd->s_iattr); put_fragment(sd->s_frag); kmem_cache_free(configfs_dir_cachep, sd); } } static inline struct configfs_dirent * configfs_get(struct configfs_dirent * sd) { if (sd) { WARN_ON(!atomic_read(&sd->s_count)); atomic_inc(&sd->s_count); } return sd; } static inline void configfs_put(struct configfs_dirent * sd) { WARN_ON(!atomic_read(&sd->s_count)); if (atomic_dec_and_test(&sd->s_count)) release_configfs_dirent(sd); }
2 13 1 6 6 18 2 3 16 15 7 7 2 7 1 7 1 1 22 2 20 1 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 // SPDX-License-Identifier: GPL-2.0-only /* * kexec.c - kexec_load system call * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/capability.h> #include <linux/mm.h> #include <linux/file.h> #include <linux/security.h> #include <linux/kexec.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/syscalls.h> #include <linux/vmalloc.h> #include <linux/slab.h> #include "kexec_internal.h" static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, unsigned long nr_segments, struct kexec_segment *segments, unsigned long flags) { int ret; struct kimage *image; bool kexec_on_panic = flags & KEXEC_ON_CRASH; #ifdef CONFIG_CRASH_DUMP if (kexec_on_panic) { /* Verify we have a valid entry point */ if ((entry < phys_to_boot_phys(crashk_res.start)) || (entry > phys_to_boot_phys(crashk_res.end))) return -EADDRNOTAVAIL; } #endif /* Allocate and initialize a controlling structure */ image = do_kimage_alloc_init(); if (!image) return -ENOMEM; image->start = entry; image->nr_segments = nr_segments; memcpy(image->segment, segments, nr_segments * sizeof(*segments)); #ifdef CONFIG_CRASH_DUMP if (kexec_on_panic) { /* Enable special crash kernel control page alloc policy. */ image->control_page = crashk_res.start; image->type = KEXEC_TYPE_CRASH; } #endif ret = sanity_check_segment_list(image); if (ret) goto out_free_image; /* * Find a location for the control code buffer, and add it * the vector of segments so that it's pages will also be * counted as destination pages. */ ret = -ENOMEM; image->control_code_page = kimage_alloc_control_pages(image, get_order(KEXEC_CONTROL_PAGE_SIZE)); if (!image->control_code_page) { pr_err("Could not allocate control_code_buffer\n"); goto out_free_image; } if (!kexec_on_panic) { image->swap_page = kimage_alloc_control_pages(image, 0); if (!image->swap_page) { pr_err("Could not allocate swap buffer\n"); goto out_free_control_pages; } } *rimage = image; return 0; out_free_control_pages: kimage_free_page_list(&image->control_pages); out_free_image: kfree(image); return ret; } static int do_kexec_load(unsigned long entry, unsigned long nr_segments, struct kexec_segment *segments, unsigned long flags) { struct kimage **dest_image, *image; unsigned long i; int ret; /* * Because we write directly to the reserved memory region when loading * crash kernels we need a serialization here to prevent multiple crash * kernels from attempting to load simultaneously. */ if (!kexec_trylock()) return -EBUSY; #ifdef CONFIG_CRASH_DUMP if (flags & KEXEC_ON_CRASH) { dest_image = &kexec_crash_image; if (kexec_crash_image) arch_kexec_unprotect_crashkres(); } else #endif dest_image = &kexec_image; if (nr_segments == 0) { /* Uninstall image */ kimage_free(xchg(dest_image, NULL)); ret = 0; goto out_unlock; } if (flags & KEXEC_ON_CRASH) { /* * Loading another kernel to switch to if this one * crashes. Free any current crash dump kernel before * we corrupt it. */ kimage_free(xchg(&kexec_crash_image, NULL)); } ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags); if (ret) goto out_unlock; if (flags & KEXEC_PRESERVE_CONTEXT) image->preserve_context = 1; #ifdef CONFIG_CRASH_HOTPLUG if ((flags & KEXEC_ON_CRASH) && arch_crash_hotplug_support(image, flags)) image->hotplug_support = 1; #endif ret = machine_kexec_prepare(image); if (ret) goto out; /* * Some architecture(like S390) may touch the crash memory before * machine_kexec_prepare(), we must copy vmcoreinfo data after it. */ ret = kimage_crash_copy_vmcoreinfo(image); if (ret) goto out; for (i = 0; i < nr_segments; i++) { ret = kimage_load_segment(image, &image->segment[i]); if (ret) goto out; } kimage_terminate(image); ret = machine_kexec_post_load(image); if (ret) goto out; /* Install the new kernel and uninstall the old */ image = xchg(dest_image, image); out: #ifdef CONFIG_CRASH_DUMP if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) arch_kexec_protect_crashkres(); #endif kimage_free(image); out_unlock: kexec_unlock(); return ret; } /* * Exec Kernel system call: for obvious reasons only root may call it. * * This call breaks up into three pieces. * - A generic part which loads the new kernel from the current * address space, and very carefully places the data in the * allocated pages. * * - A generic part that interacts with the kernel and tells all of * the devices to shut down. Preventing on-going dmas, and placing * the devices in a consistent state so a later kernel can * reinitialize them. * * - A machine specific part that includes the syscall number * and then copies the image to it's final destination. And * jumps into the image at entry. * * kexec does not sync, or unmount filesystems so if you need * that to happen you need to do that yourself. */ static inline int kexec_load_check(unsigned long nr_segments, unsigned long flags) { int image_type = (flags & KEXEC_ON_CRASH) ? KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT; int result; /* We only trust the superuser with rebooting the system. */ if (!kexec_load_permitted(image_type)) return -EPERM; /* Permit LSMs and IMA to fail the kexec */ result = security_kernel_load_data(LOADING_KEXEC_IMAGE, false); if (result < 0) return result; /* * kexec can be used to circumvent module loading restrictions, so * prevent loading in that case */ result = security_locked_down(LOCKDOWN_KEXEC); if (result) return result; /* * Verify we have a legal set of flags * This leaves us room for future extensions. */ if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) return -EINVAL; /* Put an artificial cap on the number * of segments passed to kexec_load. */ if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; return 0; } SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, struct kexec_segment __user *, segments, unsigned long, flags) { struct kexec_segment *ksegments; unsigned long result; result = kexec_load_check(nr_segments, flags); if (result) return result; /* Verify we are on the appropriate architecture */ if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) return -EINVAL; ksegments = memdup_array_user(segments, nr_segments, sizeof(ksegments[0])); if (IS_ERR(ksegments)) return PTR_ERR(ksegments); result = do_kexec_load(entry, nr_segments, ksegments, flags); kfree(ksegments); return result; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, compat_ulong_t, nr_segments, struct compat_kexec_segment __user *, segments, compat_ulong_t, flags) { struct compat_kexec_segment in; struct kexec_segment *ksegments; unsigned long i, result; result = kexec_load_check(nr_segments, flags); if (result) return result; /* Don't allow clients that don't understand the native * architecture to do anything. */ if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) return -EINVAL; ksegments = kmalloc_array(nr_segments, sizeof(ksegments[0]), GFP_KERNEL); if (!ksegments) return -ENOMEM; for (i = 0; i < nr_segments; i++) { result = copy_from_user(&in, &segments[i], sizeof(in)); if (result) goto fail; ksegments[i].buf = compat_ptr(in.buf); ksegments[i].bufsz = in.bufsz; ksegments[i].mem = in.mem; ksegments[i].memsz = in.memsz; } result = do_kexec_load(entry, nr_segments, ksegments, flags); fail: kfree(ksegments); return result; } #endif
9 113 116 116 71 71 58 58 303 303 12 302 304 102 284 282 282 55 55 36 50 3 74 39 56 56 1 54 55 40 36 64 64 7 64 2 64 65 1 64 64 64 94 32 16 63 93 1 94 73 17 4 20 19 21 5 16 16 36 25 225 6 60 192 7 212 5 8 33 1 2 2 36 197 1 3 3 197 26 6 21 36 36 15 15 21 32 7 7 7 36 3 3 12 1 19 7 4 4 2 5 12 9 8 12 3 12 2 2 6 1 2 3 3 1 1 1 60 1 44 26 8 74 1 71 35 4 67 4 68 2 27 2 28 67 2 67 59 60 26 295 250 74 60 36 12 295 296 294 2 293 293 307 95 241 165 37 128 79 41 45 35 3 32 27 1 26 5 4 5 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2014 Red Hat, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_sb.h" #include "xfs_defer.h" #include "xfs_btree.h" #include "xfs_trans.h" #include "xfs_alloc.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" #include "xfs_trace.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_inode.h" #include "xfs_ag.h" #include "xfs_health.h" #include "xfs_rmap_item.h" struct kmem_cache *xfs_rmap_intent_cache; /* * Lookup the first record less than or equal to [bno, len, owner, offset] * in the btree given by cur. */ int xfs_rmap_lookup_le( struct xfs_btree_cur *cur, xfs_agblock_t bno, uint64_t owner, uint64_t offset, unsigned int flags, struct xfs_rmap_irec *irec, int *stat) { int get_stat = 0; int error; cur->bc_rec.r.rm_startblock = bno; cur->bc_rec.r.rm_blockcount = 0; cur->bc_rec.r.rm_owner = owner; cur->bc_rec.r.rm_offset = offset; cur->bc_rec.r.rm_flags = flags; error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); if (error || !(*stat) || !irec) return error; error = xfs_rmap_get_rec(cur, irec, &get_stat); if (error) return error; if (!get_stat) { xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } return 0; } /* * Lookup the record exactly matching [bno, len, owner, offset] * in the btree given by cur. */ int xfs_rmap_lookup_eq( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner, uint64_t offset, unsigned int flags, int *stat) { cur->bc_rec.r.rm_startblock = bno; cur->bc_rec.r.rm_blockcount = len; cur->bc_rec.r.rm_owner = owner; cur->bc_rec.r.rm_offset = offset; cur->bc_rec.r.rm_flags = flags; return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); } /* * Update the record referred to by cur to the value given * by [bno, len, owner, offset]. * This either works (return 0) or gets an EFSCORRUPTED error. */ STATIC int xfs_rmap_update( struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec) { union xfs_btree_rec rec; int error; trace_xfs_rmap_update(cur, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock); rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount); rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner); rec.rmap.rm_offset = cpu_to_be64( xfs_rmap_irec_offset_pack(irec)); error = xfs_btree_update(cur, &rec); if (error) trace_xfs_rmap_update_error(cur, error, _RET_IP_); return error; } int xfs_rmap_insert( struct xfs_btree_cur *rcur, xfs_agblock_t agbno, xfs_extlen_t len, uint64_t owner, uint64_t offset, unsigned int flags) { int i; int error; trace_xfs_rmap_insert(rcur, agbno, len, owner, offset, flags); error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(rcur->bc_mp, i != 0)) { xfs_btree_mark_sick(rcur); error = -EFSCORRUPTED; goto done; } rcur->bc_rec.r.rm_startblock = agbno; rcur->bc_rec.r.rm_blockcount = len; rcur->bc_rec.r.rm_owner = owner; rcur->bc_rec.r.rm_offset = offset; rcur->bc_rec.r.rm_flags = flags; error = xfs_btree_insert(rcur, &i); if (error) goto done; if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) { xfs_btree_mark_sick(rcur); error = -EFSCORRUPTED; goto done; } done: if (error) trace_xfs_rmap_insert_error(rcur, error, _RET_IP_); return error; } STATIC int xfs_rmap_delete( struct xfs_btree_cur *rcur, xfs_agblock_t agbno, xfs_extlen_t len, uint64_t owner, uint64_t offset, unsigned int flags) { int i; int error; trace_xfs_rmap_delete(rcur, agbno, len, owner, offset, flags); error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) { xfs_btree_mark_sick(rcur); error = -EFSCORRUPTED; goto done; } error = xfs_btree_delete(rcur, &i); if (error) goto done; if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) { xfs_btree_mark_sick(rcur); error = -EFSCORRUPTED; goto done; } done: if (error) trace_xfs_rmap_delete_error(rcur, error, _RET_IP_); return error; } /* Convert an internal btree record to an rmap record. */ xfs_failaddr_t xfs_rmap_btrec_to_irec( const union xfs_btree_rec *rec, struct xfs_rmap_irec *irec) { irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock); irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount); irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner); return xfs_rmap_irec_offset_unpack(be64_to_cpu(rec->rmap.rm_offset), irec); } /* Simple checks for rmap records. */ xfs_failaddr_t xfs_rmap_check_irec( struct xfs_perag *pag, const struct xfs_rmap_irec *irec) { struct xfs_mount *mp = pag_mount(pag); bool is_inode; bool is_unwritten; bool is_bmbt; bool is_attr; if (irec->rm_blockcount == 0) return __this_address; if (irec->rm_startblock <= XFS_AGFL_BLOCK(mp)) { if (irec->rm_owner != XFS_RMAP_OWN_FS) return __this_address; if (irec->rm_blockcount != XFS_AGFL_BLOCK(mp) + 1) return __this_address; } else { /* check for valid extent range, including overflow */ if (!xfs_verify_agbext(pag, irec->rm_startblock, irec->rm_blockcount)) return __this_address; } if (!(xfs_verify_ino(mp, irec->rm_owner) || (irec->rm_owner <= XFS_RMAP_OWN_FS && irec->rm_owner >= XFS_RMAP_OWN_MIN))) return __this_address; /* Check flags. */ is_inode = !XFS_RMAP_NON_INODE_OWNER(irec->rm_owner); is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK; is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK; is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN; if (is_bmbt && irec->rm_offset != 0) return __this_address; if (!is_inode && irec->rm_offset != 0) return __this_address; if (is_unwritten && (is_bmbt || !is_inode || is_attr)) return __this_address; if (!is_inode && (is_bmbt || is_unwritten || is_attr)) return __this_address; /* Check for a valid fork offset, if applicable. */ if (is_inode && !is_bmbt && !xfs_verify_fileext(mp, irec->rm_offset, irec->rm_blockcount)) return __this_address; return NULL; } static inline xfs_failaddr_t xfs_rmap_check_btrec( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *irec) { return xfs_rmap_check_irec(to_perag(cur->bc_group), irec); } static inline int xfs_rmap_complain_bad_rec( struct xfs_btree_cur *cur, xfs_failaddr_t fa, const struct xfs_rmap_irec *irec) { struct xfs_mount *mp = cur->bc_mp; if (xfs_btree_is_mem_rmap(cur->bc_ops)) xfs_warn(mp, "In-Memory Reverse Mapping BTree record corruption detected at %pS!", fa); else xfs_warn(mp, "Reverse Mapping BTree record corruption in AG %d detected at %pS!", cur->bc_group->xg_gno, fa); xfs_warn(mp, "Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x", irec->rm_owner, irec->rm_flags, irec->rm_startblock, irec->rm_blockcount); xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } /* * Get the data from the pointed-to record. */ int xfs_rmap_get_rec( struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec, int *stat) { union xfs_btree_rec *rec; xfs_failaddr_t fa; int error; error = xfs_btree_get_rec(cur, &rec, stat); if (error || !*stat) return error; fa = xfs_rmap_btrec_to_irec(rec, irec); if (!fa) fa = xfs_rmap_check_btrec(cur, irec); if (fa) return xfs_rmap_complain_bad_rec(cur, fa, irec); return 0; } struct xfs_find_left_neighbor_info { struct xfs_rmap_irec high; struct xfs_rmap_irec *irec; }; /* For each rmap given, figure out if it matches the key we want. */ STATIC int xfs_rmap_find_left_neighbor_helper( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *rec, void *priv) { struct xfs_find_left_neighbor_info *info = priv; trace_xfs_rmap_find_left_neighbor_candidate(cur, rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, rec->rm_offset, rec->rm_flags); if (rec->rm_owner != info->high.rm_owner) return 0; if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && rec->rm_offset + rec->rm_blockcount - 1 != info->high.rm_offset) return 0; *info->irec = *rec; return -ECANCELED; } /* * Find the record to the left of the given extent, being careful only to * return a match with the same owner and adjacent physical and logical * block ranges. */ STATIC int xfs_rmap_find_left_neighbor( struct xfs_btree_cur *cur, xfs_agblock_t bno, uint64_t owner, uint64_t offset, unsigned int flags, struct xfs_rmap_irec *irec, int *stat) { struct xfs_find_left_neighbor_info info; int found = 0; int error; *stat = 0; if (bno == 0) return 0; info.high.rm_startblock = bno - 1; info.high.rm_owner = owner; if (!XFS_RMAP_NON_INODE_OWNER(owner) && !(flags & XFS_RMAP_BMBT_BLOCK)) { if (offset == 0) return 0; info.high.rm_offset = offset - 1; } else info.high.rm_offset = 0; info.high.rm_flags = flags; info.high.rm_blockcount = 0; info.irec = irec; trace_xfs_rmap_find_left_neighbor_query(cur, bno, 0, owner, offset, flags); /* * Historically, we always used the range query to walk every reverse * mapping that could possibly overlap the key that the caller asked * for, and filter out the ones that don't. That is very slow when * there are a lot of records. * * However, there are two scenarios where the classic btree search can * produce correct results -- if the index contains a record that is an * exact match for the lookup key; and if there are no other records * between the record we want and the key we supplied. * * As an optimization, try a non-overlapped lookup first. This makes * extent conversion and remap operations run a bit faster if the * physical extents aren't being shared. If we don't find what we * want, we fall back to the overlapped query. */ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec, &found); if (error) return error; if (found) error = xfs_rmap_find_left_neighbor_helper(cur, irec, &info); if (!error) error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_find_left_neighbor_helper, &info); if (error != -ECANCELED) return error; *stat = 1; trace_xfs_rmap_find_left_neighbor_result(cur, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); return 0; } /* For each rmap given, figure out if it matches the key we want. */ STATIC int xfs_rmap_lookup_le_range_helper( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *rec, void *priv) { struct xfs_find_left_neighbor_info *info = priv; trace_xfs_rmap_lookup_le_range_candidate(cur, rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, rec->rm_offset, rec->rm_flags); if (rec->rm_owner != info->high.rm_owner) return 0; if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && (rec->rm_offset > info->high.rm_offset || rec->rm_offset + rec->rm_blockcount <= info->high.rm_offset)) return 0; *info->irec = *rec; return -ECANCELED; } /* * Find the record to the left of the given extent, being careful only to * return a match with the same owner and overlapping physical and logical * block ranges. This is the overlapping-interval version of * xfs_rmap_lookup_le. */ int xfs_rmap_lookup_le_range( struct xfs_btree_cur *cur, xfs_agblock_t bno, uint64_t owner, uint64_t offset, unsigned int flags, struct xfs_rmap_irec *irec, int *stat) { struct xfs_find_left_neighbor_info info; int found = 0; int error; info.high.rm_startblock = bno; info.high.rm_owner = owner; if (!XFS_RMAP_NON_INODE_OWNER(owner) && !(flags & XFS_RMAP_BMBT_BLOCK)) info.high.rm_offset = offset; else info.high.rm_offset = 0; info.high.rm_flags = flags; info.high.rm_blockcount = 0; *stat = 0; info.irec = irec; trace_xfs_rmap_lookup_le_range(cur, bno, 0, owner, offset, flags); /* * Historically, we always used the range query to walk every reverse * mapping that could possibly overlap the key that the caller asked * for, and filter out the ones that don't. That is very slow when * there are a lot of records. * * However, there are two scenarios where the classic btree search can * produce correct results -- if the index contains a record that is an * exact match for the lookup key; and if there are no other records * between the record we want and the key we supplied. * * As an optimization, try a non-overlapped lookup first. This makes * scrub run much faster on most filesystems because bmbt records are * usually an exact match for rmap records. If we don't find what we * want, we fall back to the overlapped query. */ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec, &found); if (error) return error; if (found) error = xfs_rmap_lookup_le_range_helper(cur, irec, &info); if (!error) error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_lookup_le_range_helper, &info); if (error != -ECANCELED) return error; *stat = 1; trace_xfs_rmap_lookup_le_range_result(cur, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); return 0; } /* * Perform all the relevant owner checks for a removal op. If we're doing an * unknown-owner removal then we have no owner information to check. */ static int xfs_rmap_free_check_owner( struct xfs_btree_cur *cur, uint64_t ltoff, struct xfs_rmap_irec *rec, xfs_filblks_t len, uint64_t owner, uint64_t offset, unsigned int flags) { struct xfs_mount *mp = cur->bc_mp; int error = 0; if (owner == XFS_RMAP_OWN_UNKNOWN) return 0; /* Make sure the unwritten flag matches. */ if (XFS_IS_CORRUPT(mp, (flags & XFS_RMAP_UNWRITTEN) != (rec->rm_flags & XFS_RMAP_UNWRITTEN))) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } /* Make sure the owner matches what we expect to find in the tree. */ if (XFS_IS_CORRUPT(mp, owner != rec->rm_owner)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } /* Check the offset, if necessary. */ if (XFS_RMAP_NON_INODE_OWNER(owner)) goto out; if (flags & XFS_RMAP_BMBT_BLOCK) { if (XFS_IS_CORRUPT(mp, !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } } else { if (XFS_IS_CORRUPT(mp, rec->rm_offset > offset)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } if (XFS_IS_CORRUPT(mp, offset + len > ltoff + rec->rm_blockcount)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } } out: return error; } /* * Find the extent in the rmap btree and remove it. * * The record we find should always be an exact match for the extent that we're * looking for, since we insert them into the btree without modification. * * Special Case #1: when growing the filesystem, we "free" an extent when * growing the last AG. This extent is new space and so it is not tracked as * used space in the btree. The growfs code will pass in an owner of * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this * extent. We verify that - the extent lookup result in a record that does not * overlap. * * Special Case #2: EFIs do not record the owner of the extent, so when * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap * btree to ignore the owner (i.e. wildcard match) so we don't trigger * corruption checks during log recovery. */ STATIC int xfs_rmap_unmap( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool unwritten, const struct xfs_owner_info *oinfo) { struct xfs_mount *mp = cur->bc_mp; struct xfs_rmap_irec ltrec; uint64_t ltoff; int error = 0; int i; uint64_t owner; uint64_t offset; unsigned int flags; bool ignore_off; xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) || (flags & XFS_RMAP_BMBT_BLOCK); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo); /* * We should always have a left record because there's a static record * for the AG headers at rm_startblock == 0 created by mkfs/growfs that * will not ever be removed from the tree. */ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); ltoff = ltrec.rm_offset; /* * For growfs, the incoming extent must be beyond the left record we * just found as it is new space and won't be used by anyone. This is * just a corruption check as we don't actually do anything with this * extent. Note that we need to use >= instead of > because it might * be the case that the "left" extent goes all the way to EOFS. */ if (owner == XFS_RMAP_OWN_NULL) { if (XFS_IS_CORRUPT(mp, bno < ltrec.rm_startblock + ltrec.rm_blockcount)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } goto out_done; } /* * If we're doing an unknown-owner removal for EFI recovery, we expect * to find the full range in the rmapbt or nothing at all. If we * don't find any rmaps overlapping either end of the range, we're * done. Hopefully this means that the EFI creator already queued * (and finished) a RUI to remove the rmap. */ if (owner == XFS_RMAP_OWN_UNKNOWN && ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { struct xfs_rmap_irec rtrec; error = xfs_btree_increment(cur, 0, &i); if (error) goto out_error; if (i == 0) goto out_done; error = xfs_rmap_get_rec(cur, &rtrec, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (rtrec.rm_startblock >= bno + len) goto out_done; } /* Make sure the extent we found covers the entire freeing range. */ if (XFS_IS_CORRUPT(mp, ltrec.rm_startblock > bno || ltrec.rm_startblock + ltrec.rm_blockcount < bno + len)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } /* Check owner information. */ error = xfs_rmap_free_check_owner(cur, ltoff, &ltrec, len, owner, offset, flags); if (error) goto out_error; if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ trace_xfs_rmap_delete(cur, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); error = xfs_btree_delete(cur, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } } else if (ltrec.rm_startblock == bno) { /* * overlap left hand side of extent: move the start, trim the * length and update the current record. * * ltbno ltlen * Orig: |oooooooooooooooooooo| * Freeing: |fffffffff| * Result: |rrrrrrrrrr| * bno len */ ltrec.rm_startblock += len; ltrec.rm_blockcount -= len; if (!ignore_off) ltrec.rm_offset += len; error = xfs_rmap_update(cur, &ltrec); if (error) goto out_error; } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) { /* * overlap right hand side of extent: trim the length and update * the current record. * * ltbno ltlen * Orig: |oooooooooooooooooooo| * Freeing: |fffffffff| * Result: |rrrrrrrrrr| * bno len */ ltrec.rm_blockcount -= len; error = xfs_rmap_update(cur, &ltrec); if (error) goto out_error; } else { /* * overlap middle of extent: trim the length of the existing * record to the length of the new left-extent size, increment * the insertion position so we can insert a new record * containing the remaining right-extent space. * * ltbno ltlen * Orig: |oooooooooooooooooooo| * Freeing: |fffffffff| * Result: |rrrrr| |rrrr| * bno len */ xfs_extlen_t orig_len = ltrec.rm_blockcount; ltrec.rm_blockcount = bno - ltrec.rm_startblock; error = xfs_rmap_update(cur, &ltrec); if (error) goto out_error; error = xfs_btree_increment(cur, 0, &i); if (error) goto out_error; cur->bc_rec.r.rm_startblock = bno + len; cur->bc_rec.r.rm_blockcount = orig_len - len - ltrec.rm_blockcount; cur->bc_rec.r.rm_owner = ltrec.rm_owner; if (ignore_off) cur->bc_rec.r.rm_offset = 0; else cur->bc_rec.r.rm_offset = offset + len; cur->bc_rec.r.rm_flags = flags; trace_xfs_rmap_insert(cur, cur->bc_rec.r.rm_startblock, cur->bc_rec.r.rm_blockcount, cur->bc_rec.r.rm_owner, cur->bc_rec.r.rm_offset, cur->bc_rec.r.rm_flags); error = xfs_btree_insert(cur, &i); if (error) goto out_error; } out_done: trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_unmap_error(cur, error, _RET_IP_); return error; } #ifdef CONFIG_XFS_LIVE_HOOKS /* * Use a static key here to reduce the overhead of rmapbt live updates. If * the compiler supports jump labels, the static branch will be replaced by a * nop sled when there are no hook users. Online fsck is currently the only * caller, so this is a reasonable tradeoff. * * Note: Patching the kernel code requires taking the cpu hotplug lock. Other * parts of the kernel allocate memory with that lock held, which means that * XFS callers cannot hold any locks that might be used by memory reclaim or * writeback when calling the static_branch_{inc,dec} functions. */ DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_rmap_hooks_switch); void xfs_rmap_hook_disable(void) { xfs_hooks_switch_off(&xfs_rmap_hooks_switch); } void xfs_rmap_hook_enable(void) { xfs_hooks_switch_on(&xfs_rmap_hooks_switch); } /* Call downstream hooks for a reverse mapping update. */ static inline void xfs_rmap_update_hook( struct xfs_trans *tp, struct xfs_group *xg, enum xfs_rmap_intent_type op, xfs_agblock_t startblock, xfs_extlen_t blockcount, bool unwritten, const struct xfs_owner_info *oinfo) { if (xfs_hooks_switched_on(&xfs_rmap_hooks_switch)) { struct xfs_rmap_update_params p = { .startblock = startblock, .blockcount = blockcount, .unwritten = unwritten, .oinfo = *oinfo, /* struct copy */ }; if (xg) xfs_hooks_call(&xg->xg_rmap_update_hooks, op, &p); } } /* Call the specified function during a reverse mapping update. */ int xfs_rmap_hook_add( struct xfs_group *xg, struct xfs_rmap_hook *hook) { return xfs_hooks_add(&xg->xg_rmap_update_hooks, &hook->rmap_hook); } /* Stop calling the specified function during a reverse mapping update. */ void xfs_rmap_hook_del( struct xfs_group *xg, struct xfs_rmap_hook *hook) { xfs_hooks_del(&xg->xg_rmap_update_hooks, &hook->rmap_hook); } /* Configure rmap update hook functions. */ void xfs_rmap_hook_setup( struct xfs_rmap_hook *hook, notifier_fn_t mod_fn) { xfs_hook_setup(&hook->rmap_hook, mod_fn); } #else # define xfs_rmap_update_hook(t, p, o, s, b, u, oi) do { } while (0) #endif /* CONFIG_XFS_LIVE_HOOKS */ /* * Remove a reference to an extent in the rmap btree. */ int xfs_rmap_free( struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo) { struct xfs_mount *mp = tp->t_mountp; struct xfs_btree_cur *cur; int error; if (!xfs_has_rmapbt(mp)) return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_UNMAP, bno, len, false, oinfo); error = xfs_rmap_unmap(cur, bno, len, false, oinfo); xfs_btree_del_cursor(cur, error); return error; } /* * A mergeable rmap must have the same owner and the same values for * the unwritten, attr_fork, and bmbt flags. The startblock and * offset are checked separately. */ static bool xfs_rmap_is_mergeable( struct xfs_rmap_irec *irec, uint64_t owner, unsigned int flags) { if (irec->rm_owner == XFS_RMAP_OWN_NULL) return false; if (irec->rm_owner != owner) return false; if ((flags & XFS_RMAP_UNWRITTEN) ^ (irec->rm_flags & XFS_RMAP_UNWRITTEN)) return false; if ((flags & XFS_RMAP_ATTR_FORK) ^ (irec->rm_flags & XFS_RMAP_ATTR_FORK)) return false; if ((flags & XFS_RMAP_BMBT_BLOCK) ^ (irec->rm_flags & XFS_RMAP_BMBT_BLOCK)) return false; return true; } /* * When we allocate a new block, the first thing we do is add a reference to * the extent in the rmap btree. This takes the form of a [agbno, length, * owner, offset] record. Flags are encoded in the high bits of the offset * field. */ STATIC int xfs_rmap_map( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool unwritten, const struct xfs_owner_info *oinfo) { struct xfs_mount *mp = cur->bc_mp; struct xfs_rmap_irec ltrec; struct xfs_rmap_irec gtrec; int have_gt; int have_lt; int error = 0; int i; uint64_t owner; uint64_t offset; unsigned int flags = 0; bool ignore_off; xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); ASSERT(owner != 0); ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) || (flags & XFS_RMAP_BMBT_BLOCK); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo); ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); /* * For the initial lookup, look for an exact match or the left-adjacent * record for our insertion point. This will also give us the record for * start block contiguity tests. */ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec, &have_lt); if (error) goto out_error; if (have_lt) { trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); if (!xfs_rmap_is_mergeable(&ltrec, owner, flags)) have_lt = 0; } if (XFS_IS_CORRUPT(mp, have_lt != 0 && ltrec.rm_startblock + ltrec.rm_blockcount > bno)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } /* * Increment the cursor to see if we have a right-adjacent record to our * insertion point. This will give us the record for end block * contiguity tests. */ error = xfs_btree_increment(cur, 0, &have_gt); if (error) goto out_error; if (have_gt) { error = xfs_rmap_get_rec(cur, &gtrec, &have_gt); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, have_gt != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (XFS_IS_CORRUPT(mp, bno + len > gtrec.rm_startblock)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } trace_xfs_rmap_find_right_neighbor_result(cur, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); if (!xfs_rmap_is_mergeable(&gtrec, owner, flags)) have_gt = 0; } /* * Note: cursor currently points one record to the right of ltrec, even * if there is no record in the tree to the right. */ if (have_lt && ltrec.rm_startblock + ltrec.rm_blockcount == bno && (ignore_off || ltrec.rm_offset + ltrec.rm_blockcount == offset)) { /* * left edge contiguous, merge into left record. * * ltbno ltlen * orig: |ooooooooo| * adding: |aaaaaaaaa| * result: |rrrrrrrrrrrrrrrrrrr| * bno len */ ltrec.rm_blockcount += len; if (have_gt && bno + len == gtrec.rm_startblock && (ignore_off || offset + len == gtrec.rm_offset) && (unsigned long)ltrec.rm_blockcount + len + gtrec.rm_blockcount <= XFS_RMAP_LEN_MAX) { /* * right edge also contiguous, delete right record * and merge into left record. * * ltbno ltlen gtbno gtlen * orig: |ooooooooo| |ooooooooo| * adding: |aaaaaaaaa| * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr| */ ltrec.rm_blockcount += gtrec.rm_blockcount; trace_xfs_rmap_delete(cur, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); error = xfs_btree_delete(cur, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } } /* point the cursor back to the left record and update */ error = xfs_btree_decrement(cur, 0, &have_gt); if (error) goto out_error; error = xfs_rmap_update(cur, &ltrec); if (error) goto out_error; } else if (have_gt && bno + len == gtrec.rm_startblock && (ignore_off || offset + len == gtrec.rm_offset)) { /* * right edge contiguous, merge into right record. * * gtbno gtlen * Orig: |ooooooooo| * adding: |aaaaaaaaa| * Result: |rrrrrrrrrrrrrrrrrrr| * bno len */ gtrec.rm_startblock = bno; gtrec.rm_blockcount += len; if (!ignore_off) gtrec.rm_offset = offset; error = xfs_rmap_update(cur, &gtrec); if (error) goto out_error; } else { /* * no contiguous edge with identical owner, insert * new record at current cursor position. */ cur->bc_rec.r.rm_startblock = bno; cur->bc_rec.r.rm_blockcount = len; cur->bc_rec.r.rm_owner = owner; cur->bc_rec.r.rm_offset = offset; cur->bc_rec.r.rm_flags = flags; trace_xfs_rmap_insert(cur, bno, len, owner, offset, flags); error = xfs_btree_insert(cur, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } } trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_map_error(cur, error, _RET_IP_); return error; } /* * Add a reference to an extent in the rmap btree. */ int xfs_rmap_alloc( struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo) { struct xfs_mount *mp = tp->t_mountp; struct xfs_btree_cur *cur; int error; if (!xfs_has_rmapbt(mp)) return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_MAP, bno, len, false, oinfo); error = xfs_rmap_map(cur, bno, len, false, oinfo); xfs_btree_del_cursor(cur, error); return error; } #define RMAP_LEFT_CONTIG (1 << 0) #define RMAP_RIGHT_CONTIG (1 << 1) #define RMAP_LEFT_FILLING (1 << 2) #define RMAP_RIGHT_FILLING (1 << 3) #define RMAP_LEFT_VALID (1 << 6) #define RMAP_RIGHT_VALID (1 << 7) #define LEFT r[0] #define RIGHT r[1] #define PREV r[2] #define NEW r[3] /* * Convert an unwritten extent to a real extent or vice versa. * Does not handle overlapping extents. */ STATIC int xfs_rmap_convert( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool unwritten, const struct xfs_owner_info *oinfo) { struct xfs_mount *mp = cur->bc_mp; struct xfs_rmap_irec r[4]; /* neighbor extent entries */ /* left is 0, right is 1, */ /* prev is 2, new is 3 */ uint64_t owner; uint64_t offset; uint64_t new_endoff; unsigned int oldext; unsigned int newext; unsigned int flags = 0; int i; int state = 0; int error; xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) || (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; new_endoff = offset + len; trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo); /* * For the initial lookup, look for an exact match or the left-adjacent * record for our insertion point. This will also give us the record for * start block contiguity tests. */ error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, &PREV, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } trace_xfs_rmap_lookup_le_range_result(cur, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); ASSERT(PREV.rm_offset <= offset); ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff); ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext); newext = ~oldext & XFS_RMAP_UNWRITTEN; /* * Set flags determining what part of the previous oldext allocation * extent is being replaced by a newext allocation. */ if (PREV.rm_offset == offset) state |= RMAP_LEFT_FILLING; if (PREV.rm_offset + PREV.rm_blockcount == new_endoff) state |= RMAP_RIGHT_FILLING; /* * Decrement the cursor to see if we have a left-adjacent record to our * insertion point. This will give us the record for end block * contiguity tests. */ error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; if (i) { state |= RMAP_LEFT_VALID; error = xfs_rmap_get_rec(cur, &LEFT, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if (XFS_IS_CORRUPT(mp, LEFT.rm_startblock + LEFT.rm_blockcount > bno)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } trace_xfs_rmap_find_left_neighbor_result(cur, LEFT.rm_startblock, LEFT.rm_blockcount, LEFT.rm_owner, LEFT.rm_offset, LEFT.rm_flags); if (LEFT.rm_startblock + LEFT.rm_blockcount == bno && LEFT.rm_offset + LEFT.rm_blockcount == offset && xfs_rmap_is_mergeable(&LEFT, owner, newext)) state |= RMAP_LEFT_CONTIG; } /* * Increment the cursor to see if we have a right-adjacent record to our * insertion point. This will give us the record for end block * contiguity tests. */ error = xfs_btree_increment(cur, 0, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } error = xfs_btree_increment(cur, 0, &i); if (error) goto done; if (i) { state |= RMAP_RIGHT_VALID; error = xfs_rmap_get_rec(cur, &RIGHT, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } trace_xfs_rmap_find_right_neighbor_result(cur, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (bno + len == RIGHT.rm_startblock && offset + len == RIGHT.rm_offset && xfs_rmap_is_mergeable(&RIGHT, owner, newext)) state |= RMAP_RIGHT_CONTIG; } /* check that left + prev + right is not too long */ if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) == (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) && (unsigned long)LEFT.rm_blockcount + len + RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) state &= ~RMAP_RIGHT_CONTIG; trace_xfs_rmap_convert_state(cur, state, _RET_IP_); /* reset the cursor back to PREV */ error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } /* * Switch out based on the FILLING and CONTIG state bits. */ switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) { case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: /* * Setting all of a previous oldext extent to newext. * The left and right neighbors are both contiguous with new. */ error = xfs_btree_increment(cur, 0, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } trace_xfs_rmap_delete(cur, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); error = xfs_btree_delete(cur, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } trace_xfs_rmap_delete(cur, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); error = xfs_btree_delete(cur, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW = LEFT; NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG: /* * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ trace_xfs_rmap_delete(cur, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); error = xfs_btree_delete(cur, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW = LEFT; NEW.rm_blockcount += PREV.rm_blockcount; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: /* * Setting all of a previous oldext extent to newext. * The right neighbor is contiguous, the left is not. */ error = xfs_btree_increment(cur, 0, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } trace_xfs_rmap_delete(cur, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); error = xfs_btree_delete(cur, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW = PREV; NEW.rm_blockcount = len + RIGHT.rm_blockcount; NEW.rm_flags = newext; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING: /* * Setting all of a previous oldext extent to newext. * Neither the left nor right neighbors are contiguous with * the new one. */ NEW = PREV; NEW.rm_flags = newext; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG: /* * Setting the first part of a previous oldext extent to newext. * The left neighbor is contiguous. */ NEW = PREV; NEW.rm_offset += len; NEW.rm_startblock += len; NEW.rm_blockcount -= len; error = xfs_rmap_update(cur, &NEW); if (error) goto done; error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; NEW = LEFT; NEW.rm_blockcount += len; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING: /* * Setting the first part of a previous oldext extent to newext. * The left neighbor is not contiguous. */ NEW = PREV; NEW.rm_startblock += len; NEW.rm_offset += len; NEW.rm_blockcount -= len; error = xfs_rmap_update(cur, &NEW); if (error) goto done; NEW.rm_startblock = bno; NEW.rm_owner = owner; NEW.rm_offset = offset; NEW.rm_blockcount = len; NEW.rm_flags = newext; cur->bc_rec.r = NEW; trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } break; case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: /* * Setting the last part of a previous oldext extent to newext. * The right neighbor is contiguous with the new allocation. */ NEW = PREV; NEW.rm_blockcount -= len; error = xfs_rmap_update(cur, &NEW); if (error) goto done; error = xfs_btree_increment(cur, 0, &i); if (error) goto done; NEW = RIGHT; NEW.rm_offset = offset; NEW.rm_startblock = bno; NEW.rm_blockcount += len; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_RIGHT_FILLING: /* * Setting the last part of a previous oldext extent to newext. * The right neighbor is not contiguous. */ NEW = PREV; NEW.rm_blockcount -= len; error = xfs_rmap_update(cur, &NEW); if (error) goto done; error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset, oldext, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW.rm_startblock = bno; NEW.rm_owner = owner; NEW.rm_offset = offset; NEW.rm_blockcount = len; NEW.rm_flags = newext; cur->bc_rec.r = NEW; trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } break; case 0: /* * Setting the middle part of a previous oldext extent to * newext. Contiguity is impossible here. * One extent becomes three extents. */ /* new right extent - oldext */ NEW.rm_startblock = bno + len; NEW.rm_owner = owner; NEW.rm_offset = new_endoff; NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount - new_endoff; NEW.rm_flags = PREV.rm_flags; error = xfs_rmap_update(cur, &NEW); if (error) goto done; /* new left extent - oldext */ NEW = PREV; NEW.rm_blockcount = offset - PREV.rm_offset; cur->bc_rec.r = NEW; trace_xfs_rmap_insert(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); error = xfs_btree_insert(cur, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } /* * Reset the cursor to the position of the new extent * we are about to insert as we can't trust it after * the previous insert. */ error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset, oldext, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } /* new middle extent - newext */ cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN; cur->bc_rec.r.rm_flags |= newext; trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } break; case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG: case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG: case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: case RMAP_LEFT_CONTIG: case RMAP_RIGHT_CONTIG: /* * These cases are all impossible. */ ASSERT(0); } trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo); done: if (error) trace_xfs_rmap_convert_error(cur, error, _RET_IP_); return error; } /* * Convert an unwritten extent to a real extent or vice versa. If there is no * possibility of overlapping extents, delegate to the simpler convert * function. */ STATIC int xfs_rmap_convert_shared( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool unwritten, const struct xfs_owner_info *oinfo) { struct xfs_mount *mp = cur->bc_mp; struct xfs_rmap_irec r[4]; /* neighbor extent entries */ /* left is 0, right is 1, */ /* prev is 2, new is 3 */ uint64_t owner; uint64_t offset; uint64_t new_endoff; unsigned int oldext; unsigned int newext; unsigned int flags = 0; int i; int state = 0; int error; xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) || (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; new_endoff = offset + len; trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo); /* * For the initial lookup, look for and exact match or the left-adjacent * record for our insertion point. This will also give us the record for * start block contiguity tests. */ error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext, &PREV, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } ASSERT(PREV.rm_offset <= offset); ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff); ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext); newext = ~oldext & XFS_RMAP_UNWRITTEN; /* * Set flags determining what part of the previous oldext allocation * extent is being replaced by a newext allocation. */ if (PREV.rm_offset == offset) state |= RMAP_LEFT_FILLING; if (PREV.rm_offset + PREV.rm_blockcount == new_endoff) state |= RMAP_RIGHT_FILLING; /* Is there a left record that abuts our range? */ error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, newext, &LEFT, &i); if (error) goto done; if (i) { state |= RMAP_LEFT_VALID; if (XFS_IS_CORRUPT(mp, LEFT.rm_startblock + LEFT.rm_blockcount > bno)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if (xfs_rmap_is_mergeable(&LEFT, owner, newext)) state |= RMAP_LEFT_CONTIG; } /* Is there a right record that abuts our range? */ error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len, newext, &i); if (error) goto done; if (i) { state |= RMAP_RIGHT_VALID; error = xfs_rmap_get_rec(cur, &RIGHT, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } trace_xfs_rmap_find_right_neighbor_result(cur, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (xfs_rmap_is_mergeable(&RIGHT, owner, newext)) state |= RMAP_RIGHT_CONTIG; } /* check that left + prev + right is not too long */ if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) == (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) && (unsigned long)LEFT.rm_blockcount + len + RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) state &= ~RMAP_RIGHT_CONTIG; trace_xfs_rmap_convert_state(cur, state, _RET_IP_); /* * Switch out based on the FILLING and CONTIG state bits. */ switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) { case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: /* * Setting all of a previous oldext extent to newext. * The left and right neighbors are both contiguous with new. */ error = xfs_rmap_delete(cur, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (error) goto done; error = xfs_rmap_delete(cur, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); if (error) goto done; NEW = LEFT; error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG: /* * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ error = xfs_rmap_delete(cur, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); if (error) goto done; NEW = LEFT; error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW.rm_blockcount += PREV.rm_blockcount; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: /* * Setting all of a previous oldext extent to newext. * The right neighbor is contiguous, the left is not. */ error = xfs_rmap_delete(cur, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (error) goto done; NEW = PREV; error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW.rm_blockcount += RIGHT.rm_blockcount; NEW.rm_flags = RIGHT.rm_flags; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING: /* * Setting all of a previous oldext extent to newext. * Neither the left nor right neighbors are contiguous with * the new one. */ NEW = PREV; error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW.rm_flags = newext; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG: /* * Setting the first part of a previous oldext extent to newext. * The left neighbor is contiguous. */ NEW = PREV; error = xfs_rmap_delete(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); if (error) goto done; NEW.rm_offset += len; NEW.rm_startblock += len; NEW.rm_blockcount -= len; error = xfs_rmap_insert(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); if (error) goto done; NEW = LEFT; error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW.rm_blockcount += len; error = xfs_rmap_update(cur, &NEW); if (error) goto done; break; case RMAP_LEFT_FILLING: /* * Setting the first part of a previous oldext extent to newext. * The left neighbor is not contiguous. */ NEW = PREV; error = xfs_rmap_delete(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); if (error) goto done; NEW.rm_offset += len; NEW.rm_startblock += len; NEW.rm_blockcount -= len; error = xfs_rmap_insert(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); if (error) goto done; error = xfs_rmap_insert(cur, bno, len, owner, offset, newext); if (error) goto done; break; case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: /* * Setting the last part of a previous oldext extent to newext. * The right neighbor is contiguous with the new allocation. */ NEW = PREV; error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW.rm_blockcount = offset - NEW.rm_offset; error = xfs_rmap_update(cur, &NEW); if (error) goto done; NEW = RIGHT; error = xfs_rmap_delete(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); if (error) goto done; NEW.rm_offset = offset; NEW.rm_startblock = bno; NEW.rm_blockcount += len; error = xfs_rmap_insert(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); if (error) goto done; break; case RMAP_RIGHT_FILLING: /* * Setting the last part of a previous oldext extent to newext. * The right neighbor is not contiguous. */ NEW = PREV; error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW.rm_blockcount -= len; error = xfs_rmap_update(cur, &NEW); if (error) goto done; error = xfs_rmap_insert(cur, bno, len, owner, offset, newext); if (error) goto done; break; case 0: /* * Setting the middle part of a previous oldext extent to * newext. Contiguity is impossible here. * One extent becomes three extents. */ /* new right extent - oldext */ NEW.rm_startblock = bno + len; NEW.rm_owner = owner; NEW.rm_offset = new_endoff; NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount - new_endoff; NEW.rm_flags = PREV.rm_flags; error = xfs_rmap_insert(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); if (error) goto done; /* new left extent - oldext */ NEW = PREV; error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } NEW.rm_blockcount = offset - NEW.rm_offset; error = xfs_rmap_update(cur, &NEW); if (error) goto done; /* new middle extent - newext */ NEW.rm_startblock = bno; NEW.rm_blockcount = len; NEW.rm_owner = owner; NEW.rm_offset = offset; NEW.rm_flags = newext; error = xfs_rmap_insert(cur, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); if (error) goto done; break; case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG: case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG: case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: case RMAP_LEFT_CONTIG: case RMAP_RIGHT_CONTIG: /* * These cases are all impossible. */ ASSERT(0); } trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo); done: if (error) trace_xfs_rmap_convert_error(cur, error, _RET_IP_); return error; } #undef NEW #undef LEFT #undef RIGHT #undef PREV /* * Find an extent in the rmap btree and unmap it. For rmap extent types that * can overlap (data fork rmaps on reflink filesystems) we must be careful * that the prev/next records in the btree might belong to another owner. * Therefore we must use delete+insert to alter any of the key fields. * * For every other situation there can only be one owner for a given extent, * so we can call the regular _free function. */ STATIC int xfs_rmap_unmap_shared( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool unwritten, const struct xfs_owner_info *oinfo) { struct xfs_mount *mp = cur->bc_mp; struct xfs_rmap_irec ltrec; uint64_t ltoff; int error = 0; int i; uint64_t owner; uint64_t offset; unsigned int flags; xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo); /* * We should always have a left record because there's a static record * for the AG headers at rm_startblock == 0 created by mkfs/growfs that * will not ever be removed from the tree. */ error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags, &ltrec, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } ltoff = ltrec.rm_offset; /* Make sure the extent we found covers the entire freeing range. */ if (XFS_IS_CORRUPT(mp, ltrec.rm_startblock > bno || ltrec.rm_startblock + ltrec.rm_blockcount < bno + len)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } /* Make sure the owner matches what we expect to find in the tree. */ if (XFS_IS_CORRUPT(mp, owner != ltrec.rm_owner)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } /* Make sure the unwritten flag matches. */ if (XFS_IS_CORRUPT(mp, (flags & XFS_RMAP_UNWRITTEN) != (ltrec.rm_flags & XFS_RMAP_UNWRITTEN))) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } /* Check the offset. */ if (XFS_IS_CORRUPT(mp, ltrec.rm_offset > offset)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (XFS_IS_CORRUPT(mp, offset > ltoff + ltrec.rm_blockcount)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* Exact match, simply remove the record from rmap tree. */ error = xfs_rmap_delete(cur, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); if (error) goto out_error; } else if (ltrec.rm_startblock == bno) { /* * Overlap left hand side of extent: move the start, trim the * length and update the current record. * * ltbno ltlen * Orig: |oooooooooooooooooooo| * Freeing: |fffffffff| * Result: |rrrrrrrrrr| * bno len */ /* Delete prev rmap. */ error = xfs_rmap_delete(cur, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); if (error) goto out_error; /* Add an rmap at the new offset. */ ltrec.rm_startblock += len; ltrec.rm_blockcount -= len; ltrec.rm_offset += len; error = xfs_rmap_insert(cur, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); if (error) goto out_error; } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) { /* * Overlap right hand side of extent: trim the length and * update the current record. * * ltbno ltlen * Orig: |oooooooooooooooooooo| * Freeing: |fffffffff| * Result: |rrrrrrrrrr| * bno len */ error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } ltrec.rm_blockcount -= len; error = xfs_rmap_update(cur, &ltrec); if (error) goto out_error; } else { /* * Overlap middle of extent: trim the length of the existing * record to the length of the new left-extent size, increment * the insertion position so we can insert a new record * containing the remaining right-extent space. * * ltbno ltlen * Orig: |oooooooooooooooooooo| * Freeing: |fffffffff| * Result: |rrrrr| |rrrr| * bno len */ xfs_extlen_t orig_len = ltrec.rm_blockcount; /* Shrink the left side of the rmap */ error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } ltrec.rm_blockcount = bno - ltrec.rm_startblock; error = xfs_rmap_update(cur, &ltrec); if (error) goto out_error; /* Add an rmap at the new offset */ error = xfs_rmap_insert(cur, bno + len, orig_len - len - ltrec.rm_blockcount, ltrec.rm_owner, offset + len, ltrec.rm_flags); if (error) goto out_error; } trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_unmap_error(cur, error, _RET_IP_); return error; } /* * Find an extent in the rmap btree and map it. For rmap extent types that * can overlap (data fork rmaps on reflink filesystems) we must be careful * that the prev/next records in the btree might belong to another owner. * Therefore we must use delete+insert to alter any of the key fields. * * For every other situation there can only be one owner for a given extent, * so we can call the regular _alloc function. */ STATIC int xfs_rmap_map_shared( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool unwritten, const struct xfs_owner_info *oinfo) { struct xfs_mount *mp = cur->bc_mp; struct xfs_rmap_irec ltrec; struct xfs_rmap_irec gtrec; int have_gt; int have_lt; int error = 0; int i; uint64_t owner; uint64_t offset; unsigned int flags = 0; xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo); /* Is there a left record that abuts our range? */ error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, flags, &ltrec, &have_lt); if (error) goto out_error; if (have_lt && !xfs_rmap_is_mergeable(&ltrec, owner, flags)) have_lt = 0; /* Is there a right record that abuts our range? */ error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len, flags, &have_gt); if (error) goto out_error; if (have_gt) { error = xfs_rmap_get_rec(cur, &gtrec, &have_gt); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, have_gt != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } trace_xfs_rmap_find_right_neighbor_result(cur, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); if (!xfs_rmap_is_mergeable(&gtrec, owner, flags)) have_gt = 0; } if (have_lt && ltrec.rm_startblock + ltrec.rm_blockcount == bno && ltrec.rm_offset + ltrec.rm_blockcount == offset) { /* * Left edge contiguous, merge into left record. * * ltbno ltlen * orig: |ooooooooo| * adding: |aaaaaaaaa| * result: |rrrrrrrrrrrrrrrrrrr| * bno len */ ltrec.rm_blockcount += len; if (have_gt && bno + len == gtrec.rm_startblock && offset + len == gtrec.rm_offset) { /* * Right edge also contiguous, delete right record * and merge into left record. * * ltbno ltlen gtbno gtlen * orig: |ooooooooo| |ooooooooo| * adding: |aaaaaaaaa| * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr| */ ltrec.rm_blockcount += gtrec.rm_blockcount; error = xfs_rmap_delete(cur, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); if (error) goto out_error; } /* Point the cursor back to the left record and update. */ error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } error = xfs_rmap_update(cur, &ltrec); if (error) goto out_error; } else if (have_gt && bno + len == gtrec.rm_startblock && offset + len == gtrec.rm_offset) { /* * Right edge contiguous, merge into right record. * * gtbno gtlen * Orig: |ooooooooo| * adding: |aaaaaaaaa| * Result: |rrrrrrrrrrrrrrrrrrr| * bno len */ /* Delete the old record. */ error = xfs_rmap_delete(cur, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); if (error) goto out_error; /* Move the start and re-add it. */ gtrec.rm_startblock = bno; gtrec.rm_blockcount += len; gtrec.rm_offset = offset; error = xfs_rmap_insert(cur, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); if (error) goto out_error; } else { /* * No contiguous edge with identical owner, insert * new record at current cursor position. */ error = xfs_rmap_insert(cur, bno, len, owner, offset, flags); if (error) goto out_error; } trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_map_error(cur, error, _RET_IP_); return error; } /* Insert a raw rmap into the rmapbt. */ int xfs_rmap_map_raw( struct xfs_btree_cur *cur, struct xfs_rmap_irec *rmap) { struct xfs_owner_info oinfo; xfs_owner_info_pack(&oinfo, rmap->rm_owner, rmap->rm_offset, rmap->rm_flags); if ((rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | XFS_RMAP_UNWRITTEN)) || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner)) return xfs_rmap_map(cur, rmap->rm_startblock, rmap->rm_blockcount, rmap->rm_flags & XFS_RMAP_UNWRITTEN, &oinfo); return xfs_rmap_map_shared(cur, rmap->rm_startblock, rmap->rm_blockcount, rmap->rm_flags & XFS_RMAP_UNWRITTEN, &oinfo); } struct xfs_rmap_query_range_info { xfs_rmap_query_range_fn fn; void *priv; }; /* Format btree record and pass to our callback. */ STATIC int xfs_rmap_query_range_helper( struct xfs_btree_cur *cur, const union xfs_btree_rec *rec, void *priv) { struct xfs_rmap_query_range_info *query = priv; struct xfs_rmap_irec irec; xfs_failaddr_t fa; fa = xfs_rmap_btrec_to_irec(rec, &irec); if (!fa) fa = xfs_rmap_check_btrec(cur, &irec); if (fa) return xfs_rmap_complain_bad_rec(cur, fa, &irec); return query->fn(cur, &irec, query->priv); } /* Find all rmaps between two keys. */ int xfs_rmap_query_range( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *low_rec, const struct xfs_rmap_irec *high_rec, xfs_rmap_query_range_fn fn, void *priv) { union xfs_btree_irec low_brec = { .r = *low_rec }; union xfs_btree_irec high_brec = { .r = *high_rec }; struct xfs_rmap_query_range_info query = { .priv = priv, .fn = fn }; return xfs_btree_query_range(cur, &low_brec, &high_brec, xfs_rmap_query_range_helper, &query); } /* Find all rmaps. */ int xfs_rmap_query_all( struct xfs_btree_cur *cur, xfs_rmap_query_range_fn fn, void *priv) { struct xfs_rmap_query_range_info query; query.priv = priv; query.fn = fn; return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query); } /* Commit an rmap operation into the ondisk tree. */ int __xfs_rmap_finish_intent( struct xfs_btree_cur *rcur, enum xfs_rmap_intent_type op, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, bool unwritten) { switch (op) { case XFS_RMAP_ALLOC: case XFS_RMAP_MAP: return xfs_rmap_map(rcur, bno, len, unwritten, oinfo); case XFS_RMAP_MAP_SHARED: return xfs_rmap_map_shared(rcur, bno, len, unwritten, oinfo); case XFS_RMAP_FREE: case XFS_RMAP_UNMAP: return xfs_rmap_unmap(rcur, bno, len, unwritten, oinfo); case XFS_RMAP_UNMAP_SHARED: return xfs_rmap_unmap_shared(rcur, bno, len, unwritten, oinfo); case XFS_RMAP_CONVERT: return xfs_rmap_convert(rcur, bno, len, !unwritten, oinfo); case XFS_RMAP_CONVERT_SHARED: return xfs_rmap_convert_shared(rcur, bno, len, !unwritten, oinfo); default: ASSERT(0); return -EFSCORRUPTED; } } /* * Process one of the deferred rmap operations. We pass back the * btree cursor to maintain our lock on the rmapbt between calls. * This saves time and eliminates a buffer deadlock between the * superblock and the AGF because we'll always grab them in the same * order. */ int xfs_rmap_finish_one( struct xfs_trans *tp, struct xfs_rmap_intent *ri, struct xfs_btree_cur **pcur) { struct xfs_owner_info oinfo; struct xfs_mount *mp = tp->t_mountp; struct xfs_btree_cur *rcur = *pcur; struct xfs_buf *agbp = NULL; xfs_agblock_t bno; bool unwritten; int error = 0; trace_xfs_rmap_deferred(mp, ri); if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE)) return -EIO; /* * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ if (rcur != NULL && rcur->bc_group != ri->ri_group) { xfs_btree_del_cursor(rcur, 0); rcur = NULL; *pcur = NULL; } if (rcur == NULL) { struct xfs_perag *pag = to_perag(ri->ri_group); /* * Refresh the freelist before we start changing the * rmapbt, because a shape change could cause us to * allocate blocks. */ error = xfs_free_extent_fix_freelist(tp, pag, &agbp); if (error) { xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL); return error; } if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) { xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL); return -EFSCORRUPTED; } *pcur = rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); } xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork, ri->ri_bmap.br_startoff); unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN; bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, ri->ri_bmap.br_startblock); error = __xfs_rmap_finish_intent(rcur, ri->ri_type, bno, ri->ri_bmap.br_blockcount, &oinfo, unwritten); if (error) return error; xfs_rmap_update_hook(tp, ri->ri_group, ri->ri_type, bno, ri->ri_bmap.br_blockcount, unwritten, &oinfo); return 0; } /* * Don't defer an rmap if we aren't an rmap filesystem. */ static bool xfs_rmap_update_is_needed( struct xfs_mount *mp, int whichfork) { return xfs_has_rmapbt(mp) && whichfork != XFS_COW_FORK; } /* * Record a rmap intent; the list is kept sorted first by AG and then by * increasing age. */ static void __xfs_rmap_add( struct xfs_trans *tp, enum xfs_rmap_intent_type type, uint64_t owner, int whichfork, struct xfs_bmbt_irec *bmap) { struct xfs_rmap_intent *ri; ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_owner = owner; ri->ri_whichfork = whichfork; ri->ri_bmap = *bmap; xfs_rmap_defer_add(tp, ri); } /* Map an extent into a file. */ void xfs_rmap_map_extent( struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *PREV) { enum xfs_rmap_intent_type type = XFS_RMAP_MAP; if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) return; if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip)) type = XFS_RMAP_MAP_SHARED; __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV); } /* Unmap an extent out of a file. */ void xfs_rmap_unmap_extent( struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *PREV) { enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP; if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) return; if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip)) type = XFS_RMAP_UNMAP_SHARED; __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV); } /* * Convert a data fork extent from unwritten to real or vice versa. * * Note that tp can be NULL here as no transaction is used for COW fork * unwritten conversion. */ void xfs_rmap_convert_extent( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *PREV) { enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT; if (!xfs_rmap_update_is_needed(mp, whichfork)) return; if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip)) type = XFS_RMAP_CONVERT_SHARED; __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV); } /* Schedule the creation of an rmap for non-file data. */ void xfs_rmap_alloc_extent( struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner) { struct xfs_bmbt_irec bmap; if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) return; bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); bmap.br_blockcount = len; bmap.br_startoff = 0; bmap.br_state = XFS_EXT_NORM; __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap); } /* Schedule the deletion of an rmap for non-file data. */ void xfs_rmap_free_extent( struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner) { struct xfs_bmbt_irec bmap; if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) return; bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); bmap.br_blockcount = len; bmap.br_startoff = 0; bmap.br_state = XFS_EXT_NORM; __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap); } /* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */ int xfs_rmap_compare( const struct xfs_rmap_irec *a, const struct xfs_rmap_irec *b) { __u64 oa; __u64 ob; oa = xfs_rmap_irec_offset_pack(a); ob = xfs_rmap_irec_offset_pack(b); if (a->rm_startblock < b->rm_startblock) return -1; else if (a->rm_startblock > b->rm_startblock) return 1; else if (a->rm_owner < b->rm_owner) return -1; else if (a->rm_owner > b->rm_owner) return 1; else if (oa < ob) return -1; else if (oa > ob) return 1; else return 0; } /* * Scan the physical storage part of the keyspace of the reverse mapping index * and tell us if the area has no records, is fully mapped by records, or is * partially filled. */ int xfs_rmap_has_records( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, enum xbtree_recpacking *outcome) { union xfs_btree_key mask = { .rmap.rm_startblock = cpu_to_be32(-1U), }; union xfs_btree_irec low; union xfs_btree_irec high; memset(&low, 0, sizeof(low)); low.r.rm_startblock = bno; memset(&high, 0xFF, sizeof(high)); high.r.rm_startblock = bno + len - 1; return xfs_btree_has_records(cur, &low, &high, &mask, outcome); } struct xfs_rmap_ownercount { /* Owner that we're looking for. */ struct xfs_rmap_irec good; /* rmap search keys */ struct xfs_rmap_irec low; struct xfs_rmap_irec high; struct xfs_rmap_matches *results; /* Stop early if we find a nonmatch? */ bool stop_on_nonmatch; }; /* Does this rmap represent space that can have multiple owners? */ static inline bool xfs_rmap_shareable( struct xfs_mount *mp, const struct xfs_rmap_irec *rmap) { if (!xfs_has_reflink(mp)) return false; if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner)) return false; if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) return false; return true; } static inline void xfs_rmap_ownercount_init( struct xfs_rmap_ownercount *roc, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, struct xfs_rmap_matches *results) { memset(roc, 0, sizeof(*roc)); roc->results = results; roc->low.rm_startblock = bno; memset(&roc->high, 0xFF, sizeof(roc->high)); roc->high.rm_startblock = bno + len - 1; memset(results, 0, sizeof(*results)); roc->good.rm_startblock = bno; roc->good.rm_blockcount = len; roc->good.rm_owner = oinfo->oi_owner; roc->good.rm_offset = oinfo->oi_offset; if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) roc->good.rm_flags |= XFS_RMAP_ATTR_FORK; if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) roc->good.rm_flags |= XFS_RMAP_BMBT_BLOCK; } /* Figure out if this is a match for the owner. */ STATIC int xfs_rmap_count_owners_helper( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *rec, void *priv) { struct xfs_rmap_ownercount *roc = priv; struct xfs_rmap_irec check = *rec; unsigned int keyflags; bool filedata; int64_t delta; filedata = !XFS_RMAP_NON_INODE_OWNER(check.rm_owner) && !(check.rm_flags & XFS_RMAP_BMBT_BLOCK); /* Trim the part of check that comes before the comparison range. */ delta = (int64_t)roc->good.rm_startblock - check.rm_startblock; if (delta > 0) { check.rm_startblock += delta; check.rm_blockcount -= delta; if (filedata) check.rm_offset += delta; } /* Trim the part of check that comes after the comparison range. */ delta = (check.rm_startblock + check.rm_blockcount) - (roc->good.rm_startblock + roc->good.rm_blockcount); if (delta > 0) check.rm_blockcount -= delta; /* Don't care about unwritten status for establishing ownership. */ keyflags = check.rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK); if (check.rm_startblock == roc->good.rm_startblock && check.rm_blockcount == roc->good.rm_blockcount && check.rm_owner == roc->good.rm_owner && check.rm_offset == roc->good.rm_offset && keyflags == roc->good.rm_flags) { roc->results->matches++; } else { roc->results->non_owner_matches++; if (xfs_rmap_shareable(cur->bc_mp, &roc->good) ^ xfs_rmap_shareable(cur->bc_mp, &check)) roc->results->bad_non_owner_matches++; } if (roc->results->non_owner_matches && roc->stop_on_nonmatch) return -ECANCELED; return 0; } /* Count the number of owners and non-owners of this range of blocks. */ int xfs_rmap_count_owners( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, struct xfs_rmap_matches *results) { struct xfs_rmap_ownercount roc; int error; xfs_rmap_ownercount_init(&roc, bno, len, oinfo, results); error = xfs_rmap_query_range(cur, &roc.low, &roc.high, xfs_rmap_count_owners_helper, &roc); if (error) return error; /* * There can't be any non-owner rmaps that conflict with the given * owner if we didn't find any rmaps matching the owner. */ if (!results->matches) results->bad_non_owner_matches = 0; return 0; } /* * Given an extent and some owner info, can we find records overlapping * the extent whose owner info does not match the given owner? */ int xfs_rmap_has_other_keys( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, bool *has_other) { struct xfs_rmap_matches res; struct xfs_rmap_ownercount roc; int error; xfs_rmap_ownercount_init(&roc, bno, len, oinfo, &res); roc.stop_on_nonmatch = true; error = xfs_rmap_query_range(cur, &roc.low, &roc.high, xfs_rmap_count_owners_helper, &roc); if (error == -ECANCELED) { *has_other = true; return 0; } if (error) return error; *has_other = false; return 0; } const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = { .oi_owner = XFS_RMAP_OWN_NULL, }; const struct xfs_owner_info XFS_RMAP_OINFO_ANY_OWNER = { .oi_owner = XFS_RMAP_OWN_UNKNOWN, }; const struct xfs_owner_info XFS_RMAP_OINFO_FS = { .oi_owner = XFS_RMAP_OWN_FS, }; const struct xfs_owner_info XFS_RMAP_OINFO_LOG = { .oi_owner = XFS_RMAP_OWN_LOG, }; const struct xfs_owner_info XFS_RMAP_OINFO_AG = { .oi_owner = XFS_RMAP_OWN_AG, }; const struct xfs_owner_info XFS_RMAP_OINFO_INOBT = { .oi_owner = XFS_RMAP_OWN_INOBT, }; const struct xfs_owner_info XFS_RMAP_OINFO_INODES = { .oi_owner = XFS_RMAP_OWN_INODES, }; const struct xfs_owner_info XFS_RMAP_OINFO_REFC = { .oi_owner = XFS_RMAP_OWN_REFC, }; const struct xfs_owner_info XFS_RMAP_OINFO_COW = { .oi_owner = XFS_RMAP_OWN_COW, }; int __init xfs_rmap_intent_init_cache(void) { xfs_rmap_intent_cache = kmem_cache_create("xfs_rmap_intent", sizeof(struct xfs_rmap_intent), 0, 0, NULL); return xfs_rmap_intent_cache != NULL ? 0 : -ENOMEM; } void xfs_rmap_intent_destroy_cache(void) { kmem_cache_destroy(xfs_rmap_intent_cache); xfs_rmap_intent_cache = NULL; }
10 14 1 24 14 1 20 5 3 285 2 15 4 250 191 83 83 24 24 24 169 60 14 14 14 14 167 3 3 3 23 7 12 10 3 1 28 2 2 2 2 5 57 3 3 13 5 5 5 1 5 336 335 13 331 329 9 332 335 3 6 3 3 153 154 152 154 148 6 153 168 169 169 124 49 37 5 48 48 166 8 1 166 3 169 169 1 38 38 38 6 16 16 16 16 8 8 1 7 4 2 6 2 4 1 1 2 150 8 60 60 60 38 1 34 1 3 37 152 3 150 250 205 5 9 30 28 1 21 11 11 264 250 22 5 1 4 4 1 1 4 14 1 7 1 7 7 6 1 2 31 1 1 2 21 8 3 5 8 3 14 4 22 1 1 11 3 9 2 1 10 2 1 6 2 10 10 1 9 5 4 10 5 8 8 36 2 34 2 33 1 34 15 1 1 1 2 5 1 3 16 16 16 7 20 11 9 18 18 18 7 1 8 7 1 10 10 1 1 1 8 8 12 3 9 3 1 7 11 8 1 13 308 305 1 308 322 323 311 13 1 1 1 1 302 3 299 5 1 1 1 266 292 10 16 9 283 13 1 2 280 1 282 1 1 279 4 1 2 283 277 1 5 214 1 4 3 1 2 17 250 8 259 3 3 266 1 219 29 251 22 1 1 1 1 16 56 43 20 31 1 25 1 1 2 51 3 2 1 22 2 2 25 23 4 18 3 1 16 16 31 31 1 28 4 18 1 17 46 2 36 1 36 18 23 2 10 8 31 1 30 30 5 26 3 1 29 2 30 16 7 23 9 6 3 31 25 7 35 2 1 1 25 24 4 6 2 23 24 5 2 1 2 1 2 1 2 42 6 5 2 2 2 1 1 1 1 4 4 1 56 1 51 51 51 28 42 8 3 5 27 9 1 25 2 2 25 1 4 41 6 23 18 19 1 33 31 2 15 8 1 9 1 7 3 37 10 48 34 3 50 7 43 10 10 9 1 12 1 10 1 9 1 8 6 3 10 6 1 6 6 4 2 6 1 1 1 1 1 116 101 1 5 1 5 1 1 1 1 14 14 14 14 14 21 21 20 21 21 2 6 6 2 8 9 7 9 9 1 6 9 6 5 8 6 6 9 8 2 8 6 2 3 1 4 4 32 32 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 // SPDX-License-Identifier: GPL-2.0-or-later /* * NET4: Implementation of BSD Unix domain sockets. * * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> * * Fixes: * Linus Torvalds : Assorted bug cures. * Niibe Yutaka : async I/O support. * Carsten Paeth : PF_UNIX check, address fixes. * Alan Cox : Limit size of allocated blocks. * Alan Cox : Fixed the stupid socketpair bug. * Alan Cox : BSD compatibility fine tuning. * Alan Cox : Fixed a bug in connect when interrupted. * Alan Cox : Sorted out a proper draft version of * file descriptor passing hacked up from * Mike Shaver's work. * Marty Leisner : Fixes to fd passing * Nick Nevin : recvmsg bugfix. * Alan Cox : Started proper garbage collector * Heiko EiBfeldt : Missing verify_area check * Alan Cox : Started POSIXisms * Andreas Schwab : Replace inode by dentry for proper * reference counting * Kirk Petersen : Made this a module * Christoph Rohland : Elegant non-blocking accept/connect algorithm. * Lots of bug fixes. * Alexey Kuznetosv : Repaired (I hope) bugs introduces * by above two patches. * Andrea Arcangeli : If possible we block in connect(2) * if the max backlog of the listen socket * is been reached. This won't break * old apps and it will avoid huge amount * of socks hashed (this for unix_gc() * performances reasons). * Security fix that limits the max * number of socks to 2*max_files and * the number of skb queueable in the * dgram receiver. * Artur Skawina : Hash function optimizations * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) * Malcolm Beattie : Set peercred for socketpair * Michal Ostrowski : Module initialization cleanup. * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, * the core infrastructure is doing that * for all net proto families now (2.5.69+) * * Known differences from reference BSD that was tested: * * [TO FIX] * ECONNREFUSED is not returned from one end of a connected() socket to the * other the moment one end closes. * fstat() doesn't return st_dev=0, and give the blksize as high water mark * and a fake inode identifier (nor the BSD first socket fstat twice bug). * [NOT TO FIX] * accept() returns a path name even if the connecting socket has closed * in the meantime (BSD loses the path and gives up). * accept() returns 0 length path for an unbound connector. BSD returns 16 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) * socketpair(...SOCK_RAW..) doesn't panic the kernel. * BSD af_unix apparently has connect forgetting to block properly. * (need to check this with the POSIX spec in detail) * * Differences from 2.0.0-11-... (ANK) * Bug fixes and improvements. * - client shutdown killed server socket. * - removed all useless cli/sti pairs. * * Semantic changes/extensions. * - generic control message passing. * - SCM_CREDENTIALS control message. * - "Abstract" (not FS based) socket bindings. * Abstract names are sequences of bytes (not zero terminated) * started by 0, so that this name space does not intersect * with BSD names. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/dcache.h> #include <linux/namei.h> #include <linux/socket.h> #include <linux/un.h> #include <linux/fcntl.h> #include <linux/filter.h> #include <linux/termios.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/af_unix.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/scm.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/rtnetlink.h> #include <linux/mount.h> #include <net/checksum.h> #include <linux/security.h> #include <linux/splice.h> #include <linux/freezer.h> #include <linux/file.h> #include <linux/btf_ids.h> #include <linux/bpf-cgroup.h> static atomic_long_t unix_nr_socks; static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; /* SMP locking strategy: * hash table is protected with spinlock. * each socket state is protected by separate spinlock. */ #ifdef CONFIG_PROVE_LOCKING #define cmp_ptr(l, r) (((l) > (r)) - ((l) < (r))) static int unix_table_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) { return cmp_ptr(a, b); } static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, const struct lockdep_map *_b) { const struct unix_sock *a, *b; a = container_of(_a, struct unix_sock, lock.dep_map); b = container_of(_b, struct unix_sock, lock.dep_map); if (a->sk.sk_state == TCP_LISTEN) { /* unix_stream_connect(): Before the 2nd unix_state_lock(), * * 1. a is TCP_LISTEN. * 2. b is not a. * 3. concurrent connect(b -> a) must fail. * * Except for 2. & 3., the b's state can be any possible * value due to concurrent connect() or listen(). * * 2. is detected in debug_spin_lock_before(), and 3. cannot * be expressed as lock_cmp_fn. */ switch (b->sk.sk_state) { case TCP_CLOSE: case TCP_ESTABLISHED: case TCP_LISTEN: return -1; default: /* Invalid case. */ return 0; } } /* Should never happen. Just to be symmetric. */ if (b->sk.sk_state == TCP_LISTEN) { switch (b->sk.sk_state) { case TCP_CLOSE: case TCP_ESTABLISHED: return 1; default: return 0; } } /* unix_state_double_lock(): ascending address order. */ return cmp_ptr(a, b); } static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a, const struct lockdep_map *_b) { const struct sock *a, *b; a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map); b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map); /* unix_collect_skb(): listener -> embryo order. */ if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a) return -1; /* Should never happen. Just to be symmetric. */ if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b) return 1; return 0; } #endif static unsigned int unix_unbound_hash(struct sock *sk) { unsigned long hash = (unsigned long)sk; hash ^= hash >> 16; hash ^= hash >> 8; hash ^= sk->sk_type; return hash & UNIX_HASH_MOD; } static unsigned int unix_bsd_hash(struct inode *i) { return i->i_ino & UNIX_HASH_MOD; } static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, int addr_len, int type) { __wsum csum = csum_partial(sunaddr, addr_len, 0); unsigned int hash; hash = (__force unsigned int)csum_fold(csum); hash ^= hash >> 8; hash ^= type; return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); } static void unix_table_double_lock(struct net *net, unsigned int hash1, unsigned int hash2) { if (hash1 == hash2) { spin_lock(&net->unx.table.locks[hash1]); return; } if (hash1 > hash2) swap(hash1, hash2); spin_lock(&net->unx.table.locks[hash1]); spin_lock(&net->unx.table.locks[hash2]); } static void unix_table_double_unlock(struct net *net, unsigned int hash1, unsigned int hash2) { if (hash1 == hash2) { spin_unlock(&net->unx.table.locks[hash1]); return; } spin_unlock(&net->unx.table.locks[hash1]); spin_unlock(&net->unx.table.locks[hash2]); } #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { UNIXCB(skb).secid = scm->secid; } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { scm->secid = UNIXCB(skb).secid; } static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) { return (scm->secid == UNIXCB(skb).secid); } #else static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) { return true; } #endif /* CONFIG_SECURITY_NETWORK */ static inline int unix_our_peer(struct sock *sk, struct sock *osk) { return unix_peer(osk) == sk; } static inline int unix_may_send(struct sock *sk, struct sock *osk) { return unix_peer(osk) == NULL || unix_our_peer(sk, osk); } static inline int unix_recvq_full_lockless(const struct sock *sk) { return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } struct sock *unix_peer_get(struct sock *s) { struct sock *peer; unix_state_lock(s); peer = unix_peer(s); if (peer) sock_hold(peer); unix_state_unlock(s); return peer; } EXPORT_SYMBOL_GPL(unix_peer_get); static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, int addr_len) { struct unix_address *addr; addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); if (!addr) return NULL; refcount_set(&addr->refcnt, 1); addr->len = addr_len; memcpy(addr->name, sunaddr, addr_len); return addr; } static inline void unix_release_addr(struct unix_address *addr) { if (refcount_dec_and_test(&addr->refcnt)) kfree(addr); } /* * Check unix socket name: * - should be not zero length. * - if started by not zero, should be NULL terminated (FS object) * - if started by zero, it is abstract name. */ static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) { if (addr_len <= offsetof(struct sockaddr_un, sun_path) || addr_len > sizeof(*sunaddr)) return -EINVAL; if (sunaddr->sun_family != AF_UNIX) return -EINVAL; return 0; } static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) { struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr; short offset = offsetof(struct sockaddr_storage, __data); BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path)); /* This may look like an off by one error but it is a bit more * subtle. 108 is the longest valid AF_UNIX path for a binding. * sun_path[108] doesn't as such exist. However in kernel space * we are guaranteed that it is a valid memory location in our * kernel address buffer because syscall functions always pass * a pointer of struct sockaddr_storage which has a bigger buffer * than 108. Also, we must terminate sun_path for strlen() in * getname_kernel(). */ addr->__data[addr_len - offset] = 0; /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen() * know the actual buffer. */ return strlen(addr->__data) + offset + 1; } static void __unix_remove_socket(struct sock *sk) { sk_del_node_init(sk); } static void __unix_insert_socket(struct net *net, struct sock *sk) { DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); } static void __unix_set_addr_hash(struct net *net, struct sock *sk, struct unix_address *addr, unsigned int hash) { __unix_remove_socket(sk); smp_store_release(&unix_sk(sk)->addr, addr); sk->sk_hash = hash; __unix_insert_socket(net, sk); } static void unix_remove_socket(struct net *net, struct sock *sk) { spin_lock(&net->unx.table.locks[sk->sk_hash]); __unix_remove_socket(sk); spin_unlock(&net->unx.table.locks[sk->sk_hash]); } static void unix_insert_unbound_socket(struct net *net, struct sock *sk) { spin_lock(&net->unx.table.locks[sk->sk_hash]); __unix_insert_socket(net, sk); spin_unlock(&net->unx.table.locks[sk->sk_hash]); } static void unix_insert_bsd_socket(struct sock *sk) { spin_lock(&bsd_socket_locks[sk->sk_hash]); sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); spin_unlock(&bsd_socket_locks[sk->sk_hash]); } static void unix_remove_bsd_socket(struct sock *sk) { if (!hlist_unhashed(&sk->sk_bind_node)) { spin_lock(&bsd_socket_locks[sk->sk_hash]); __sk_del_bind_node(sk); spin_unlock(&bsd_socket_locks[sk->sk_hash]); sk_node_init(&sk->sk_bind_node); } } static struct sock *__unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, int len, unsigned int hash) { struct sock *s; sk_for_each(s, &net->unx.table.buckets[hash]) { struct unix_sock *u = unix_sk(s); if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) return s; } return NULL; } static inline struct sock *unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, int len, unsigned int hash) { struct sock *s; spin_lock(&net->unx.table.locks[hash]); s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); spin_unlock(&net->unx.table.locks[hash]); return s; } static struct sock *unix_find_socket_byinode(struct inode *i) { unsigned int hash = unix_bsd_hash(i); struct sock *s; spin_lock(&bsd_socket_locks[hash]); sk_for_each_bound(s, &bsd_socket_buckets[hash]) { struct dentry *dentry = unix_sk(s)->path.dentry; if (dentry && d_backing_inode(dentry) == i) { sock_hold(s); spin_unlock(&bsd_socket_locks[hash]); return s; } } spin_unlock(&bsd_socket_locks[hash]); return NULL; } /* Support code for asymmetrically connected dgram sockets * * If a datagram socket is connected to a socket not itself connected * to the first socket (eg, /dev/log), clients may only enqueue more * messages if the present receive queue of the server socket is not * "too large". This means there's a second writeability condition * poll and sendmsg need to test. The dgram recv code will do a wake * up on the peer_wait wait queue of a socket upon reception of a * datagram which needs to be propagated to sleeping would-be writers * since these might not have sent anything so far. This can't be * accomplished via poll_wait because the lifetime of the server * socket might be less than that of its clients if these break their * association with it or if the server socket is closed while clients * are still connected to it and there's no way to inform "a polling * implementation" that it should let go of a certain wait queue * * In order to propagate a wake up, a wait_queue_entry_t of the client * socket is enqueued on the peer_wait queue of the server socket * whose wake function does a wake_up on the ordinary client socket * wait queue. This connection is established whenever a write (or * poll for write) hit the flow control condition and broken when the * association to the server socket is dissolved or after a wake up * was relayed. */ static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, void *key) { struct unix_sock *u; wait_queue_head_t *u_sleep; u = container_of(q, struct unix_sock, peer_wake); __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, q); u->peer_wake.private = NULL; /* relaying can only happen while the wq still exists */ u_sleep = sk_sleep(&u->sk); if (u_sleep) wake_up_interruptible_poll(u_sleep, key_to_poll(key)); return 0; } static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) { struct unix_sock *u, *u_other; int rc; u = unix_sk(sk); u_other = unix_sk(other); rc = 0; spin_lock(&u_other->peer_wait.lock); if (!u->peer_wake.private) { u->peer_wake.private = other; __add_wait_queue(&u_other->peer_wait, &u->peer_wake); rc = 1; } spin_unlock(&u_other->peer_wait.lock); return rc; } static void unix_dgram_peer_wake_disconnect(struct sock *sk, struct sock *other) { struct unix_sock *u, *u_other; u = unix_sk(sk); u_other = unix_sk(other); spin_lock(&u_other->peer_wait.lock); if (u->peer_wake.private == other) { __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); u->peer_wake.private = NULL; } spin_unlock(&u_other->peer_wait.lock); } static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, struct sock *other) { unix_dgram_peer_wake_disconnect(sk, other); wake_up_interruptible_poll(sk_sleep(sk), EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); } /* preconditions: * - unix_peer(sk) == other * - association is stable */ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) { int connected; connected = unix_dgram_peer_wake_connect(sk, other); /* If other is SOCK_DEAD, we want to make sure we signal * POLLOUT, such that a subsequent write() can get a * -ECONNREFUSED. Otherwise, if we haven't queued any skbs * to other and its full, we will hang waiting for POLLOUT. */ if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) return 1; if (connected) unix_dgram_peer_wake_disconnect(sk, other); return 0; } static int unix_writable(const struct sock *sk, unsigned char state) { return state != TCP_LISTEN && (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf); } static void unix_write_space(struct sock *sk) { struct socket_wq *wq; rcu_read_lock(); if (unix_writable(sk, READ_ONCE(sk->sk_state))) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } /* When dgram socket disconnects (or changes its peer), we clear its receive * queue of packets arrived from previous peer. First, it allows to do * flow control based only on wmem_alloc; second, sk connected to peer * may receive messages only from that peer. */ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) { if (!skb_queue_empty(&sk->sk_receive_queue)) { skb_queue_purge(&sk->sk_receive_queue); wake_up_interruptible_all(&unix_sk(sk)->peer_wait); /* If one link of bidirectional dgram pipe is disconnected, * we signal error. Messages are lost. Do not make this, * when peer was not connected to us. */ if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { WRITE_ONCE(other->sk_err, ECONNRESET); sk_error_report(other); } } } static void unix_sock_destructor(struct sock *sk) { struct unix_sock *u = unix_sk(sk); skb_queue_purge(&sk->sk_receive_queue); DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { pr_info("Attempt to release alive unix socket: %p\n", sk); return; } if (u->addr) unix_release_addr(u->addr); atomic_long_dec(&unix_nr_socks); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); #ifdef UNIX_REFCNT_DEBUG pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, atomic_long_read(&unix_nr_socks)); #endif } static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct sock *skpair; struct sk_buff *skb; struct path path; int state; unix_remove_socket(sock_net(sk), sk); unix_remove_bsd_socket(sk); /* Clear state */ unix_state_lock(sk); sock_orphan(sk); WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); path = u->path; u->path.dentry = NULL; u->path.mnt = NULL; state = sk->sk_state; WRITE_ONCE(sk->sk_state, TCP_CLOSE); skpair = unix_peer(sk); unix_peer(sk) = NULL; unix_state_unlock(sk); #if IS_ENABLED(CONFIG_AF_UNIX_OOB) u->oob_skb = NULL; #endif wake_up_interruptible_all(&u->peer_wait); if (skpair != NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { unix_state_lock(skpair); /* No more writes */ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion) WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); } unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ } /* Try to flush out this socket. Throw out buffers at least */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (state == TCP_LISTEN) unix_release_sock(skb->sk, 1); /* passed fds are erased in the kfree_skb hook */ kfree_skb(skb); } if (path.dentry) path_put(&path); sock_put(sk); /* ---- Socket is dead now and most probably destroyed ---- */ /* * Fixme: BSD difference: In BSD all sockets connected to us get * ECONNRESET and we die on the spot. In Linux we behave * like files and pipes do and wait for the last * dereference. * * Can't we simply set sock->err? * * What the above comment does talk about? --ANK(980817) */ if (READ_ONCE(unix_tot_inflight)) unix_gc(); /* Garbage collect fds */ } static void init_peercred(struct sock *sk) { sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); } static void update_peercred(struct sock *sk) { const struct cred *old_cred; struct pid *old_pid; spin_lock(&sk->sk_peer_lock); old_pid = sk->sk_peer_pid; old_cred = sk->sk_peer_cred; init_peercred(sk); spin_unlock(&sk->sk_peer_lock); put_pid(old_pid); put_cred(old_cred); } static void copy_peercred(struct sock *sk, struct sock *peersk) { lockdep_assert_held(&unix_sk(peersk)->lock); spin_lock(&sk->sk_peer_lock); sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); spin_unlock(&sk->sk_peer_lock); } static int unix_listen(struct socket *sock, int backlog) { int err; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; /* Only stream/seqpacket sockets accept */ err = -EINVAL; if (!READ_ONCE(u->addr)) goto out; /* No listens on an unbound socket */ unix_state_lock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) goto out_unlock; if (backlog > sk->sk_max_ack_backlog) wake_up_interruptible_all(&u->peer_wait); sk->sk_max_ack_backlog = backlog; WRITE_ONCE(sk->sk_state, TCP_LISTEN); /* set credentials so connect can copy them */ update_peercred(sk); err = 0; out_unlock: unix_state_unlock(sk); out: return err; } static int unix_release(struct socket *); static int unix_bind(struct socket *, struct sockaddr *, int); static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg); static int unix_getname(struct socket *, struct sockaddr *, int); static __poll_t unix_poll(struct file *, struct socket *, poll_table *); static __poll_t unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); #ifdef CONFIG_COMPAT static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); #endif static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, struct pipe_inode_info *, size_t size, unsigned int flags); static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, int); #ifdef CONFIG_PROC_FS static int unix_count_nr_fds(struct sock *sk) { struct sk_buff *skb; struct unix_sock *u; int nr_fds = 0; spin_lock(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); while (skb) { u = unix_sk(skb->sk); nr_fds += atomic_read(&u->scm_stat.nr_fds); skb = skb_peek_next(skb, &sk->sk_receive_queue); } spin_unlock(&sk->sk_receive_queue.lock); return nr_fds; } static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) { struct sock *sk = sock->sk; unsigned char s_state; struct unix_sock *u; int nr_fds = 0; if (sk) { s_state = READ_ONCE(sk->sk_state); u = unix_sk(sk); /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN. * SOCK_DGRAM is ordinary. So, no lock is needed. */ if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED) nr_fds = atomic_read(&u->scm_stat.nr_fds); else if (s_state == TCP_LISTEN) nr_fds = unix_count_nr_fds(sk); seq_printf(m, "scm_fds: %u\n", nr_fds); } } #else #define unix_show_fdinfo NULL #endif static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, .bind = unix_bind, .connect = unix_stream_connect, .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, .poll = unix_poll, .ioctl = unix_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = unix_compat_ioctl, #endif .listen = unix_listen, .shutdown = unix_shutdown, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, .splice_read = unix_stream_splice_read, .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; static const struct proto_ops unix_dgram_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, .bind = unix_bind, .connect = unix_dgram_connect, .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = unix_compat_ioctl, #endif .listen = sock_no_listen, .shutdown = unix_shutdown, .sendmsg = unix_dgram_sendmsg, .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; static const struct proto_ops unix_seqpacket_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, .bind = unix_bind, .connect = unix_stream_connect, .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = unix_compat_ioctl, #endif .listen = unix_listen, .shutdown = unix_shutdown, .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; static void unix_close(struct sock *sk, long timeout) { /* Nothing to do here, unix socket does not need a ->close(). * This is merely for sockmap. */ } static void unix_unhash(struct sock *sk) { /* Nothing to do here, unix socket does not need a ->unhash(). * This is merely for sockmap. */ } static bool unix_bpf_bypass_getsockopt(int level, int optname) { if (level == SOL_SOCKET) { switch (optname) { case SO_PEERPIDFD: return true; default: return false; } } return false; } struct proto unix_dgram_proto = { .name = "UNIX", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = unix_dgram_bpf_update_proto, #endif }; struct proto unix_stream_proto = { .name = "UNIX-STREAM", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, .unhash = unix_unhash, .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = unix_stream_bpf_update_proto, #endif }; static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) { struct unix_sock *u; struct sock *sk; int err; atomic_long_inc(&unix_nr_socks); if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { err = -ENFILE; goto err; } if (type == SOCK_STREAM) sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); else /*dgram and seqpacket */ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); if (!sk) { err = -ENOMEM; goto err; } sock_init_data(sock, sk); sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen); sk->sk_destruct = unix_sock_destructor; lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL); u = unix_sk(sk); u->listener = NULL; u->vertex = NULL; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); memset(&u->scm_stat, 0, sizeof(struct scm_stat)); unix_insert_unbound_socket(net, sk); sock_prot_inuse_add(net, sk->sk_prot, 1); return sk; err: atomic_long_dec(&unix_nr_socks); return ERR_PTR(err); } static int unix_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; sock->state = SS_UNCONNECTED; switch (sock->type) { case SOCK_STREAM: sock->ops = &unix_stream_ops; break; /* * Believe it or not BSD has AF_UNIX, SOCK_RAW though * nothing uses it. */ case SOCK_RAW: sock->type = SOCK_DGRAM; fallthrough; case SOCK_DGRAM: sock->ops = &unix_dgram_ops; break; case SOCK_SEQPACKET: sock->ops = &unix_seqpacket_ops; break; default: return -ESOCKTNOSUPPORT; } sk = unix_create1(net, sock, kern, sock->type); if (IS_ERR(sk)) return PTR_ERR(sk); return 0; } static int unix_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return 0; sk->sk_prot->close(sk, 0); unix_release_sock(sk, 0); sock->sk = NULL; return 0; } static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, int type) { struct inode *inode; struct path path; struct sock *sk; int err; unix_mkname_bsd(sunaddr, addr_len); err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; err = path_permission(&path, MAY_WRITE); if (err) goto path_put; err = -ECONNREFUSED; inode = d_backing_inode(path.dentry); if (!S_ISSOCK(inode->i_mode)) goto path_put; sk = unix_find_socket_byinode(inode); if (!sk) goto path_put; err = -EPROTOTYPE; if (sk->sk_type == type) touch_atime(&path); else goto sock_put; path_put(&path); return sk; sock_put: sock_put(sk); path_put: path_put(&path); fail: return ERR_PTR(err); } static struct sock *unix_find_abstract(struct net *net, struct sockaddr_un *sunaddr, int addr_len, int type) { unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); struct dentry *dentry; struct sock *sk; sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); if (!sk) return ERR_PTR(-ECONNREFUSED); dentry = unix_sk(sk)->path.dentry; if (dentry) touch_atime(&unix_sk(sk)->path); return sk; } static struct sock *unix_find_other(struct net *net, struct sockaddr_un *sunaddr, int addr_len, int type) { struct sock *sk; if (sunaddr->sun_path[0]) sk = unix_find_bsd(sunaddr, addr_len, type); else sk = unix_find_abstract(net, sunaddr, addr_len, type); return sk; } static int unix_autobind(struct sock *sk) { struct unix_sock *u = unix_sk(sk); unsigned int new_hash, old_hash; struct net *net = sock_net(sk); struct unix_address *addr; u32 lastnum, ordernum; int err; err = mutex_lock_interruptible(&u->bindlock); if (err) return err; if (u->addr) goto out; err = -ENOMEM; addr = kzalloc(sizeof(*addr) + offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); if (!addr) goto out; addr->len = offsetof(struct sockaddr_un, sun_path) + 6; addr->name->sun_family = AF_UNIX; refcount_set(&addr->refcnt, 1); old_hash = sk->sk_hash; ordernum = get_random_u32(); lastnum = ordernum & 0xFFFFF; retry: ordernum = (ordernum + 1) & 0xFFFFF; sprintf(addr->name->sun_path + 1, "%05x", ordernum); new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); unix_table_double_lock(net, old_hash, new_hash); if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { unix_table_double_unlock(net, old_hash, new_hash); /* __unix_find_socket_byname() may take long time if many names * are already in use. */ cond_resched(); if (ordernum == lastnum) { /* Give up if all names seems to be in use. */ err = -ENOSPC; unix_release_addr(addr); goto out; } goto retry; } __unix_set_addr_hash(net, sk, addr, new_hash); unix_table_double_unlock(net, old_hash, new_hash); err = 0; out: mutex_unlock(&u->bindlock); return err; } static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, int addr_len) { umode_t mode = S_IFSOCK | (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); struct unix_sock *u = unix_sk(sk); unsigned int new_hash, old_hash; struct net *net = sock_net(sk); struct mnt_idmap *idmap; struct unix_address *addr; struct dentry *dentry; struct path parent; int err; addr_len = unix_mkname_bsd(sunaddr, addr_len); addr = unix_create_addr(sunaddr, addr_len); if (!addr) return -ENOMEM; /* * Get the parent directory, calculate the hash for last * component. */ dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); goto out; } /* * All right, let's create it. */ idmap = mnt_idmap(parent.mnt); err = security_path_mknod(&parent, dentry, mode, 0); if (!err) err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0); if (err) goto out_path; err = mutex_lock_interruptible(&u->bindlock); if (err) goto out_unlink; if (u->addr) goto out_unlock; old_hash = sk->sk_hash; new_hash = unix_bsd_hash(d_backing_inode(dentry)); unix_table_double_lock(net, old_hash, new_hash); u->path.mnt = mntget(parent.mnt); u->path.dentry = dget(dentry); __unix_set_addr_hash(net, sk, addr, new_hash); unix_table_double_unlock(net, old_hash, new_hash); unix_insert_bsd_socket(sk); mutex_unlock(&u->bindlock); done_path_create(&parent, dentry); return 0; out_unlock: mutex_unlock(&u->bindlock); err = -EINVAL; out_unlink: /* failed after successful mknod? unlink what we'd created... */ vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL); out_path: done_path_create(&parent, dentry); out: unix_release_addr(addr); return err == -EEXIST ? -EADDRINUSE : err; } static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, int addr_len) { struct unix_sock *u = unix_sk(sk); unsigned int new_hash, old_hash; struct net *net = sock_net(sk); struct unix_address *addr; int err; addr = unix_create_addr(sunaddr, addr_len); if (!addr) return -ENOMEM; err = mutex_lock_interruptible(&u->bindlock); if (err) goto out; if (u->addr) { err = -EINVAL; goto out_mutex; } old_hash = sk->sk_hash; new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); unix_table_double_lock(net, old_hash, new_hash); if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) goto out_spin; __unix_set_addr_hash(net, sk, addr, new_hash); unix_table_double_unlock(net, old_hash, new_hash); mutex_unlock(&u->bindlock); return 0; out_spin: unix_table_double_unlock(net, old_hash, new_hash); err = -EADDRINUSE; out_mutex: mutex_unlock(&u->bindlock); out: unix_release_addr(addr); return err; } static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; struct sock *sk = sock->sk; int err; if (addr_len == offsetof(struct sockaddr_un, sun_path) && sunaddr->sun_family == AF_UNIX) return unix_autobind(sk); err = unix_validate_addr(sunaddr, addr_len); if (err) return err; if (sunaddr->sun_path[0]) err = unix_bind_bsd(sk, sunaddr, addr_len); else err = unix_bind_abstract(sk, sunaddr, addr_len); return err; } static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) { if (unlikely(sk1 == sk2) || !sk2) { unix_state_lock(sk1); return; } if (sk1 > sk2) swap(sk1, sk2); unix_state_lock(sk1); unix_state_lock(sk2); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) { if (unlikely(sk1 == sk2) || !sk2) { unix_state_unlock(sk1); return; } unix_state_unlock(sk1); unix_state_unlock(sk2); } static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; struct sock *sk = sock->sk; struct sock *other; int err; err = -EINVAL; if (alen < offsetofend(struct sockaddr, sa_family)) goto out; if (addr->sa_family != AF_UNSPEC) { err = unix_validate_addr(sunaddr, alen); if (err) goto out; err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen); if (err) goto out; if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !READ_ONCE(unix_sk(sk)->addr)) { err = unix_autobind(sk); if (err) goto out; } restart: other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); if (IS_ERR(other)) { err = PTR_ERR(other); goto out; } unix_state_double_lock(sk, other); /* Apparently VFS overslept socket death. Retry. */ if (sock_flag(other, SOCK_DEAD)) { unix_state_double_unlock(sk, other); sock_put(other); goto restart; } err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; err = security_unix_may_send(sk->sk_socket, other->sk_socket); if (err) goto out_unlock; WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); WRITE_ONCE(other->sk_state, TCP_ESTABLISHED); } else { /* * 1003.1g breaking connected state with AF_UNSPEC */ other = NULL; unix_state_double_lock(sk, other); } /* * If it was connected, reconnect. */ if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk) = other; if (!other) WRITE_ONCE(sk->sk_state, TCP_CLOSE); unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); unix_state_double_unlock(sk, other); if (other != old_peer) { unix_dgram_disconnected(sk, old_peer); unix_state_lock(old_peer); if (!unix_peer(old_peer)) WRITE_ONCE(old_peer->sk_state, TCP_CLOSE); unix_state_unlock(old_peer); } sock_put(old_peer); } else { unix_peer(sk) = other; unix_state_double_unlock(sk, other); } return 0; out_unlock: unix_state_double_unlock(sk, other); sock_put(other); out: return err; } static long unix_wait_for_peer(struct sock *other, long timeo) __releases(&unix_sk(other)->lock) { struct unix_sock *u = unix_sk(other); int sched; DEFINE_WAIT(wait); prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); sched = !sock_flag(other, SOCK_DEAD) && !(other->sk_shutdown & RCV_SHUTDOWN) && unix_recvq_full_lockless(other); unix_state_unlock(other); if (sched) timeo = schedule_timeout(timeo); finish_wait(&u->peer_wait, &wait); return timeo; } static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct net *net = sock_net(sk); struct sk_buff *skb = NULL; unsigned char state; long timeo; int err; err = unix_validate_addr(sunaddr, addr_len); if (err) goto out; err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len); if (err) goto out; if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; } timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); /* First of all allocate resources. If we will make it after state is locked, we will have to recheck all again in any case. */ /* create new sock for complete connection */ newsk = unix_create1(net, NULL, 0, sock->type); if (IS_ERR(newsk)) { err = PTR_ERR(newsk); newsk = NULL; goto out; } err = -ENOMEM; /* Allocate skb for sending to listening sock */ skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); if (skb == NULL) goto out; restart: /* Find listening sock. */ other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); other = NULL; goto out; } unix_state_lock(other); /* Apparently VFS overslept socket death. Retry. */ if (sock_flag(other, SOCK_DEAD)) { unix_state_unlock(other); sock_put(other); goto restart; } err = -ECONNREFUSED; if (other->sk_state != TCP_LISTEN) goto out_unlock; if (other->sk_shutdown & RCV_SHUTDOWN) goto out_unlock; if (unix_recvq_full_lockless(other)) { err = -EAGAIN; if (!timeo) goto out_unlock; timeo = unix_wait_for_peer(other, timeo); err = sock_intr_errno(timeo); if (signal_pending(current)) goto out; sock_put(other); goto restart; } /* self connect and simultaneous connect are eliminated * by rejecting TCP_LISTEN socket to avoid deadlock. */ state = READ_ONCE(sk->sk_state); if (unlikely(state != TCP_CLOSE)) { err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL; goto out_unlock; } unix_state_lock(sk); if (unlikely(sk->sk_state != TCP_CLOSE)) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL; unix_state_unlock(sk); goto out_unlock; } err = security_unix_stream_connect(sk, other, newsk); if (err) { unix_state_unlock(sk); goto out_unlock; } /* The way is open! Fastly set all the necessary fields... */ sock_hold(sk); unix_peer(newsk) = sk; newsk->sk_state = TCP_ESTABLISHED; newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); newu->listener = other; RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); /* copy address information from listening to new sock * * The contents of *(otheru->addr) and otheru->path * are seen fully set up here, since we have found * otheru in hash under its lock. Insertion into the * hash chain we'd found it in had been done in an * earlier critical area protected by the chain's lock, * the same one where we'd set *(otheru->addr) contents, * as well as otheru->path and otheru->addr itself. * * Using smp_store_release() here to set newu->addr * is enough to make those stores, as well as stores * to newu->path visible to anyone who gets newu->addr * by smp_load_acquire(). IOW, the same warranties * as for unix_sock instances bound in unix_bind() or * in unix_autobind(). */ if (otheru->path.dentry) { path_get(&otheru->path); newu->path = otheru->path; } refcount_inc(&otheru->addr->refcnt); smp_store_release(&newu->addr, otheru->addr); /* Set credentials */ copy_peercred(sk, other); sock->state = SS_CONNECTED; WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); sock_hold(newsk); smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ unix_peer(sk) = newsk; unix_state_unlock(sk); /* take ten and send info to listening sock */ spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); return 0; out_unlock: if (other) unix_state_unlock(other); out: kfree_skb(skb); if (newsk) unix_release_sock(newsk, 0); if (other) sock_put(other); return err; } static int unix_socketpair(struct socket *socka, struct socket *sockb) { struct sock *ska = socka->sk, *skb = sockb->sk; /* Join our sockets back to back */ sock_hold(ska); sock_hold(skb); unix_peer(ska) = skb; unix_peer(skb) = ska; init_peercred(ska); init_peercred(skb); ska->sk_state = TCP_ESTABLISHED; skb->sk_state = TCP_ESTABLISHED; socka->state = SS_CONNECTED; sockb->state = SS_CONNECTED; return 0; } static void unix_sock_inherit_flags(const struct socket *old, struct socket *new) { if (test_bit(SOCK_PASSCRED, &old->flags)) set_bit(SOCK_PASSCRED, &new->flags); if (test_bit(SOCK_PASSPIDFD, &old->flags)) set_bit(SOCK_PASSPIDFD, &new->flags); if (test_bit(SOCK_PASSSEC, &old->flags)) set_bit(SOCK_PASSSEC, &new->flags); } static int unix_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sk_buff *skb; struct sock *tsk; arg->err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; arg->err = -EINVAL; if (READ_ONCE(sk->sk_state) != TCP_LISTEN) goto out; /* If socket state is TCP_LISTEN it cannot change (for now...), * so that no locks are necessary. */ skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, &arg->err); if (!skb) { /* This means receive shutdown. */ if (arg->err == 0) arg->err = -EINVAL; goto out; } tsk = skb->sk; skb_free_datagram(sk, skb); wake_up_interruptible(&unix_sk(sk)->peer_wait); /* attach accepted sock to socket */ unix_state_lock(tsk); unix_update_edges(unix_sk(tsk)); newsock->state = SS_CONNECTED; unix_sock_inherit_flags(sock, newsock); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; out: return arg->err; } static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sock *sk = sock->sk; struct unix_address *addr; DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); int err = 0; if (peer) { sk = unix_peer_get(sk); err = -ENOTCONN; if (!sk) goto out; err = 0; } else { sock_hold(sk); } addr = smp_load_acquire(&unix_sk(sk)->addr); if (!addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; err = offsetof(struct sockaddr_un, sun_path); } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); if (peer) BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, CGROUP_UNIX_GETPEERNAME); else BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, CGROUP_UNIX_GETSOCKNAME); } sock_put(sk); out: return err; } /* The "user->unix_inflight" variable is protected by the garbage * collection lock, and we just read it locklessly here. If you go * over the limit, there might be a tiny race in actually noticing * it across threads. Tough. */ static inline bool too_many_unix_fds(struct task_struct *p) { struct user_struct *user = current_user(); if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE))) return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); return false; } static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { if (too_many_unix_fds(current)) return -ETOOMANYREFS; UNIXCB(skb).fp = scm->fp; scm->fp = NULL; if (unix_prepare_fpl(UNIXCB(skb).fp)) return -ENOMEM; return 0; } static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) { scm->fp = UNIXCB(skb).fp; UNIXCB(skb).fp = NULL; unix_destroy_fpl(scm->fp); } static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { scm->fp = scm_fp_dup(UNIXCB(skb).fp); } static void unix_destruct_scm(struct sk_buff *skb) { struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); scm.pid = UNIXCB(skb).pid; if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); /* Alas, it calls VFS */ /* So fscking what? fput() had been SMP-safe since the last Summer */ scm_destroy(&scm); sock_wfree(skb); } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; UNIXCB(skb).pid = get_pid(scm->pid); UNIXCB(skb).uid = scm->creds.uid; UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; unix_get_secdata(scm, skb); if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); skb->destructor = unix_destruct_scm; return err; } static bool unix_passcred_enabled(const struct socket *sock, const struct sock *other) { return test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags) || !other->sk_socket || test_bit(SOCK_PASSCRED, &other->sk_socket->flags) || test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags); } /* * Some apps rely on write() giving SCM_CREDENTIALS * We include credentials if source or destination socket * asserted SOCK_PASSCRED. */ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, const struct sock *other) { if (UNIXCB(skb).pid) return; if (unix_passcred_enabled(sock, other)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } static bool unix_skb_scm_eq(struct sk_buff *skb, struct scm_cookie *scm) { return UNIXCB(skb).pid == scm->pid && uid_eq(UNIXCB(skb).uid, scm->creds.uid) && gid_eq(UNIXCB(skb).gid, scm->creds.gid) && unix_secdata_eq(scm, skb); } static void scm_stat_add(struct sock *sk, struct sk_buff *skb) { struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk); if (unlikely(fp && fp->count)) { atomic_add(fp->count, &u->scm_stat.nr_fds); unix_add_edges(fp, u); } } static void scm_stat_del(struct sock *sk, struct sk_buff *skb) { struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk); if (unlikely(fp && fp->count)) { atomic_sub(fp->count, &u->scm_stat.nr_fds); unix_del_edges(fp); } } /* * Send AF_UNIX data. */ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); struct sock *sk = sock->sk, *other = NULL; struct unix_sock *u = unix_sk(sk); struct scm_cookie scm; struct sk_buff *skb; int data_len = 0; int sk_locked; long timeo; int err; err = scm_send(sock, msg, &scm, false); if (err < 0) return err; wait_for_unix_gc(scm.fp); err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) goto out; if (msg->msg_namelen) { err = unix_validate_addr(sunaddr, msg->msg_namelen); if (err) goto out; err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, msg->msg_name, &msg->msg_namelen, NULL); if (err) goto out; } else { sunaddr = NULL; err = -ENOTCONN; other = unix_peer_get(sk); if (!other) goto out; } if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; } err = -EMSGSIZE; if (len > READ_ONCE(sk->sk_sndbuf) - 32) goto out; if (len > SKB_MAX_ALLOC) { data_len = min_t(size_t, len - SKB_MAX_ALLOC, MAX_SKB_FRAGS * PAGE_SIZE); data_len = PAGE_ALIGN(data_len); BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); } skb = sock_alloc_send_pskb(sk, len - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, PAGE_ALLOC_COSTLY_ORDER); if (skb == NULL) goto out; err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; skb_put(skb, len - data_len); skb->data_len = data_len; skb->len = len; err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); if (err) goto out_free; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); restart: if (!other) { err = -ECONNRESET; if (sunaddr == NULL) goto out_free; other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); other = NULL; goto out_free; } } if (sk_filter(other, skb) < 0) { /* Toss the packet but do not return any error to the sender */ err = len; goto out_free; } sk_locked = 0; unix_state_lock(other); restart_locked: err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; if (unlikely(sock_flag(other, SOCK_DEAD))) { /* * Check with 1003.1g - what should * datagram error */ unix_state_unlock(other); sock_put(other); if (!sk_locked) unix_state_lock(sk); err = 0; if (sk->sk_type == SOCK_SEQPACKET) { /* We are here only when racing with unix_release_sock() * is clearing @other. Never change state to TCP_CLOSE * unlike SOCK_DGRAM wants. */ unix_state_unlock(sk); err = -EPIPE; } else if (unix_peer(sk) == other) { unix_peer(sk) = NULL; unix_dgram_peer_wake_disconnect_wakeup(sk, other); WRITE_ONCE(sk->sk_state, TCP_CLOSE); unix_state_unlock(sk); unix_dgram_disconnected(sk, other); sock_put(other); err = -ECONNREFUSED; } else { unix_state_unlock(sk); } other = NULL; if (err) goto out_free; goto restart; } err = -EPIPE; if (other->sk_shutdown & RCV_SHUTDOWN) goto out_unlock; if (sk->sk_type != SOCK_SEQPACKET) { err = security_unix_may_send(sk->sk_socket, other->sk_socket); if (err) goto out_unlock; } /* other == sk && unix_peer(other) != sk if * - unix_peer(sk) == NULL, destination address bound to sk * - unix_peer(sk) == sk by time of get but disconnected before lock */ if (other != sk && unlikely(unix_peer(other) != sk && unix_recvq_full_lockless(other))) { if (timeo) { timeo = unix_wait_for_peer(other, timeo); err = sock_intr_errno(timeo); if (signal_pending(current)) goto out_free; goto restart; } if (!sk_locked) { unix_state_unlock(other); unix_state_double_lock(sk, other); } if (unix_peer(sk) != other || unix_dgram_peer_wake_me(sk, other)) { err = -EAGAIN; sk_locked = 1; goto out_unlock; } if (!sk_locked) { sk_locked = 1; goto restart_locked; } } if (unlikely(sk_locked)) unix_state_unlock(sk); if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); scm_destroy(&scm); return len; out_unlock: if (sk_locked) unix_state_unlock(sk); unix_state_unlock(other); out_free: kfree_skb(skb); out: if (other) sock_put(other); scm_destroy(&scm); return err; } /* We use paged skbs for stream sockets, and limit occupancy to 32768 * bytes, and a minimum of a full page. */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) #if IS_ENABLED(CONFIG_AF_UNIX_OOB) static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, struct scm_cookie *scm, bool fds_sent) { struct unix_sock *ousk = unix_sk(other); struct sk_buff *skb; int err = 0; skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) return err; err = unix_scm_to_skb(scm, skb, !fds_sent); if (err < 0) { kfree_skb(skb); return err; } skb_put(skb, 1); err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); if (err) { kfree_skb(skb); return err; } unix_state_lock(other); if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) { unix_state_unlock(other); kfree_skb(skb); return -EPIPE; } maybe_add_creds(skb, sock, other); scm_stat_add(other, skb); spin_lock(&other->sk_receive_queue.lock); WRITE_ONCE(ousk->oob_skb, skb); __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); sk_send_sigurg(other); unix_state_unlock(other); other->sk_data_ready(other); return err; } #endif static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sock *other = NULL; int err, size; struct sk_buff *skb; int sent = 0; struct scm_cookie scm; bool fds_sent = false; int data_len; err = scm_send(sock, msg, &scm, false); if (err < 0) return err; wait_for_unix_gc(scm.fp); err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) { #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (len) len--; else #endif goto out_err; } if (msg->msg_namelen) { err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { err = -ENOTCONN; other = unix_peer(sk); if (!other) goto out_err; } if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto pipe_err; while (sent < len) { size = len - sent; if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { skb = sock_alloc_send_pskb(sk, 0, 0, msg->msg_flags & MSG_DONTWAIT, &err, 0); } else { /* Keep two messages in the pipe so it schedules better */ size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64); /* allow fallback to order-0 allocations */ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); skb = sock_alloc_send_pskb(sk, size - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, get_order(UNIX_SKB_FRAGS_SZ)); } if (!skb) goto out_err; /* Only send the fds in the first buffer */ err = unix_scm_to_skb(&scm, skb, !fds_sent); if (err < 0) { kfree_skb(skb); goto out_err; } fds_sent = true; if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { skb->ip_summed = CHECKSUM_UNNECESSARY; err = skb_splice_from_iter(skb, &msg->msg_iter, size, sk->sk_allocation); if (err < 0) { kfree_skb(skb); goto out_err; } size = err; refcount_add(size, &sk->sk_wmem_alloc); } else { skb_put(skb, size - data_len); skb->data_len = data_len; skb->len = size; err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); if (err) { kfree_skb(skb); goto out_err; } } unix_state_lock(other); if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) goto pipe_err_free; maybe_add_creds(skb, sock, other); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); other->sk_data_ready(other); sent += size; } #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (msg->msg_flags & MSG_OOB) { err = queue_oob(sock, msg, other, &scm, fds_sent); if (err) goto out_err; sent++; } #endif scm_destroy(&scm); return sent; pipe_err_free: unix_state_unlock(other); kfree_skb(skb); pipe_err: if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); err = -EPIPE; out_err: scm_destroy(&scm); return sent ? : err; } static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { int err; struct sock *sk = sock->sk; err = sock_error(sk); if (err) return err; if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) return -ENOTCONN; if (msg->msg_namelen) msg->msg_namelen = 0; return unix_dgram_sendmsg(sock, msg, len); } static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) return -ENOTCONN; return unix_dgram_recvmsg(sock, msg, size, flags); } static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); if (addr) { msg->msg_namelen = addr->len; memcpy(msg->msg_name, addr->name, addr->len); } } int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, int flags) { struct scm_cookie scm; struct socket *sock = sk->sk_socket; struct unix_sock *u = unix_sk(sk); struct sk_buff *skb, *last; long timeo; int skip; int err; err = -EOPNOTSUPP; if (flags&MSG_OOB) goto out; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, &skip, &err, &last); if (skb) { if (!(flags & MSG_PEEK)) scm_stat_del(sk, skb); break; } mutex_unlock(&u->iolock); if (err != -EAGAIN) break; } while (timeo && !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, &err, &timeo, last)); if (!skb) { /* implies iolock unlocked */ unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN)) err = 0; unix_state_unlock(sk); goto out; } if (wq_has_sleeper(&u->peer_wait)) wake_up_interruptible_sync_poll(&u->peer_wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); if (msg->msg_name) { unix_copy_addr(msg, skb->sk); BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name, &msg->msg_namelen); } if (size > skb->len - skip) size = skb->len - skip; else if (size < skb->len - skip) msg->msg_flags |= MSG_TRUNC; err = skb_copy_datagram_msg(skb, skip, msg, size); if (err) goto out_free; if (sock_flag(sk, SOCK_RCVTSTAMP)) __sock_recv_timestamp(msg, sk, skb); memset(&scm, 0, sizeof(scm)); scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(&scm, skb); if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); sk_peek_offset_bwd(sk, skb->len); } else { /* It is questionable: on PEEK we could: - do not return fds - good, but too simple 8) - return fds, and do not return them on read (old strategy, apparently wrong) - clone fds (I chose it for now, it is the most universal solution) POSIX 1003.1g does not actually define this clearly at all. POSIX 1003.1g doesn't define a lot of things clearly however! */ sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; scm_recv_unix(sock, msg, &scm, flags); out_free: skb_free_datagram(sk, skb); mutex_unlock(&u->iolock); out: return err; } static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; #ifdef CONFIG_BPF_SYSCALL const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_dgram_proto) return prot->recvmsg(sk, msg, size, flags, NULL); #endif return __unix_dgram_recvmsg(sk, msg, size, flags); } static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { struct unix_sock *u = unix_sk(sk); struct sk_buff *skb; int err; mutex_lock(&u->iolock); skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); mutex_unlock(&u->iolock); if (!skb) return err; return recv_actor(sk, skb); } /* * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, struct sk_buff *last, unsigned int last_len, bool freezable) { unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; struct sk_buff *tail; DEFINE_WAIT(wait); unix_state_lock(sk); for (;;) { prepare_to_wait(sk_sleep(sk), &wait, state); tail = skb_peek_tail(&sk->sk_receive_queue); if (tail != last || (tail && tail->len != last_len) || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current) || !timeo) break; sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); timeo = schedule_timeout(timeo); unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) break; sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } finish_wait(sk_sleep(sk), &wait); unix_state_unlock(sk); return timeo; } static unsigned int unix_skb_len(const struct sk_buff *skb) { return skb->len - UNIXCB(skb).consumed; } struct unix_stream_read_state { int (*recv_actor)(struct sk_buff *, int, int, struct unix_stream_read_state *); struct socket *socket; struct msghdr *msg; struct pipe_inode_info *pipe; size_t size; int flags; unsigned int splice_flags; }; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) static int unix_stream_recv_urg(struct unix_stream_read_state *state) { struct socket *sock = state->socket; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int chunk = 1; struct sk_buff *oob_skb; mutex_lock(&u->iolock); unix_state_lock(sk); spin_lock(&sk->sk_receive_queue.lock); if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); mutex_unlock(&u->iolock); return -EINVAL; } oob_skb = u->oob_skb; if (!(state->flags & MSG_PEEK)) WRITE_ONCE(u->oob_skb, NULL); spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); chunk = state->recv_actor(oob_skb, 0, chunk, state); if (!(state->flags & MSG_PEEK)) UNIXCB(oob_skb).consumed += 1; mutex_unlock(&u->iolock); if (chunk < 0) return -EFAULT; state->msg->msg_flags |= MSG_OOB; return 1; } static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, int flags, int copied) { struct sk_buff *read_skb = NULL, *unread_skb = NULL; struct unix_sock *u = unix_sk(sk); if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb))) return skb; spin_lock(&sk->sk_receive_queue.lock); if (!unix_skb_len(skb)) { if (copied && (!u->oob_skb || skb == u->oob_skb)) { skb = NULL; } else if (flags & MSG_PEEK) { skb = skb_peek_next(skb, &sk->sk_receive_queue); } else { read_skb = skb; skb = skb_peek_next(skb, &sk->sk_receive_queue); __skb_unlink(read_skb, &sk->sk_receive_queue); } if (!skb) goto unlock; } if (skb != u->oob_skb) goto unlock; if (copied) { skb = NULL; } else if (!(flags & MSG_PEEK)) { WRITE_ONCE(u->oob_skb, NULL); if (!sock_flag(sk, SOCK_URGINLINE)) { __skb_unlink(skb, &sk->sk_receive_queue); unread_skb = skb; skb = skb_peek(&sk->sk_receive_queue); } } else if (!sock_flag(sk, SOCK_URGINLINE)) { skb = skb_peek_next(skb, &sk->sk_receive_queue); } unlock: spin_unlock(&sk->sk_receive_queue.lock); consume_skb(read_skb); kfree_skb(unread_skb); return skb; } #endif static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { struct unix_sock *u = unix_sk(sk); struct sk_buff *skb; int err; if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) return -ENOTCONN; mutex_lock(&u->iolock); skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); mutex_unlock(&u->iolock); if (!skb) return err; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (unlikely(skb == READ_ONCE(u->oob_skb))) { bool drop = false; unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) { unix_state_unlock(sk); kfree_skb(skb); return -ECONNRESET; } spin_lock(&sk->sk_receive_queue.lock); if (likely(skb == u->oob_skb)) { WRITE_ONCE(u->oob_skb, NULL); drop = true; } spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); if (drop) { kfree_skb(skb); return -EAGAIN; } } #endif return recv_actor(sk, skb); } static int unix_stream_read_generic(struct unix_stream_read_state *state, bool freezable) { struct scm_cookie scm; struct socket *sock = state->socket; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int copied = 0; int flags = state->flags; int noblock = flags & MSG_DONTWAIT; bool check_creds = false; int target; int err = 0; long timeo; int skip; size_t size = state->size; unsigned int last_len; if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) { err = -EINVAL; goto out; } if (unlikely(flags & MSG_OOB)) { err = -EOPNOTSUPP; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) err = unix_stream_recv_urg(state); #endif goto out; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); memset(&scm, 0, sizeof(scm)); /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ mutex_lock(&u->iolock); skip = max(sk_peek_offset(sk, flags), 0); do { struct sk_buff *skb, *last; int chunk; redo: unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) { err = -ECONNRESET; goto unlock; } last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb) { skb = manage_oob(skb, sk, flags, copied); if (!skb && copied) { unix_state_unlock(sk); break; } } #endif if (skb == NULL) { if (copied >= target) goto unlock; /* * POSIX 1003.1g mandates this order. */ err = sock_error(sk); if (err) goto unlock; if (sk->sk_shutdown & RCV_SHUTDOWN) goto unlock; unix_state_unlock(sk); if (!timeo) { err = -EAGAIN; break; } mutex_unlock(&u->iolock); timeo = unix_stream_data_wait(sk, timeo, last, last_len, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); scm_destroy(&scm); goto out; } mutex_lock(&u->iolock); goto redo; unlock: unix_state_unlock(sk); break; } while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; last_len = skb->len; skb = skb_peek_next(skb, &sk->sk_receive_queue); if (!skb) goto again; } unix_state_unlock(sk); if (check_creds) { /* Never glue messages from different writers */ if (!unix_skb_scm_eq(skb, &scm)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) { /* Copy credentials */ scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(&scm, skb); check_creds = true; } /* Copy address just once */ if (state->msg && state->msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, state->msg->msg_name); unix_copy_addr(state->msg, skb->sk); BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, state->msg->msg_name, &state->msg->msg_namelen); sunaddr = NULL; } chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); chunk = state->recv_actor(skb, skip, chunk, state); if (chunk < 0) { if (copied == 0) copied = -EFAULT; break; } copied += chunk; size -= chunk; /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { UNIXCB(skb).consumed += chunk; sk_peek_offset_bwd(sk, chunk); if (UNIXCB(skb).fp) { scm_stat_del(sk, skb); unix_detach_fds(&scm, skb); } if (unix_skb_len(skb)) break; skb_unlink(skb, &sk->sk_receive_queue); consume_skb(skb); if (scm.fp) break; } else { /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); if (UNIXCB(skb).fp) break; skip = 0; last = skb; last_len = skb->len; unix_state_lock(sk); skb = skb_peek_next(skb, &sk->sk_receive_queue); if (skb) goto again; unix_state_unlock(sk); break; } } while (size); mutex_unlock(&u->iolock); if (state->msg) scm_recv_unix(sock, state->msg, &scm, flags); else scm_destroy(&scm); out: return copied ? : err; } static int unix_stream_read_actor(struct sk_buff *skb, int skip, int chunk, struct unix_stream_read_state *state) { int ret; ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, state->msg, chunk); return ret ?: chunk; } int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, int flags) { struct unix_stream_read_state state = { .recv_actor = unix_stream_read_actor, .socket = sk->sk_socket, .msg = msg, .size = size, .flags = flags }; return unix_stream_read_generic(&state, true); } static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct unix_stream_read_state state = { .recv_actor = unix_stream_read_actor, .socket = sock, .msg = msg, .size = size, .flags = flags }; #ifdef CONFIG_BPF_SYSCALL struct sock *sk = sock->sk; const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_stream_proto) return prot->recvmsg(sk, msg, size, flags, NULL); #endif return unix_stream_read_generic(&state, true); } static int unix_stream_splice_actor(struct sk_buff *skb, int skip, int chunk, struct unix_stream_read_state *state) { return skb_splice_bits(skb, state->socket->sk, UNIXCB(skb).consumed + skip, state->pipe, chunk, state->splice_flags); } static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t size, unsigned int flags) { struct unix_stream_read_state state = { .recv_actor = unix_stream_splice_actor, .socket = sock, .pipe = pipe, .size = size, .splice_flags = flags, }; if (unlikely(*ppos)) return -ESPIPE; if (sock->file->f_flags & O_NONBLOCK || flags & SPLICE_F_NONBLOCK) state.flags = MSG_DONTWAIT; return unix_stream_read_generic(&state, false); } static int unix_shutdown(struct socket *sock, int mode) { struct sock *sk = sock->sk; struct sock *other; if (mode < SHUT_RD || mode > SHUT_RDWR) return -EINVAL; /* This maps: * SHUT_RD (0) -> RCV_SHUTDOWN (1) * SHUT_WR (1) -> SEND_SHUTDOWN (2) * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) */ ++mode; unix_state_lock(sk); WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode); other = unix_peer(sk); if (other) sock_hold(other); unix_state_unlock(sk); sk->sk_state_change(sk); if (other && (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { int peer_mode = 0; const struct proto *prot = READ_ONCE(other->sk_prot); if (prot->unhash) prot->unhash(other); if (mode&RCV_SHUTDOWN) peer_mode |= SEND_SHUTDOWN; if (mode&SEND_SHUTDOWN) peer_mode |= RCV_SHUTDOWN; unix_state_lock(other); WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode); unix_state_unlock(other); other->sk_state_change(other); if (peer_mode == SHUTDOWN_MASK) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); else if (peer_mode & RCV_SHUTDOWN) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); } if (other) sock_put(other); return 0; } long unix_inq_len(struct sock *sk) { struct sk_buff *skb; long amount = 0; if (READ_ONCE(sk->sk_state) == TCP_LISTEN) return -EINVAL; spin_lock(&sk->sk_receive_queue.lock); if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { skb_queue_walk(&sk->sk_receive_queue, skb) amount += unix_skb_len(skb); } else { skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len; } spin_unlock(&sk->sk_receive_queue.lock); return amount; } EXPORT_SYMBOL_GPL(unix_inq_len); long unix_outq_len(struct sock *sk) { return sk_wmem_alloc_get(sk); } EXPORT_SYMBOL_GPL(unix_outq_len); static int unix_open_file(struct sock *sk) { struct path path; struct file *f; int fd; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (!smp_load_acquire(&unix_sk(sk)->addr)) return -ENOENT; path = unix_sk(sk)->path; if (!path.dentry) return -ENOENT; path_get(&path); fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) goto out; f = dentry_open(&path, O_PATH, current_cred()); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); goto out; } fd_install(fd, f); out: path_put(&path); return fd; } static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; long amount = 0; int err; switch (cmd) { case SIOCOUTQ: amount = unix_outq_len(sk); err = put_user(amount, (int __user *)arg); break; case SIOCINQ: amount = unix_inq_len(sk); if (amount < 0) err = amount; else err = put_user(amount, (int __user *)arg); break; case SIOCUNIXFILE: err = unix_open_file(sk); break; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) case SIOCATMARK: { struct unix_sock *u = unix_sk(sk); struct sk_buff *skb; int answ = 0; mutex_lock(&u->iolock); skb = skb_peek(&sk->sk_receive_queue); if (skb) { struct sk_buff *oob_skb = READ_ONCE(u->oob_skb); struct sk_buff *next_skb; next_skb = skb_peek_next(skb, &sk->sk_receive_queue); if (skb == oob_skb || (!unix_skb_len(skb) && (!oob_skb || next_skb == oob_skb))) answ = 1; } mutex_unlock(&u->iolock); err = put_user(answ, (int __user *)arg); } break; #endif default: err = -ENOIOCTLCMD; break; } return err; } #ifdef CONFIG_COMPAT static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); } #endif static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; unsigned char state; __poll_t mask; u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; shutdown = READ_ONCE(sk->sk_shutdown); state = READ_ONCE(sk->sk_state); /* exceptional events? */ if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; if (shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; if (shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (READ_ONCE(unix_sk(sk)->oob_skb)) mask |= EPOLLPRI; #endif /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && state == TCP_CLOSE) mask |= EPOLLHUP; /* * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. */ if (unix_writable(sk, state)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; } static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk, *other; unsigned int writable; unsigned char state; __poll_t mask; u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; shutdown = READ_ONCE(sk->sk_shutdown); state = READ_ONCE(sk->sk_state); /* exceptional events? */ if (READ_ONCE(sk->sk_err) || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE) mask |= EPOLLHUP; /* No write status requested, avoid expensive OUT tests. */ if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) return mask; writable = unix_writable(sk, state); if (writable) { unix_state_lock(sk); other = unix_peer(sk); if (other && unix_peer(other) != sk && unix_recvq_full_lockless(other) && unix_dgram_peer_wake_me(sk, other)) writable = 0; unix_state_unlock(sk); } if (writable) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } #ifdef CONFIG_PROC_FS #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) #define get_bucket(x) ((x) >> BUCKET_SPACE) #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { unsigned long offset = get_offset(*pos); unsigned long bucket = get_bucket(*pos); unsigned long count = 0; struct sock *sk; for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); sk; sk = sk_next(sk)) { if (++count == offset) break; } return sk; } static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) { unsigned long bucket = get_bucket(*pos); struct net *net = seq_file_net(seq); struct sock *sk; while (bucket < UNIX_HASH_SIZE) { spin_lock(&net->unx.table.locks[bucket]); sk = unix_from_bucket(seq, pos); if (sk) return sk; spin_unlock(&net->unx.table.locks[bucket]); *pos = set_bucket_offset(++bucket, 1); } return NULL; } static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, loff_t *pos) { unsigned long bucket = get_bucket(*pos); sk = sk_next(sk); if (sk) return sk; spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); *pos = set_bucket_offset(++bucket, 1); return unix_get_first(seq, pos); } static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { if (!*pos) return SEQ_START_TOKEN; return unix_get_first(seq, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; if (v == SEQ_START_TOKEN) return unix_get_first(seq, pos); return unix_get_next(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) { struct sock *sk = v; if (sk) spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); } static int unix_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "Num RefCount Protocol Flags Type St " "Inode Path\n"); else { struct sock *s = v; struct unix_sock *u = unix_sk(s); unix_state_lock(s); seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", s, refcount_read(&s->sk_refcnt), 0, s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, s->sk_type, s->sk_socket ? (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s)); if (u->addr) { // under a hash table lock here int i, len; seq_putc(seq, ' '); i = 0; len = u->addr->len - offsetof(struct sockaddr_un, sun_path); if (u->addr->name->sun_path[0]) { len--; } else { seq_putc(seq, '@'); i++; } for ( ; i < len; i++) seq_putc(seq, u->addr->name->sun_path[i] ?: '@'); } unix_state_unlock(s); seq_putc(seq, '\n'); } return 0; } static const struct seq_operations unix_seq_ops = { .start = unix_seq_start, .next = unix_seq_next, .stop = unix_seq_stop, .show = unix_seq_show, }; #ifdef CONFIG_BPF_SYSCALL struct bpf_unix_iter_state { struct seq_net_private p; unsigned int cur_sk; unsigned int end_sk; unsigned int max_sk; struct sock **batch; bool st_bucket_done; }; struct bpf_iter__unix { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct unix_sock *, unix_sk); uid_t uid __aligned(8); }; static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, struct unix_sock *unix_sk, uid_t uid) { struct bpf_iter__unix ctx; meta->seq_num--; /* skip SEQ_START_TOKEN */ ctx.meta = meta; ctx.unix_sk = unix_sk; ctx.uid = uid; return bpf_iter_run_prog(prog, &ctx); } static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) { struct bpf_unix_iter_state *iter = seq->private; unsigned int expected = 1; struct sock *sk; sock_hold(start_sk); iter->batch[iter->end_sk++] = start_sk; for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { if (iter->end_sk < iter->max_sk) { sock_hold(sk); iter->batch[iter->end_sk++] = sk; } expected++; } spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); return expected; } static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) { while (iter->cur_sk < iter->end_sk) sock_put(iter->batch[iter->cur_sk++]); } static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, unsigned int new_batch_sz) { struct sock **new_batch; new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, GFP_USER | __GFP_NOWARN); if (!new_batch) return -ENOMEM; bpf_iter_unix_put_batch(iter); kvfree(iter->batch); iter->batch = new_batch; iter->max_sk = new_batch_sz; return 0; } static struct sock *bpf_iter_unix_batch(struct seq_file *seq, loff_t *pos) { struct bpf_unix_iter_state *iter = seq->private; unsigned int expected; bool resized = false; struct sock *sk; if (iter->st_bucket_done) *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); again: /* Get a new batch */ iter->cur_sk = 0; iter->end_sk = 0; sk = unix_get_first(seq, pos); if (!sk) return NULL; /* Done */ expected = bpf_iter_unix_hold_batch(seq, sk); if (iter->end_sk == expected) { iter->st_bucket_done = true; return sk; } if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { resized = true; goto again; } return sk; } static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) { if (!*pos) return SEQ_START_TOKEN; /* bpf iter does not support lseek, so it always * continue from where it was stop()-ped. */ return bpf_iter_unix_batch(seq, pos); } static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bpf_unix_iter_state *iter = seq->private; struct sock *sk; /* Whenever seq_next() is called, the iter->cur_sk is * done with seq_show(), so advance to the next sk in * the batch. */ if (iter->cur_sk < iter->end_sk) sock_put(iter->batch[iter->cur_sk++]); ++*pos; if (iter->cur_sk < iter->end_sk) sk = iter->batch[iter->cur_sk]; else sk = bpf_iter_unix_batch(seq, pos); return sk; } static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) { struct bpf_iter_meta meta; struct bpf_prog *prog; struct sock *sk = v; uid_t uid; bool slow; int ret; if (v == SEQ_START_TOKEN) return 0; slow = lock_sock_fast(sk); if (unlikely(sk_unhashed(sk))) { ret = SEQ_SKIP; goto unlock; } uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); meta.seq = seq; prog = bpf_iter_get_info(&meta, false); ret = unix_prog_seq_show(prog, &meta, v, uid); unlock: unlock_sock_fast(sk, slow); return ret; } static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) { struct bpf_unix_iter_state *iter = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; if (!v) { meta.seq = seq; prog = bpf_iter_get_info(&meta, true); if (prog) (void)unix_prog_seq_show(prog, &meta, v, 0); } if (iter->cur_sk < iter->end_sk) bpf_iter_unix_put_batch(iter); } static const struct seq_operations bpf_iter_unix_seq_ops = { .start = bpf_iter_unix_seq_start, .next = bpf_iter_unix_seq_next, .stop = bpf_iter_unix_seq_stop, .show = bpf_iter_unix_seq_show, }; #endif #endif static const struct net_proto_family unix_family_ops = { .family = PF_UNIX, .create = unix_create, .owner = THIS_MODULE, }; static int __net_init unix_net_init(struct net *net) { int i; net->unx.sysctl_max_dgram_qlen = 10; if (unix_sysctl_register(net)) goto out; #ifdef CONFIG_PROC_FS if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, sizeof(struct seq_net_private))) goto err_sysctl; #endif net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, sizeof(spinlock_t), GFP_KERNEL); if (!net->unx.table.locks) goto err_proc; net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, sizeof(struct hlist_head), GFP_KERNEL); if (!net->unx.table.buckets) goto free_locks; for (i = 0; i < UNIX_HASH_SIZE; i++) { spin_lock_init(&net->unx.table.locks[i]); lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL); INIT_HLIST_HEAD(&net->unx.table.buckets[i]); } return 0; free_locks: kvfree(net->unx.table.locks); err_proc: #ifdef CONFIG_PROC_FS remove_proc_entry("unix", net->proc_net); err_sysctl: #endif unix_sysctl_unregister(net); out: return -ENOMEM; } static void __net_exit unix_net_exit(struct net *net) { kvfree(net->unx.table.buckets); kvfree(net->unx.table.locks); unix_sysctl_unregister(net); remove_proc_entry("unix", net->proc_net); } static struct pernet_operations unix_net_ops = { .init = unix_net_init, .exit = unix_net_exit, }; #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, struct unix_sock *unix_sk, uid_t uid) #define INIT_BATCH_SZ 16 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) { struct bpf_unix_iter_state *iter = priv_data; int err; err = bpf_iter_init_seq_net(priv_data, aux); if (err) return err; err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); if (err) { bpf_iter_fini_seq_net(priv_data); return err; } return 0; } static void bpf_iter_fini_unix(void *priv_data) { struct bpf_unix_iter_state *iter = priv_data; bpf_iter_fini_seq_net(priv_data); kvfree(iter->batch); } static const struct bpf_iter_seq_info unix_seq_info = { .seq_ops = &bpf_iter_unix_seq_ops, .init_seq_private = bpf_iter_init_unix, .fini_seq_private = bpf_iter_fini_unix, .seq_priv_size = sizeof(struct bpf_unix_iter_state), }; static const struct bpf_func_proto * bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_setsockopt: return &bpf_sk_setsockopt_proto; case BPF_FUNC_getsockopt: return &bpf_sk_getsockopt_proto; default: return NULL; } } static struct bpf_iter_reg unix_reg_info = { .target = "unix", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__unix, unix_sk), PTR_TO_BTF_ID_OR_NULL }, }, .get_func_proto = bpf_iter_unix_get_func_proto, .seq_info = &unix_seq_info, }; static void __init bpf_iter_register(void) { unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; if (bpf_iter_reg_target(&unix_reg_info)) pr_warn("Warning: could not register bpf iterator unix\n"); } #endif static int __init af_unix_init(void) { int i, rc = -1; BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { spin_lock_init(&bsd_socket_locks[i]); INIT_HLIST_HEAD(&bsd_socket_buckets[i]); } rc = proto_register(&unix_dgram_proto, 1); if (rc != 0) { pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); goto out; } rc = proto_register(&unix_stream_proto, 1); if (rc != 0) { pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); proto_unregister(&unix_dgram_proto); goto out; } sock_register(&unix_family_ops); register_pernet_subsys(&unix_net_ops); unix_bpf_build_proto(); #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) bpf_iter_register(); #endif out: return rc; } /* Later than subsys_initcall() because we depend on stuff initialised there */ fs_initcall(af_unix_init);
75 75 86 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 // SPDX-License-Identifier: LGPL-2.0+ /* Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Paul Eggert (eggert@twinsun.com). */ /* * dgb 10/02/98: ripped this from glibc source to help convert timestamps * to unix time * 10/04/98: added new table-based lookup after seeing how ugly * the gnu code is * blf 09/27/99: ripped out all the old code and inserted new table from * John Brockmeyer (without leap second corrections) * rewrote udf_stamp_to_time and fixed timezone accounting in * udf_time_to_stamp. */ /* * We don't take into account leap seconds. This may be correct or incorrect. * For more NIST information (especially dealing with leap seconds), see: * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm */ #include "udfdecl.h" #include <linux/types.h> #include <linux/kernel.h> #include <linux/time.h> void udf_disk_stamp_to_time(struct timespec64 *dest, struct timestamp src) { u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone); u16 year = le16_to_cpu(src.year); uint8_t type = typeAndTimezone >> 12; int16_t offset; if (type == 1) { offset = typeAndTimezone << 4; /* sign extent offset */ offset = (offset >> 4); if (offset == -2047) /* unspecified offset */ offset = 0; } else offset = 0; dest->tv_sec = mktime64(year, src.month, src.day, src.hour, src.minute, src.second); dest->tv_sec -= offset * 60; /* * Sanitize nanosecond field since reportedly some filesystems are * recorded with bogus sub-second values. */ if (src.centiseconds < 100 && src.hundredsOfMicroseconds < 100 && src.microseconds < 100) { dest->tv_nsec = 1000 * (src.centiseconds * 10000 + src.hundredsOfMicroseconds * 100 + src.microseconds); } else { dest->tv_nsec = 0; } } void udf_time_to_disk_stamp(struct timestamp *dest, struct timespec64 ts) { time64_t seconds; int16_t offset; struct tm tm; offset = -sys_tz.tz_minuteswest; dest->typeAndTimezone = cpu_to_le16(0x1000 | (offset & 0x0FFF)); seconds = ts.tv_sec + offset * 60; time64_to_tm(seconds, 0, &tm); dest->year = cpu_to_le16(tm.tm_year + 1900); dest->month = tm.tm_mon + 1; dest->day = tm.tm_mday; dest->hour = tm.tm_hour; dest->minute = tm.tm_min; dest->second = tm.tm_sec; dest->centiseconds = ts.tv_nsec / 10000000; dest->hundredsOfMicroseconds = (ts.tv_nsec / 1000 - dest->centiseconds * 10000) / 100; dest->microseconds = (ts.tv_nsec / 1000 - dest->centiseconds * 10000 - dest->hundredsOfMicroseconds * 100); } /* EOF */
49 48 49 49 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2020 Facebook * Copyright 2020 Google LLC. */ #include <linux/pid.h> #include <linux/sched.h> #include <linux/rculist.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/types.h> #include <linux/spinlock.h> #include <linux/bpf.h> #include <linux/bpf_local_storage.h> #include <linux/filter.h> #include <uapi/linux/btf.h> #include <linux/btf_ids.h> #include <linux/rcupdate_trace.h> DEFINE_BPF_STORAGE_CACHE(task_cache); static DEFINE_PER_CPU(int, bpf_task_storage_busy); static void bpf_task_storage_lock(void) { migrate_disable(); this_cpu_inc(bpf_task_storage_busy); } static void bpf_task_storage_unlock(void) { this_cpu_dec(bpf_task_storage_busy); migrate_enable(); } static bool bpf_task_storage_trylock(void) { migrate_disable(); if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { this_cpu_dec(bpf_task_storage_busy); migrate_enable(); return false; } return true; } static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) { struct task_struct *task = owner; return &task->bpf_storage; } static struct bpf_local_storage_data * task_storage_lookup(struct task_struct *task, struct bpf_map *map, bool cacheit_lockit) { struct bpf_local_storage *task_storage; struct bpf_local_storage_map *smap; task_storage = rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held()); if (!task_storage) return NULL; smap = (struct bpf_local_storage_map *)map; return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); } void bpf_task_storage_free(struct task_struct *task) { struct bpf_local_storage *local_storage; rcu_read_lock(); local_storage = rcu_dereference(task->bpf_storage); if (!local_storage) { rcu_read_unlock(); return; } bpf_task_storage_lock(); bpf_local_storage_destroy(local_storage); bpf_task_storage_unlock(); rcu_read_unlock(); } static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) { struct bpf_local_storage_data *sdata; struct task_struct *task; unsigned int f_flags; struct pid *pid; int fd, err; fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) return ERR_CAST(pid); /* We should be in an RCU read side critical section, it should be safe * to call pid_task. */ WARN_ON_ONCE(!rcu_read_lock_held()); task = pid_task(pid, PIDTYPE_PID); if (!task) { err = -ENOENT; goto out; } bpf_task_storage_lock(); sdata = task_storage_lookup(task, map, true); bpf_task_storage_unlock(); put_pid(pid); return sdata ? sdata->data : NULL; out: put_pid(pid); return ERR_PTR(err); } static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct task_struct *task; unsigned int f_flags; struct pid *pid; int fd, err; if ((map_flags & BPF_F_LOCK) && btf_record_has_field(map->record, BPF_UPTR)) return -EOPNOTSUPP; fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) return PTR_ERR(pid); /* We should be in an RCU read side critical section, it should be safe * to call pid_task. */ WARN_ON_ONCE(!rcu_read_lock_held()); task = pid_task(pid, PIDTYPE_PID); if (!task) { err = -ENOENT; goto out; } bpf_task_storage_lock(); sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, map_flags, true, GFP_ATOMIC); bpf_task_storage_unlock(); err = PTR_ERR_OR_ZERO(sdata); out: put_pid(pid); return err; } static int task_storage_delete(struct task_struct *task, struct bpf_map *map, bool nobusy) { struct bpf_local_storage_data *sdata; sdata = task_storage_lookup(task, map, false); if (!sdata) return -ENOENT; if (!nobusy) return -EBUSY; bpf_selem_unlink(SELEM(sdata), false); return 0; } static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) { struct task_struct *task; unsigned int f_flags; struct pid *pid; int fd, err; fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) return PTR_ERR(pid); /* We should be in an RCU read side critical section, it should be safe * to call pid_task. */ WARN_ON_ONCE(!rcu_read_lock_held()); task = pid_task(pid, PIDTYPE_PID); if (!task) { err = -ENOENT; goto out; } bpf_task_storage_lock(); err = task_storage_delete(task, map, true); bpf_task_storage_unlock(); out: put_pid(pid); return err; } /* Called by bpf_task_storage_get*() helpers */ static void *__bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags, gfp_t gfp_flags, bool nobusy) { struct bpf_local_storage_data *sdata; sdata = task_storage_lookup(task, map, nobusy); if (sdata) return sdata->data; /* only allocate new storage, when the task is refcounted */ if (refcount_read(&task->usage) && (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) { sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, false, gfp_flags); return IS_ERR(sdata) ? NULL : sdata->data; } return NULL; } /* *gfp_flags* is a hidden argument provided by the verifier */ BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, task, void *, value, u64, flags, gfp_t, gfp_flags) { bool nobusy; void *data; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) return (unsigned long)NULL; nobusy = bpf_task_storage_trylock(); data = __bpf_task_storage_get(map, task, value, flags, gfp_flags, nobusy); if (nobusy) bpf_task_storage_unlock(); return (unsigned long)data; } /* *gfp_flags* is a hidden argument provided by the verifier */ BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, task, void *, value, u64, flags, gfp_t, gfp_flags) { void *data; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) return (unsigned long)NULL; bpf_task_storage_lock(); data = __bpf_task_storage_get(map, task, value, flags, gfp_flags, true); bpf_task_storage_unlock(); return (unsigned long)data; } BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, task) { bool nobusy; int ret; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; nobusy = bpf_task_storage_trylock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ ret = task_storage_delete(task, map, nobusy); if (nobusy) bpf_task_storage_unlock(); return ret; } BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, task) { int ret; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; bpf_task_storage_lock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ ret = task_storage_delete(task, map, true); bpf_task_storage_unlock(); return ret; } static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) { return -ENOTSUPP; } static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) { return bpf_local_storage_map_alloc(attr, &task_cache, true); } static void task_storage_map_free(struct bpf_map *map) { bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy); } BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map) const struct bpf_map_ops task_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, .map_alloc = task_storage_map_alloc, .map_free = task_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_pid_task_storage_lookup_elem, .map_update_elem = bpf_pid_task_storage_update_elem, .map_delete_elem = bpf_pid_task_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, .map_mem_usage = bpf_local_storage_map_mem_usage, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_owner_storage_ptr = task_storage_ptr, }; const struct bpf_func_proto bpf_task_storage_get_recur_proto = { .func = bpf_task_storage_get_recur, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_task_storage_get_proto = { .func = bpf_task_storage_get, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { .func = bpf_task_storage_delete_recur, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; const struct bpf_func_proto bpf_task_storage_delete_proto = { .func = bpf_task_storage_delete, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], };
5 3 5 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 /* * linux/fs/nls/nls_cp864.c * * Charset cp864 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x066a, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x00b0, 0x00b7, 0x2219, 0x221a, 0x2592, 0x2500, 0x2502, 0x253c, 0x2524, 0x252c, 0x251c, 0x2534, 0x2510, 0x250c, 0x2514, 0x2518, /* 0x90*/ 0x03b2, 0x221e, 0x03c6, 0x00b1, 0x00bd, 0x00bc, 0x2248, 0x00ab, 0x00bb, 0xfef7, 0xfef8, 0x0000, 0x0000, 0xfefb, 0xfefc, 0x0000, /* 0xa0*/ 0x00a0, 0x00ad, 0xfe82, 0x00a3, 0x00a4, 0xfe84, 0x0000, 0x0000, 0xfe8e, 0xfe8f, 0xfe95, 0xfe99, 0x060c, 0xfe9d, 0xfea1, 0xfea5, /* 0xb0*/ 0x0660, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669, 0xfed1, 0x061b, 0xfeb1, 0xfeb5, 0xfeb9, 0x061f, /* 0xc0*/ 0x00a2, 0xfe80, 0xfe81, 0xfe83, 0xfe85, 0xfeca, 0xfe8b, 0xfe8d, 0xfe91, 0xfe93, 0xfe97, 0xfe9b, 0xfe9f, 0xfea3, 0xfea7, 0xfea9, /* 0xd0*/ 0xfeab, 0xfead, 0xfeaf, 0xfeb3, 0xfeb7, 0xfebb, 0xfebf, 0xfec1, 0xfec5, 0xfecb, 0xfecf, 0x00a6, 0x00ac, 0x00f7, 0x00d7, 0xfec9, /* 0xe0*/ 0x0640, 0xfed3, 0xfed7, 0xfedb, 0xfedf, 0xfee3, 0xfee7, 0xfeeb, 0xfeed, 0xfeef, 0xfef3, 0xfebd, 0xfecc, 0xfece, 0xfecd, 0xfee1, /* 0xf0*/ 0xfe7d, 0x0651, 0xfee5, 0xfee9, 0xfeec, 0xfef0, 0xfef2, 0xfed0, 0xfed5, 0xfef5, 0xfef6, 0xfedd, 0xfed9, 0xfef1, 0x25a0, 0x0000, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x00, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0x00, 0xc0, 0xa3, 0xa4, 0x00, 0xdb, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x97, 0xdc, 0xa1, 0x00, 0x00, /* 0xa8-0xaf */ 0x80, 0x93, 0x00, 0x00, 0x00, 0x00, 0x00, 0x81, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x98, 0x95, 0x94, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdd, /* 0xf0-0xf7 */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x92, 0x00, /* 0xc0-0xc7 */ }; static const unsigned char page06[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xac, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0xbb, 0x00, 0x00, 0x00, 0xbf, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0xf1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0x60-0x67 */ 0xb8, 0xb9, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x82, 0x83, 0x00, 0x00, 0x00, 0x91, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x96, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ }; static const unsigned char page25[256] = { 0x85, 0x00, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x8d, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x8c, 0x00, 0x00, 0x00, 0x8e, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x8f, 0x00, 0x00, 0x00, 0x8a, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x8b, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char pagefe[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, /* 0x78-0x7f */ 0xc1, 0xc2, 0xa2, 0xc3, 0xa5, 0xc4, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc7, 0xa8, 0xa9, /* 0x88-0x8f */ 0x00, 0xc8, 0x00, 0xc9, 0x00, 0xaa, 0x00, 0xca, /* 0x90-0x97 */ 0x00, 0xab, 0x00, 0xcb, 0x00, 0xad, 0x00, 0xcc, /* 0x98-0x9f */ 0x00, 0xae, 0x00, 0xcd, 0x00, 0xaf, 0x00, 0xce, /* 0xa0-0xa7 */ 0x00, 0xcf, 0x00, 0xd0, 0x00, 0xd1, 0x00, 0xd2, /* 0xa8-0xaf */ 0x00, 0xbc, 0x00, 0xd3, 0x00, 0xbd, 0x00, 0xd4, /* 0xb0-0xb7 */ 0x00, 0xbe, 0x00, 0xd5, 0x00, 0xeb, 0x00, 0xd6, /* 0xb8-0xbf */ 0x00, 0xd7, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0xdf, 0xc5, 0xd9, 0xec, 0xee, 0xed, 0xda, /* 0xc8-0xcf */ 0xf7, 0xba, 0x00, 0xe1, 0x00, 0xf8, 0x00, 0xe2, /* 0xd0-0xd7 */ 0x00, 0xfc, 0x00, 0xe3, 0x00, 0xfb, 0x00, 0xe4, /* 0xd8-0xdf */ 0x00, 0xef, 0x00, 0xe5, 0x00, 0xf2, 0x00, 0xe6, /* 0xe0-0xe7 */ 0x00, 0xf3, 0x00, 0xe7, 0xf4, 0xe8, 0x00, 0xe9, /* 0xe8-0xef */ 0xf5, 0xfd, 0xf6, 0xea, 0x00, 0xf9, 0xfa, 0x99, /* 0xf0-0xf7 */ 0x9a, 0x00, 0x00, 0x9d, 0x9e, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char *const page_uni2charset[256] = { page00, NULL, NULL, page03, NULL, NULL, page06, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page22, NULL, NULL, page25, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, pagefe, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x00, 0x00, 0x9d, 0x9e, 0x00, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0x00, 0x00, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0x00, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x00, 0x91, 0x00, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x00, 0x00, 0x9d, 0x9e, 0x00, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0x00, 0x00, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0x00, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp864", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp864(void) { return register_nls(&table); } static void __exit exit_nls_cp864(void) { unregister_nls(&table); } module_init(init_nls_cp864) module_exit(exit_nls_cp864) MODULE_DESCRIPTION("NLS Codepage 864 (Arabic)"); MODULE_LICENSE("Dual BSD/GPL");
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 2001-2006 Ian Kent <raven@themaw.net> */ #include <linux/sched/signal.h> #include "autofs_i.h" /* We make this a static variable rather than a part of the superblock; it * is better if we don't reassign numbers easily even across filesystems */ static autofs_wqt_t autofs_next_wait_queue = 1; void autofs_catatonic_mode(struct autofs_sb_info *sbi) { struct autofs_wait_queue *wq, *nwq; mutex_lock(&sbi->wq_mutex); if (sbi->flags & AUTOFS_SBI_CATATONIC) { mutex_unlock(&sbi->wq_mutex); return; } pr_debug("entering catatonic mode\n"); sbi->flags |= AUTOFS_SBI_CATATONIC; wq = sbi->queues; sbi->queues = NULL; /* Erase all wait queues */ while (wq) { nwq = wq->next; wq->status = -ENOENT; /* Magic is gone - report failure */ kfree(wq->name.name - wq->offset); wq->name.name = NULL; wake_up(&wq->queue); if (!--wq->wait_ctr) kfree(wq); wq = nwq; } fput(sbi->pipe); /* Close the pipe */ sbi->pipe = NULL; sbi->pipefd = -1; mutex_unlock(&sbi->wq_mutex); } static int autofs_write(struct autofs_sb_info *sbi, struct file *file, const void *addr, int bytes) { unsigned long sigpipe, flags; const char *data = (const char *)addr; ssize_t wr = 0; sigpipe = sigismember(&current->pending.signal, SIGPIPE); mutex_lock(&sbi->pipe_mutex); while (bytes) { wr = __kernel_write(file, data, bytes, NULL); if (wr <= 0) break; data += wr; bytes -= wr; } mutex_unlock(&sbi->pipe_mutex); /* Keep the currently executing process from receiving a * SIGPIPE unless it was already supposed to get one */ if (wr == -EPIPE && !sigpipe) { spin_lock_irqsave(&current->sighand->siglock, flags); sigdelset(&current->pending.signal, SIGPIPE); recalc_sigpending(); spin_unlock_irqrestore(&current->sighand->siglock, flags); } /* if 'wr' returned 0 (impossible) we assume -EIO (safe) */ return bytes == 0 ? 0 : wr < 0 ? wr : -EIO; } static void autofs_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq, int type) { union { struct autofs_packet_hdr hdr; union autofs_packet_union v4_pkt; union autofs_v5_packet_union v5_pkt; } pkt; struct file *pipe = NULL; size_t pktsz; int ret; pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n", (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type); memset(&pkt, 0, sizeof(pkt)); /* For security reasons */ pkt.hdr.proto_version = sbi->version; pkt.hdr.type = type; switch (type) { /* Kernel protocol v4 missing and expire packets */ case autofs_ptype_missing: { struct autofs_packet_missing *mp = &pkt.v4_pkt.missing; pktsz = sizeof(*mp); mp->wait_queue_token = wq->wait_queue_token; mp->len = wq->name.len; memcpy(mp->name, wq->name.name, wq->name.len); mp->name[wq->name.len] = '\0'; break; } case autofs_ptype_expire_multi: { struct autofs_packet_expire_multi *ep = &pkt.v4_pkt.expire_multi; pktsz = sizeof(*ep); ep->wait_queue_token = wq->wait_queue_token; ep->len = wq->name.len; memcpy(ep->name, wq->name.name, wq->name.len); ep->name[wq->name.len] = '\0'; break; } /* * Kernel protocol v5 packet for handling indirect and direct * mount missing and expire requests */ case autofs_ptype_missing_indirect: case autofs_ptype_expire_indirect: case autofs_ptype_missing_direct: case autofs_ptype_expire_direct: { struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns; pktsz = sizeof(*packet); packet->wait_queue_token = wq->wait_queue_token; packet->len = wq->name.len; memcpy(packet->name, wq->name.name, wq->name.len); packet->name[wq->name.len] = '\0'; packet->dev = wq->dev; packet->ino = wq->ino; packet->uid = from_kuid_munged(user_ns, wq->uid); packet->gid = from_kgid_munged(user_ns, wq->gid); packet->pid = wq->pid; packet->tgid = wq->tgid; break; } default: pr_warn("bad type %d!\n", type); mutex_unlock(&sbi->wq_mutex); return; } pipe = get_file(sbi->pipe); mutex_unlock(&sbi->wq_mutex); switch (ret = autofs_write(sbi, pipe, &pkt, pktsz)) { case 0: break; case -ENOMEM: case -ERESTARTSYS: /* Just fail this one */ autofs_wait_release(sbi, wq->wait_queue_token, ret); break; default: autofs_catatonic_mode(sbi); break; } fput(pipe); } static struct autofs_wait_queue * autofs_find_wait(struct autofs_sb_info *sbi, const struct qstr *qstr) { struct autofs_wait_queue *wq; for (wq = sbi->queues; wq; wq = wq->next) { if (wq->name.hash == qstr->hash && wq->name.len == qstr->len && wq->name.name && !memcmp(wq->name.name, qstr->name, qstr->len)) break; } return wq; } /* * Check if we have a valid request. * Returns * 1 if the request should continue. * In this case we can return an autofs_wait_queue entry if one is * found or NULL to idicate a new wait needs to be created. * 0 or a negative errno if the request shouldn't continue. */ static int validate_request(struct autofs_wait_queue **wait, struct autofs_sb_info *sbi, const struct qstr *qstr, const struct path *path, enum autofs_notify notify) { struct dentry *dentry = path->dentry; struct autofs_wait_queue *wq; struct autofs_info *ino; if (sbi->flags & AUTOFS_SBI_CATATONIC) return -ENOENT; /* Wait in progress, continue; */ wq = autofs_find_wait(sbi, qstr); if (wq) { *wait = wq; return 1; } *wait = NULL; /* If we don't yet have any info this is a new request */ ino = autofs_dentry_ino(dentry); if (!ino) return 1; /* * If we've been asked to wait on an existing expire (NFY_NONE) * but there is no wait in the queue ... */ if (notify == NFY_NONE) { /* * Either we've betean the pending expire to post it's * wait or it finished while we waited on the mutex. * So we need to wait till either, the wait appears * or the expire finishes. */ while (ino->flags & AUTOFS_INF_EXPIRING) { mutex_unlock(&sbi->wq_mutex); schedule_timeout_interruptible(HZ/10); if (mutex_lock_interruptible(&sbi->wq_mutex)) return -EINTR; if (sbi->flags & AUTOFS_SBI_CATATONIC) return -ENOENT; wq = autofs_find_wait(sbi, qstr); if (wq) { *wait = wq; return 1; } } /* * Not ideal but the status has already gone. Of the two * cases where we wait on NFY_NONE neither depend on the * return status of the wait. */ return 0; } /* * If we've been asked to trigger a mount and the request * completed while we waited on the mutex ... */ if (notify == NFY_MOUNT) { struct dentry *new = NULL; struct path this; int valid = 1; /* * If the dentry was successfully mounted while we slept * on the wait queue mutex we can return success. If it * isn't mounted (doesn't have submounts for the case of * a multi-mount with no mount at it's base) we can * continue on and create a new request. */ if (!IS_ROOT(dentry)) { if (d_unhashed(dentry) && d_really_is_positive(dentry)) { struct dentry *parent = dentry->d_parent; new = d_lookup(parent, &dentry->d_name); if (new) dentry = new; } } this.mnt = path->mnt; this.dentry = dentry; if (path_has_submounts(&this)) valid = 0; if (new) dput(new); return valid; } return 1; } int autofs_wait(struct autofs_sb_info *sbi, const struct path *path, enum autofs_notify notify) { struct dentry *dentry = path->dentry; struct autofs_wait_queue *wq; struct qstr qstr; char *name; int status, ret, type; unsigned int offset = 0; pid_t pid; pid_t tgid; /* In catatonic mode, we don't wait for nobody */ if (sbi->flags & AUTOFS_SBI_CATATONIC) return -ENOENT; /* * Try translating pids to the namespace of the daemon. * * Zero means failure: we are in an unrelated pid namespace. */ pid = task_pid_nr_ns(current, ns_of_pid(sbi->oz_pgrp)); tgid = task_tgid_nr_ns(current, ns_of_pid(sbi->oz_pgrp)); if (pid == 0 || tgid == 0) return -ENOENT; if (d_really_is_negative(dentry)) { /* * A wait for a negative dentry is invalid for certain * cases. A direct or offset mount "always" has its mount * point directory created and so the request dentry must * be positive or the map key doesn't exist. The situation * is very similar for indirect mounts except only dentrys * in the root of the autofs file system may be negative. */ if (autofs_type_trigger(sbi->type)) return -ENOENT; else if (!IS_ROOT(dentry->d_parent)) return -ENOENT; } name = kmalloc(NAME_MAX + 1, GFP_KERNEL); if (!name) return -ENOMEM; /* If this is a direct mount request create a dummy name */ if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type)) { qstr.name = name; qstr.len = sprintf(name, "%p", dentry); } else { char *p = dentry_path_raw(dentry, name, NAME_MAX); if (IS_ERR(p)) { kfree(name); return -ENOENT; } qstr.name = ++p; // skip the leading slash qstr.len = strlen(p); offset = p - name; } qstr.hash = full_name_hash(dentry, qstr.name, qstr.len); if (mutex_lock_interruptible(&sbi->wq_mutex)) { kfree(name); return -EINTR; } ret = validate_request(&wq, sbi, &qstr, path, notify); if (ret <= 0) { if (ret != -EINTR) mutex_unlock(&sbi->wq_mutex); kfree(name); return ret; } if (!wq) { /* Create a new wait queue */ wq = kmalloc(sizeof(struct autofs_wait_queue), GFP_KERNEL); if (!wq) { kfree(name); mutex_unlock(&sbi->wq_mutex); return -ENOMEM; } wq->wait_queue_token = autofs_next_wait_queue; if (++autofs_next_wait_queue == 0) autofs_next_wait_queue = 1; wq->next = sbi->queues; sbi->queues = wq; init_waitqueue_head(&wq->queue); memcpy(&wq->name, &qstr, sizeof(struct qstr)); wq->offset = offset; wq->dev = autofs_get_dev(sbi); wq->ino = autofs_get_ino(sbi); wq->uid = current_uid(); wq->gid = current_gid(); wq->pid = pid; wq->tgid = tgid; wq->status = -EINTR; /* Status return if interrupted */ wq->wait_ctr = 2; if (sbi->version < 5) { if (notify == NFY_MOUNT) type = autofs_ptype_missing; else type = autofs_ptype_expire_multi; } else { if (notify == NFY_MOUNT) type = autofs_type_trigger(sbi->type) ? autofs_ptype_missing_direct : autofs_ptype_missing_indirect; else type = autofs_type_trigger(sbi->type) ? autofs_ptype_expire_direct : autofs_ptype_expire_indirect; } pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, notify); /* * autofs_notify_daemon() may block; it will unlock ->wq_mutex */ autofs_notify_daemon(sbi, wq, type); } else { wq->wait_ctr++; pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, notify); mutex_unlock(&sbi->wq_mutex); kfree(name); } /* * wq->name.name is NULL iff the lock is already released * or the mount has been made catatonic. */ wait_event_killable(wq->queue, wq->name.name == NULL); status = wq->status; /* * For direct and offset mounts we need to track the requester's * uid and gid in the dentry info struct. This is so it can be * supplied, on request, by the misc device ioctl interface. * This is needed during daemon resatart when reconnecting * to existing, active, autofs mounts. The uid and gid (and * related string values) may be used for macro substitution * in autofs mount maps. */ if (!status) { struct autofs_info *ino; struct dentry *de = NULL; /* direct mount or browsable map */ ino = autofs_dentry_ino(dentry); if (!ino) { /* If not lookup actual dentry used */ de = d_lookup(dentry->d_parent, &dentry->d_name); if (de) ino = autofs_dentry_ino(de); } /* Set mount requester */ if (ino) { spin_lock(&sbi->fs_lock); ino->uid = wq->uid; ino->gid = wq->gid; spin_unlock(&sbi->fs_lock); } if (de) dput(de); } /* Are we the last process to need status? */ mutex_lock(&sbi->wq_mutex); if (!--wq->wait_ctr) kfree(wq); mutex_unlock(&sbi->wq_mutex); return status; } int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status) { struct autofs_wait_queue *wq, **wql; mutex_lock(&sbi->wq_mutex); for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) { if (wq->wait_queue_token == wait_queue_token) break; } if (!wq) { mutex_unlock(&sbi->wq_mutex); return -EINVAL; } *wql = wq->next; /* Unlink from chain */ kfree(wq->name.name - wq->offset); wq->name.name = NULL; /* Do not wait on this queue */ wq->status = status; wake_up(&wq->queue); if (!--wq->wait_ctr) kfree(wq); mutex_unlock(&sbi->wq_mutex); return 0; }
1 1 112 112 2 112 1 114 113 113 113 113 113 11 11 9 9 7 8 8 5 1 3 9 2 4 8 3 3 2 2 1 22 29 29 29 9 19 7 36 1 32 34 2 31 31 2 1 8 12 9 9 6 5 1 1 2 1 1 1 2 1 1 1 1 1 3 1 1 40 1 1 1 1 32 3 2 2 1 2 1 1 48 1 41 6 15 1 4 5 2 1 1 1 4 4 4 4 32 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 // SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code for ARP packets. * * Based heavily, if not almost entirely, upon ip_tables.c framework. * * Some ARP specific bits are: * * Copyright (C) 2002 David S. Miller (davem@redhat.com) * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net> * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/capability.h> #include <linux/if_arp.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/proc_fs.h> #include <linux/module.h> #include <linux/init.h> #include <linux/mutex.h> #include <linux/err.h> #include <net/compat.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp/arp_tables.h> #include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); MODULE_DESCRIPTION("arptables core"); void *arpt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(arpt, ARPT); } EXPORT_SYMBOL_GPL(arpt_alloc_initial_table); static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, const char *hdr_addr, int len) { int i, ret; if (len > ARPT_DEV_ADDR_LEN_MAX) len = ARPT_DEV_ADDR_LEN_MAX; ret = 0; for (i = 0; i < len; i++) ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i]; return ret != 0; } /* * Unfortunately, _b and _mask are not aligned to an int (or long int) * Some arches dont care, unrolling the loop is a win on them. * For other arches, we only have a 16bit alignement. */ static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) { #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS unsigned long ret = ifname_compare_aligned(_a, _b, _mask); #else unsigned long ret = 0; const u16 *a = (const u16 *)_a; const u16 *b = (const u16 *)_b; const u16 *mask = (const u16 *)_mask; int i; for (i = 0; i < IFNAMSIZ/sizeof(u16); i++) ret |= (a[i] ^ b[i]) & mask[i]; #endif return ret; } /* Returns whether packet matches rule or not. */ static inline int arp_packet_match(const struct arphdr *arphdr, struct net_device *dev, const char *indev, const char *outdev, const struct arpt_arp *arpinfo) { const char *arpptr = (char *)(arphdr + 1); const char *src_devaddr, *tgt_devaddr; __be32 src_ipaddr, tgt_ipaddr; long ret; if (NF_INVF(arpinfo, ARPT_INV_ARPOP, (arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop)) return 0; if (NF_INVF(arpinfo, ARPT_INV_ARPHRD, (arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd)) return 0; if (NF_INVF(arpinfo, ARPT_INV_ARPPRO, (arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro)) return 0; if (NF_INVF(arpinfo, ARPT_INV_ARPHLN, (arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln)) return 0; src_devaddr = arpptr; arpptr += dev->addr_len; memcpy(&src_ipaddr, arpptr, sizeof(u32)); arpptr += sizeof(u32); tgt_devaddr = arpptr; arpptr += dev->addr_len; memcpy(&tgt_ipaddr, arpptr, sizeof(u32)); if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR, arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len)) || NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR, arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len))) return 0; if (NF_INVF(arpinfo, ARPT_INV_SRCIP, (src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr) || NF_INVF(arpinfo, ARPT_INV_TGTIP, (tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr)) return 0; /* Look for ifname matches. */ ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); if (NF_INVF(arpinfo, ARPT_INV_VIA_IN, ret != 0)) return 0; ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); if (NF_INVF(arpinfo, ARPT_INV_VIA_OUT, ret != 0)) return 0; return 1; } static inline int arp_checkentry(const struct arpt_arp *arp) { if (arp->flags & ~ARPT_F_MASK) return 0; if (arp->invflags & ~ARPT_INV_MASK) return 0; return 1; } static unsigned int arpt_error(struct sk_buff *skb, const struct xt_action_param *par) { net_err_ratelimited("arp_tables: error: '%s'\n", (const char *)par->targinfo); return NF_DROP; } static inline const struct xt_entry_target * arpt_get_target_c(const struct arpt_entry *e) { return arpt_get_target((struct arpt_entry *)e); } static inline struct arpt_entry * get_entry(const void *base, unsigned int offset) { return (struct arpt_entry *)(base + offset); } static inline struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) { return (void *)entry + entry->next_offset; } unsigned int arpt_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; struct arpt_entry *e, **jumpstack; const char *indev, *outdev; const void *table_base; unsigned int cpu, stackidx = 0; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; local_bh_disable(); addend = xt_write_recseq_begin(); private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; /* No TEE support for arptables, so no need to switch to alternate * stack. All targets that reenter must return absolute verdicts. */ e = get_entry(table_base, private->hook_entry[hook]); acpar.state = state; acpar.hotdrop = false; arp = arp_hdr(skb); do { const struct xt_entry_target *t; struct xt_counters *counter; if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { e = arpt_next_entry(e); continue; } counter = xt_get_this_cpu_counter(&e->counters); ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1); t = arpt_get_target_c(e); /* Standard target? */ if (!t->u.kernel.target->target) { int v; v = ((struct xt_standard_target *)t)->verdict; if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { verdict = (unsigned int)(-v) - 1; break; } if (stackidx == 0) { e = get_entry(table_base, private->underflow[hook]); } else { e = jumpstack[--stackidx]; e = arpt_next_entry(e); } continue; } if (table_base + v != arpt_next_entry(e)) { if (unlikely(stackidx >= private->stacksize)) { verdict = NF_DROP; break; } jumpstack[stackidx++] = e; } e = get_entry(table_base, v); continue; } acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); if (verdict == XT_CONTINUE) { /* Target might have changed stuff. */ arp = arp_hdr(skb); e = arpt_next_entry(e); } else { /* Verdict */ break; } } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); if (acpar.hotdrop) return NF_DROP; else return verdict; } /* All zeroes == unconditional rule. */ static inline bool unconditional(const struct arpt_entry *e) { static const struct arpt_arp uncond; return e->target_offset == sizeof(struct arpt_entry) && memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0, unsigned int *offsets) { unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset * to 0 as we leave), and comefrom to save source hook bitmask. */ for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct arpt_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; /* Set initial back pointer. */ e->counters.pcnt = pos; for (;;) { const struct xt_standard_target *t = (void *)arpt_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) return 0; e->comefrom |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && t->verdict < 0) || visited) { unsigned int oldpos, size; /* Return: backtrack through the last * big jump. */ do { e->comefrom ^= (1<<NF_ARP_NUMHOOKS); oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; /* We're at the start. */ if (pos == oldpos) goto next; e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; pos += size; } else { int newpos = t->verdict; if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } } next: ; } return 1; } static int check_target(struct arpt_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = arpt_get_target(e); struct xt_tgchk_param par = { .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, .targinfo = t->data, .hook_mask = e->comefrom, .family = NFPROTO_ARP, }; return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); } static int find_check_entry(struct arpt_entry *e, struct net *net, const char *name, unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; int ret; if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; t = arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto out; } t->u.kernel.target = target; ret = check_target(e, net, name); if (ret) goto err; return 0; err: module_put(t->u.kernel.target->me); out: xt_percpu_counter_free(&e->counters); return ret; } static bool check_underflow(const struct arpt_entry *e) { const struct xt_entry_target *t; unsigned int verdict; if (!unconditional(e)) return false; t = arpt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct xt_standard_target *)t)->verdict; verdict = -verdict - 1; return verdict == NF_DROP || verdict == NF_ACCEPT; } static inline int check_entry_size_and_hooks(struct arpt_entry *e, struct xt_table_info *newinfo, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, unsigned int valid_hooks) { unsigned int h; int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || (unsigned char *)e + sizeof(struct arpt_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) return -EINVAL; if (!arp_checkentry(&e->arp)) return -EINVAL; err = xt_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (err) return err; /* Check hooks & underflows */ for (h = 0; h < NF_ARP_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) return -EINVAL; newinfo->underflow[h] = underflows[h]; } } /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; return 0; } static void cleanup_entry(struct arpt_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; t = arpt_get_target(e); par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_ARP; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in * newinfo). */ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct arpt_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct arpt_entry *iter; unsigned int *offsets; unsigned int i; int ret = 0; newinfo->size = repl->size; newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } offsets = xt_alloc_entry_offsets(newinfo->number); if (!offsets) return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { ret = check_entry_size_and_hooks(iter, newinfo, entry0, entry0 + repl->size, repl->hook_entry, repl->underflow, repl->valid_hooks); if (ret != 0) goto out_free; if (i < repl->num_entries) offsets[i] = (void *)iter - entry0; ++i; if (strcmp(arpt_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } ret = -EINVAL; if (i != repl->num_entries) goto out_free; ret = xt_check_table_hooks(newinfo, repl->valid_hooks); if (ret) goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; goto out_free; } kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; ++i; } if (ret != 0) { xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; cleanup_entry(iter, net); } return ret; } return ret; out_free: kvfree(offsets); return ret; } static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct arpt_entry *iter; unsigned int cpu; unsigned int i; for_each_possible_cpu(cpu) { seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); bcnt = tmp->bcnt; pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; cond_resched(); } } } static void get_old_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct arpt_entry *iter; unsigned int cpu, i; for_each_possible_cpu(cpu) { i = 0; xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); ++i; } cond_resched(); } } static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care * about). */ countersize = sizeof(struct xt_counters) * private->number; counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); get_counters(private, counters); return counters; } static int copy_entries_to_user(unsigned int total_size, const struct xt_table *table, void __user *userptr) { unsigned int off, num; const struct arpt_entry *e; struct xt_counters *counters; struct xt_table_info *private = table->private; int ret = 0; void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); loc_cpu_entry = private->entries; /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ const struct xt_entry_target *t; e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; } if (copy_to_user(userptr + off + offsetof(struct arpt_entry, counters), &counters[num], sizeof(counters[num])) != 0) { ret = -EFAULT; goto free_counters; } t = arpt_get_target_c(e); if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } } free_counters: vfree(counters); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; if (v > 0) v += xt_compat_calc_jump(NFPROTO_ARP, v); memcpy(dst, &v, sizeof(v)); } static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; if (cv > 0) cv -= xt_compat_calc_jump(NFPROTO_ARP, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static int compat_calc_entry(const struct arpt_entry *e, const struct xt_table_info *info, const void *base, struct xt_table_info *newinfo) { const struct xt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); entry_offset = (void *)e - base; t = arpt_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) return ret; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { if (info->hook_entry[i] && (e < (struct arpt_entry *)(base + info->hook_entry[i]))) newinfo->hook_entry[i] -= off; if (info->underflow[i] && (e < (struct arpt_entry *)(base + info->underflow[i]))) newinfo->underflow[i] -= off; } return 0; } static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct arpt_entry *iter; const void *loc_cpu_entry; int ret; if (!newinfo || !info) return -EINVAL; /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; ret = xt_compat_init_offsets(NFPROTO_ARP, info->number); if (ret) return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) return ret; } return 0; } #endif static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; int ret; if (*len != sizeof(struct arpt_getinfo)) return -EINVAL; if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_lock(NFPROTO_ARP); #endif t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (!IS_ERR(t)) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct xt_table_info tmp; if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; } #endif memset(&info, 0, sizeof(info)); info.valid_hooks = t->valid_hooks; memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry)); memcpy(info.underflow, private->underflow, sizeof(info.underflow)); info.num_entries = private->number; info.size = private->size; strscpy(info.name, name); if (copy_to_user(user, &info, *len) != 0) ret = -EFAULT; else ret = 0; xt_table_unlock(t); module_put(t->me); } else ret = PTR_ERR(t); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_unlock(NFPROTO_ARP); #endif return ret; } static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, const int *len) { int ret; struct arpt_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct arpt_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else ret = -EAGAIN; module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); return ret; } static int __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { int ret; struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; void *loc_cpu_old_entry; struct arpt_entry *iter; ret = 0; counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; goto out; } t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } /* You lied! */ if (valid_hooks != t->valid_hooks) { ret = -EINVAL; goto put_module; } oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; /* Update module usage count based on number of rules */ if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); if ((oldinfo->number > oldinfo->initial_entries) && (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); xt_table_unlock(t); get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries; xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) { /* Silent error, can't fail, new table is already in place */ net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n"); } vfree(counters); return ret; put_module: module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: vfree(counters); out: return ret; } static int do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct arpt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct arpt_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) { unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct arpt_entry *iter; unsigned int addend; paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free; } local_bh_disable(); private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); unlock_up_free: local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: vfree(paddc); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct compat_arpt_replace { char name[XT_TABLE_MAXNAMELEN]; u32 valid_hooks; u32 num_entries; u32 size; u32 hook_entry[NF_ARP_NUMHOOKS]; u32 underflow[NF_ARP_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; struct compat_arpt_entry entries[]; }; static inline void compat_release_entry(struct compat_arpt_entry *e) { struct xt_entry_target *t; t = compat_arpt_get_target(e); module_put(t->u.kernel.target->me); } static int check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, const unsigned char *limit) { struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; int ret, off; if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct compat_arpt_entry) + sizeof(struct compat_xt_entry_target)) return -EINVAL; if (!arp_checkentry(&e->arp)) return -EINVAL; ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); entry_offset = (void *)e - (void *)base; t = compat_arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto out; } t->u.kernel.target = target; off += xt_compat_target_offset(target); *size += off; ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) goto release_target; return 0; release_target: module_put(t->u.kernel.target->me); out: return ret; } static void compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct arpt_entry *de; unsigned int origsize; int h; origsize = *size; de = *dstptr; memcpy(de, e, sizeof(struct arpt_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); *dstptr += sizeof(struct arpt_entry); *size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); de->target_offset = e->target_offset - (origsize - *size); t = compat_arpt_get_target(e); xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_ARP_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } } static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, void **pentry0, const struct compat_arpt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_arpt_entry *iter0; struct arpt_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; size = compatr->size; info->number = compatr->num_entries; j = 0; xt_compat_lock(NFPROTO_ARP); ret = xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); if (ret) goto out_unlock; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; if (j != compatr->num_entries) goto out_unlock; ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; memset(newinfo->entries, 0, size); newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; size = compatr->size; xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); /* all module references in entry0 are now gone */ xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); memcpy(&repl, compatr, sizeof(*compatr)); for (i = 0; i < NF_ARP_NUMHOOKS; i++) { repl.hook_entry[i] = newinfo->hook_entry[i]; repl.underflow[i] = newinfo->underflow[i]; } repl.num_counters = 0; repl.counters = NULL; repl.size = newinfo->size; ret = translate_table(net, newinfo, entry1, &repl); if (ret) goto free_newinfo; *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); return 0; free_newinfo: xt_free_table_info(newinfo); return ret; out_unlock: xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; } static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct compat_arpt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct arpt_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, compat_uint_t *size, struct xt_counters *counters, unsigned int i) { struct xt_entry_target *t; struct compat_arpt_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; int ret; origsize = *size; ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) return -EFAULT; *dstptr += sizeof(struct compat_arpt_entry); *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); target_offset = e->target_offset - (origsize - *size); t = arpt_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) return ret; next_offset = e->next_offset - (origsize - *size); if (put_user(target_offset, &ce->target_offset) != 0 || put_user(next_offset, &ce->next_offset) != 0) return -EFAULT; return 0; } static int compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; unsigned int i = 0; struct arpt_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); pos = userptr; size = total_size; xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) break; } vfree(counters); return ret; } struct compat_arpt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; struct compat_arpt_entry entrytable[]; }; static int compat_get_entries(struct net *net, struct compat_arpt_get_entries __user *uptr, int *len) { int ret; struct compat_arpt_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct compat_arpt_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) { ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); } else if (!ret) ret = -EAGAIN; xt_compat_flush_offsets(NFPROTO_ARP); module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); xt_compat_unlock(NFPROTO_ARP); return ret; } #endif static int do_arpt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case ARPT_SO_SET_REPLACE: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_do_replace(sock_net(sk), arg, len); else #endif ret = do_replace(sock_net(sk), arg, len); break; case ARPT_SO_SET_ADD_COUNTERS: ret = do_add_counters(sock_net(sk), arg, len); break; default: ret = -EINVAL; } return ret; } static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case ARPT_SO_GET_INFO: ret = get_info(sock_net(sk), user, len); break; case ARPT_SO_GET_ENTRIES: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_get_entries(sock_net(sk), user, len); else #endif ret = get_entries(sock_net(sk), user, len); break; case ARPT_SO_GET_REVISION_TARGET: { struct xt_get_revision rev; if (*len != sizeof(rev)) { ret = -EINVAL; break; } if (copy_from_user(&rev, user, sizeof(rev)) != 0) { ret = -EFAULT; break; } rev.name[sizeof(rev.name)-1] = 0; try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), "arpt_%s", rev.name); break; } default: ret = -EINVAL; } return ret; } static void __arpt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; struct arpt_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); } int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *template_ops) { struct nf_hook_ops *ops; unsigned int num_ops; int ret, i; struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) { xt_free_table_info(newinfo); return ret; } new_table = xt_register_table(net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct arpt_entry *iter; xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); xt_free_table_info(newinfo); return PTR_ERR(new_table); } num_ops = hweight32(table->valid_hooks); if (num_ops == 0) { ret = -EINVAL; goto out_free; } ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); if (!ops) { ret = -ENOMEM; goto out_free; } for (i = 0; i < num_ops; i++) ops[i].priv = new_table; new_table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); if (ret != 0) goto out_free; return ret; out_free: __arpt_unregister_table(net, new_table); return ret; } void arpt_unregister_table_pre_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); if (table) nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } EXPORT_SYMBOL(arpt_unregister_table_pre_exit); void arpt_unregister_table(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); if (table) __arpt_unregister_table(net, table); } /* The built-in targets: standard (NULL) and error. */ static struct xt_target arpt_builtin_tg[] __read_mostly = { { .name = XT_STANDARD_TARGET, .targetsize = sizeof(int), .family = NFPROTO_ARP, #ifdef CONFIG_NETFILTER_XTABLES_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, .compat_to_user = compat_standard_to_user, #endif }, { .name = XT_ERROR_TARGET, .target = arpt_error, .targetsize = XT_FUNCTION_MAXNAMELEN, .family = NFPROTO_ARP, }, }; static struct nf_sockopt_ops arpt_sockopts = { .pf = PF_INET, .set_optmin = ARPT_BASE_CTL, .set_optmax = ARPT_SO_SET_MAX+1, .set = do_arpt_set_ctl, .get_optmin = ARPT_BASE_CTL, .get_optmax = ARPT_SO_GET_MAX+1, .get = do_arpt_get_ctl, .owner = THIS_MODULE, }; static int __net_init arp_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_ARP); } static void __net_exit arp_tables_net_exit(struct net *net) { xt_proto_fini(net, NFPROTO_ARP); } static struct pernet_operations arp_tables_net_ops = { .init = arp_tables_net_init, .exit = arp_tables_net_exit, }; static int __init arp_tables_init(void) { int ret; ret = register_pernet_subsys(&arp_tables_net_ops); if (ret < 0) goto err1; /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); if (ret < 0) goto err2; /* Register setsockopt */ ret = nf_register_sockopt(&arpt_sockopts); if (ret < 0) goto err4; return 0; err4: xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); err2: unregister_pernet_subsys(&arp_tables_net_ops); err1: return ret; } static void __exit arp_tables_fini(void) { nf_unregister_sockopt(&arpt_sockopts); xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); unregister_pernet_subsys(&arp_tables_net_ops); } EXPORT_SYMBOL(arpt_register_table); EXPORT_SYMBOL(arpt_unregister_table); EXPORT_SYMBOL(arpt_do_table); module_init(arp_tables_init); module_exit(arp_tables_fini);
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM udp #if !defined(_TRACE_UDP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_UDP_H #include <linux/udp.h> #include <linux/tracepoint.h> #include <trace/events/net_probe_common.h> TRACE_EVENT(udp_fail_queue_rcv_skb, TP_PROTO(int rc, struct sock *sk, struct sk_buff *skb), TP_ARGS(rc, sk, skb), TP_STRUCT__entry( __field(int, rc) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( const struct udphdr *uh = (const struct udphdr *)udp_hdr(skb); __entry->rc = rc; /* for filtering use */ __entry->sport = ntohs(uh->source); __entry->dport = ntohs(uh->dest); __entry->family = sk->sk_family; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, uh, __entry->saddr, __entry->daddr); ), TP_printk("rc=%d family=%s src=%pISpc dest=%pISpc", __entry->rc, show_family_name(__entry->family), __entry->saddr, __entry->daddr) ); #endif /* _TRACE_UDP_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
26 26 6 2 1 32 26 26 26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* * linux/can/skb.h * * Definitions for the CAN network socket buffer * * Copyright (C) 2012 Oliver Hartkopp <socketcan@hartkopp.net> * */ #ifndef _CAN_SKB_H #define _CAN_SKB_H #include <linux/types.h> #include <linux/skbuff.h> #include <linux/can.h> #include <net/sock.h> void can_flush_echo_skb(struct net_device *dev); int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx, unsigned int frame_len); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *len_ptr, unsigned int *frame_len_ptr); unsigned int __must_check can_get_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *frame_len_ptr); void can_free_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *frame_len_ptr); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct canfd_frame **cfd); struct sk_buff *alloc_canxl_skb(struct net_device *dev, struct canxl_frame **cxl, unsigned int data_len); struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf); bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb); /* * The struct can_skb_priv is used to transport additional information along * with the stored struct can(fd)_frame that can not be contained in existing * struct sk_buff elements. * N.B. that this information must not be modified in cloned CAN sk_buffs. * To modify the CAN frame content or the struct can_skb_priv content * skb_copy() needs to be used instead of skb_clone(). */ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on * @skbcnt: atomic counter to have an unique id together with skb pointer * @frame_len: length of CAN frame in data link layer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; int skbcnt; unsigned int frame_len; struct can_frame cf[]; }; static inline struct can_skb_priv *can_skb_prv(struct sk_buff *skb) { return (struct can_skb_priv *)(skb->head); } static inline void can_skb_reserve(struct sk_buff *skb) { skb_reserve(skb, sizeof(struct can_skb_priv)); } static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { /* If the socket has already been closed by user space, the * refcount may already be 0 (and the socket will be freed * after the last TX skb has been freed). So only increase * socket refcount if the refcount is > 0. */ if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { skb->destructor = sock_efree; skb->sk = sk; } } /* * returns an unshared skb owned by the original sock to be echo'ed back */ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) { struct sk_buff *nskb; nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { kfree_skb(skb); return NULL; } can_skb_set_owner(nskb, skb->sk); consume_skb(skb); return nskb; } static inline bool can_is_can_skb(const struct sk_buff *skb) { struct can_frame *cf = (struct can_frame *)skb->data; /* the CAN specific type of skb is identified by its data length */ return (skb->len == CAN_MTU && cf->len <= CAN_MAX_DLEN); } static inline bool can_is_canfd_skb(const struct sk_buff *skb) { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; /* the CAN specific type of skb is identified by its data length */ return (skb->len == CANFD_MTU && cfd->len <= CANFD_MAX_DLEN); } static inline bool can_is_canxl_skb(const struct sk_buff *skb) { const struct canxl_frame *cxl = (struct canxl_frame *)skb->data; if (skb->len < CANXL_HDR_SIZE + CANXL_MIN_DLEN || skb->len > CANXL_MTU) return false; /* this also checks valid CAN XL data length boundaries */ if (skb->len != CANXL_HDR_SIZE + cxl->len) return false; return cxl->flags & CANXL_XLF; } /* get length element value from can[|fd|xl]_frame structure */ static inline unsigned int can_skb_get_len_val(struct sk_buff *skb) { const struct canxl_frame *cxl = (struct canxl_frame *)skb->data; const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; if (can_is_canxl_skb(skb)) return cxl->len; return cfd->len; } /* get needed data length inside CAN frame for all frame types (RTR aware) */ static inline unsigned int can_skb_get_data_len(struct sk_buff *skb) { unsigned int len = can_skb_get_len_val(skb); const struct can_frame *cf = (struct can_frame *)skb->data; /* RTR frames have an actual length of zero */ if (can_is_can_skb(skb) && cf->can_id & CAN_RTR_FLAG) return 0; return len; } #endif /* !_CAN_SKB_H */
77 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 /* SPDX-License-Identifier: GPL-2.0 OR MIT */ #ifndef _CRYPTO_BLAKE2B_H #define _CRYPTO_BLAKE2B_H #include <linux/bug.h> #include <linux/types.h> #include <linux/string.h> enum blake2b_lengths { BLAKE2B_BLOCK_SIZE = 128, BLAKE2B_HASH_SIZE = 64, BLAKE2B_KEY_SIZE = 64, BLAKE2B_160_HASH_SIZE = 20, BLAKE2B_256_HASH_SIZE = 32, BLAKE2B_384_HASH_SIZE = 48, BLAKE2B_512_HASH_SIZE = 64, }; struct blake2b_state { /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */ u64 h[8]; u64 t[2]; u64 f[2]; u8 buf[BLAKE2B_BLOCK_SIZE]; unsigned int buflen; unsigned int outlen; }; enum blake2b_iv { BLAKE2B_IV0 = 0x6A09E667F3BCC908ULL, BLAKE2B_IV1 = 0xBB67AE8584CAA73BULL, BLAKE2B_IV2 = 0x3C6EF372FE94F82BULL, BLAKE2B_IV3 = 0xA54FF53A5F1D36F1ULL, BLAKE2B_IV4 = 0x510E527FADE682D1ULL, BLAKE2B_IV5 = 0x9B05688C2B3E6C1FULL, BLAKE2B_IV6 = 0x1F83D9ABFB41BD6BULL, BLAKE2B_IV7 = 0x5BE0CD19137E2179ULL, }; static inline void __blake2b_init(struct blake2b_state *state, size_t outlen, const void *key, size_t keylen) { state->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen); state->h[1] = BLAKE2B_IV1; state->h[2] = BLAKE2B_IV2; state->h[3] = BLAKE2B_IV3; state->h[4] = BLAKE2B_IV4; state->h[5] = BLAKE2B_IV5; state->h[6] = BLAKE2B_IV6; state->h[7] = BLAKE2B_IV7; state->t[0] = 0; state->t[1] = 0; state->f[0] = 0; state->f[1] = 0; state->buflen = 0; state->outlen = outlen; if (keylen) { memcpy(state->buf, key, keylen); memset(&state->buf[keylen], 0, BLAKE2B_BLOCK_SIZE - keylen); state->buflen = BLAKE2B_BLOCK_SIZE; } } #endif /* _CRYPTO_BLAKE2B_H */
8 24 1 17 2 56 22 3 6 16 3 2 20 17 11 9 9 6 6 7 13566 94 35 126 42 91 57 396 13444 5008 8537 8485 391 149 3 7 1 1 3 35 69 13030 63 13035 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * VLAN An implementation of 802.1Q VLAN tagging. * * Authors: Ben Greear <greearb@candelatech.com> */ #ifndef _LINUX_IF_VLAN_H_ #define _LINUX_IF_VLAN_H_ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <linux/bug.h> #include <uapi/linux/if_vlan.h> #define VLAN_HLEN 4 /* The additional bytes required by VLAN * (in addition to the Ethernet header) */ #define VLAN_ETH_HLEN 18 /* Total octets in header. */ #define VLAN_ETH_ZLEN 64 /* Min. octets in frame sans FCS */ /* * According to 802.3ac, the packet can be 4 bytes longer. --Klika Jan */ #define VLAN_ETH_DATA_LEN 1500 /* Max. octets in payload */ #define VLAN_ETH_FRAME_LEN 1518 /* Max. octets in frame sans FCS */ #define VLAN_MAX_DEPTH 8 /* Max. number of nested VLAN tags parsed */ /* * struct vlan_hdr - vlan header * @h_vlan_TCI: priority and VLAN ID * @h_vlan_encapsulated_proto: packet type ID or len */ struct vlan_hdr { __be16 h_vlan_TCI; __be16 h_vlan_encapsulated_proto; }; /** * struct vlan_ethhdr - vlan ethernet header (ethhdr + vlan_hdr) * @h_dest: destination ethernet address * @h_source: source ethernet address * @h_vlan_proto: ethernet protocol * @h_vlan_TCI: priority and VLAN ID * @h_vlan_encapsulated_proto: packet type ID or len */ struct vlan_ethhdr { struct_group(addrs, unsigned char h_dest[ETH_ALEN]; unsigned char h_source[ETH_ALEN]; ); __be16 h_vlan_proto; __be16 h_vlan_TCI; __be16 h_vlan_encapsulated_proto; }; #include <linux/skbuff.h> static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) { return (struct vlan_ethhdr *)skb_mac_header(skb); } /* Prefer this version in TX path, instead of * skb_reset_mac_header() + vlan_eth_hdr() */ static inline struct vlan_ethhdr *skb_vlan_eth_hdr(const struct sk_buff *skb) { return (struct vlan_ethhdr *)skb->data; } #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ #define VLAN_PRIO_SHIFT 13 #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator / Drop Eligible Indicator */ #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ #define VLAN_N_VID 4096 /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); static inline bool is_vlan_dev(const struct net_device *dev) { return dev->priv_flags & IFF_802_1Q_VLAN; } #define skb_vlan_tag_present(__skb) (!!(__skb)->vlan_all) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) #define skb_vlan_tag_get_cfi(__skb) (!!((__skb)->vlan_tci & VLAN_CFI_MASK)) #define skb_vlan_tag_get_prio(__skb) (((__skb)->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT) static inline int vlan_get_rx_ctag_filter_info(struct net_device *dev) { ASSERT_RTNL(); return notifier_to_errno(call_netdevice_notifiers(NETDEV_CVLAN_FILTER_PUSH_INFO, dev)); } static inline void vlan_drop_rx_ctag_filter_info(struct net_device *dev) { ASSERT_RTNL(); call_netdevice_notifiers(NETDEV_CVLAN_FILTER_DROP_INFO, dev); } static inline int vlan_get_rx_stag_filter_info(struct net_device *dev) { ASSERT_RTNL(); return notifier_to_errno(call_netdevice_notifiers(NETDEV_SVLAN_FILTER_PUSH_INFO, dev)); } static inline void vlan_drop_rx_stag_filter_info(struct net_device *dev) { ASSERT_RTNL(); call_netdevice_notifiers(NETDEV_SVLAN_FILTER_DROP_INFO, dev); } /** * struct vlan_pcpu_stats - VLAN percpu rx/tx stats * @rx_packets: number of received packets * @rx_bytes: number of received bytes * @rx_multicast: number of received multicast packets * @tx_packets: number of transmitted packets * @tx_bytes: number of transmitted bytes * @syncp: synchronization point for 64bit counters * @rx_errors: number of rx errors * @tx_dropped: number of tx drops */ struct vlan_pcpu_stats { u64_stats_t rx_packets; u64_stats_t rx_bytes; u64_stats_t rx_multicast; u64_stats_t tx_packets; u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_errors; u32 tx_dropped; }; #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id); extern int vlan_for_each(struct net_device *dev, int (*action)(struct net_device *dev, int vid, void *arg), void *arg); extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern __be16 vlan_dev_vlan_proto(const struct net_device *dev); /** * struct vlan_priority_tci_mapping - vlan egress priority mappings * @priority: skb priority * @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000 * @next: pointer to next struct */ struct vlan_priority_tci_mapping { u32 priority; u16 vlan_qos; struct vlan_priority_tci_mapping *next; }; struct proc_dir_entry; struct netpoll; /** * struct vlan_dev_priv - VLAN private device data * @nr_ingress_mappings: number of ingress priority mappings * @ingress_priority_map: ingress priority mappings * @nr_egress_mappings: number of egress priority mappings * @egress_priority_map: hash of egress priority mappings * @vlan_proto: VLAN encapsulation protocol * @vlan_id: VLAN identifier * @flags: device flags * @real_dev: underlying netdevice * @dev_tracker: refcount tracker for @real_dev reference * @real_dev_addr: address of underlying netdevice * @dent: proc dir entry * @vlan_pcpu_stats: ptr to percpu rx stats */ struct vlan_dev_priv { unsigned int nr_ingress_mappings; u32 ingress_priority_map[8]; unsigned int nr_egress_mappings; struct vlan_priority_tci_mapping *egress_priority_map[16]; __be16 vlan_proto; u16 vlan_id; u16 flags; struct net_device *real_dev; netdevice_tracker dev_tracker; unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; struct vlan_pcpu_stats __percpu *vlan_pcpu_stats; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif }; static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) { return netdev_priv(dev); } static inline u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) { struct vlan_priority_tci_mapping *mp; smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)]; while (mp) { if (mp->priority == skprio) { return mp->vlan_qos; /* This should already be shifted * to mask correctly with the * VLAN's TCI */ } mp = mp->next; } return 0; } extern bool vlan_do_receive(struct sk_buff **skb); extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid); extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid); extern int vlan_vids_add_by_dev(struct net_device *dev, const struct net_device *by_dev); extern void vlan_vids_del_by_dev(struct net_device *dev, const struct net_device *by_dev); extern bool vlan_uses_dev(const struct net_device *dev); #else static inline struct net_device * __vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id) { return NULL; } static inline int vlan_for_each(struct net_device *dev, int (*action)(struct net_device *dev, int vid, void *arg), void *arg) { return 0; } static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { BUG(); return NULL; } static inline u16 vlan_dev_vlan_id(const struct net_device *dev) { BUG(); return 0; } static inline __be16 vlan_dev_vlan_proto(const struct net_device *dev) { BUG(); return 0; } static inline u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) { return 0; } static inline bool vlan_do_receive(struct sk_buff **skb) { return false; } static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { return 0; } static inline void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid) { } static inline int vlan_vids_add_by_dev(struct net_device *dev, const struct net_device *by_dev) { return 0; } static inline void vlan_vids_del_by_dev(struct net_device *dev, const struct net_device *by_dev) { } static inline bool vlan_uses_dev(const struct net_device *dev) { return false; } #endif /** * eth_type_vlan - check for valid vlan ether type. * @ethertype: ether type to check * * Returns true if the ether type is a vlan ether type. */ static inline bool eth_type_vlan(__be16 ethertype) { switch (ethertype) { case htons(ETH_P_8021Q): case htons(ETH_P_8021AD): return true; default: return false; } } static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { if (proto == htons(ETH_P_8021Q) && features & NETIF_F_HW_VLAN_CTAG_TX) return true; if (proto == htons(ETH_P_8021AD) && features & NETIF_F_HW_VLAN_STAG_TX) return true; return false; } /** * __vlan_insert_inner_tag - inner VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * @mac_len: MAC header length including outer vlan headers * * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns error if skb_cow_head fails. * * Does not change skb->protocol so this function can be used during receive. */ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci, unsigned int mac_len) { struct vlan_ethhdr *veth; if (skb_cow_head(skb, VLAN_HLEN) < 0) return -ENOMEM; skb_push(skb, VLAN_HLEN); /* Move the mac header sans proto to the beginning of the new header. */ if (likely(mac_len > ETH_TLEN)) memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); if (skb_mac_header_was_set(skb)) skb->mac_header -= VLAN_HLEN; veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN); /* first, the ethernet type */ if (likely(mac_len >= ETH_TLEN)) { /* h_vlan_encapsulated_proto should already be populated, and * skb->data has space for h_vlan_proto */ veth->h_vlan_proto = vlan_proto; } else { /* h_vlan_encapsulated_proto should not be populated, and * skb->data has no space for h_vlan_proto */ veth->h_vlan_encapsulated_proto = skb->protocol; } /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); return 0; } /** * __vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload * Returns error if skb_cow_head fails. * * Does not change skb->protocol so this function can be used during receive. */ static inline int __vlan_insert_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { return __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); } /** * vlan_insert_inner_tag - inner VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * @mac_len: MAC header length including outer vlan headers * * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns a VLAN tagged skb. This might change skb->head. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. * * Does not change skb->protocol so this function can be used during receive. */ static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci, unsigned int mac_len) { int err; err = __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, mac_len); if (err) { dev_kfree_skb_any(skb); return NULL; } return skb; } /** * vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload * Returns a VLAN tagged skb. This might change skb->head. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. * * Does not change skb->protocol so this function can be used during receive. */ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { return vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); } /** * vlan_insert_tag_set_proto - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload * Returns a VLAN tagged skb. This might change skb->head. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. */ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { skb = vlan_insert_tag(skb, vlan_proto, vlan_tci); if (skb) skb->protocol = vlan_proto; return skb; } /** * __vlan_hwaccel_clear_tag - clear hardware accelerated VLAN info * @skb: skbuff to clear * * Clears the VLAN information from @skb */ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) { skb->vlan_all = 0; } /** * __vlan_hwaccel_copy_tag - copy hardware accelerated VLAN info from another skb * @dst: skbuff to copy to * @src: skbuff to copy from * * Copies VLAN information from @src to @dst (for branchless code) */ static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src) { dst->vlan_all = src->vlan_all; } /* * __vlan_hwaccel_push_inside - pushes vlan tag to the payload * @skb: skbuff to tag * * Pushes the VLAN tag from @skb->vlan_tci inside to the payload. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. */ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb) { skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); if (likely(skb)) __vlan_hwaccel_clear_tag(skb); return skb; } /** * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Puts the VLAN TCI in @skb->vlan_tci and lets the device do the rest */ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { skb->vlan_proto = vlan_proto; skb->vlan_tci = vlan_tci; } /** * __vlan_get_tag - get the VLAN ID that is part of the payload * @skb: skbuff to query * @vlan_tci: buffer to store value * * Returns error if the skb is not of VLAN type */ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { struct vlan_ethhdr *veth = skb_vlan_eth_hdr(skb); if (!eth_type_vlan(veth->h_vlan_proto)) return -ENODATA; *vlan_tci = ntohs(veth->h_vlan_TCI); return 0; } /** * __vlan_hwaccel_get_tag - get the VLAN ID that is in @skb->cb[] * @skb: skbuff to query * @vlan_tci: buffer to store value * * Returns error if @skb->vlan_tci is not set correctly */ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { if (skb_vlan_tag_present(skb)) { *vlan_tci = skb_vlan_tag_get(skb); return 0; } else { *vlan_tci = 0; return -ENODATA; } } /** * vlan_get_tag - get the VLAN ID from the skb * @skb: skbuff to query * @vlan_tci: buffer to store value * * Returns error if the skb is not VLAN tagged */ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { if (skb->dev->features & NETIF_F_HW_VLAN_CTAG_TX) { return __vlan_hwaccel_get_tag(skb, vlan_tci); } else { return __vlan_get_tag(skb, vlan_tci); } } /** * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query * @type: first vlan protocol * @depth: buffer to store length of eth and vlan tags in bytes * * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, int *depth) { unsigned int vlan_depth = skb->mac_len, parse_depth = VLAN_MAX_DEPTH; /* if type is 802.1Q/AD then the header should already be * present at mac_len - VLAN_HLEN (if mac_len > 0), or at * ETH_HLEN otherwise */ if (eth_type_vlan(type)) { if (vlan_depth) { if (WARN_ON(vlan_depth < VLAN_HLEN)) return 0; vlan_depth -= VLAN_HLEN; } else { vlan_depth = ETH_HLEN; } do { struct vlan_hdr vhdr, *vh; vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr); if (unlikely(!vh || !--parse_depth)) return 0; type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } while (eth_type_vlan(type)); } if (depth) *depth = vlan_depth; return type; } /** * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query * * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 vlan_get_protocol(const struct sk_buff *skb) { return __vlan_get_protocol(skb, skb->protocol, NULL); } /* This version of __vlan_get_protocol() also pulls mac header in skb->head */ static inline __be16 vlan_get_protocol_and_depth(struct sk_buff *skb, __be16 type, int *depth) { int maclen; type = __vlan_get_protocol(skb, type, &maclen); if (type) { if (!pskb_may_pull(skb, maclen)) type = 0; else if (depth) *depth = maclen; } return type; } /* A getter for the SKB protocol field which will handle VLAN tags consistently * whether VLAN acceleration is enabled or not. */ static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) { if (!skip_vlan) /* VLAN acceleration strips the VLAN header from the skb and * moves it to skb->vlan_proto */ return skb_vlan_tag_present(skb) ? skb->vlan_proto : skb->protocol; return vlan_get_protocol(skb); } static inline void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr) { __be16 proto; unsigned short *rawp; /* * Was a VLAN packet, grab the encapsulated protocol, which the layer * three protocols care about. */ proto = vhdr->h_vlan_encapsulated_proto; if (eth_proto_is_802_3(proto)) { skb->protocol = proto; return; } rawp = (unsigned short *)(vhdr + 1); if (*rawp == 0xFFFF) /* * This is a magic hack to spot IPX packets. Older Novell * breaks the protocol design and runs IPX over 802.3 without * an 802.2 LLC layer. We look for FFFF which isn't a used * 802.2 SSAP/DSAP. This won't work for fault tolerant netware * but does for the rest. */ skb->protocol = htons(ETH_P_802_3); else /* * Real 802.2 LLC */ skb->protocol = htons(ETH_P_802_2); } /** * vlan_remove_tag - remove outer VLAN tag from payload * @skb: skbuff to remove tag from * @vlan_tci: buffer to store value * * Expects the skb to contain a VLAN tag in the payload, and to have skb->data * pointing at the MAC header. * * Returns a new pointer to skb->data, or NULL on failure to pull. */ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); *vlan_tci = ntohs(vhdr->h_vlan_TCI); memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); vlan_set_encap_proto(skb, vhdr); return __skb_pull(skb, VLAN_HLEN); } /** * skb_vlan_tagged - check if skb is vlan tagged. * @skb: skbuff to query * * Returns true if the skb is tagged, regardless of whether it is hardware * accelerated or not. */ static inline bool skb_vlan_tagged(const struct sk_buff *skb) { if (!skb_vlan_tag_present(skb) && likely(!eth_type_vlan(skb->protocol))) return false; return true; } /** * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. * @skb: skbuff to query * * Returns true if the skb is tagged with multiple vlan headers, regardless * of whether it is hardware accelerated or not. */ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) { __be16 protocol = skb->protocol; if (!skb_vlan_tag_present(skb)) { struct vlan_ethhdr *veh; if (likely(!eth_type_vlan(protocol))) return false; if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) return false; veh = skb_vlan_eth_hdr(skb); protocol = veh->h_vlan_encapsulated_proto; } if (!eth_type_vlan(protocol)) return false; return true; } /** * vlan_features_check - drop unsafe features for skb with multiple tags. * @skb: skbuff to query * @features: features to be checked * * Returns features without unsafe ones if the skb has multiple tags. */ static inline netdev_features_t vlan_features_check(struct sk_buff *skb, netdev_features_t features) { if (skb_vlan_tagged_multi(skb)) { /* In the case of multi-tagged packets, use a direct mask * instead of using netdev_interesect_features(), to make * sure that only devices supporting NETIF_F_HW_CSUM will * have checksum offloading support. */ features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; } return features; } /** * compare_vlan_header - Compare two vlan headers * @h1: Pointer to vlan header * @h2: Pointer to vlan header * * Compare two vlan headers, returns 0 if equal. * * Please note that alignment of h1 & h2 are only guaranteed to be 16 bits. */ static inline unsigned long compare_vlan_header(const struct vlan_hdr *h1, const struct vlan_hdr *h2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) return *(u32 *)h1 ^ *(u32 *)h2; #else return ((__force u32)h1->h_vlan_TCI ^ (__force u32)h2->h_vlan_TCI) | ((__force u32)h1->h_vlan_encapsulated_proto ^ (__force u32)h2->h_vlan_encapsulated_proto); #endif } #endif /* !(_LINUX_IF_VLAN_H_) */
9 6 2 2 1 5 2 3 1 1 2 1 3 5 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/befs/linuxvfs.c * * Copyright (C) 2001 Will Dyson <will_dyson@pobox.com * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/errno.h> #include <linux/stat.h> #include <linux/nls.h> #include <linux/buffer_head.h> #include <linux/vfs.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/exportfs.h> #include <linux/seq_file.h> #include <linux/blkdev.h> #include "befs.h" #include "btree.h" #include "inode.h" #include "datastream.h" #include "super.h" #include "io.h" MODULE_DESCRIPTION("BeOS File System (BeFS) driver"); MODULE_AUTHOR("Will Dyson"); MODULE_LICENSE("GPL"); /* The units the vfs expects inode->i_blocks to be in */ #define VFS_BLOCK_SIZE 512 static int befs_readdir(struct file *, struct dir_context *); static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); static int befs_read_folio(struct file *file, struct folio *folio); static sector_t befs_bmap(struct address_space *mapping, sector_t block); static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int); static struct inode *befs_iget(struct super_block *, unsigned long); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_free_inode(struct inode *inode); static void befs_destroy_inodecache(void); static int befs_symlink_read_folio(struct file *, struct folio *); static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static void befs_put_super(struct super_block *); static int befs_statfs(struct dentry *, struct kstatfs *); static int befs_show_options(struct seq_file *, struct dentry *); static struct dentry *befs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); static struct dentry *befs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); static struct dentry *befs_get_parent(struct dentry *child); static void befs_free_fc(struct fs_context *fc); static const struct super_operations befs_sops = { .alloc_inode = befs_alloc_inode, /* allocate a new inode */ .free_inode = befs_free_inode, /* deallocate an inode */ .put_super = befs_put_super, /* uninit super */ .statfs = befs_statfs, /* statfs */ .show_options = befs_show_options, }; /* slab cache for befs_inode_info objects */ static struct kmem_cache *befs_inode_cachep; static const struct file_operations befs_dir_operations = { .read = generic_read_dir, .iterate_shared = befs_readdir, .llseek = generic_file_llseek, }; static const struct inode_operations befs_dir_inode_operations = { .lookup = befs_lookup, }; static const struct address_space_operations befs_aops = { .read_folio = befs_read_folio, .bmap = befs_bmap, }; static const struct address_space_operations befs_symlink_aops = { .read_folio = befs_symlink_read_folio, }; static const struct export_operations befs_export_operations = { .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = befs_fh_to_dentry, .fh_to_parent = befs_fh_to_parent, .get_parent = befs_get_parent, }; /* * Called by generic_file_read() to read a folio of data * * In turn, simply calls a generic block read function and * passes it the address of befs_get_block, for mapping file * positions to disk blocks. */ static int befs_read_folio(struct file *file, struct folio *folio) { return block_read_full_folio(folio, befs_get_block); } static sector_t befs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping, block, befs_get_block); } /* * Generic function to map a file position (block) to a * disk offset (passed back in bh_result). * * Used by many higher level functions. * * Calls befs_fblock2brun() in datastream.c to do the real work. */ static int befs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) { struct super_block *sb = inode->i_sb; befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; befs_block_run run = BAD_IADDR; int res; ulong disk_off; befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld", (unsigned long)inode->i_ino, (long)block); if (create) { befs_error(sb, "befs_get_block() was asked to write to " "block %ld in inode %lu", (long)block, (unsigned long)inode->i_ino); return -EPERM; } res = befs_fblock2brun(sb, ds, block, &run); if (res != BEFS_OK) { befs_error(sb, "<--- %s for inode %lu, block %ld ERROR", __func__, (unsigned long)inode->i_ino, (long)block); return -EFBIG; } disk_off = (ulong) iaddr2blockno(sb, &run); map_bh(bh_result, inode->i_sb, disk_off); befs_debug(sb, "<--- %s for inode %lu, block %ld, disk address %lu", __func__, (unsigned long)inode->i_ino, (long)block, (unsigned long)disk_off); return 0; } static struct dentry * befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; struct super_block *sb = dir->i_sb; const befs_data_stream *ds = &BEFS_I(dir)->i_data.ds; befs_off_t offset; int ret; int utfnamelen; char *utfname; const char *name = dentry->d_name.name; befs_debug(sb, "---> %s name %pd inode %ld", __func__, dentry, dir->i_ino); /* Convert to UTF-8 */ if (BEFS_SB(sb)->nls) { ret = befs_nls2utf(sb, name, strlen(name), &utfname, &utfnamelen); if (ret < 0) { befs_debug(sb, "<--- %s ERROR", __func__); return ERR_PTR(ret); } ret = befs_btree_find(sb, ds, utfname, &offset); kfree(utfname); } else { ret = befs_btree_find(sb, ds, name, &offset); } if (ret == BEFS_BT_NOT_FOUND) { befs_debug(sb, "<--- %s %pd not found", __func__, dentry); inode = NULL; } else if (ret != BEFS_OK || offset == 0) { befs_error(sb, "<--- %s Error", __func__); inode = ERR_PTR(-ENODATA); } else { inode = befs_iget(dir->i_sb, (ino_t) offset); } befs_debug(sb, "<--- %s", __func__); return d_splice_alias(inode, dentry); } static int befs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; const befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; befs_off_t value; int result; size_t keysize; char keybuf[BEFS_NAME_LEN + 1]; befs_debug(sb, "---> %s name %pD, inode %ld, ctx->pos %lld", __func__, file, inode->i_ino, ctx->pos); while (1) { result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1, keybuf, &keysize, &value); if (result == BEFS_ERR) { befs_debug(sb, "<--- %s ERROR", __func__); befs_error(sb, "IO error reading %pD (inode %lu)", file, inode->i_ino); return -EIO; } else if (result == BEFS_BT_END) { befs_debug(sb, "<--- %s END", __func__); return 0; } else if (result == BEFS_BT_EMPTY) { befs_debug(sb, "<--- %s Empty directory", __func__); return 0; } /* Convert to NLS */ if (BEFS_SB(sb)->nls) { char *nlsname; int nlsnamelen; result = befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); if (result < 0) { befs_debug(sb, "<--- %s ERROR", __func__); return result; } if (!dir_emit(ctx, nlsname, nlsnamelen, (ino_t) value, DT_UNKNOWN)) { kfree(nlsname); return 0; } kfree(nlsname); } else { if (!dir_emit(ctx, keybuf, keysize, (ino_t) value, DT_UNKNOWN)) return 0; } ctx->pos++; } } static struct inode * befs_alloc_inode(struct super_block *sb) { struct befs_inode_info *bi; bi = alloc_inode_sb(sb, befs_inode_cachep, GFP_KERNEL); if (!bi) return NULL; return &bi->vfs_inode; } static void befs_free_inode(struct inode *inode) { kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } static void init_once(void *foo) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; inode_init_once(&bi->vfs_inode); } static struct inode *befs_iget(struct super_block *sb, unsigned long ino) { struct buffer_head *bh; befs_inode *raw_inode; struct befs_sb_info *befs_sb = BEFS_SB(sb); struct befs_inode_info *befs_ino; struct inode *inode; befs_debug(sb, "---> %s inode = %lu", __func__, ino); inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; befs_ino = BEFS_I(inode); /* convert from vfs's inode number to befs's inode number */ befs_ino->i_inode_num = blockno2iaddr(sb, inode->i_ino); befs_debug(sb, " real inode number [%u, %hu, %hu]", befs_ino->i_inode_num.allocation_group, befs_ino->i_inode_num.start, befs_ino->i_inode_num.len); bh = sb_bread(sb, inode->i_ino); if (!bh) { befs_error(sb, "unable to read inode block - " "inode = %lu", inode->i_ino); goto unacquire_none; } raw_inode = (befs_inode *) bh->b_data; befs_dump_inode(sb, raw_inode); if (befs_check_inode(sb, raw_inode, inode->i_ino) != BEFS_OK) { befs_error(sb, "Bad inode: %lu", inode->i_ino); goto unacquire_bh; } inode->i_mode = (umode_t) fs32_to_cpu(sb, raw_inode->mode); /* * set uid and gid. But since current BeOS is single user OS, so * you can change by "uid" or "gid" options. */ inode->i_uid = befs_sb->mount_opts.use_uid ? befs_sb->mount_opts.uid : make_kuid(&init_user_ns, fs32_to_cpu(sb, raw_inode->uid)); inode->i_gid = befs_sb->mount_opts.use_gid ? befs_sb->mount_opts.gid : make_kgid(&init_user_ns, fs32_to_cpu(sb, raw_inode->gid)); set_nlink(inode, 1); /* * BEFS's time is 64 bits, but current VFS is 32 bits... * BEFS don't have access time. Nor inode change time. VFS * doesn't have creation time. * Also, the lower 16 bits of the last_modified_time and * create_time are just a counter to help ensure uniqueness * for indexing purposes. (PFD, page 54) */ inode_set_mtime(inode, fs64_to_cpu(sb, raw_inode->last_modified_time) >> 16, 0);/* lower 16 bits are not a time */ inode_set_ctime_to_ts(inode, inode_get_mtime(inode)); inode_set_atime_to_ts(inode, inode_get_mtime(inode)); befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num); befs_ino->i_parent = fsrun_to_cpu(sb, raw_inode->parent); befs_ino->i_attribute = fsrun_to_cpu(sb, raw_inode->attributes); befs_ino->i_flags = fs32_to_cpu(sb, raw_inode->flags); if (S_ISLNK(inode->i_mode) && !(befs_ino->i_flags & BEFS_LONG_SYMLINK)){ inode->i_size = 0; inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE; strscpy(befs_ino->i_data.symlink, raw_inode->data.symlink, BEFS_SYMLINK_LEN); } else { int num_blks; befs_ino->i_data.ds = fsds_to_cpu(sb, &raw_inode->data.datastream); num_blks = befs_count_blocks(sb, &befs_ino->i_data.ds); inode->i_blocks = num_blks * (befs_sb->block_size / VFS_BLOCK_SIZE); inode->i_size = befs_ino->i_data.ds.size; } inode->i_mapping->a_ops = &befs_aops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &generic_ro_fops; } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &befs_dir_inode_operations; inode->i_fop = &befs_dir_operations; } else if (S_ISLNK(inode->i_mode)) { if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &befs_symlink_aops; } else { inode->i_link = befs_ino->i_data.symlink; inode->i_op = &simple_symlink_inode_operations; } } else { befs_error(sb, "Inode %lu is not a regular file, " "directory or symlink. THAT IS WRONG! BeFS has no " "on disk special files", inode->i_ino); goto unacquire_bh; } brelse(bh); befs_debug(sb, "<--- %s", __func__); unlock_new_inode(inode); return inode; unacquire_bh: brelse(bh); unacquire_none: iget_failed(inode); befs_debug(sb, "<--- %s - Bad inode", __func__); return ERR_PTR(-EIO); } /* Initialize the inode cache. Called at fs setup. * * Taken from NFS implementation by Al Viro. */ static int __init befs_init_inodecache(void) { befs_inode_cachep = kmem_cache_create_usercopy("befs_inode_cache", sizeof(struct befs_inode_info), 0, SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, offsetof(struct befs_inode_info, i_data.symlink), sizeof_field(struct befs_inode_info, i_data.symlink), init_once); if (befs_inode_cachep == NULL) return -ENOMEM; return 0; } /* Called at fs teardown. * * Taken from NFS implementation by Al Viro. */ static void befs_destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(befs_inode_cachep); } /* * The inode of symbolic link is different to data stream. * The data stream become link name. Unless the LONG_SYMLINK * flag is set. */ static int befs_symlink_read_folio(struct file *unused, struct folio *folio) { struct inode *inode = folio->mapping->host; struct super_block *sb = inode->i_sb; struct befs_inode_info *befs_ino = BEFS_I(inode); befs_data_stream *data = &befs_ino->i_data.ds; befs_off_t len = data->size; char *link = folio_address(folio); int err = -EIO; if (len == 0 || len > PAGE_SIZE) { befs_error(sb, "Long symlink with illegal length"); goto fail; } befs_debug(sb, "Follow long symlink"); if (befs_read_lsymlink(sb, data, link, len) != len) { befs_error(sb, "Failed to read entire long symlink"); goto fail; } link[len - 1] = '\0'; err = 0; fail: folio_end_read(folio, err == 0); return err; } /* * UTF-8 to NLS charset convert routine * * Uses uni2char() / char2uni() rather than the nls tables directly */ static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len) { struct nls_table *nls = BEFS_SB(sb)->nls; int i, o; unicode_t uni; int unilen, utflen; char *result; /* The utf8->nls conversion won't make the final nls string bigger * than the utf one, but if the string is pure ascii they'll have the * same width and an extra char is needed to save the additional \0 */ int maxlen = in_len + 1; befs_debug(sb, "---> %s", __func__); if (!nls) { befs_error(sb, "%s called with no NLS table loaded", __func__); return -EINVAL; } *out = result = kmalloc(maxlen, GFP_NOFS); if (!*out) return -ENOMEM; for (i = o = 0; i < in_len; i += utflen, o += unilen) { /* convert from UTF-8 to Unicode */ utflen = utf8_to_utf32(&in[i], in_len - i, &uni); if (utflen < 0) goto conv_err; /* convert from Unicode to nls */ if (uni > MAX_WCHAR_T) goto conv_err; unilen = nls->uni2char(uni, &result[o], in_len - o); if (unilen < 0) goto conv_err; } result[o] = '\0'; *out_len = o; befs_debug(sb, "<--- %s", __func__); return o; conv_err: befs_error(sb, "Name using character set %s contains a character that " "cannot be converted to unicode.", nls->charset); befs_debug(sb, "<--- %s", __func__); kfree(result); return -EILSEQ; } /** * befs_nls2utf - Convert NLS string to utf8 encodeing * @sb: Superblock * @in: Input string buffer in NLS format * @in_len: Length of input string in bytes * @out: The output string in UTF-8 format * @out_len: Length of the output buffer * * Converts input string @in, which is in the format of the loaded NLS map, * into a utf8 string. * * The destination string @out is allocated by this function and the caller is * responsible for freeing it with kfree() * * On return, *@out_len is the length of @out in bytes. * * On success, the return value is the number of utf8 characters written to * the output buffer @out. * * On Failure, a negative number coresponding to the error code is returned. */ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, char **out, int *out_len) { struct nls_table *nls = BEFS_SB(sb)->nls; int i, o; wchar_t uni; int unilen, utflen; char *result; /* * There are nls characters that will translate to 3-chars-wide UTF-8 * characters, an additional byte is needed to save the final \0 * in special cases */ int maxlen = (3 * in_len) + 1; befs_debug(sb, "---> %s\n", __func__); if (!nls) { befs_error(sb, "%s called with no NLS table loaded.", __func__); return -EINVAL; } *out = result = kmalloc(maxlen, GFP_NOFS); if (!*out) { *out_len = 0; return -ENOMEM; } for (i = o = 0; i < in_len; i += unilen, o += utflen) { /* convert from nls to unicode */ unilen = nls->char2uni(&in[i], in_len - i, &uni); if (unilen < 0) goto conv_err; /* convert from unicode to UTF-8 */ utflen = utf32_to_utf8(uni, &result[o], 3); if (utflen <= 0) goto conv_err; } result[o] = '\0'; *out_len = o; befs_debug(sb, "<--- %s", __func__); return i; conv_err: befs_error(sb, "Name using character set %s contains a character that " "cannot be converted to unicode.", nls->charset); befs_debug(sb, "<--- %s", __func__); kfree(result); return -EILSEQ; } static struct inode *befs_nfs_get_inode(struct super_block *sb, uint64_t ino, uint32_t generation) { /* No need to handle i_generation */ return befs_iget(sb, ino); } /* * Map a NFS file handle to a corresponding dentry */ static struct dentry *befs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_dentry(sb, fid, fh_len, fh_type, befs_nfs_get_inode); } /* * Find the parent for a file specified by NFS handle */ static struct dentry *befs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_parent(sb, fid, fh_len, fh_type, befs_nfs_get_inode); } static struct dentry *befs_get_parent(struct dentry *child) { struct inode *parent; struct befs_inode_info *befs_ino = BEFS_I(d_inode(child)); parent = befs_iget(child->d_sb, (unsigned long)befs_ino->i_parent.start); return d_obtain_alias(parent); } enum { Opt_uid, Opt_gid, Opt_charset, Opt_debug, }; static const struct fs_parameter_spec befs_param_spec[] = { fsparam_uid ("uid", Opt_uid), fsparam_gid ("gid", Opt_gid), fsparam_string ("iocharset", Opt_charset), fsparam_flag ("debug", Opt_debug), {} }; static int befs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct befs_mount_options *opts = fc->fs_private; int token; struct fs_parse_result result; /* befs ignores all options on remount */ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) return 0; token = fs_parse(fc, befs_param_spec, param, &result); if (token < 0) return token; switch (token) { case Opt_uid: opts->uid = result.uid; opts->use_uid = 1; break; case Opt_gid: opts->gid = result.gid; opts->use_gid = 1; break; case Opt_charset: kfree(opts->iocharset); opts->iocharset = param->string; param->string = NULL; break; case Opt_debug: opts->debug = 1; break; default: return -EINVAL; } return 0; } static int befs_show_options(struct seq_file *m, struct dentry *root) { struct befs_sb_info *befs_sb = BEFS_SB(root->d_sb); struct befs_mount_options *opts = &befs_sb->mount_opts; if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, opts->uid)); if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, opts->gid)); if (opts->iocharset) seq_printf(m, ",charset=%s", opts->iocharset); if (opts->debug) seq_puts(m, ",debug"); return 0; } /* This function has the responsibiltiy of getting the * filesystem ready for unmounting. * Basically, we free everything that we allocated in * befs_read_inode */ static void befs_put_super(struct super_block *sb) { kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; unload_nls(BEFS_SB(sb)->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } /* * Copy the parsed options into the sbi mount_options member */ static void befs_set_options(struct befs_sb_info *sbi, struct befs_mount_options *opts) { sbi->mount_opts.uid = opts->uid; sbi->mount_opts.gid = opts->gid; sbi->mount_opts.use_uid = opts->use_uid; sbi->mount_opts.use_gid = opts->use_gid; sbi->mount_opts.debug = opts->debug; sbi->mount_opts.iocharset = opts->iocharset; opts->iocharset = NULL; } /* Allocate private field of the superblock, fill it. * * Finish filling the public superblock fields * Make the root directory * Load a set of NLS translations if needed. */ static int befs_fill_super(struct super_block *sb, struct fs_context *fc) { struct buffer_head *bh; struct befs_sb_info *befs_sb; befs_super_block *disk_sb; struct inode *root; long ret = -EINVAL; const unsigned long sb_block = 0; const off_t x86_sb_off = 512; int blocksize; struct befs_mount_options *parsed_opts = fc->fs_private; int silent = fc->sb_flags & SB_SILENT; sb->s_fs_info = kzalloc(sizeof(*befs_sb), GFP_KERNEL); if (sb->s_fs_info == NULL) goto unacquire_none; befs_sb = BEFS_SB(sb); befs_set_options(befs_sb, parsed_opts); befs_debug(sb, "---> %s", __func__); if (!sb_rdonly(sb)) { befs_warning(sb, "No write support. Marking filesystem read-only"); sb->s_flags |= SB_RDONLY; } /* * Set dummy blocksize to read super block. * Will be set to real fs blocksize later. * * Linux 2.4.10 and later refuse to read blocks smaller than * the logical block size for the device. But we also need to read at * least 1k to get the second 512 bytes of the volume. */ blocksize = sb_min_blocksize(sb, 1024); if (!blocksize) { if (!silent) befs_error(sb, "unable to set blocksize"); goto unacquire_priv_sbp; } bh = sb_bread(sb, sb_block); if (!bh) { if (!silent) befs_error(sb, "unable to read superblock"); goto unacquire_priv_sbp; } /* account for offset of super block on x86 */ disk_sb = (befs_super_block *) bh->b_data; if ((disk_sb->magic1 == BEFS_SUPER_MAGIC1_LE) || (disk_sb->magic1 == BEFS_SUPER_MAGIC1_BE)) { befs_debug(sb, "Using PPC superblock location"); } else { befs_debug(sb, "Using x86 superblock location"); disk_sb = (befs_super_block *) ((void *) bh->b_data + x86_sb_off); } if ((befs_load_sb(sb, disk_sb) != BEFS_OK) || (befs_check_sb(sb) != BEFS_OK)) goto unacquire_bh; befs_dump_super_block(sb, disk_sb); brelse(bh); if (befs_sb->num_blocks > ~((sector_t)0)) { if (!silent) befs_error(sb, "blocks count: %llu is larger than the host can use", befs_sb->num_blocks); goto unacquire_priv_sbp; } /* * set up enough so that it can read an inode * Fill in kernel superblock fields from private sb */ sb->s_magic = BEFS_SUPER_MAGIC; /* Set real blocksize of fs */ sb_set_blocksize(sb, (ulong) befs_sb->block_size); sb->s_op = &befs_sops; sb->s_export_op = &befs_export_operations; sb->s_time_min = 0; sb->s_time_max = 0xffffffffffffll; root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); if (IS_ERR(root)) { ret = PTR_ERR(root); goto unacquire_priv_sbp; } sb->s_root = d_make_root(root); if (!sb->s_root) { if (!silent) befs_error(sb, "get root inode failed"); goto unacquire_priv_sbp; } /* load nls library */ if (befs_sb->mount_opts.iocharset) { befs_debug(sb, "Loading nls: %s", befs_sb->mount_opts.iocharset); befs_sb->nls = load_nls(befs_sb->mount_opts.iocharset); if (!befs_sb->nls) { befs_warning(sb, "Cannot load nls %s" " loading default nls", befs_sb->mount_opts.iocharset); befs_sb->nls = load_nls_default(); } /* load default nls if none is specified in mount options */ } else { befs_debug(sb, "Loading default nls"); befs_sb->nls = load_nls_default(); } return 0; unacquire_bh: brelse(bh); unacquire_priv_sbp: kfree(befs_sb->mount_opts.iocharset); kfree(sb->s_fs_info); sb->s_fs_info = NULL; unacquire_none: return ret; } static int befs_reconfigure(struct fs_context *fc) { sync_filesystem(fc->root->d_sb); if (!(fc->sb_flags & SB_RDONLY)) return -EINVAL; return 0; } static int befs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); befs_debug(sb, "---> %s", __func__); buf->f_type = BEFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = BEFS_SB(sb)->num_blocks; buf->f_bfree = BEFS_SB(sb)->num_blocks - BEFS_SB(sb)->used_blocks; buf->f_bavail = buf->f_bfree; buf->f_files = 0; /* UNKNOWN */ buf->f_ffree = 0; /* UNKNOWN */ buf->f_fsid = u64_to_fsid(id); buf->f_namelen = BEFS_NAME_LEN; befs_debug(sb, "<--- %s", __func__); return 0; } static int befs_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, befs_fill_super); } static const struct fs_context_operations befs_context_ops = { .parse_param = befs_parse_param, .get_tree = befs_get_tree, .reconfigure = befs_reconfigure, .free = befs_free_fc, }; static int befs_init_fs_context(struct fs_context *fc) { struct befs_mount_options *opts; opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) return -ENOMEM; /* Initialize options */ opts->uid = GLOBAL_ROOT_UID; opts->gid = GLOBAL_ROOT_GID; fc->fs_private = opts; fc->ops = &befs_context_ops; return 0; } static void befs_free_fc(struct fs_context *fc) { struct befs_mount_options *opts = fc->fs_private; kfree(opts->iocharset); kfree(fc->fs_private); } static struct file_system_type befs_fs_type = { .owner = THIS_MODULE, .name = "befs", .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = befs_init_fs_context, .parameters = befs_param_spec, }; MODULE_ALIAS_FS("befs"); static int __init init_befs_fs(void) { int err; pr_info("version: %s\n", BEFS_VERSION); err = befs_init_inodecache(); if (err) goto unacquire_none; err = register_filesystem(&befs_fs_type); if (err) goto unacquire_inodecache; return 0; unacquire_inodecache: befs_destroy_inodecache(); unacquire_none: return err; } static void __exit exit_befs_fs(void) { befs_destroy_inodecache(); unregister_filesystem(&befs_fs_type); } /* * Macros that typecheck the init and exit functions, * ensures that they are called at init and cleanup, * and eliminates warnings about unused functions. */ module_init(init_befs_fs) module_exit(exit_befs_fs)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * acpi_bus.h - ACPI Bus Driver ($Revision: 22 $) * * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> */ #ifndef __ACPI_BUS_H__ #define __ACPI_BUS_H__ #include <linux/completion.h> #include <linux/container_of.h> #include <linux/device.h> #include <linux/kobject.h> #include <linux/mutex.h> #include <linux/property.h> #include <linux/types.h> struct acpi_handle_list { u32 count; acpi_handle *handles; }; /* acpi_utils.h */ acpi_status acpi_extract_package(union acpi_object *package, struct acpi_buffer *format, struct acpi_buffer *buffer); acpi_status acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, struct acpi_object_list *arguments, unsigned long long *data); bool acpi_evaluate_reference(acpi_handle handle, acpi_string pathname, struct acpi_object_list *arguments, struct acpi_handle_list *list); bool acpi_handle_list_equal(struct acpi_handle_list *list1, struct acpi_handle_list *list2); void acpi_handle_list_replace(struct acpi_handle_list *dst, struct acpi_handle_list *src); void acpi_handle_list_free(struct acpi_handle_list *list); bool acpi_device_dep(acpi_handle target, acpi_handle match); acpi_status acpi_evaluate_ost(acpi_handle handle, u32 source_event, u32 status_code, struct acpi_buffer *status_buf); acpi_status acpi_get_physical_device_location(acpi_handle handle, struct acpi_pld_info **pld); bool acpi_has_method(acpi_handle handle, char *name); acpi_status acpi_execute_simple_method(acpi_handle handle, char *method, u64 arg); acpi_status acpi_evaluate_ej0(acpi_handle handle); acpi_status acpi_evaluate_lck(acpi_handle handle, int lock); acpi_status acpi_evaluate_reg(acpi_handle handle, u8 space_id, u32 function); bool acpi_ata_match(acpi_handle handle); bool acpi_bay_match(acpi_handle handle); bool acpi_dock_match(acpi_handle handle); bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs); union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4); #ifdef CONFIG_ACPI static inline union acpi_object * acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4, acpi_object_type type) { union acpi_object *obj; obj = acpi_evaluate_dsm(handle, guid, rev, func, argv4); if (obj && obj->type != type) { ACPI_FREE(obj); obj = NULL; } return obj; } #endif #define ACPI_INIT_DSM_ARGV4(cnt, eles) \ { \ .package.type = ACPI_TYPE_PACKAGE, \ .package.count = (cnt), \ .package.elements = (eles) \ } bool acpi_dev_found(const char *hid); bool acpi_dev_present(const char *hid, const char *uid, s64 hrv); bool acpi_reduced_hardware(void); #ifdef CONFIG_ACPI struct proc_dir_entry; #define ACPI_BUS_FILE_ROOT "acpi" extern struct proc_dir_entry *acpi_root_dir; enum acpi_bus_device_type { ACPI_BUS_TYPE_DEVICE = 0, ACPI_BUS_TYPE_POWER, ACPI_BUS_TYPE_PROCESSOR, ACPI_BUS_TYPE_THERMAL, ACPI_BUS_TYPE_POWER_BUTTON, ACPI_BUS_TYPE_SLEEP_BUTTON, ACPI_BUS_TYPE_ECDT_EC, ACPI_BUS_DEVICE_TYPE_COUNT }; struct acpi_driver; struct acpi_device; /* * ACPI Scan Handler * ----------------- */ struct acpi_hotplug_profile { struct kobject kobj; int (*scan_dependent)(struct acpi_device *adev); void (*notify_online)(struct acpi_device *adev); bool enabled:1; bool demand_offline:1; }; static inline struct acpi_hotplug_profile *to_acpi_hotplug_profile( struct kobject *kobj) { return container_of(kobj, struct acpi_hotplug_profile, kobj); } struct acpi_scan_handler { struct list_head list_node; const struct acpi_device_id *ids; bool (*match)(const char *idstr, const struct acpi_device_id **matchid); int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); void (*detach)(struct acpi_device *dev); void (*post_eject)(struct acpi_device *dev); void (*bind)(struct device *phys_dev); void (*unbind)(struct device *phys_dev); struct acpi_hotplug_profile hotplug; }; /* * ACPI Hotplug Context * -------------------- */ typedef int (*acpi_hp_notify) (struct acpi_device *, u32); typedef void (*acpi_hp_uevent) (struct acpi_device *, u32); typedef void (*acpi_hp_fixup) (struct acpi_device *); struct acpi_hotplug_context { struct acpi_device *self; acpi_hp_notify notify; acpi_hp_uevent uevent; acpi_hp_fixup fixup; }; /* * ACPI Driver * ----------- */ typedef int (*acpi_op_add) (struct acpi_device * device); typedef void (*acpi_op_remove) (struct acpi_device *device); typedef void (*acpi_op_notify) (struct acpi_device * device, u32 event); struct acpi_device_ops { acpi_op_add add; acpi_op_remove remove; acpi_op_notify notify; }; #define ACPI_DRIVER_ALL_NOTIFY_EVENTS 0x1 /* system AND device events */ struct acpi_driver { char name[80]; char class[80]; const struct acpi_device_id *ids; /* Supported Hardware IDs */ unsigned int flags; struct acpi_device_ops ops; struct device_driver drv; }; /* * ACPI Device * ----------- */ /* Status (_STA) */ struct acpi_device_status { u32 present:1; u32 enabled:1; u32 show_in_ui:1; u32 functional:1; u32 battery_present:1; u32 reserved:27; }; /* Flags */ struct acpi_device_flags { u32 dynamic_status:1; u32 removable:1; u32 ejectable:1; u32 power_manageable:1; u32 match_driver:1; u32 initialized:1; u32 visited:1; u32 hotplug_notify:1; u32 is_dock_station:1; u32 of_compatible_ok:1; u32 coherent_dma:1; u32 cca_seen:1; u32 enumeration_by_parent:1; u32 honor_deps:1; u32 reserved:18; }; /* File System */ struct acpi_device_dir { struct proc_dir_entry *entry; }; #define acpi_device_dir(d) ((d)->dir.entry) /* Plug and Play */ #define MAX_ACPI_DEVICE_NAME_LEN 40 #define MAX_ACPI_CLASS_NAME_LEN 20 typedef char acpi_bus_id[8]; typedef u64 acpi_bus_address; typedef char acpi_device_name[MAX_ACPI_DEVICE_NAME_LEN]; typedef char acpi_device_class[MAX_ACPI_CLASS_NAME_LEN]; struct acpi_hardware_id { struct list_head list; const char *id; }; struct acpi_pnp_type { u32 hardware_id:1; u32 bus_address:1; u32 platform_id:1; u32 backlight:1; u32 reserved:28; }; struct acpi_device_pnp { acpi_bus_id bus_id; /* Object name */ int instance_no; /* Instance number of this object */ struct acpi_pnp_type type; /* ID type */ acpi_bus_address bus_address; /* _ADR */ char *unique_id; /* _UID */ struct list_head ids; /* _HID and _CIDs */ acpi_device_name device_name; /* Driver-determined */ acpi_device_class device_class; /* " */ }; #define acpi_device_bid(d) ((d)->pnp.bus_id) #define acpi_device_adr(d) ((d)->pnp.bus_address) const char *acpi_device_hid(struct acpi_device *device); #define acpi_device_uid(d) ((d)->pnp.unique_id) #define acpi_device_name(d) ((d)->pnp.device_name) #define acpi_device_class(d) ((d)->pnp.device_class) /* Power Management */ struct acpi_device_power_flags { u32 explicit_get:1; /* _PSC present? */ u32 power_resources:1; /* Power resources */ u32 inrush_current:1; /* Serialize Dx->D0 */ u32 power_removed:1; /* Optimize Dx->D0 */ u32 ignore_parent:1; /* Power is independent of parent power state */ u32 dsw_present:1; /* _DSW present? */ u32 reserved:26; }; struct acpi_device_power_state { struct list_head resources; /* Power resources referenced */ struct { u8 valid:1; u8 explicit_set:1; /* _PSx present? */ u8 reserved:6; } flags; int power; /* % Power (compared to D0) */ int latency; /* Dx->D0 time (microseconds) */ }; struct acpi_device_power { int state; /* Current state */ struct acpi_device_power_flags flags; struct acpi_device_power_state states[ACPI_D_STATE_COUNT]; /* Power states (D0-D3Cold) */ u8 state_for_enumeration; /* Deepest power state for enumeration */ }; struct acpi_dep_data { struct list_head node; acpi_handle supplier; acpi_handle consumer; bool honor_dep; bool met; bool free_when_met; }; /* Performance Management */ struct acpi_device_perf_flags { u8 reserved:8; }; struct acpi_device_perf_state { struct { u8 valid:1; u8 reserved:7; } flags; u8 power; /* % Power (compared to P0) */ u8 performance; /* % Performance ( " ) */ int latency; /* Px->P0 time (microseconds) */ }; struct acpi_device_perf { int state; struct acpi_device_perf_flags flags; int state_count; struct acpi_device_perf_state *states; }; /* Wakeup Management */ struct acpi_device_wakeup_flags { u8 valid:1; /* Can successfully enable wakeup? */ u8 notifier_present:1; /* Wake-up notify handler has been installed */ }; struct acpi_device_wakeup_context { void (*func)(struct acpi_device_wakeup_context *context); struct device *dev; }; struct acpi_device_wakeup { acpi_handle gpe_device; u64 gpe_number; u64 sleep_state; struct list_head resources; struct acpi_device_wakeup_flags flags; struct acpi_device_wakeup_context context; struct wakeup_source *ws; int prepare_count; int enable_count; }; struct acpi_device_physical_node { struct list_head node; struct device *dev; unsigned int node_id; bool put_online:1; }; struct acpi_device_properties { struct list_head list; const guid_t *guid; union acpi_object *properties; void **bufs; }; /* ACPI Device Specific Data (_DSD) */ struct acpi_device_data { const union acpi_object *pointer; struct list_head properties; const union acpi_object *of_compatible; struct list_head subnodes; }; struct acpi_gpio_mapping; #define ACPI_DEVICE_SWNODE_ROOT 0 /* * The maximum expected number of CSI-2 data lanes. * * This number is not expected to ever have to be equal to or greater than the * number of bits in an unsigned long variable, but if it needs to be increased * above that limit, code will need to be adjusted accordingly. */ #define ACPI_DEVICE_CSI2_DATA_LANES 8 #define ACPI_DEVICE_SWNODE_PORT_NAME_LENGTH 8 enum acpi_device_swnode_dev_props { ACPI_DEVICE_SWNODE_DEV_ROTATION, ACPI_DEVICE_SWNODE_DEV_CLOCK_FREQUENCY, ACPI_DEVICE_SWNODE_DEV_LED_MAX_MICROAMP, ACPI_DEVICE_SWNODE_DEV_FLASH_MAX_MICROAMP, ACPI_DEVICE_SWNODE_DEV_FLASH_MAX_TIMEOUT_US, ACPI_DEVICE_SWNODE_DEV_NUM_OF, ACPI_DEVICE_SWNODE_DEV_NUM_ENTRIES }; enum acpi_device_swnode_port_props { ACPI_DEVICE_SWNODE_PORT_REG, ACPI_DEVICE_SWNODE_PORT_NUM_OF, ACPI_DEVICE_SWNODE_PORT_NUM_ENTRIES }; enum acpi_device_swnode_ep_props { ACPI_DEVICE_SWNODE_EP_REMOTE_EP, ACPI_DEVICE_SWNODE_EP_BUS_TYPE, ACPI_DEVICE_SWNODE_EP_REG, ACPI_DEVICE_SWNODE_EP_CLOCK_LANES, ACPI_DEVICE_SWNODE_EP_DATA_LANES, ACPI_DEVICE_SWNODE_EP_LANE_POLARITIES, /* TX only */ ACPI_DEVICE_SWNODE_EP_LINK_FREQUENCIES, ACPI_DEVICE_SWNODE_EP_NUM_OF, ACPI_DEVICE_SWNODE_EP_NUM_ENTRIES }; /* * Each device has a root software node plus two times as many nodes as the * number of CSI-2 ports. */ #define ACPI_DEVICE_SWNODE_PORT(port) (2 * (port) + 1) #define ACPI_DEVICE_SWNODE_EP(endpoint) \ (ACPI_DEVICE_SWNODE_PORT(endpoint) + 1) /** * struct acpi_device_software_node_port - MIPI DisCo for Imaging CSI-2 port * @port_name: Port name. * @data_lanes: "data-lanes" property values. * @lane_polarities: "lane-polarities" property values. * @link_frequencies: "link_frequencies" property values. * @port_nr: Port number. * @crs_crs2_local: _CRS CSI2 record present (i.e. this is a transmitter one). * @port_props: Port properties. * @ep_props: Endpoint properties. * @remote_ep: Reference to the remote endpoint. */ struct acpi_device_software_node_port { char port_name[ACPI_DEVICE_SWNODE_PORT_NAME_LENGTH + 1]; u32 data_lanes[ACPI_DEVICE_CSI2_DATA_LANES]; u32 lane_polarities[ACPI_DEVICE_CSI2_DATA_LANES + 1 /* clock lane */]; u64 link_frequencies[ACPI_DEVICE_CSI2_DATA_LANES]; unsigned int port_nr; bool crs_csi2_local; struct property_entry port_props[ACPI_DEVICE_SWNODE_PORT_NUM_ENTRIES]; struct property_entry ep_props[ACPI_DEVICE_SWNODE_EP_NUM_ENTRIES]; struct software_node_ref_args remote_ep[1]; }; /** * struct acpi_device_software_nodes - Software nodes for an ACPI device * @dev_props: Device properties. * @nodes: Software nodes for root as well as ports and endpoints. * @nodeprts: Array of software node pointers, for (un)registering them. * @ports: Information related to each port and endpoint within a port. * @num_ports: The number of ports. */ struct acpi_device_software_nodes { struct property_entry dev_props[ACPI_DEVICE_SWNODE_DEV_NUM_ENTRIES]; struct software_node *nodes; const struct software_node **nodeptrs; struct acpi_device_software_node_port *ports; unsigned int num_ports; }; /* Device */ struct acpi_device { u32 pld_crc; int device_type; acpi_handle handle; /* no handle for fixed hardware */ struct fwnode_handle fwnode; struct list_head wakeup_list; struct list_head del_list; struct acpi_device_status status; struct acpi_device_flags flags; struct acpi_device_pnp pnp; struct acpi_device_power power; struct acpi_device_wakeup wakeup; struct acpi_device_perf performance; struct acpi_device_dir dir; struct acpi_device_data data; struct acpi_scan_handler *handler; struct acpi_hotplug_context *hp; struct acpi_device_software_nodes *swnodes; const struct acpi_gpio_mapping *driver_gpios; void *driver_data; struct device dev; unsigned int physical_node_count; unsigned int dep_unmet; struct list_head physical_node_list; struct mutex physical_node_lock; void (*remove)(struct acpi_device *); }; /* Non-device subnode */ struct acpi_data_node { struct list_head sibling; const char *name; acpi_handle handle; struct fwnode_handle fwnode; struct fwnode_handle *parent; struct acpi_device_data data; struct kobject kobj; struct completion kobj_done; }; extern const struct fwnode_operations acpi_device_fwnode_ops; extern const struct fwnode_operations acpi_data_fwnode_ops; extern const struct fwnode_operations acpi_static_fwnode_ops; bool is_acpi_device_node(const struct fwnode_handle *fwnode); bool is_acpi_data_node(const struct fwnode_handle *fwnode); static inline bool is_acpi_node(const struct fwnode_handle *fwnode) { return (is_acpi_device_node(fwnode) || is_acpi_data_node(fwnode)); } #define to_acpi_device_node(__fwnode) \ ({ \ typeof(__fwnode) __to_acpi_device_node_fwnode = __fwnode; \ \ is_acpi_device_node(__to_acpi_device_node_fwnode) ? \ container_of(__to_acpi_device_node_fwnode, \ struct acpi_device, fwnode) : \ NULL; \ }) #define to_acpi_data_node(__fwnode) \ ({ \ typeof(__fwnode) __to_acpi_data_node_fwnode = __fwnode; \ \ is_acpi_data_node(__to_acpi_data_node_fwnode) ? \ container_of(__to_acpi_data_node_fwnode, \ struct acpi_data_node, fwnode) : \ NULL; \ }) static inline bool is_acpi_static_node(const struct fwnode_handle *fwnode) { return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_static_fwnode_ops; } static inline bool acpi_data_node_match(const struct fwnode_handle *fwnode, const char *name) { return is_acpi_data_node(fwnode) ? (!strcmp(to_acpi_data_node(fwnode)->name, name)) : false; } static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev) { return &adev->fwnode; } static inline void *acpi_driver_data(struct acpi_device *d) { return d->driver_data; } #define to_acpi_device(d) container_of(d, struct acpi_device, dev) #define to_acpi_driver(d) container_of_const(d, struct acpi_driver, drv) static inline struct acpi_device *acpi_dev_parent(struct acpi_device *adev) { if (adev->dev.parent) return to_acpi_device(adev->dev.parent); return NULL; } static inline void acpi_set_device_status(struct acpi_device *adev, u32 sta) { *((u32 *)&adev->status) = sta; } static inline void acpi_set_hp_context(struct acpi_device *adev, struct acpi_hotplug_context *hp) { hp->self = adev; adev->hp = hp; } void acpi_initialize_hp_context(struct acpi_device *adev, struct acpi_hotplug_context *hp, acpi_hp_notify notify, acpi_hp_uevent uevent); /* acpi_device.dev.bus == &acpi_bus_type */ extern const struct bus_type acpi_bus_type; int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data); int acpi_dev_for_each_child(struct acpi_device *adev, int (*fn)(struct acpi_device *, void *), void *data); int acpi_dev_for_each_child_reverse(struct acpi_device *adev, int (*fn)(struct acpi_device *, void *), void *data); /* * Events * ------ */ struct acpi_bus_event { struct list_head node; acpi_device_class device_class; acpi_bus_id bus_id; u32 type; u32 data; }; extern struct kobject *acpi_kobj; extern int acpi_bus_generate_netlink_event(const char*, const char*, u8, int); void acpi_bus_private_data_handler(acpi_handle, void *); int acpi_bus_get_private_data(acpi_handle, void **); int acpi_bus_attach_private_data(acpi_handle, void *); void acpi_bus_detach_private_data(acpi_handle); int acpi_dev_install_notify_handler(struct acpi_device *adev, u32 handler_type, acpi_notify_handler handler, void *context); void acpi_dev_remove_notify_handler(struct acpi_device *adev, u32 handler_type, acpi_notify_handler handler); extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32); extern int register_acpi_notifier(struct notifier_block *); extern int unregister_acpi_notifier(struct notifier_block *); /* * External Functions */ acpi_status acpi_bus_get_status_handle(acpi_handle handle, unsigned long long *sta); int acpi_bus_get_status(struct acpi_device *device); int acpi_bus_set_power(acpi_handle handle, int state); const char *acpi_power_state_string(int state); int acpi_device_set_power(struct acpi_device *device, int state); int acpi_bus_init_power(struct acpi_device *device); int acpi_device_fix_up_power(struct acpi_device *device); void acpi_device_fix_up_power_extended(struct acpi_device *adev); void acpi_device_fix_up_power_children(struct acpi_device *adev); int acpi_bus_update_power(acpi_handle handle, int *state_p); int acpi_device_update_power(struct acpi_device *device, int *state_p); bool acpi_bus_power_manageable(acpi_handle handle); void acpi_dev_power_up_children_with_adr(struct acpi_device *adev); u8 acpi_dev_power_state_for_wake(struct acpi_device *adev); int acpi_device_power_add_dependent(struct acpi_device *adev, struct device *dev); void acpi_device_power_remove_dependent(struct acpi_device *adev, struct device *dev); #ifdef CONFIG_PM bool acpi_bus_can_wakeup(acpi_handle handle); #else static inline bool acpi_bus_can_wakeup(acpi_handle handle) { return false; } #endif void acpi_scan_lock_acquire(void); void acpi_scan_lock_release(void); void acpi_lock_hp_context(void); void acpi_unlock_hp_context(void); int acpi_scan_add_handler(struct acpi_scan_handler *handler); /* * use a macro to avoid include chaining to get THIS_MODULE */ #define acpi_bus_register_driver(drv) \ __acpi_bus_register_driver(drv, THIS_MODULE) int __acpi_bus_register_driver(struct acpi_driver *driver, struct module *owner); void acpi_bus_unregister_driver(struct acpi_driver *driver); int acpi_bus_scan(acpi_handle handle); void acpi_bus_trim(struct acpi_device *start); acpi_status acpi_bus_get_ejd(acpi_handle handle, acpi_handle * ejd); int acpi_match_device_ids(struct acpi_device *device, const struct acpi_device_id *ids); void acpi_set_modalias(struct acpi_device *adev, const char *default_id, char *modalias, size_t len); static inline bool acpi_device_enumerated(struct acpi_device *adev) { return adev && adev->flags.initialized && adev->flags.visited; } /** * module_acpi_driver(acpi_driver) - Helper macro for registering an ACPI driver * @__acpi_driver: acpi_driver struct * * Helper macro for ACPI drivers which do not do anything special in module * init/exit. This eliminates a lot of boilerplate. Each module may only * use this macro once, and calling it replaces module_init() and module_exit() */ #define module_acpi_driver(__acpi_driver) \ module_driver(__acpi_driver, acpi_bus_register_driver, \ acpi_bus_unregister_driver) /* * Bind physical devices with ACPI devices */ struct acpi_bus_type { struct list_head list; const char *name; bool (*match)(struct device *dev); struct acpi_device * (*find_companion)(struct device *); void (*setup)(struct device *); }; int register_acpi_bus_type(struct acpi_bus_type *); int unregister_acpi_bus_type(struct acpi_bus_type *); int acpi_bind_one(struct device *dev, struct acpi_device *adev); int acpi_unbind_one(struct device *dev); enum acpi_bridge_type { ACPI_BRIDGE_TYPE_PCIE = 1, ACPI_BRIDGE_TYPE_CXL, }; struct acpi_pci_root { struct acpi_device * device; struct pci_bus *bus; u16 segment; int bridge_type; struct resource secondary; /* downstream bus range */ u32 osc_support_set; /* _OSC state of support bits */ u32 osc_control_set; /* _OSC state of control bits */ u32 osc_ext_support_set; /* _OSC state of extended support bits */ u32 osc_ext_control_set; /* _OSC state of extended control bits */ phys_addr_t mcfg_addr; }; /* helper */ struct iommu_ops; bool acpi_dma_supported(const struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_iommu_fwspec_init(struct device *dev, u32 id, struct fwnode_handle *fwnode); int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id); static inline int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) { return acpi_dma_configure_id(dev, attr, NULL); } struct acpi_device *acpi_find_child_device(struct acpi_device *parent, u64 address, bool check_children); struct acpi_device *acpi_find_child_by_adr(struct acpi_device *adev, acpi_bus_address adr); int acpi_is_root_bridge(acpi_handle); struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle); int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); int acpi_disable_wakeup_device_power(struct acpi_device *dev); #ifdef CONFIG_X86 bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status); bool acpi_quirk_skip_acpi_ac_and_battery(void); int acpi_install_cmos_rtc_space_handler(acpi_handle handle); void acpi_remove_cmos_rtc_space_handler(acpi_handle handle); int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip); #else static inline bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status) { return false; } static inline bool acpi_quirk_skip_acpi_ac_and_battery(void) { return false; } static inline int acpi_install_cmos_rtc_space_handler(acpi_handle handle) { return 1; } static inline void acpi_remove_cmos_rtc_space_handler(acpi_handle handle) { } static inline int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip) { *skip = false; return 0; } #endif #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS) bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev); bool acpi_quirk_skip_gpio_event_handlers(void); #else static inline bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev) { return false; } static inline bool acpi_quirk_skip_gpio_event_handlers(void) { return false; } #endif #ifdef CONFIG_PM void acpi_pm_wakeup_event(struct device *dev); acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, void (*func)(struct acpi_device_wakeup_context *context)); acpi_status acpi_remove_pm_notifier(struct acpi_device *adev); bool acpi_pm_device_can_wakeup(struct device *dev); int acpi_pm_device_sleep_state(struct device *, int *, int); int acpi_pm_set_device_wakeup(struct device *dev, bool enable); #else static inline void acpi_pm_wakeup_event(struct device *dev) { } static inline acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, void (*func)(struct acpi_device_wakeup_context *context)) { return AE_SUPPORT; } static inline acpi_status acpi_remove_pm_notifier(struct acpi_device *adev) { return AE_SUPPORT; } static inline bool acpi_pm_device_can_wakeup(struct device *dev) { return false; } static inline int acpi_pm_device_sleep_state(struct device *d, int *p, int m) { if (p) *p = ACPI_STATE_D0; return (m >= ACPI_STATE_D0 && m <= ACPI_STATE_D3_COLD) ? m : ACPI_STATE_D0; } static inline int acpi_pm_set_device_wakeup(struct device *dev, bool enable) { return -ENODEV; } #endif #ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT bool acpi_sleep_state_supported(u8 sleep_state); #else static inline bool acpi_sleep_state_supported(u8 sleep_state) { return false; } #endif #ifdef CONFIG_ACPI_SLEEP u32 acpi_target_system_state(void); #else static inline u32 acpi_target_system_state(void) { return ACPI_STATE_S0; } #endif static inline bool acpi_device_power_manageable(struct acpi_device *adev) { return adev->flags.power_manageable; } static inline bool acpi_device_can_wakeup(struct acpi_device *adev) { return adev->wakeup.flags.valid; } static inline bool acpi_device_can_poweroff(struct acpi_device *adev) { return adev->power.states[ACPI_STATE_D3_COLD].flags.valid || ((acpi_gbl_FADT.header.revision < 6) && adev->power.states[ACPI_STATE_D3_HOT].flags.explicit_set); } int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer); static inline bool acpi_dev_hid_match(struct acpi_device *adev, const char *hid2) { const char *hid1 = acpi_device_hid(adev); return hid1 && hid2 && !strcmp(hid1, hid2); } static inline bool acpi_str_uid_match(struct acpi_device *adev, const char *uid2) { const char *uid1 = acpi_device_uid(adev); return uid1 && uid2 && !strcmp(uid1, uid2); } static inline bool acpi_int_uid_match(struct acpi_device *adev, u64 uid2) { u64 uid1; return !acpi_dev_uid_to_integer(adev, &uid1) && uid1 == uid2; } #define TYPE_ENTRY(type, x) \ const type: x, \ type: x #define ACPI_STR_TYPES(match) \ TYPE_ENTRY(unsigned char *, match), \ TYPE_ENTRY(signed char *, match), \ TYPE_ENTRY(char *, match), \ TYPE_ENTRY(void *, match) /** * acpi_dev_uid_match - Match device by supplied UID * @adev: ACPI device to match. * @uid2: Unique ID of the device. * * Matches UID in @adev with given @uid2. * * Returns: %true if matches, %false otherwise. */ #define acpi_dev_uid_match(adev, uid2) \ _Generic(uid2, \ /* Treat @uid2 as a string for acpi string types */ \ ACPI_STR_TYPES(acpi_str_uid_match), \ /* Treat as an integer otherwise */ \ default: acpi_int_uid_match)(adev, uid2) /** * acpi_dev_hid_uid_match - Match device by supplied HID and UID * @adev: ACPI device to match. * @hid2: Hardware ID of the device. * @uid2: Unique ID of the device, pass NULL to not check _UID. * * Matches HID and UID in @adev with given @hid2 and @uid2. Absence of @uid2 * will be treated as a match. If user wants to validate @uid2, it should be * done before calling this function. * * Returns: %true if matches or @uid2 is NULL, %false otherwise. */ #define acpi_dev_hid_uid_match(adev, hid2, uid2) \ (acpi_dev_hid_match(adev, hid2) && \ /* Distinguish integer 0 from NULL @uid2 */ \ (_Generic(uid2, ACPI_STR_TYPES(!(uid2)), default: 0) || \ acpi_dev_uid_match(adev, uid2))) void acpi_dev_clear_dependencies(struct acpi_device *supplier); bool acpi_dev_ready_for_enumeration(const struct acpi_device *device); struct acpi_device *acpi_dev_get_next_consumer_dev(struct acpi_device *supplier, struct acpi_device *start); /** * for_each_acpi_consumer_dev - iterate over the consumer ACPI devices for a * given supplier * @supplier: Pointer to the supplier's ACPI device * @consumer: Pointer to &struct acpi_device to hold the consumer, initially NULL */ #define for_each_acpi_consumer_dev(supplier, consumer) \ for (consumer = acpi_dev_get_next_consumer_dev(supplier, NULL); \ consumer; \ consumer = acpi_dev_get_next_consumer_dev(supplier, consumer)) struct acpi_device * acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const char *uid, s64 hrv); struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv); /** * for_each_acpi_dev_match - iterate over ACPI devices that matching the criteria * @adev: pointer to the matching ACPI device, NULL at the end of the loop * @hid: Hardware ID of the device. * @uid: Unique ID of the device, pass NULL to not check _UID * @hrv: Hardware Revision of the device, pass -1 to not check _HRV * * The caller is responsible for invoking acpi_dev_put() on the returned device. */ #define for_each_acpi_dev_match(adev, hid, uid, hrv) \ for (adev = acpi_dev_get_first_match_dev(hid, uid, hrv); \ adev; \ adev = acpi_dev_get_next_match_dev(adev, hid, uid, hrv)) static inline struct acpi_device *acpi_dev_get(struct acpi_device *adev) { return adev ? to_acpi_device(get_device(&adev->dev)) : NULL; } static inline void acpi_dev_put(struct acpi_device *adev) { if (adev) put_device(&adev->dev); } struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); struct acpi_device *acpi_get_acpi_dev(acpi_handle handle); static inline void acpi_put_acpi_dev(struct acpi_device *adev) { acpi_dev_put(adev); } int acpi_wait_for_acpi_ipmi(void); int acpi_scan_add_dep(acpi_handle handle, struct acpi_handle_list *dep_devices); u32 arch_acpi_add_auto_dep(acpi_handle handle); #else /* CONFIG_ACPI */ static inline int register_acpi_bus_type(void *bus) { return 0; } static inline int unregister_acpi_bus_type(void *bus) { return 0; } static inline int acpi_wait_for_acpi_ipmi(void) { return 0; } #endif /* CONFIG_ACPI */ #endif /*__ACPI_BUS_H__*/
3 1578 4419 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 /* SPDX-License-Identifier: GPL-2.0-only */ /* * include/linux/idr.h * * 2002-10-18 written by Jim Houston jim.houston@ccur.com * Copyright (C) 2002 by Concurrent Computer Corporation * * Small id to pointer translation service avoiding fixed sized * tables. */ #ifndef __IDR_H__ #define __IDR_H__ #include <linux/radix-tree.h> #include <linux/gfp.h> #include <linux/percpu.h> struct idr { struct radix_tree_root idr_rt; unsigned int idr_base; unsigned int idr_next; }; /* * The IDR API does not expose the tagging functionality of the radix tree * to users. Use tag 0 to track whether a node has free space below it. */ #define IDR_FREE 0 /* Set the IDR flag and the IDR_FREE tag */ #define IDR_RT_MARKER (ROOT_IS_IDR | (__force gfp_t) \ (1 << (ROOT_TAG_SHIFT + IDR_FREE))) #define IDR_INIT_BASE(name, base) { \ .idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER), \ .idr_base = (base), \ .idr_next = 0, \ } /** * IDR_INIT() - Initialise an IDR. * @name: Name of IDR. * * A freshly-initialised IDR contains no IDs. */ #define IDR_INIT(name) IDR_INIT_BASE(name, 0) /** * DEFINE_IDR() - Define a statically-allocated IDR. * @name: Name of IDR. * * An IDR defined using this macro is ready for use with no additional * initialisation required. It contains no IDs. */ #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) /** * idr_get_cursor - Return the current position of the cyclic allocator * @idr: idr handle * * The value returned is the value that will be next returned from * idr_alloc_cyclic() if it is free (otherwise the search will start from * this position). */ static inline unsigned int idr_get_cursor(const struct idr *idr) { return READ_ONCE(idr->idr_next); } /** * idr_set_cursor - Set the current position of the cyclic allocator * @idr: idr handle * @val: new position * * The next call to idr_alloc_cyclic() will return @val if it is free * (otherwise the search will start from this position). */ static inline void idr_set_cursor(struct idr *idr, unsigned int val) { WRITE_ONCE(idr->idr_next, val); } /** * DOC: idr sync * idr synchronization (stolen from radix-tree.h) * * idr_find() is able to be called locklessly, using RCU. The caller must * ensure calls to this function are made within rcu_read_lock() regions. * Other readers (lock-free or otherwise) and modifications may be running * concurrently. * * It is still required that the caller manage the synchronization and * lifetimes of the items. So if RCU lock-free lookups are used, typically * this would mean that the items have their own locks, or are amenable to * lock-free access; and that the items are freed by RCU (or only freed after * having been deleted from the idr tree *and* a synchronize_rcu() grace * period). */ #define idr_lock(idr) xa_lock(&(idr)->idr_rt) #define idr_unlock(idr) xa_unlock(&(idr)->idr_rt) #define idr_lock_bh(idr) xa_lock_bh(&(idr)->idr_rt) #define idr_unlock_bh(idr) xa_unlock_bh(&(idr)->idr_rt) #define idr_lock_irq(idr) xa_lock_irq(&(idr)->idr_rt) #define idr_unlock_irq(idr) xa_unlock_irq(&(idr)->idr_rt) #define idr_lock_irqsave(idr, flags) \ xa_lock_irqsave(&(idr)->idr_rt, flags) #define idr_unlock_irqrestore(idr, flags) \ xa_unlock_irqrestore(&(idr)->idr_rt, flags) void idr_preload(gfp_t gfp_mask); int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t); int __must_check idr_alloc_u32(struct idr *, void *ptr, u32 *id, unsigned long max, gfp_t); int idr_alloc_cyclic(struct idr *, void *ptr, int start, int end, gfp_t); void *idr_remove(struct idr *, unsigned long id); void *idr_find(const struct idr *, unsigned long id); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); void *idr_get_next_ul(struct idr *, unsigned long *nextid); void *idr_replace(struct idr *, void *, unsigned long id); void idr_destroy(struct idr *); /** * idr_init_base() - Initialise an IDR. * @idr: IDR handle. * @base: The base value for the IDR. * * This variation of idr_init() creates an IDR which will allocate IDs * starting at %base. */ static inline void idr_init_base(struct idr *idr, int base) { INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); idr->idr_base = base; idr->idr_next = 0; } /** * idr_init() - Initialise an IDR. * @idr: IDR handle. * * Initialise a dynamically allocated IDR. To initialise a * statically allocated IDR, use DEFINE_IDR(). */ static inline void idr_init(struct idr *idr) { idr_init_base(idr, 0); } /** * idr_is_empty() - Are there any IDs allocated? * @idr: IDR handle. * * Return: %true if any IDs have been allocated from this IDR. */ static inline bool idr_is_empty(const struct idr *idr) { return radix_tree_empty(&idr->idr_rt) && radix_tree_tagged(&idr->idr_rt, IDR_FREE); } /** * idr_preload_end - end preload section started with idr_preload() * * Each idr_preload() should be matched with an invocation of this * function. See idr_preload() for details. */ static inline void idr_preload_end(void) { local_unlock(&radix_tree_preloads.lock); } /** * idr_for_each_entry() - Iterate over an IDR's elements of a given type. * @idr: IDR handle. * @entry: The type * to use as cursor * @id: Entry ID. * * @entry and @id do not need to be initialized before the loop, and * after normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry(idr, entry, id) \ for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; id += 1U) /** * idr_for_each_entry_ul() - Iterate over an IDR's elements of a given type. * @idr: IDR handle. * @entry: The type * to use as cursor. * @tmp: A temporary placeholder for ID. * @id: Entry ID. * * @entry and @id do not need to be initialized before the loop, and * after normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry_ul(idr, entry, tmp, id) \ for (tmp = 0, id = 0; \ ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /** * idr_for_each_entry_continue() - Continue iteration over an IDR's elements of a given type * @idr: IDR handle. * @entry: The type * to use as a cursor. * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. */ #define idr_for_each_entry_continue(idr, entry, id) \ for ((entry) = idr_get_next((idr), &(id)); \ entry; \ ++id, (entry) = idr_get_next((idr), &(id))) /** * idr_for_each_entry_continue_ul() - Continue iteration over an IDR's elements of a given type * @idr: IDR handle. * @entry: The type * to use as a cursor. * @tmp: A temporary placeholder for ID. * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. * After normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ for (tmp = id; \ ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /* * IDA - ID Allocator, use when translation from id to pointer isn't necessary. */ #define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ #define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long)) #define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) struct ida_bitmap { unsigned long bitmap[IDA_BITMAP_LONGS]; }; struct ida { struct xarray xa; }; #define IDA_INIT_FLAGS (XA_FLAGS_LOCK_IRQ | XA_FLAGS_ALLOC) #define IDA_INIT(name) { \ .xa = XARRAY_INIT(name, IDA_INIT_FLAGS) \ } #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t); void ida_free(struct ida *, unsigned int id); void ida_destroy(struct ida *ida); /** * ida_alloc() - Allocate an unused ID. * @ida: IDA handle. * @gfp: Memory allocation flags. * * Allocate an ID between 0 and %INT_MAX, inclusive. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ static inline int ida_alloc(struct ida *ida, gfp_t gfp) { return ida_alloc_range(ida, 0, ~0, gfp); } /** * ida_alloc_min() - Allocate an unused ID. * @ida: IDA handle. * @min: Lowest ID to allocate. * @gfp: Memory allocation flags. * * Allocate an ID between @min and %INT_MAX, inclusive. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp) { return ida_alloc_range(ida, min, ~0, gfp); } /** * ida_alloc_max() - Allocate an unused ID. * @ida: IDA handle. * @max: Highest ID to allocate. * @gfp: Memory allocation flags. * * Allocate an ID between 0 and @max, inclusive. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp) { return ida_alloc_range(ida, 0, max, gfp); } static inline void ida_init(struct ida *ida) { xa_init_flags(&ida->xa, IDA_INIT_FLAGS); } /* * ida_simple_get() and ida_simple_remove() are deprecated. Use * ida_alloc() and ida_free() instead respectively. */ #define ida_simple_get(ida, start, end, gfp) \ ida_alloc_range(ida, start, (end) - 1, gfp) #define ida_simple_remove(ida, id) ida_free(ida, id) static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); } #endif /* __IDR_H__ */
9 9 5 5 5 5 5 9 9 5 5 2 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 // SPDX-License-Identifier: GPL-2.0-only #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/export.h> #include <linux/delay.h> #include <linux/hpet.h> #include <linux/cpu.h> #include <linux/irq.h> #include <asm/irq_remapping.h> #include <asm/hpet.h> #include <asm/time.h> #include <asm/mwait.h> #undef pr_fmt #define pr_fmt(fmt) "hpet: " fmt enum hpet_mode { HPET_MODE_UNUSED, HPET_MODE_LEGACY, HPET_MODE_CLOCKEVT, HPET_MODE_DEVICE, }; struct hpet_channel { struct clock_event_device evt; unsigned int num; unsigned int cpu; unsigned int irq; unsigned int in_use; enum hpet_mode mode; unsigned int boot_cfg; char name[10]; }; struct hpet_base { unsigned int nr_channels; unsigned int nr_clockevents; unsigned int boot_cfg; struct hpet_channel *channels; }; #define HPET_MASK CLOCKSOURCE_MASK(32) #define HPET_MIN_CYCLES 128 #define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) /* * HPET address is set in acpi/boot.c, when an ACPI entry exists */ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ bool hpet_msi_disable; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_GENERIC_MSI_IRQ) static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel); static struct irq_domain *hpet_domain; #endif static void __iomem *hpet_virt_address; static struct hpet_base hpet_base; static bool hpet_legacy_int_enabled; static unsigned long hpet_freq; bool boot_hpet_disable; bool hpet_force_user; static bool hpet_verbose; static inline struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt) { return container_of(evt, struct hpet_channel, evt); } inline unsigned int hpet_readl(unsigned int a) { return readl(hpet_virt_address + a); } static inline void hpet_writel(unsigned int d, unsigned int a) { writel(d, hpet_virt_address + a); } static inline void hpet_set_mapping(void) { hpet_virt_address = ioremap(hpet_address, HPET_MMAP_SIZE); } static inline void hpet_clear_mapping(void) { iounmap(hpet_virt_address); hpet_virt_address = NULL; } /* * HPET command line enable / disable */ static int __init hpet_setup(char *str) { while (str) { char *next = strchr(str, ','); if (next) *next++ = 0; if (!strncmp("disable", str, 7)) boot_hpet_disable = true; if (!strncmp("force", str, 5)) hpet_force_user = true; if (!strncmp("verbose", str, 7)) hpet_verbose = true; str = next; } return 1; } __setup("hpet=", hpet_setup); static int __init disable_hpet(char *str) { boot_hpet_disable = true; return 1; } __setup("nohpet", disable_hpet); static inline int is_hpet_capable(void) { return !boot_hpet_disable && hpet_address; } /** * is_hpet_enabled - Check whether the legacy HPET timer interrupt is enabled */ int is_hpet_enabled(void) { return is_hpet_capable() && hpet_legacy_int_enabled; } EXPORT_SYMBOL_GPL(is_hpet_enabled); static void _hpet_print_config(const char *function, int line) { u32 i, id, period, cfg, status, channels, l, h; pr_info("%s(%d):\n", function, line); id = hpet_readl(HPET_ID); period = hpet_readl(HPET_PERIOD); pr_info("ID: 0x%x, PERIOD: 0x%x\n", id, period); cfg = hpet_readl(HPET_CFG); status = hpet_readl(HPET_STATUS); pr_info("CFG: 0x%x, STATUS: 0x%x\n", cfg, status); l = hpet_readl(HPET_COUNTER); h = hpet_readl(HPET_COUNTER+4); pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; for (i = 0; i < channels; i++) { l = hpet_readl(HPET_Tn_CFG(i)); h = hpet_readl(HPET_Tn_CFG(i)+4); pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h); l = hpet_readl(HPET_Tn_CMP(i)); h = hpet_readl(HPET_Tn_CMP(i)+4); pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h); l = hpet_readl(HPET_Tn_ROUTE(i)); h = hpet_readl(HPET_Tn_ROUTE(i)+4); pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h); } } #define hpet_print_config() \ do { \ if (hpet_verbose) \ _hpet_print_config(__func__, __LINE__); \ } while (0) /* * When the HPET driver (/dev/hpet) is enabled, we need to reserve * timer 0 and timer 1 in case of RTC emulation. */ #ifdef CONFIG_HPET static void __init hpet_reserve_platform_timers(void) { struct hpet_data hd; unsigned int i; memset(&hd, 0, sizeof(hd)); hd.hd_phys_address = hpet_address; hd.hd_address = hpet_virt_address; hd.hd_nirqs = hpet_base.nr_channels; /* * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 * is wrong for i8259!) not the output IRQ. Many BIOS writers * don't bother configuring *any* comparator interrupts. */ hd.hd_irq[0] = HPET_LEGACY_8254; hd.hd_irq[1] = HPET_LEGACY_RTC; for (i = 0; i < hpet_base.nr_channels; i++) { struct hpet_channel *hc = hpet_base.channels + i; if (i >= 2) hd.hd_irq[i] = hc->irq; switch (hc->mode) { case HPET_MODE_UNUSED: case HPET_MODE_DEVICE: hc->mode = HPET_MODE_DEVICE; break; case HPET_MODE_CLOCKEVT: case HPET_MODE_LEGACY: hpet_reserve_timer(&hd, hc->num); break; } } hpet_alloc(&hd); } static void __init hpet_select_device_channel(void) { int i; for (i = 0; i < hpet_base.nr_channels; i++) { struct hpet_channel *hc = hpet_base.channels + i; /* Associate the first unused channel to /dev/hpet */ if (hc->mode == HPET_MODE_UNUSED) { hc->mode = HPET_MODE_DEVICE; return; } } } #else static inline void hpet_reserve_platform_timers(void) { } static inline void hpet_select_device_channel(void) {} #endif /* Common HPET functions */ static void hpet_stop_counter(void) { u32 cfg = hpet_readl(HPET_CFG); cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } static void hpet_reset_counter(void) { hpet_writel(0, HPET_COUNTER); hpet_writel(0, HPET_COUNTER + 4); } static void hpet_start_counter(void) { unsigned int cfg = hpet_readl(HPET_CFG); cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } static void hpet_restart_counter(void) { hpet_stop_counter(); hpet_reset_counter(); hpet_start_counter(); } static void hpet_resume_device(void) { force_hpet_resume(); } static void hpet_resume_counter(struct clocksource *cs) { hpet_resume_device(); hpet_restart_counter(); } static void hpet_enable_legacy_int(void) { unsigned int cfg = hpet_readl(HPET_CFG); cfg |= HPET_CFG_LEGACY; hpet_writel(cfg, HPET_CFG); hpet_legacy_int_enabled = true; } static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt) { unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg, cmp, now; uint64_t delta; hpet_stop_counter(); delta = ((uint64_t)(NSEC_PER_SEC / HZ)) * evt->mult; delta >>= evt->shift; now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned int)delta; cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(channel)); hpet_writel(cmp, HPET_Tn_CMP(channel)); udelay(1); /* * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL * bit is automatically cleared after the first write. * (See AMD-8111 HyperTransport I/O Hub Data Sheet, * Publication # 24674) */ hpet_writel((unsigned int)delta, HPET_Tn_CMP(channel)); hpet_start_counter(); hpet_print_config(); return 0; } static int hpet_clkevt_set_state_oneshot(struct clock_event_device *evt) { unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg; cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(channel)); return 0; } static int hpet_clkevt_set_state_shutdown(struct clock_event_device *evt) { unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg; cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg &= ~HPET_TN_ENABLE; hpet_writel(cfg, HPET_Tn_CFG(channel)); return 0; } static int hpet_clkevt_legacy_resume(struct clock_event_device *evt) { hpet_enable_legacy_int(); hpet_print_config(); return 0; } static int hpet_clkevt_set_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned int channel = clockevent_to_channel(evt)->num; u32 cnt; s32 res; cnt = hpet_readl(HPET_COUNTER); cnt += (u32) delta; hpet_writel(cnt, HPET_Tn_CMP(channel)); /* * HPETs are a complete disaster. The compare register is * based on a equal comparison and neither provides a less * than or equal functionality (which would require to take * the wraparound into account) nor a simple count down event * mode. Further the write to the comparator register is * delayed internally up to two HPET clock cycles in certain * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even * longer delays. We worked around that by reading back the * compare register, but that required another workaround for * ICH9,10 chips where the first readout after write can * return the old stale value. We already had a minimum * programming delta of 5us enforced, but a NMI or SMI hitting * between the counter readout and the comparator write can * move us behind that point easily. Now instead of reading * the compare register back several times, we make the ETIME * decision based on the following: Return ETIME if the * counter value after the write is less than HPET_MIN_CYCLES * away from the event or if the counter is already ahead of * the event. The minimum programming delta for the generic * clockevents code is set to 1.5 * HPET_MIN_CYCLES. */ res = (s32)(cnt - hpet_readl(HPET_COUNTER)); return res < HPET_MIN_CYCLES ? -ETIME : 0; } static void hpet_init_clockevent(struct hpet_channel *hc, unsigned int rating) { struct clock_event_device *evt = &hc->evt; evt->rating = rating; evt->irq = hc->irq; evt->name = hc->name; evt->cpumask = cpumask_of(hc->cpu); evt->set_state_oneshot = hpet_clkevt_set_state_oneshot; evt->set_next_event = hpet_clkevt_set_next_event; evt->set_state_shutdown = hpet_clkevt_set_state_shutdown; evt->features = CLOCK_EVT_FEAT_ONESHOT; if (hc->boot_cfg & HPET_TN_PERIODIC) { evt->features |= CLOCK_EVT_FEAT_PERIODIC; evt->set_state_periodic = hpet_clkevt_set_state_periodic; } } static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc) { /* * Start HPET with the boot CPU's cpumask and make it global after * the IO_APIC has been initialized. */ hc->cpu = boot_cpu_data.cpu_index; strscpy(hc->name, "hpet", sizeof(hc->name)); hpet_init_clockevent(hc, 50); hc->evt.tick_resume = hpet_clkevt_legacy_resume; /* * Legacy horrors and sins from the past. HPET used periodic mode * unconditionally forever on the legacy channel 0. Removing the * below hack and using the conditional in hpet_init_clockevent() * makes at least Qemu and one hardware machine fail to boot. * There are two issues which cause the boot failure: * * #1 After the timer delivery test in IOAPIC and the IOAPIC setup * the next interrupt is not delivered despite the HPET channel * being programmed correctly. Reprogramming the HPET after * switching to IOAPIC makes it work again. After fixing this, * the next issue surfaces: * * #2 Due to the unconditional periodic mode availability the Local * APIC timer calibration can hijack the global clockevents * event handler without causing damage. Using oneshot at this * stage makes if hang because the HPET does not get * reprogrammed due to the handler hijacking. Duh, stupid me! * * Both issues require major surgery and especially the kick HPET * again after enabling IOAPIC results in really nasty hackery. * This 'assume periodic works' magic has survived since HPET * support got added, so it's questionable whether this should be * fixed. Both Qemu and the failing hardware machine support * periodic mode despite the fact that both don't advertise it in * the configuration register and both need that extra kick after * switching to IOAPIC. Seems to be a feature... */ hc->evt.features |= CLOCK_EVT_FEAT_PERIODIC; hc->evt.set_state_periodic = hpet_clkevt_set_state_periodic; /* Start HPET legacy interrupts */ hpet_enable_legacy_int(); clockevents_config_and_register(&hc->evt, hpet_freq, HPET_MIN_PROG_DELTA, 0x7FFFFFFF); global_clock_event = &hc->evt; pr_debug("Clockevent registered\n"); } /* * HPET MSI Support */ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_GENERIC_MSI_IRQ) static void hpet_msi_unmask(struct irq_data *data) { struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; cfg = hpet_readl(HPET_Tn_CFG(hc->num)); cfg |= HPET_TN_ENABLE | HPET_TN_FSB; hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } static void hpet_msi_mask(struct irq_data *data) { struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; cfg = hpet_readl(HPET_Tn_CFG(hc->num)); cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB); hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } static void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg) { hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num)); hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4); } static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) { hpet_msi_write(irq_data_get_irq_handler_data(data), msg); } static struct irq_chip hpet_msi_controller __ro_after_init = { .name = "HPET-MSI", .irq_unmask = hpet_msi_unmask, .irq_mask = hpet_msi_mask, .irq_ack = irq_chip_ack_parent, .irq_set_affinity = msi_domain_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_write_msi_msg = hpet_msi_write_msg, .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP, }; static int hpet_msi_init(struct irq_domain *domain, struct msi_domain_info *info, unsigned int virq, irq_hw_number_t hwirq, msi_alloc_info_t *arg) { irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); irq_domain_set_info(domain, virq, arg->hwirq, info->chip, NULL, handle_edge_irq, arg->data, "edge"); return 0; } static void hpet_msi_free(struct irq_domain *domain, struct msi_domain_info *info, unsigned int virq) { irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); } static struct msi_domain_ops hpet_msi_domain_ops = { .msi_init = hpet_msi_init, .msi_free = hpet_msi_free, }; static struct msi_domain_info hpet_msi_domain_info = { .ops = &hpet_msi_domain_ops, .chip = &hpet_msi_controller, .flags = MSI_FLAG_USE_DEF_DOM_OPS, }; static struct irq_domain *hpet_create_irq_domain(int hpet_id) { struct msi_domain_info *domain_info; struct irq_domain *parent, *d; struct fwnode_handle *fn; struct irq_fwspec fwspec; if (x86_vector_domain == NULL) return NULL; domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL); if (!domain_info) return NULL; *domain_info = hpet_msi_domain_info; domain_info->data = (void *)(long)hpet_id; fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name, hpet_id); if (!fn) { kfree(domain_info); return NULL; } fwspec.fwnode = fn; fwspec.param_count = 1; fwspec.param[0] = hpet_id; parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_GENERIC_MSI); if (!parent) { irq_domain_free_fwnode(fn); kfree(domain_info); return NULL; } if (parent != x86_vector_domain) hpet_msi_controller.name = "IR-HPET-MSI"; d = msi_create_irq_domain(fn, domain_info, parent); if (!d) { irq_domain_free_fwnode(fn); kfree(domain_info); } return d; } static inline int hpet_dev_id(struct irq_domain *domain) { struct msi_domain_info *info = msi_get_domain_info(domain); return (int)(long)info->data; } static int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc, int dev_num) { struct irq_alloc_info info; init_irq_alloc_info(&info, NULL); info.type = X86_IRQ_ALLOC_TYPE_HPET; info.data = hc; info.devid = hpet_dev_id(domain); info.hwirq = dev_num; return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); } static int hpet_clkevt_msi_resume(struct clock_event_device *evt) { struct hpet_channel *hc = clockevent_to_channel(evt); struct irq_data *data = irq_get_irq_data(hc->irq); struct msi_msg msg; /* Restore the MSI msg and unmask the interrupt */ irq_chip_compose_msi_msg(data, &msg); hpet_msi_write(hc, &msg); hpet_msi_unmask(data); return 0; } static irqreturn_t hpet_msi_interrupt_handler(int irq, void *data) { struct hpet_channel *hc = data; struct clock_event_device *evt = &hc->evt; if (!evt->event_handler) { pr_info("Spurious interrupt HPET channel %d\n", hc->num); return IRQ_HANDLED; } evt->event_handler(evt); return IRQ_HANDLED; } static int hpet_setup_msi_irq(struct hpet_channel *hc) { if (request_irq(hc->irq, hpet_msi_interrupt_handler, IRQF_TIMER | IRQF_NOBALANCING, hc->name, hc)) return -1; disable_irq(hc->irq); irq_set_affinity(hc->irq, cpumask_of(hc->cpu)); enable_irq(hc->irq); pr_debug("%s irq %u for MSI\n", hc->name, hc->irq); return 0; } /* Invoked from the hotplug callback on @cpu */ static void init_one_hpet_msi_clockevent(struct hpet_channel *hc, int cpu) { struct clock_event_device *evt = &hc->evt; hc->cpu = cpu; per_cpu(cpu_hpet_channel, cpu) = hc; hpet_setup_msi_irq(hc); hpet_init_clockevent(hc, 110); evt->tick_resume = hpet_clkevt_msi_resume; clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA, 0x7FFFFFFF); } static struct hpet_channel *hpet_get_unused_clockevent(void) { int i; for (i = 0; i < hpet_base.nr_channels; i++) { struct hpet_channel *hc = hpet_base.channels + i; if (hc->mode != HPET_MODE_CLOCKEVT || hc->in_use) continue; hc->in_use = 1; return hc; } return NULL; } static int hpet_cpuhp_online(unsigned int cpu) { struct hpet_channel *hc = hpet_get_unused_clockevent(); if (hc) init_one_hpet_msi_clockevent(hc, cpu); return 0; } static int hpet_cpuhp_dead(unsigned int cpu) { struct hpet_channel *hc = per_cpu(cpu_hpet_channel, cpu); if (!hc) return 0; free_irq(hc->irq, hc); hc->in_use = 0; per_cpu(cpu_hpet_channel, cpu) = NULL; return 0; } static void __init hpet_select_clockevents(void) { unsigned int i; hpet_base.nr_clockevents = 0; /* No point if MSI is disabled or CPU has an Always Running APIC Timer */ if (hpet_msi_disable || boot_cpu_has(X86_FEATURE_ARAT)) return; hpet_print_config(); hpet_domain = hpet_create_irq_domain(hpet_blockid); if (!hpet_domain) return; for (i = 0; i < hpet_base.nr_channels; i++) { struct hpet_channel *hc = hpet_base.channels + i; int irq; if (hc->mode != HPET_MODE_UNUSED) continue; /* Only consider HPET channel with MSI support */ if (!(hc->boot_cfg & HPET_TN_FSB_CAP)) continue; sprintf(hc->name, "hpet%d", i); irq = hpet_assign_irq(hpet_domain, hc, hc->num); if (irq <= 0) continue; hc->irq = irq; hc->mode = HPET_MODE_CLOCKEVT; if (++hpet_base.nr_clockevents == num_possible_cpus()) break; } pr_info("%d channels of %d reserved for per-cpu timers\n", hpet_base.nr_channels, hpet_base.nr_clockevents); } #else static inline void hpet_select_clockevents(void) { } #define hpet_cpuhp_online NULL #define hpet_cpuhp_dead NULL #endif /* * Clock source related code */ #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) /* * Reading the HPET counter is a very slow operation. If a large number of * CPUs are trying to access the HPET counter simultaneously, it can cause * massive delays and slow down system performance dramatically. This may * happen when HPET is the default clock source instead of TSC. For a * really large system with hundreds of CPUs, the slowdown may be so * severe, that it can actually crash the system because of a NMI watchdog * soft lockup, for example. * * If multiple CPUs are trying to access the HPET counter at the same time, * we don't actually need to read the counter multiple times. Instead, the * other CPUs can use the counter value read by the first CPU in the group. * * This special feature is only enabled on x86-64 systems. It is unlikely * that 32-bit x86 systems will have enough CPUs to require this feature * with its associated locking overhead. We also need 64-bit atomic read. * * The lock and the HPET value are stored together and can be read in a * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t * is 32 bits in size. */ union hpet_lock { struct { arch_spinlock_t lock; u32 value; }; u64 lockval; }; static union hpet_lock hpet __cacheline_aligned = { { .lock = __ARCH_SPIN_LOCK_UNLOCKED, }, }; static u64 read_hpet(struct clocksource *cs) { unsigned long flags; union hpet_lock old, new; BUILD_BUG_ON(sizeof(union hpet_lock) != 8); /* * Read HPET directly if in NMI. */ if (in_nmi()) return (u64)hpet_readl(HPET_COUNTER); /* * Read the current state of the lock and HPET value atomically. */ old.lockval = READ_ONCE(hpet.lockval); if (arch_spin_is_locked(&old.lock)) goto contended; local_irq_save(flags); if (arch_spin_trylock(&hpet.lock)) { new.value = hpet_readl(HPET_COUNTER); /* * Use WRITE_ONCE() to prevent store tearing. */ WRITE_ONCE(hpet.value, new.value); arch_spin_unlock(&hpet.lock); local_irq_restore(flags); return (u64)new.value; } local_irq_restore(flags); contended: /* * Contended case * -------------- * Wait until the HPET value change or the lock is free to indicate * its value is up-to-date. * * It is possible that old.value has already contained the latest * HPET value while the lock holder was in the process of releasing * the lock. Checking for lock state change will enable us to return * the value immediately instead of waiting for the next HPET reader * to come along. */ do { cpu_relax(); new.lockval = READ_ONCE(hpet.lockval); } while ((new.value == old.value) && arch_spin_is_locked(&new.lock)); return (u64)new.value; } #else /* * For UP or 32-bit. */ static u64 read_hpet(struct clocksource *cs) { return (u64)hpet_readl(HPET_COUNTER); } #endif static struct clocksource clocksource_hpet = { .name = "hpet", .rating = 250, .read = read_hpet, .mask = HPET_MASK, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .resume = hpet_resume_counter, }; /* * AMD SB700 based systems with spread spectrum enabled use a SMM based * HPET emulation to provide proper frequency setting. * * On such systems the SMM code is initialized with the first HPET register * access and takes some time to complete. During this time the config * register reads 0xffffffff. We check for max 1000 loops whether the * config register reads a non-0xffffffff value to make sure that the * HPET is up and running before we proceed any further. * * A counting loop is safe, as the HPET access takes thousands of CPU cycles. * * On non-SB700 based machines this check is only done once and has no * side effects. */ static bool __init hpet_cfg_working(void) { int i; for (i = 0; i < 1000; i++) { if (hpet_readl(HPET_CFG) != 0xFFFFFFFF) return true; } pr_warn("Config register invalid. Disabling HPET\n"); return false; } static bool __init hpet_counting(void) { u64 start, now, t1; hpet_restart_counter(); t1 = hpet_readl(HPET_COUNTER); start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for * 200000 TSC cycles is safe: * 4 GHz == 50us * 1 GHz == 200us */ do { if (t1 != hpet_readl(HPET_COUNTER)) return true; now = rdtsc(); } while ((now - start) < 200000UL); pr_warn("Counter not counting. HPET disabled\n"); return false; } static bool __init mwait_pc10_supported(void) { unsigned int eax, ebx, ecx, mwait_substates; if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return false; if (!cpu_feature_enabled(X86_FEATURE_MWAIT)) return false; if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) return false; cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) && (ecx & CPUID5_ECX_INTERRUPT_BREAK) && (mwait_substates & (0xF << 28)); } /* * Check whether the system supports PC10. If so force disable HPET as that * stops counting in PC10. This check is overbroad as it does not take any * of the following into account: * * - ACPI tables * - Enablement of intel_idle * - Command line arguments which limit intel_idle C-state support * * That's perfectly fine. HPET is a piece of hardware designed by committee * and the only reasons why it is still in use on modern systems is the * fact that it is impossible to reliably query TSC and CPU frequency via * CPUID or firmware. * * If HPET is functional it is useful for calibrating TSC, but this can be * done via PMTIMER as well which seems to be the last remaining timer on * X86/INTEL platforms that has not been completely wreckaged by feature * creep. * * In theory HPET support should be removed altogether, but there are older * systems out there which depend on it because TSC and APIC timer are * dysfunctional in deeper C-states. * * It's only 20 years now that hardware people have been asked to provide * reliable and discoverable facilities which can be used for timekeeping * and per CPU timer interrupts. * * The probability that this problem is going to be solved in the * foreseeable future is close to zero, so the kernel has to be cluttered * with heuristics to keep up with the ever growing amount of hardware and * firmware trainwrecks. Hopefully some day hardware people will understand * that the approach of "This can be fixed in software" is not sustainable. * Hope dies last... */ static bool __init hpet_is_pc10_damaged(void) { unsigned long long pcfg; /* Check whether PC10 substates are supported */ if (!mwait_pc10_supported()) return false; /* Check whether PC10 is enabled in PKG C-state limit */ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, pcfg); if ((pcfg & 0xF) < 8) return false; if (hpet_force_user) { pr_warn("HPET force enabled via command line, but dysfunctional in PC10.\n"); return false; } pr_info("HPET dysfunctional in PC10. Force disabled.\n"); boot_hpet_disable = true; return true; } /** * hpet_enable - Try to setup the HPET timer. Returns 1 on success. */ int __init hpet_enable(void) { u32 hpet_period, cfg, id, irq; unsigned int i, channels; struct hpet_channel *hc; u64 freq; if (!is_hpet_capable()) return 0; if (hpet_is_pc10_damaged()) return 0; hpet_set_mapping(); if (!hpet_virt_address) return 0; /* Validate that the config register is working */ if (!hpet_cfg_working()) goto out_nohpet; /* * Read the period and check for a sane value: */ hpet_period = hpet_readl(HPET_PERIOD); if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) goto out_nohpet; /* The period is a femtoseconds value. Convert it to a frequency. */ freq = FSEC_PER_SEC; do_div(freq, hpet_period); hpet_freq = freq; /* * Read the HPET ID register to retrieve the IRQ routing * information and the number of channels */ id = hpet_readl(HPET_ID); hpet_print_config(); /* This is the HPET channel number which is zero based */ channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; /* * The legacy routing mode needs at least two channels, tick timer * and the rtc emulation channel. */ if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC) && channels < 2) goto out_nohpet; hc = kcalloc(channels, sizeof(*hc), GFP_KERNEL); if (!hc) { pr_warn("Disabling HPET.\n"); goto out_nohpet; } hpet_base.channels = hc; hpet_base.nr_channels = channels; /* Read, store and sanitize the global configuration */ cfg = hpet_readl(HPET_CFG); hpet_base.boot_cfg = cfg; cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); hpet_writel(cfg, HPET_CFG); if (cfg) pr_warn("Global config: Unknown bits %#x\n", cfg); /* Read, store and sanitize the per channel configuration */ for (i = 0; i < channels; i++, hc++) { hc->num = i; cfg = hpet_readl(HPET_Tn_CFG(i)); hc->boot_cfg = cfg; irq = (cfg & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; hc->irq = irq; cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB); hpet_writel(cfg, HPET_Tn_CFG(i)); cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE | HPET_TN_FSB | HPET_TN_FSB_CAP); if (cfg) pr_warn("Channel #%u config: Unknown bits %#x\n", i, cfg); } hpet_print_config(); /* * Validate that the counter is counting. This needs to be done * after sanitizing the config registers to properly deal with * force enabled HPETs. */ if (!hpet_counting()) goto out_nohpet; if (tsc_clocksource_watchdog_disabled()) clocksource_hpet.flags |= CLOCK_SOURCE_MUST_VERIFY; clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); if (id & HPET_ID_LEGSUP) { hpet_legacy_clockevent_register(&hpet_base.channels[0]); hpet_base.channels[0].mode = HPET_MODE_LEGACY; if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC)) hpet_base.channels[1].mode = HPET_MODE_LEGACY; return 1; } return 0; out_nohpet: kfree(hpet_base.channels); hpet_base.channels = NULL; hpet_base.nr_channels = 0; hpet_clear_mapping(); hpet_address = 0; return 0; } /* * The late initialization runs after the PCI quirks have been invoked * which might have detected a system on which the HPET can be enforced. * * Also, the MSI machinery is not working yet when the HPET is initialized * early. * * If the HPET is enabled, then: * * 1) Reserve one channel for /dev/hpet if CONFIG_HPET=y * 2) Reserve up to num_possible_cpus() channels as per CPU clockevents * 3) Setup /dev/hpet if CONFIG_HPET=y * 4) Register hotplug callbacks when clockevents are available */ static __init int hpet_late_init(void) { int ret; if (!hpet_address) { if (!force_hpet_address) return -ENODEV; hpet_address = force_hpet_address; hpet_enable(); } if (!hpet_virt_address) return -ENODEV; hpet_select_device_channel(); hpet_select_clockevents(); hpet_reserve_platform_timers(); hpet_print_config(); if (!hpet_base.nr_clockevents) return 0; ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online", hpet_cpuhp_online, NULL); if (ret) return ret; ret = cpuhp_setup_state(CPUHP_X86_HPET_DEAD, "x86/hpet:dead", NULL, hpet_cpuhp_dead); if (ret) goto err_cpuhp; return 0; err_cpuhp: cpuhp_remove_state(CPUHP_AP_X86_HPET_ONLINE); return ret; } fs_initcall(hpet_late_init); void hpet_disable(void) { unsigned int i; u32 cfg; if (!is_hpet_capable() || !hpet_virt_address) return; /* Restore boot configuration with the enable bit cleared */ cfg = hpet_base.boot_cfg; cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); /* Restore the channel boot configuration */ for (i = 0; i < hpet_base.nr_channels; i++) hpet_writel(hpet_base.channels[i].boot_cfg, HPET_Tn_CFG(i)); /* If the HPET was enabled at boot time, reenable it */ if (hpet_base.boot_cfg & HPET_CFG_ENABLE) hpet_writel(hpet_base.boot_cfg, HPET_CFG); } #ifdef CONFIG_HPET_EMULATE_RTC /* * HPET in LegacyReplacement mode eats up the RTC interrupt line. When HPET * is enabled, we support RTC interrupt functionality in software. * * RTC has 3 kinds of interrupts: * * 1) Update Interrupt - generate an interrupt, every second, when the * RTC clock is updated * 2) Alarm Interrupt - generate an interrupt at a specific time of day * 3) Periodic Interrupt - generate periodic interrupt, with frequencies * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all frequencies in powers of 2) * * (1) and (2) above are implemented using polling at a frequency of 64 Hz: * DEFAULT_RTC_INT_FREQ. * * The exact frequency is a tradeoff between accuracy and interrupt overhead. * * For (3), we use interrupts at 64 Hz, or the user specified periodic frequency, * if it's higher. */ #include <linux/mc146818rtc.h> #include <linux/rtc.h> #define DEFAULT_RTC_INT_FREQ 64 #define DEFAULT_RTC_SHIFT 6 #define RTC_NUM_INTS 1 static unsigned long hpet_rtc_flags; static int hpet_prev_update_sec; static struct rtc_time hpet_alarm_time; static unsigned long hpet_pie_count; static u32 hpet_t1_cmp; static u32 hpet_default_delta; static u32 hpet_pie_delta; static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; /* * Check that the HPET counter c1 is ahead of c2 */ static inline int hpet_cnt_ahead(u32 c1, u32 c2) { return (s32)(c2 - c1) < 0; } /* * Registers a IRQ handler. */ int hpet_register_irq_handler(rtc_irq_handler handler) { if (!is_hpet_enabled()) return -ENODEV; if (irq_handler) return -EBUSY; irq_handler = handler; return 0; } EXPORT_SYMBOL_GPL(hpet_register_irq_handler); /* * Deregisters the IRQ handler registered with hpet_register_irq_handler() * and does cleanup. */ void hpet_unregister_irq_handler(rtc_irq_handler handler) { if (!is_hpet_enabled()) return; irq_handler = NULL; hpet_rtc_flags = 0; } EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); /* * Channel 1 for RTC emulation. We use one shot mode, as periodic mode * is not supported by all HPET implementations for channel 1. * * hpet_rtc_timer_init() is called when the rtc is initialized. */ int hpet_rtc_timer_init(void) { unsigned int cfg, cnt, delta; unsigned long flags; if (!is_hpet_enabled()) return 0; if (!hpet_default_delta) { struct clock_event_device *evt = &hpet_base.channels[0].evt; uint64_t clc; clc = (uint64_t) evt->mult * NSEC_PER_SEC; clc >>= evt->shift + DEFAULT_RTC_SHIFT; hpet_default_delta = clc; } if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) delta = hpet_default_delta; else delta = hpet_pie_delta; local_irq_save(flags); cnt = delta + hpet_readl(HPET_COUNTER); hpet_writel(cnt, HPET_T1_CMP); hpet_t1_cmp = cnt; cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_T1_CFG); local_irq_restore(flags); return 1; } EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); static void hpet_disable_rtc_channel(void) { u32 cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_ENABLE; hpet_writel(cfg, HPET_T1_CFG); } /* * The functions below are called from rtc driver. * Return 0 if HPET is not being used. * Otherwise do the necessary changes and return 1. */ int hpet_mask_rtc_irq_bit(unsigned long bit_mask) { if (!is_hpet_enabled()) return 0; hpet_rtc_flags &= ~bit_mask; if (unlikely(!hpet_rtc_flags)) hpet_disable_rtc_channel(); return 1; } EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit); int hpet_set_rtc_irq_bit(unsigned long bit_mask) { unsigned long oldbits = hpet_rtc_flags; if (!is_hpet_enabled()) return 0; hpet_rtc_flags |= bit_mask; if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE)) hpet_prev_update_sec = -1; if (!oldbits) hpet_rtc_timer_init(); return 1; } EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit); int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) { if (!is_hpet_enabled()) return 0; hpet_alarm_time.tm_hour = hrs; hpet_alarm_time.tm_min = min; hpet_alarm_time.tm_sec = sec; return 1; } EXPORT_SYMBOL_GPL(hpet_set_alarm_time); int hpet_set_periodic_freq(unsigned long freq) { uint64_t clc; if (!is_hpet_enabled()) return 0; if (freq <= DEFAULT_RTC_INT_FREQ) { hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; } else { struct clock_event_device *evt = &hpet_base.channels[0].evt; clc = (uint64_t) evt->mult * NSEC_PER_SEC; do_div(clc, freq); clc >>= evt->shift; hpet_pie_delta = clc; hpet_pie_limit = 0; } return 1; } EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); int hpet_rtc_dropped_irq(void) { return is_hpet_enabled(); } EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); static void hpet_rtc_timer_reinit(void) { unsigned int delta; int lost_ints = -1; if (unlikely(!hpet_rtc_flags)) hpet_disable_rtc_channel(); if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) delta = hpet_default_delta; else delta = hpet_pie_delta; /* * Increment the comparator value until we are ahead of the * current count. */ do { hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER))); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) hpet_pie_count += lost_ints; if (printk_ratelimit()) pr_warn("Lost %d RTC interrupts\n", lost_ints); } } irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) { struct rtc_time curr_time; unsigned long rtc_int_flag = 0; hpet_rtc_timer_reinit(); memset(&curr_time, 0, sizeof(struct rtc_time)); if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) { if (unlikely(mc146818_get_time(&curr_time, 10) < 0)) { pr_err_ratelimited("unable to read current time from RTC\n"); return IRQ_HANDLED; } } if (hpet_rtc_flags & RTC_UIE && curr_time.tm_sec != hpet_prev_update_sec) { if (hpet_prev_update_sec >= 0) rtc_int_flag = RTC_UF; hpet_prev_update_sec = curr_time.tm_sec; } if (hpet_rtc_flags & RTC_PIE && ++hpet_pie_count >= hpet_pie_limit) { rtc_int_flag |= RTC_PF; hpet_pie_count = 0; } if (hpet_rtc_flags & RTC_AIE && (curr_time.tm_sec == hpet_alarm_time.tm_sec) && (curr_time.tm_min == hpet_alarm_time.tm_min) && (curr_time.tm_hour == hpet_alarm_time.tm_hour)) rtc_int_flag |= RTC_AF; if (rtc_int_flag) { rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); if (irq_handler) irq_handler(rtc_int_flag, dev_id); } return IRQ_HANDLED; } EXPORT_SYMBOL_GPL(hpet_rtc_interrupt); #endif
38 38 38 38 35 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 // SPDX-License-Identifier: GPL-2.0-or-later /* * Crypto library utility functions * * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <linux/unaligned.h> #include <crypto/utils.h> #include <linux/module.h> /* * XOR @len bytes from @src1 and @src2 together, writing the result to @dst * (which may alias one of the sources). Don't call this directly; call * crypto_xor() or crypto_xor_cpy() instead. */ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len) { int relalign = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { int size = sizeof(unsigned long); int d = (((unsigned long)dst ^ (unsigned long)src1) | ((unsigned long)dst ^ (unsigned long)src2)) & (size - 1); relalign = d ? 1 << __ffs(d) : size; /* * If we care about alignment, process as many bytes as * needed to advance dst and src to values whose alignments * equal their relative alignment. This will allow us to * process the remainder of the input using optimal strides. */ while (((unsigned long)dst & (relalign - 1)) && len > 0) { *dst++ = *src1++ ^ *src2++; len--; } } while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { u64 l = get_unaligned((u64 *)src1) ^ get_unaligned((u64 *)src2); put_unaligned(l, (u64 *)dst); } else { *(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2; } dst += 8; src1 += 8; src2 += 8; len -= 8; } while (len >= 4 && !(relalign & 3)) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { u32 l = get_unaligned((u32 *)src1) ^ get_unaligned((u32 *)src2); put_unaligned(l, (u32 *)dst); } else { *(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2; } dst += 4; src1 += 4; src2 += 4; len -= 4; } while (len >= 2 && !(relalign & 1)) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { u16 l = get_unaligned((u16 *)src1) ^ get_unaligned((u16 *)src2); put_unaligned(l, (u16 *)dst); } else { *(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2; } dst += 2; src1 += 2; src2 += 2; len -= 2; } while (len--) *dst++ = *src1++ ^ *src2++; } EXPORT_SYMBOL_GPL(__crypto_xor); MODULE_DESCRIPTION("Crypto library utility functions"); MODULE_LICENSE("GPL");
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 // SPDX-License-Identifier: GPL-2.0-or-later /* * dma-bufs for virtio exported objects * * Copyright (C) 2020 Google, Inc. */ #include <linux/module.h> #include <linux/virtio_dma_buf.h> /** * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object * @exp_info: [in] see dma_buf_export(). ops MUST refer to a dma_buf_ops * struct embedded in a virtio_dma_buf_ops. * * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf * for an virtio exported object that can be queried by other virtio drivers * for the object's UUID. */ struct dma_buf *virtio_dma_buf_export (const struct dma_buf_export_info *exp_info) { const struct virtio_dma_buf_ops *virtio_ops = container_of(exp_info->ops, const struct virtio_dma_buf_ops, ops); if (!exp_info->ops || exp_info->ops->attach != &virtio_dma_buf_attach || !virtio_ops->get_uuid) { return ERR_PTR(-EINVAL); } return dma_buf_export(exp_info); } EXPORT_SYMBOL(virtio_dma_buf_export); /** * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs */ int virtio_dma_buf_attach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) { int ret; const struct virtio_dma_buf_ops *ops = container_of(dma_buf->ops, const struct virtio_dma_buf_ops, ops); if (ops->device_attach) { ret = ops->device_attach(dma_buf, attach); if (ret) return ret; } return 0; } EXPORT_SYMBOL(virtio_dma_buf_attach); /** * is_virtio_dma_buf - returns true if the given dma-buf is a virtio dma-buf * @dma_buf: buffer to query */ bool is_virtio_dma_buf(struct dma_buf *dma_buf) { return dma_buf->ops->attach == &virtio_dma_buf_attach; } EXPORT_SYMBOL(is_virtio_dma_buf); /** * virtio_dma_buf_get_uuid - gets a virtio dma-buf's exported object's uuid * @dma_buf: [in] buffer to query * @uuid: [out] the uuid * * Returns: 0 on success, negative on failure. */ int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf, uuid_t *uuid) { const struct virtio_dma_buf_ops *ops = container_of(dma_buf->ops, const struct virtio_dma_buf_ops, ops); if (!is_virtio_dma_buf(dma_buf)) return -EINVAL; return ops->get_uuid(dma_buf, uuid); } EXPORT_SYMBOL(virtio_dma_buf_get_uuid); MODULE_DESCRIPTION("dma-bufs for virtio exported objects"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("DMA_BUF");
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/team/team_mode_roundrobin.c - Round-robin mode for team * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com> */ #include <linux/kernel.h> #include <linux/types.h> #include <linux/module.h> #include <linux/init.h> #include <linux/netdevice.h> #include <linux/if_team.h> struct rr_priv { unsigned int sent_packets; }; static struct rr_priv *rr_priv(struct team *team) { return (struct rr_priv *) &team->mode_priv; } static bool rr_transmit(struct team *team, struct sk_buff *skb) { struct team_port *port; int port_index; port_index = team_num_to_port_index(team, rr_priv(team)->sent_packets++); port = team_get_port_by_index_rcu(team, port_index); if (unlikely(!port)) goto drop; port = team_get_first_port_txable_rcu(team, port); if (unlikely(!port)) goto drop; if (team_dev_queue_xmit(team, port, skb)) return false; return true; drop: dev_kfree_skb_any(skb); return false; } static const struct team_mode_ops rr_mode_ops = { .transmit = rr_transmit, .port_enter = team_modeop_port_enter, .port_change_dev_addr = team_modeop_port_change_dev_addr, }; static const struct team_mode rr_mode = { .kind = "roundrobin", .owner = THIS_MODULE, .priv_size = sizeof(struct rr_priv), .ops = &rr_mode_ops, .lag_tx_type = NETDEV_LAG_TX_TYPE_ROUNDROBIN, }; static int __init rr_init_module(void) { return team_mode_register(&rr_mode); } static void __exit rr_cleanup_module(void) { team_mode_unregister(&rr_mode); } module_init(rr_init_module); module_exit(rr_cleanup_module); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); MODULE_DESCRIPTION("Round-robin mode for team"); MODULE_ALIAS_TEAM_MODE("roundrobin");
16 1 1 1 2 2 1 1 1 3 3 17 1 2 1 1 1 2 3 1 1 1 1 1 1 1 1 1 1 2 3 5 1 1 1 1 1 2 1 2 1 1 1 2 10 2 1 1 1 5 1 1 1 1 2 2 1 1 2 1 1 1 1 1 2 6 1 1 1 1 1 1 1916 1827 5 5 9 2 7 1 2 1 1 3 1 1 1 1 2 1 1 1 2 1 15 5 3 7 9 1 3 1955 3 4 17 1 2 1 1 6 1 6 104 1 1813 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 // SPDX-License-Identifier: GPL-2.0 #include <linux/capability.h> #include <linux/compat.h> #include <linux/blkdev.h> #include <linux/export.h> #include <linux/gfp.h> #include <linux/blkpg.h> #include <linux/hdreg.h> #include <linux/backing-dev.h> #include <linux/fs.h> #include <linux/blktrace_api.h> #include <linux/pr.h> #include <linux/uaccess.h> #include <linux/pagemap.h> #include <linux/io_uring/cmd.h> #include <uapi/linux/blkdev.h> #include "blk.h" static int blkpg_do_ioctl(struct block_device *bdev, struct blkpg_partition __user *upart, int op) { struct gendisk *disk = bdev->bd_disk; struct blkpg_partition p; sector_t start, length, capacity, end; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) return -EFAULT; if (bdev_is_partition(bdev)) return -EINVAL; if (p.pno <= 0) return -EINVAL; if (op == BLKPG_DEL_PARTITION) return bdev_del_partition(disk, p.pno); if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) return -EINVAL; /* Check that the partition is aligned to the block size */ if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) return -EINVAL; start = p.start >> SECTOR_SHIFT; length = p.length >> SECTOR_SHIFT; capacity = get_capacity(disk); if (check_add_overflow(start, length, &end)) return -EINVAL; if (start >= capacity || end > capacity) return -EINVAL; switch (op) { case BLKPG_ADD_PARTITION: return bdev_add_partition(disk, p.pno, start, length); case BLKPG_RESIZE_PARTITION: return bdev_resize_partition(disk, p.pno, start, length); default: return -EINVAL; } } static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) { struct blkpg_partition __user *udata; int op; if (get_user(op, &arg->op) || get_user(udata, &arg->data)) return -EFAULT; return blkpg_do_ioctl(bdev, udata, op); } #ifdef CONFIG_COMPAT struct compat_blkpg_ioctl_arg { compat_int_t op; compat_int_t flags; compat_int_t datalen; compat_caddr_t data; }; static int compat_blkpg_ioctl(struct block_device *bdev, struct compat_blkpg_ioctl_arg __user *arg) { compat_caddr_t udata; int op; if (get_user(op, &arg->op) || get_user(udata, &arg->data)) return -EFAULT; return blkpg_do_ioctl(bdev, compat_ptr(udata), op); } #endif /* * Check that [start, start + len) is a valid range from the block device's * perspective, including verifying that it can be correctly translated into * logical block addresses. */ static int blk_validate_byte_range(struct block_device *bdev, uint64_t start, uint64_t len) { unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; uint64_t end; if ((start | len) & bs_mask) return -EINVAL; if (!len) return -EINVAL; if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) return -EINVAL; return 0; } static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, unsigned long arg) { uint64_t range[2], start, len; struct bio *prev = NULL, *bio; sector_t sector, nr_sects; struct blk_plug plug; int err; if (copy_from_user(range, (void __user *)arg, sizeof(range))) return -EFAULT; start = range[0]; len = range[1]; if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; if (!(mode & BLK_OPEN_WRITE)) return -EBADF; if (bdev_read_only(bdev)) return -EPERM; err = blk_validate_byte_range(bdev, start, len); if (err) return err; filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) goto fail; sector = start >> SECTOR_SHIFT; nr_sects = len >> SECTOR_SHIFT; blk_start_plug(&plug); while (1) { if (fatal_signal_pending(current)) { if (prev) bio_await_chain(prev); err = -EINTR; goto out_unplug; } bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects, GFP_KERNEL); if (!bio) break; prev = bio_chain_and_submit(prev, bio); } if (prev) { err = submit_bio_wait(prev); if (err == -EOPNOTSUPP) err = 0; bio_put(prev); } out_unplug: blk_finish_plug(&plug); fail: filemap_invalidate_unlock(bdev->bd_mapping); return err; } static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, void __user *argp) { uint64_t start, len, end; uint64_t range[2]; int err; if (!(mode & BLK_OPEN_WRITE)) return -EBADF; if (!bdev_max_secure_erase_sectors(bdev)) return -EOPNOTSUPP; if (copy_from_user(range, argp, sizeof(range))) return -EFAULT; start = range[0]; len = range[1]; if ((start & 511) || (len & 511)) return -EINVAL; if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) return -EINVAL; filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end - 1); if (!err) err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, GFP_KERNEL); filemap_invalidate_unlock(bdev->bd_mapping); return err; } static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, unsigned long arg) { uint64_t range[2]; uint64_t start, end, len; int err; if (!(mode & BLK_OPEN_WRITE)) return -EBADF; if (copy_from_user(range, (void __user *)arg, sizeof(range))) return -EFAULT; start = range[0]; len = range[1]; end = start + len - 1; if (start & 511) return -EINVAL; if (len & 511) return -EINVAL; if (end >= (uint64_t)bdev_nr_bytes(bdev)) return -EINVAL; if (end < start) return -EINVAL; /* Invalidate the page cache, including dirty pages */ filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end); if (err) goto fail; err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); fail: filemap_invalidate_unlock(bdev->bd_mapping); return err; } static int put_ushort(unsigned short __user *argp, unsigned short val) { return put_user(val, argp); } static int put_int(int __user *argp, int val) { return put_user(val, argp); } static int put_uint(unsigned int __user *argp, unsigned int val) { return put_user(val, argp); } static int put_long(long __user *argp, long val) { return put_user(val, argp); } static int put_ulong(unsigned long __user *argp, unsigned long val) { return put_user(val, argp); } static int put_u64(u64 __user *argp, u64 val) { return put_user(val, argp); } #ifdef CONFIG_COMPAT static int compat_put_long(compat_long_t __user *argp, long val) { return put_user(val, argp); } static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) { return put_user(val, argp); } #endif #ifdef CONFIG_COMPAT /* * This is the equivalent of compat_ptr_ioctl(), to be used by block * drivers that implement only commands that are completely compatible * between 32-bit and 64-bit user space */ int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned cmd, unsigned long arg) { struct gendisk *disk = bdev->bd_disk; if (disk->fops->ioctl) return disk->fops->ioctl(bdev, mode, cmd, (unsigned long)compat_ptr(arg)); return -ENOIOCTLCMD; } EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); #endif static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) { /* no sense to make reservations for partitions */ if (bdev_is_partition(bdev)) return false; if (capable(CAP_SYS_ADMIN)) return true; /* * Only allow unprivileged reservations if the file descriptor is open * for writing. */ return mode & BLK_OPEN_WRITE; } static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, struct pr_registration __user *arg) { const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; struct pr_registration reg; if (!blkdev_pr_allowed(bdev, mode)) return -EPERM; if (!ops || !ops->pr_register) return -EOPNOTSUPP; if (copy_from_user(&reg, arg, sizeof(reg))) return -EFAULT; if (reg.flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); } static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, struct pr_reservation __user *arg) { const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; struct pr_reservation rsv; if (!blkdev_pr_allowed(bdev, mode)) return -EPERM; if (!ops || !ops->pr_reserve) return -EOPNOTSUPP; if (copy_from_user(&rsv, arg, sizeof(rsv))) return -EFAULT; if (rsv.flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); } static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, struct pr_reservation __user *arg) { const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; struct pr_reservation rsv; if (!blkdev_pr_allowed(bdev, mode)) return -EPERM; if (!ops || !ops->pr_release) return -EOPNOTSUPP; if (copy_from_user(&rsv, arg, sizeof(rsv))) return -EFAULT; if (rsv.flags) return -EOPNOTSUPP; return ops->pr_release(bdev, rsv.key, rsv.type); } static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, struct pr_preempt __user *arg, bool abort) { const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; struct pr_preempt p; if (!blkdev_pr_allowed(bdev, mode)) return -EPERM; if (!ops || !ops->pr_preempt) return -EOPNOTSUPP; if (copy_from_user(&p, arg, sizeof(p))) return -EFAULT; if (p.flags) return -EOPNOTSUPP; return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); } static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, struct pr_clear __user *arg) { const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; struct pr_clear c; if (!blkdev_pr_allowed(bdev, mode)) return -EPERM; if (!ops || !ops->pr_clear) return -EOPNOTSUPP; if (copy_from_user(&c, arg, sizeof(c))) return -EFAULT; if (c.flags) return -EOPNOTSUPP; return ops->pr_clear(bdev, c.key); } static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, unsigned long arg) { if (!capable(CAP_SYS_ADMIN)) return -EACCES; mutex_lock(&bdev->bd_holder_lock); if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) bdev->bd_holder_ops->sync(bdev); else { mutex_unlock(&bdev->bd_holder_lock); sync_blockdev(bdev); } invalidate_bdev(bdev); return 0; } static int blkdev_roset(struct block_device *bdev, unsigned cmd, unsigned long arg) { int ret, n; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (get_user(n, (int __user *)arg)) return -EFAULT; if (bdev->bd_disk->fops->set_read_only) { ret = bdev->bd_disk->fops->set_read_only(bdev, n); if (ret) return ret; } if (n) bdev_set_flag(bdev, BD_READ_ONLY); else bdev_clear_flag(bdev, BD_READ_ONLY); return 0; } static int blkdev_getgeo(struct block_device *bdev, struct hd_geometry __user *argp) { struct gendisk *disk = bdev->bd_disk; struct hd_geometry geo; int ret; if (!argp) return -EINVAL; if (!disk->fops->getgeo) return -ENOTTY; /* * We need to set the startsect first, the driver may * want to override it. */ memset(&geo, 0, sizeof(geo)); geo.start = get_start_sect(bdev); ret = disk->fops->getgeo(bdev, &geo); if (ret) return ret; if (copy_to_user(argp, &geo, sizeof(geo))) return -EFAULT; return 0; } #ifdef CONFIG_COMPAT struct compat_hd_geometry { unsigned char heads; unsigned char sectors; unsigned short cylinders; u32 start; }; static int compat_hdio_getgeo(struct block_device *bdev, struct compat_hd_geometry __user *ugeo) { struct gendisk *disk = bdev->bd_disk; struct hd_geometry geo; int ret; if (!ugeo) return -EINVAL; if (!disk->fops->getgeo) return -ENOTTY; memset(&geo, 0, sizeof(geo)); /* * We need to set the startsect first, the driver may * want to override it. */ geo.start = get_start_sect(bdev); ret = disk->fops->getgeo(bdev, &geo); if (ret) return ret; ret = copy_to_user(ugeo, &geo, 4); ret |= put_user(geo.start, &ugeo->start); if (ret) ret = -EFAULT; return ret; } #endif /* set the logical block size */ static int blkdev_bszset(struct file *file, blk_mode_t mode, int __user *argp) { // this one might be file_inode(file)->i_rdev - a rare valid // use of file_inode() for those. dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; struct file *excl_file; int ret, n; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (!argp) return -EINVAL; if (get_user(n, argp)) return -EFAULT; if (mode & BLK_OPEN_EXCL) return set_blocksize(file, n); excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); if (IS_ERR(excl_file)) return -EBUSY; ret = set_blocksize(excl_file, n); fput(excl_file); return ret; } /* * Common commands that are handled the same way on native and compat * user space. Note the separate arg/argp parameters that are needed * to deal with the compat_ptr() conversion. */ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg, void __user *argp) { unsigned int max_sectors; switch (cmd) { case BLKFLSBUF: return blkdev_flushbuf(bdev, cmd, arg); case BLKROSET: return blkdev_roset(bdev, cmd, arg); case BLKDISCARD: return blk_ioctl_discard(bdev, mode, arg); case BLKSECDISCARD: return blk_ioctl_secure_erase(bdev, mode, argp); case BLKZEROOUT: return blk_ioctl_zeroout(bdev, mode, arg); case BLKGETDISKSEQ: return put_u64(argp, bdev->bd_disk->diskseq); case BLKREPORTZONE: return blkdev_report_zones_ioctl(bdev, cmd, arg); case BLKRESETZONE: case BLKOPENZONE: case BLKCLOSEZONE: case BLKFINISHZONE: return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); case BLKGETZONESZ: return put_uint(argp, bdev_zone_sectors(bdev)); case BLKGETNRZONES: return put_uint(argp, bdev_nr_zones(bdev)); case BLKROGET: return put_int(argp, bdev_read_only(bdev) != 0); case BLKSSZGET: /* get block device logical block size */ return put_int(argp, bdev_logical_block_size(bdev)); case BLKPBSZGET: /* get block device physical block size */ return put_uint(argp, bdev_physical_block_size(bdev)); case BLKIOMIN: return put_uint(argp, bdev_io_min(bdev)); case BLKIOOPT: return put_uint(argp, bdev_io_opt(bdev)); case BLKALIGNOFF: return put_int(argp, bdev_alignment_offset(bdev)); case BLKDISCARDZEROES: return put_uint(argp, 0); case BLKSECTGET: max_sectors = min_t(unsigned int, USHRT_MAX, queue_max_sectors(bdev_get_queue(bdev))); return put_ushort(argp, max_sectors); case BLKROTATIONAL: return put_ushort(argp, !bdev_nonrot(bdev)); case BLKRASET: case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; return 0; case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (bdev_is_partition(bdev)) return -EINVAL; return disk_scan_partitions(bdev->bd_disk, mode | BLK_OPEN_STRICT_SCAN); case BLKTRACESTART: case BLKTRACESTOP: case BLKTRACETEARDOWN: return blk_trace_ioctl(bdev, cmd, argp); case IOC_PR_REGISTER: return blkdev_pr_register(bdev, mode, argp); case IOC_PR_RESERVE: return blkdev_pr_reserve(bdev, mode, argp); case IOC_PR_RELEASE: return blkdev_pr_release(bdev, mode, argp); case IOC_PR_PREEMPT: return blkdev_pr_preempt(bdev, mode, argp, false); case IOC_PR_PREEMPT_ABORT: return blkdev_pr_preempt(bdev, mode, argp, true); case IOC_PR_CLEAR: return blkdev_pr_clear(bdev, mode, argp); default: return -ENOIOCTLCMD; } } /* * Always keep this in sync with compat_blkdev_ioctl() * to handle all incompatible commands in both functions. * * New commands must be compatible and go into blkdev_common_ioctl */ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct block_device *bdev = I_BDEV(file->f_mapping->host); void __user *argp = (void __user *)arg; blk_mode_t mode = file_to_blk_mode(file); int ret; switch (cmd) { /* These need separate implementations for the data structure */ case HDIO_GETGEO: return blkdev_getgeo(bdev, argp); case BLKPG: return blkpg_ioctl(bdev, argp); /* Compat mode returns 32-bit data instead of 'long' */ case BLKRAGET: case BLKFRAGET: if (!argp) return -EINVAL; return put_long(argp, (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); case BLKGETSIZE: if (bdev_nr_sectors(bdev) > ~0UL) return -EFBIG; return put_ulong(argp, bdev_nr_sectors(bdev)); /* The data is compatible, but the command number is different */ case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ return put_int(argp, block_size(bdev)); case BLKBSZSET: return blkdev_bszset(file, mode, argp); case BLKGETSIZE64: return put_u64(argp, bdev_nr_bytes(bdev)); /* Incompatible alignment on i386 */ case BLKTRACESETUP: return blk_trace_ioctl(bdev, cmd, argp); default: break; } ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); if (ret != -ENOIOCTLCMD) return ret; if (!bdev->bd_disk->fops->ioctl) return -ENOTTY; return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); } #ifdef CONFIG_COMPAT #define BLKBSZGET_32 _IOR(0x12, 112, int) #define BLKBSZSET_32 _IOW(0x12, 113, int) #define BLKGETSIZE64_32 _IOR(0x12, 114, int) /* Most of the generic ioctls are handled in the normal fallback path. This assumes the blkdev's low level compat_ioctl always returns ENOIOCTLCMD for unknown ioctls. */ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) { int ret; void __user *argp = compat_ptr(arg); struct block_device *bdev = I_BDEV(file->f_mapping->host); struct gendisk *disk = bdev->bd_disk; blk_mode_t mode = file_to_blk_mode(file); switch (cmd) { /* These need separate implementations for the data structure */ case HDIO_GETGEO: return compat_hdio_getgeo(bdev, argp); case BLKPG: return compat_blkpg_ioctl(bdev, argp); /* Compat mode returns 32-bit data instead of 'long' */ case BLKRAGET: case BLKFRAGET: if (!argp) return -EINVAL; return compat_put_long(argp, (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); case BLKGETSIZE: if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) return -EFBIG; return compat_put_ulong(argp, bdev_nr_sectors(bdev)); /* The data is compatible, but the command number is different */ case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ return put_int(argp, bdev_logical_block_size(bdev)); case BLKBSZSET_32: return blkdev_bszset(file, mode, argp); case BLKGETSIZE64_32: return put_u64(argp, bdev_nr_bytes(bdev)); /* Incompatible alignment on i386 */ case BLKTRACESETUP32: return blk_trace_ioctl(bdev, cmd, argp); default: break; } ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); return ret; } #endif struct blk_iou_cmd { int res; bool nowait; }; static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); if (bic->res == -EAGAIN && bic->nowait) io_uring_cmd_issue_blocking(cmd); else io_uring_cmd_done(cmd, bic->res, 0, issue_flags); } static void bio_cmd_bio_end_io(struct bio *bio) { struct io_uring_cmd *cmd = bio->bi_private; struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); if (unlikely(bio->bi_status) && !bic->res) bic->res = blk_status_to_errno(bio->bi_status); io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); bio_put(bio); } static int blkdev_cmd_discard(struct io_uring_cmd *cmd, struct block_device *bdev, uint64_t start, uint64_t len, bool nowait) { struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; sector_t sector = start >> SECTOR_SHIFT; sector_t nr_sects = len >> SECTOR_SHIFT; struct bio *prev = NULL, *bio; int err; if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) return -EBADF; if (bdev_read_only(bdev)) return -EPERM; err = blk_validate_byte_range(bdev, start, len); if (err) return err; err = filemap_invalidate_pages(bdev->bd_mapping, start, start + len - 1, nowait); if (err) return err; while (true) { bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects, gfp); if (!bio) break; if (nowait) { /* * Don't allow multi-bio non-blocking submissions as * subsequent bios may fail but we won't get a direct * indication of that. Normally, the caller should * retry from a blocking context. */ if (unlikely(nr_sects)) { bio_put(bio); return -EAGAIN; } bio->bi_opf |= REQ_NOWAIT; } prev = bio_chain_and_submit(prev, bio); } if (unlikely(!prev)) return -EAGAIN; if (unlikely(nr_sects)) bic->res = -EAGAIN; prev->bi_private = cmd; prev->bi_end_io = bio_cmd_bio_end_io; submit_bio(prev); return -EIOCBQUEUED; } int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); const struct io_uring_sqe *sqe = cmd->sqe; u32 cmd_op = cmd->cmd_op; uint64_t start, len; if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || sqe->rw_flags || sqe->file_index)) return -EINVAL; bic->res = 0; bic->nowait = issue_flags & IO_URING_F_NONBLOCK; start = READ_ONCE(sqe->addr); len = READ_ONCE(sqe->addr3); switch (cmd_op) { case BLOCK_URING_CMD_DISCARD: return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); } return -EINVAL; }
5 6 6 12 3 7 2 1 2 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 // SPDX-License-Identifier: GPL-2.0-or-later /* * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173 * * Copyright (C)2003 USAGI/WIDE Project * * Author Mitsuru KANDA <mk@linux-ipv6.org> */ /* * [Memo] * * Outbound: * The compression of IP datagram MUST be done before AH/ESP processing, * fragmentation, and the addition of Hop-by-Hop/Routing header. * * Inbound: * The decompression of IP datagram MUST be done after the reassembly, * AH/ESP processing. */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ipcomp.h> #include <linux/crypto.h> #include <linux/err.h> #include <linux/pfkeyv2.h> #include <linux/random.h> #include <linux/percpu.h> #include <linux/smp.h> #include <linux/list.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/mutex.h> static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); __be32 spi; const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; struct ip_comp_hdr *ipcomph = (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return 0; spi = htonl(ntohs(ipcomph->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; } static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct net *net = xs_net(x); struct xfrm_state *t = NULL; t = xfrm_state_alloc(net); if (!t) goto out; t->id.proto = IPPROTO_IPV6; t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr); if (!t->id.spi) goto error; memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); memcpy(&t->mark, &x->mark, sizeof(t->mark)); t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; atomic_set(&t->tunnel_users, 1); out: return t; error: t->km.state = XFRM_STATE_DEAD; xfrm_state_put(t); t = NULL; goto out; } static int ipcomp6_tunnel_attach(struct xfrm_state *x) { struct net *net = xs_net(x); int err = 0; struct xfrm_state *t = NULL; __be32 spi; u32 mark = x->mark.m & x->mark.v; spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr); if (spi) t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); if (!t) { err = -EINVAL; goto out; } xfrm_state_insert(t); xfrm_state_hold(t); } x->tunnel = t; atomic_inc(&t->tunnel_users); out: return err; } static int ipcomp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); break; default: NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) { NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; } } err = 0; out: return err; } static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) { return 0; } static const struct xfrm_type ipcomp6_type = { .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output, }; static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, .input_handler = xfrm_input, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, .priority = 0, }; static int __init ipcomp6_init(void) { if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); return -EAGAIN; } return 0; } static void __exit ipcomp6_fini(void) { if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); } module_init(ipcomp6_init); module_exit(ipcomp6_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173"); MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
1 1 1 1 32 32 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 // SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/ip.h> #include <net/ipv6.h> #include <net/icmp.h> #include <net/udp.h> #include <net/tcp.h> #include <net/route.h> #include <linux/netfilter.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter/xt_LOG.h> #include <net/netfilter/nf_log.h> static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_DEFAULT_MASK, }, }, }; struct arppayload { unsigned char mac_src[ETH_ALEN]; unsigned char ip_src[4]; unsigned char mac_dst[ETH_ALEN]; unsigned char ip_dst[4]; }; /* Guard against containers flooding syslog. */ static bool nf_log_allowed(const struct net *net) { return net_eq(net, &init_net) || sysctl_nf_log_all_netns; } static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb) { u16 vid; if (!skb_vlan_tag_present(skb)) return; vid = skb_vlan_tag_get(skb); nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid); } static void noinline_for_stack dump_arp_packet(struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int nhoff) { const struct arppayload *ap; struct arppayload _arpp; const struct arphdr *ah; unsigned int logflags; struct arphdr _arph; ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph); if (!ah) { nf_log_buf_add(m, "TRUNCATED"); return; } if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else logflags = NF_LOG_DEFAULT_MASK; if (logflags & NF_LOG_MACDECODE) { nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); nf_log_dump_vlan(m, skb); nf_log_buf_add(m, "MACPROTO=%04x ", ntohs(eth_hdr(skb)->h_proto)); } nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d", ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op)); /* If it's for Ethernet and the lengths are OK, then log the ARP * payload. */ if (ah->ar_hrd != htons(ARPHRD_ETHER) || ah->ar_hln != ETH_ALEN || ah->ar_pln != sizeof(__be32)) return; ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp); if (!ap) { nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", skb->len - sizeof(_arph)); return; } nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4", ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); } static void nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix, struct net *net) { const struct net_device *physoutdev __maybe_unused; const struct net_device *physindev __maybe_unused; nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : ""); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) physindev = nf_bridge_get_physindev(skb, net); if (physindev && in != physindev) nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); physoutdev = nf_bridge_get_physoutdev(skb); if (physoutdev && out != physoutdev) nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); #endif } static void nf_log_arp_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { struct nf_log_buf *m; if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); if (!loginfo) loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix, net); dump_arp_packet(m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } static struct nf_logger nf_arp_logger __read_mostly = { .name = "nf_log_arp", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_arp_packet, .me = THIS_MODULE, }; static void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m, struct sock *sk) { if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk))) return; read_lock_bh(&sk->sk_callback_lock); if (sk->sk_socket && sk->sk_socket->file) { const struct cred *cred = sk->sk_socket->file->f_cred; nf_log_buf_add(m, "UID=%u GID=%u ", from_kuid_munged(&init_user_ns, cred->fsuid), from_kgid_munged(&init_user_ns, cred->fsgid)); } read_unlock_bh(&sk->sk_callback_lock); } static noinline_for_stack int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, u8 proto, int fragment, unsigned int offset, unsigned int logflags) { struct tcphdr _tcph; const struct tcphdr *th; /* Max length: 10 "PROTO=TCP " */ nf_log_buf_add(m, "PROTO=TCP "); if (fragment) return 0; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); if (!th) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); return 1; } /* Max length: 20 "SPT=65535 DPT=65535 " */ nf_log_buf_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ if (logflags & NF_LOG_TCPSEQ) { nf_log_buf_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); } /* Max length: 13 "WINDOW=65535 " */ nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window)); /* Max length: 9 "RES=0x3C " */ nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ if (th->cwr) nf_log_buf_add(m, "CWR "); if (th->ece) nf_log_buf_add(m, "ECE "); if (th->urg) nf_log_buf_add(m, "URG "); if (th->ack) nf_log_buf_add(m, "ACK "); if (th->psh) nf_log_buf_add(m, "PSH "); if (th->rst) nf_log_buf_add(m, "RST "); if (th->syn) nf_log_buf_add(m, "SYN "); if (th->fin) nf_log_buf_add(m, "FIN "); /* Max length: 11 "URGP=65535 " */ nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr)); if ((logflags & NF_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr); u8 _opt[60 - sizeof(struct tcphdr)]; unsigned int i; const u8 *op; op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), optsize, _opt); if (!op) { nf_log_buf_add(m, "OPT (TRUNCATED)"); return 1; } /* Max length: 127 "OPT (" 15*4*2chars ") " */ nf_log_buf_add(m, "OPT ("); for (i = 0; i < optsize; i++) nf_log_buf_add(m, "%02X", op[i]); nf_log_buf_add(m, ") "); } return 0; } static noinline_for_stack int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb, u8 proto, int fragment, unsigned int offset) { struct udphdr _udph; const struct udphdr *uh; if (proto == IPPROTO_UDP) /* Max length: 10 "PROTO=UDP " */ nf_log_buf_add(m, "PROTO=UDP "); else /* Max length: 14 "PROTO=UDPLITE " */ nf_log_buf_add(m, "PROTO=UDPLITE "); if (fragment) goto out; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); if (!uh) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); return 1; } /* Max length: 20 "SPT=65535 DPT=65535 " */ nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ", ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len)); out: return 0; } /* One level of recursion won't kill us */ static noinline_for_stack void dump_ipv4_packet(struct net *net, struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int iphoff) { const struct iphdr *ih; unsigned int logflags; struct iphdr _iph; if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (!ih) { nf_log_buf_add(m, "TRUNCATED"); return; } /* Important fields: * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. * Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr); /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", iph_totlen(skb, ih), ih->tos & IPTOS_TOS_MASK, ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); /* Max length: 6 "CE DF MF " */ if (ntohs(ih->frag_off) & IP_CE) nf_log_buf_add(m, "CE "); if (ntohs(ih->frag_off) & IP_DF) nf_log_buf_add(m, "DF "); if (ntohs(ih->frag_off) & IP_MF) nf_log_buf_add(m, "MF "); /* Max length: 11 "FRAG:65535 " */ if (ntohs(ih->frag_off) & IP_OFFSET) nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); if ((logflags & NF_LOG_IPOPT) && ih->ihl * 4 > sizeof(struct iphdr)) { unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; const unsigned char *op; unsigned int i, optsize; optsize = ih->ihl * 4 - sizeof(struct iphdr); op = skb_header_pointer(skb, iphoff + sizeof(_iph), optsize, _opt); if (!op) { nf_log_buf_add(m, "TRUNCATED"); return; } /* Max length: 127 "OPT (" 15*4*2chars ") " */ nf_log_buf_add(m, "OPT ("); for (i = 0; i < optsize; i++) nf_log_buf_add(m, "%02X", op[i]); nf_log_buf_add(m, ") "); } switch (ih->protocol) { case IPPROTO_TCP: if (nf_log_dump_tcp_header(m, skb, ih->protocol, ntohs(ih->frag_off) & IP_OFFSET, iphoff + ih->ihl * 4, logflags)) return; break; case IPPROTO_UDP: case IPPROTO_UDPLITE: if (nf_log_dump_udp_header(m, skb, ih->protocol, ntohs(ih->frag_off) & IP_OFFSET, iphoff + ih->ihl * 4)) return; break; case IPPROTO_ICMP: { static const size_t required_len[NR_ICMP_TYPES + 1] = { [ICMP_ECHOREPLY] = 4, [ICMP_DEST_UNREACH] = 8 + sizeof(struct iphdr), [ICMP_SOURCE_QUENCH] = 8 + sizeof(struct iphdr), [ICMP_REDIRECT] = 8 + sizeof(struct iphdr), [ICMP_ECHO] = 4, [ICMP_TIME_EXCEEDED] = 8 + sizeof(struct iphdr), [ICMP_PARAMETERPROB] = 8 + sizeof(struct iphdr), [ICMP_TIMESTAMP] = 20, [ICMP_TIMESTAMPREPLY] = 20, [ICMP_ADDRESS] = 12, [ICMP_ADDRESSREPLY] = 12 }; const struct icmphdr *ich; struct icmphdr _icmph; /* Max length: 11 "PROTO=ICMP " */ nf_log_buf_add(m, "PROTO=ICMP "); if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, sizeof(_icmph), &_icmph); if (!ich) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl * 4); break; } /* Max length: 18 "TYPE=255 CODE=255 " */ nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); /* Max length: 25 "INCOMPLETE [65535 bytes] " */ if (ich->type <= NR_ICMP_TYPES && required_len[ich->type] && skb->len - iphoff - ih->ihl * 4 < required_len[ich->type]) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl * 4); break; } switch (ich->type) { case ICMP_ECHOREPLY: case ICMP_ECHO: /* Max length: 19 "ID=65535 SEQ=65535 " */ nf_log_buf_add(m, "ID=%u SEQ=%u ", ntohs(ich->un.echo.id), ntohs(ich->un.echo.sequence)); break; case ICMP_PARAMETERPROB: /* Max length: 14 "PARAMETER=255 " */ nf_log_buf_add(m, "PARAMETER=%u ", ntohl(ich->un.gateway) >> 24); break; case ICMP_REDIRECT: /* Max length: 24 "GATEWAY=255.255.255.255 " */ nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); fallthrough; case ICMP_DEST_UNREACH: case ICMP_SOURCE_QUENCH: case ICMP_TIME_EXCEEDED: /* Max length: 3+maxlen */ if (!iphoff) { /* Only recurse once. */ nf_log_buf_add(m, "["); dump_ipv4_packet(net, m, info, skb, iphoff + ih->ihl * 4 + sizeof(_icmph)); nf_log_buf_add(m, "] "); } /* Max length: 10 "MTU=65535 " */ if (ich->type == ICMP_DEST_UNREACH && ich->code == ICMP_FRAG_NEEDED) { nf_log_buf_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu)); } } break; } /* Max Length */ case IPPROTO_AH: { const struct ip_auth_hdr *ah; struct ip_auth_hdr _ahdr; if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 9 "PROTO=AH " */ nf_log_buf_add(m, "PROTO=AH "); /* Max length: 25 "INCOMPLETE [65535 bytes] " */ ah = skb_header_pointer(skb, iphoff + ih->ihl * 4, sizeof(_ahdr), &_ahdr); if (!ah) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl * 4); break; } /* Length: 15 "SPI=0xF1234567 " */ nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); break; } case IPPROTO_ESP: { const struct ip_esp_hdr *eh; struct ip_esp_hdr _esph; /* Max length: 10 "PROTO=ESP " */ nf_log_buf_add(m, "PROTO=ESP "); if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ eh = skb_header_pointer(skb, iphoff + ih->ihl * 4, sizeof(_esph), &_esph); if (!eh) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl * 4); break; } /* Length: 15 "SPI=0xF1234567 " */ nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi)); break; } /* Max length: 10 "PROTO 255 " */ default: nf_log_buf_add(m, "PROTO=%u ", ih->protocol); } /* Max length: 15 "UID=4294967295 " */ if ((logflags & NF_LOG_UID) && !iphoff) nf_log_dump_sk_uid_gid(net, m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (!iphoff && skb->mark) nf_log_buf_add(m, "MARK=0x%x ", skb->mark); /* Proto Max log string length */ /* IP: 40+46+6+11+127 = 230 */ /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ /* UDP: 10+max(25,20) = 35 */ /* UDPLITE: 14+max(25,20) = 39 */ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ /* ESP: 10+max(25)+15 = 50 */ /* AH: 9+max(25)+15 = 49 */ /* unknown: 10 */ /* (ICMP allows recursion one level deep) */ /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ /* maxlen = 230+ 91 + 230 + 252 = 803 */ } static noinline_for_stack void dump_ipv6_packet(struct net *net, struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int ip6hoff, int recurse) { const struct ipv6hdr *ih; unsigned int hdrlen = 0; unsigned int logflags; struct ipv6hdr _ip6h; unsigned int ptr; u8 currenthdr; int fragment; if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (!ih) { nf_log_buf_add(m, "TRUNCATED"); return; } /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", ntohs(ih->payload_len) + sizeof(struct ipv6hdr), (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, ih->hop_limit, (ntohl(*(__be32 *)ih) & 0x000fffff)); fragment = 0; ptr = ip6hoff + sizeof(struct ipv6hdr); currenthdr = ih->nexthdr; while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) { struct ipv6_opt_hdr _hdr; const struct ipv6_opt_hdr *hp; hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); if (!hp) { nf_log_buf_add(m, "TRUNCATED"); return; } /* Max length: 48 "OPT (...) " */ if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, "OPT ( "); switch (currenthdr) { case IPPROTO_FRAGMENT: { struct frag_hdr _fhdr; const struct frag_hdr *fh; nf_log_buf_add(m, "FRAG:"); fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), &_fhdr); if (!fh) { nf_log_buf_add(m, "TRUNCATED "); return; } /* Max length: 6 "65535 " */ nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); /* Max length: 11 "INCOMPLETE " */ if (fh->frag_off & htons(0x0001)) nf_log_buf_add(m, "INCOMPLETE "); nf_log_buf_add(m, "ID:%08x ", ntohl(fh->identification)); if (ntohs(fh->frag_off) & 0xFFF8) fragment = 1; hdrlen = 8; break; } case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: if (fragment) { if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ")"); return; } hdrlen = ipv6_optlen(hp); break; /* Max Length */ case IPPROTO_AH: if (logflags & NF_LOG_IPOPT) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; /* Max length: 3 "AH " */ nf_log_buf_add(m, "AH "); if (fragment) { nf_log_buf_add(m, ")"); return; } ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), &_ahdr); if (!ah) { /* Max length: 26 "INCOMPLETE [65535 bytes] )" */ nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", skb->len - ptr); return; } /* Length: 15 "SPI=0xF1234567 */ nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); } hdrlen = ipv6_authlen(hp); break; case IPPROTO_ESP: if (logflags & NF_LOG_IPOPT) { struct ip_esp_hdr _esph; const struct ip_esp_hdr *eh; /* Max length: 4 "ESP " */ nf_log_buf_add(m, "ESP "); if (fragment) { nf_log_buf_add(m, ")"); return; } /* Max length: 26 "INCOMPLETE [65535 bytes] )" */ eh = skb_header_pointer(skb, ptr, sizeof(_esph), &_esph); if (!eh) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", skb->len - ptr); return; } /* Length: 16 "SPI=0xF1234567 )" */ nf_log_buf_add(m, "SPI=0x%x )", ntohl(eh->spi)); } return; default: /* Max length: 20 "Unknown Ext Hdr 255" */ nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr); return; } if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ") "); currenthdr = hp->nexthdr; ptr += hdrlen; } switch (currenthdr) { case IPPROTO_TCP: if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment, ptr, logflags)) return; break; case IPPROTO_UDP: case IPPROTO_UDPLITE: if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr)) return; break; case IPPROTO_ICMPV6: { struct icmp6hdr _icmp6h; const struct icmp6hdr *ic; /* Max length: 13 "PROTO=ICMPv6 " */ nf_log_buf_add(m, "PROTO=ICMPv6 "); if (fragment) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); if (!ic) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); return; } /* Max length: 18 "TYPE=255 CODE=255 " */ nf_log_buf_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); switch (ic->icmp6_type) { case ICMPV6_ECHO_REQUEST: case ICMPV6_ECHO_REPLY: /* Max length: 19 "ID=65535 SEQ=65535 " */ nf_log_buf_add(m, "ID=%u SEQ=%u ", ntohs(ic->icmp6_identifier), ntohs(ic->icmp6_sequence)); break; case ICMPV6_MGM_QUERY: case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: break; case ICMPV6_PARAMPROB: /* Max length: 17 "POINTER=ffffffff " */ nf_log_buf_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); fallthrough; case ICMPV6_DEST_UNREACH: case ICMPV6_PKT_TOOBIG: case ICMPV6_TIME_EXCEED: /* Max length: 3+maxlen */ if (recurse) { nf_log_buf_add(m, "["); dump_ipv6_packet(net, m, info, skb, ptr + sizeof(_icmp6h), 0); nf_log_buf_add(m, "] "); } /* Max length: 10 "MTU=65535 " */ if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) { nf_log_buf_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu)); } } break; } /* Max length: 10 "PROTO=255 " */ default: nf_log_buf_add(m, "PROTO=%u ", currenthdr); } /* Max length: 15 "UID=4294967295 " */ if ((logflags & NF_LOG_UID) && recurse) nf_log_dump_sk_uid_gid(net, m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (recurse && skb->mark) nf_log_buf_add(m, "MARK=0x%x ", skb->mark); } static void dump_mac_header(struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb) { struct net_device *dev = skb->dev; unsigned int logflags = 0; if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; if (!(logflags & NF_LOG_MACDECODE)) goto fallback; switch (dev->type) { case ARPHRD_ETHER: nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); nf_log_dump_vlan(m, skb); nf_log_buf_add(m, "MACPROTO=%04x ", ntohs(eth_hdr(skb)->h_proto)); return; default: break; } fallback: nf_log_buf_add(m, "MAC="); if (dev->hard_header_len && skb->mac_header != skb->network_header) { const unsigned char *p = skb_mac_header(skb); unsigned int i; if (dev->type == ARPHRD_SIT) { p -= ETH_HLEN; if (p < skb->head) p = NULL; } if (p) { nf_log_buf_add(m, "%02x", *p++); for (i = 1; i < dev->hard_header_len; i++) nf_log_buf_add(m, ":%02x", *p++); } if (dev->type == ARPHRD_SIT) { const struct iphdr *iph = (struct iphdr *)skb_mac_header(skb); nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr, &iph->daddr); } } nf_log_buf_add(m, " "); } static void nf_log_ip_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { struct nf_log_buf *m; if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); if (!loginfo) loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix, net); if (in) dump_mac_header(m, loginfo, skb); dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } static struct nf_logger nf_ip_logger __read_mostly = { .name = "nf_log_ipv4", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_ip_packet, .me = THIS_MODULE, }; static void nf_log_ip6_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { struct nf_log_buf *m; if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); if (!loginfo) loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix, net); if (in) dump_mac_header(m, loginfo, skb); dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1); nf_log_buf_close(m); } static struct nf_logger nf_ip6_logger __read_mostly = { .name = "nf_log_ipv6", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_ip6_packet, .me = THIS_MODULE, }; static void nf_log_unknown_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { struct nf_log_buf *m; if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); if (!loginfo) loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix, net); dump_mac_header(m, loginfo, skb); nf_log_buf_close(m); } static void nf_log_netdev_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { switch (skb->protocol) { case htons(ETH_P_IP): nf_log_ip_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; case htons(ETH_P_IPV6): nf_log_ip6_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; case htons(ETH_P_ARP): case htons(ETH_P_RARP): nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; default: nf_log_unknown_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; } } static struct nf_logger nf_netdev_logger __read_mostly = { .name = "nf_log_netdev", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_netdev_packet, .me = THIS_MODULE, }; static struct nf_logger nf_bridge_logger __read_mostly = { .name = "nf_log_bridge", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_netdev_packet, .me = THIS_MODULE, }; static int __net_init nf_log_syslog_net_init(struct net *net) { int ret = nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); if (ret) return ret; ret = nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); if (ret) goto err1; ret = nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); if (ret) goto err2; ret = nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger); if (ret) goto err3; ret = nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); if (ret) goto err4; return 0; err4: nf_log_unset(net, &nf_netdev_logger); err3: nf_log_unset(net, &nf_ip6_logger); err2: nf_log_unset(net, &nf_arp_logger); err1: nf_log_unset(net, &nf_ip_logger); return ret; } static void __net_exit nf_log_syslog_net_exit(struct net *net) { nf_log_unset(net, &nf_ip_logger); nf_log_unset(net, &nf_arp_logger); nf_log_unset(net, &nf_ip6_logger); nf_log_unset(net, &nf_netdev_logger); nf_log_unset(net, &nf_bridge_logger); } static struct pernet_operations nf_log_syslog_net_ops = { .init = nf_log_syslog_net_init, .exit = nf_log_syslog_net_exit, }; static int __init nf_log_syslog_init(void) { int ret; ret = register_pernet_subsys(&nf_log_syslog_net_ops); if (ret < 0) return ret; ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger); if (ret < 0) goto err1; ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger); if (ret < 0) goto err2; ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); if (ret < 0) goto err3; ret = nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger); if (ret < 0) goto err4; ret = nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger); if (ret < 0) goto err5; return 0; err5: nf_log_unregister(&nf_netdev_logger); err4: nf_log_unregister(&nf_ip6_logger); err3: nf_log_unregister(&nf_arp_logger); err2: nf_log_unregister(&nf_ip_logger); err1: pr_err("failed to register logger\n"); unregister_pernet_subsys(&nf_log_syslog_net_ops); return ret; } static void __exit nf_log_syslog_exit(void) { unregister_pernet_subsys(&nf_log_syslog_net_ops); nf_log_unregister(&nf_ip_logger); nf_log_unregister(&nf_arp_logger); nf_log_unregister(&nf_ip6_logger); nf_log_unregister(&nf_netdev_logger); nf_log_unregister(&nf_bridge_logger); } module_init(nf_log_syslog_init); module_exit(nf_log_syslog_exit); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Netfilter syslog packet logging"); MODULE_LICENSE("GPL"); MODULE_ALIAS("nf_log_arp"); MODULE_ALIAS("nf_log_bridge"); MODULE_ALIAS("nf_log_ipv4"); MODULE_ALIAS("nf_log_ipv6"); MODULE_ALIAS("nf_log_netdev"); MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0); MODULE_ALIAS_NF_LOGGER(AF_INET, 0); MODULE_ALIAS_NF_LOGGER(3, 0); MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */ MODULE_ALIAS_NF_LOGGER(AF_INET6, 0);
85 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 /* * Header file for reservations for dma-buf and ttm * * Copyright(C) 2011 Linaro Limited. All rights reserved. * Copyright (C) 2012-2013 Canonical Ltd * Copyright (C) 2012 Texas Instruments * * Authors: * Rob Clark <robdclark@gmail.com> * Maarten Lankhorst <maarten.lankhorst@canonical.com> * Thomas Hellstrom <thellstrom-at-vmware-dot-com> * * Based on bo.c which bears the following copyright notice, * but is dual licensed: * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _LINUX_RESERVATION_H #define _LINUX_RESERVATION_H #include <linux/ww_mutex.h> #include <linux/dma-fence.h> #include <linux/slab.h> #include <linux/seqlock.h> #include <linux/rcupdate.h> extern struct ww_class reservation_ww_class; struct dma_resv_list; /** * enum dma_resv_usage - how the fences from a dma_resv obj are used * * This enum describes the different use cases for a dma_resv object and * controls which fences are returned when queried. * * An important fact is that there is the order KERNEL<WRITE<READ<BOOKKEEP and * when the dma_resv object is asked for fences for one use case the fences * for the lower use case are returned as well. * * For example when asking for WRITE fences then the KERNEL fences are returned * as well. Similar when asked for READ fences then both WRITE and KERNEL * fences are returned as well. * * Already used fences can be promoted in the sense that a fence with * DMA_RESV_USAGE_BOOKKEEP could become DMA_RESV_USAGE_READ by adding it again * with this usage. But fences can never be degraded in the sense that a fence * with DMA_RESV_USAGE_WRITE could become DMA_RESV_USAGE_READ. */ enum dma_resv_usage { /** * @DMA_RESV_USAGE_KERNEL: For in kernel memory management only. * * This should only be used for things like copying or clearing memory * with a DMA hardware engine for the purpose of kernel memory * management. * * Drivers *always* must wait for those fences before accessing the * resource protected by the dma_resv object. The only exception for * that is when the resource is known to be locked down in place by * pinning it previously. */ DMA_RESV_USAGE_KERNEL, /** * @DMA_RESV_USAGE_WRITE: Implicit write synchronization. * * This should only be used for userspace command submissions which add * an implicit write dependency. */ DMA_RESV_USAGE_WRITE, /** * @DMA_RESV_USAGE_READ: Implicit read synchronization. * * This should only be used for userspace command submissions which add * an implicit read dependency. */ DMA_RESV_USAGE_READ, /** * @DMA_RESV_USAGE_BOOKKEEP: No implicit sync. * * This should be used by submissions which don't want to participate in * any implicit synchronization. * * The most common cases are preemption fences, page table updates, TLB * flushes as well as explicitly synced user submissions. * * Explicitly synced user submissions can be promoted to * DMA_RESV_USAGE_READ or DMA_RESV_USAGE_WRITE as needed using * dma_buf_import_sync_file() when implicit synchronization should * become necessary after initial adding of the fence. */ DMA_RESV_USAGE_BOOKKEEP }; /** * dma_resv_usage_rw - helper for implicit sync * @write: true if we create a new implicit sync write * * This returns the implicit synchronization usage for write or read accesses, * see enum dma_resv_usage and &dma_buf.resv. */ static inline enum dma_resv_usage dma_resv_usage_rw(bool write) { /* This looks confusing at first sight, but is indeed correct. * * The rational is that new write operations needs to wait for the * existing read and write operations to finish. * But a new read operation only needs to wait for the existing write * operations to finish. */ return write ? DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE; } /** * struct dma_resv - a reservation object manages fences for a buffer * * This is a container for dma_fence objects which needs to handle multiple use * cases. * * One use is to synchronize cross-driver access to a struct dma_buf, either for * dynamic buffer management or just to handle implicit synchronization between * different users of the buffer in userspace. See &dma_buf.resv for a more * in-depth discussion. * * The other major use is to manage access and locking within a driver in a * buffer based memory manager. struct ttm_buffer_object is the canonical * example here, since this is where reservation objects originated from. But * use in drivers is spreading and some drivers also manage struct * drm_gem_object with the same scheme. */ struct dma_resv { /** * @lock: * * Update side lock. Don't use directly, instead use the wrapper * functions like dma_resv_lock() and dma_resv_unlock(). * * Drivers which use the reservation object to manage memory dynamically * also use this lock to protect buffer object state like placement, * allocation policies or throughout command submission. */ struct ww_mutex lock; /** * @fences: * * Array of fences which where added to the dma_resv object * * A new fence is added by calling dma_resv_add_fence(). Since this * often needs to be done past the point of no return in command * submission it cannot fail, and therefore sufficient slots need to be * reserved by calling dma_resv_reserve_fences(). */ struct dma_resv_list __rcu *fences; }; /** * struct dma_resv_iter - current position into the dma_resv fences * * Don't touch this directly in the driver, use the accessor function instead. * * IMPORTANT * * When using the lockless iterators like dma_resv_iter_next_unlocked() or * dma_resv_for_each_fence_unlocked() beware that the iterator can be restarted. * Code which accumulates statistics or similar needs to check for this with * dma_resv_iter_is_restarted(). */ struct dma_resv_iter { /** @obj: The dma_resv object we iterate over */ struct dma_resv *obj; /** @usage: Return fences with this usage or lower. */ enum dma_resv_usage usage; /** @fence: the currently handled fence */ struct dma_fence *fence; /** @fence_usage: the usage of the current fence */ enum dma_resv_usage fence_usage; /** @index: index into the shared fences */ unsigned int index; /** @fences: the shared fences; private, *MUST* not dereference */ struct dma_resv_list *fences; /** @num_fences: number of fences */ unsigned int num_fences; /** @is_restarted: true if this is the first returned fence */ bool is_restarted; }; struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor); struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor); /** * dma_resv_iter_begin - initialize a dma_resv_iter object * @cursor: The dma_resv_iter object to initialize * @obj: The dma_resv object which we want to iterate over * @usage: controls which fences to include, see enum dma_resv_usage. */ static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor, struct dma_resv *obj, enum dma_resv_usage usage) { cursor->obj = obj; cursor->usage = usage; cursor->fence = NULL; } /** * dma_resv_iter_end - cleanup a dma_resv_iter object * @cursor: the dma_resv_iter object which should be cleaned up * * Make sure that the reference to the fence in the cursor is properly * dropped. */ static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) { dma_fence_put(cursor->fence); } /** * dma_resv_iter_usage - Return the usage of the current fence * @cursor: the cursor of the current position * * Returns the usage of the currently processed fence. */ static inline enum dma_resv_usage dma_resv_iter_usage(struct dma_resv_iter *cursor) { return cursor->fence_usage; } /** * dma_resv_iter_is_restarted - test if this is the first fence after a restart * @cursor: the cursor with the current position * * Return true if this is the first fence in an iteration after a restart. */ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) { return cursor->is_restarted; } /** * dma_resv_for_each_fence_unlocked - unlocked fence iterator * @cursor: a struct dma_resv_iter pointer * @fence: the current fence * * Iterate over the fences in a struct dma_resv object without holding the * &dma_resv.lock and using RCU instead. The cursor needs to be initialized * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside * the iterator a reference to the dma_fence is held and the RCU lock dropped. * * Beware that the iterator can be restarted when the struct dma_resv for * @cursor is modified. Code which accumulates statistics or similar needs to * check for this with dma_resv_iter_is_restarted(). For this reason prefer the * lock iterator dma_resv_for_each_fence() whenever possible. */ #define dma_resv_for_each_fence_unlocked(cursor, fence) \ for (fence = dma_resv_iter_first_unlocked(cursor); \ fence; fence = dma_resv_iter_next_unlocked(cursor)) /** * dma_resv_for_each_fence - fence iterator * @cursor: a struct dma_resv_iter pointer * @obj: a dma_resv object pointer * @usage: controls which fences to return * @fence: the current fence * * Iterate over the fences in a struct dma_resv object while holding the * &dma_resv.lock. @all_fences controls if the shared fences are returned as * well. The cursor initialisation is part of the iterator and the fence stays * valid as long as the lock is held and so no extra reference to the fence is * taken. */ #define dma_resv_for_each_fence(cursor, obj, usage, fence) \ for (dma_resv_iter_begin(cursor, obj, usage), \ fence = dma_resv_iter_first(cursor); fence; \ fence = dma_resv_iter_next(cursor)) #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) #ifdef CONFIG_DEBUG_MUTEXES void dma_resv_reset_max_fences(struct dma_resv *obj); #else static inline void dma_resv_reset_max_fences(struct dma_resv *obj) {} #endif /** * dma_resv_lock - lock the reservation object * @obj: the reservation object * @ctx: the locking context * * Locks the reservation object for exclusive access and modification. Note, * that the lock is only against other writers, readers will run concurrently * with a writer under RCU. The seqlock is used to notify readers if they * overlap with a writer. * * As the reservation object may be locked by multiple parties in an * undefined order, a #ww_acquire_ctx is passed to unwind if a cycle * is detected. See ww_mutex_lock() and ww_acquire_init(). A reservation * object may be locked by itself by passing NULL as @ctx. * * When a die situation is indicated by returning -EDEADLK all locks held by * @ctx must be unlocked and then dma_resv_lock_slow() called on @obj. * * Unlocked by calling dma_resv_unlock(). * * See also dma_resv_lock_interruptible() for the interruptible variant. */ static inline int dma_resv_lock(struct dma_resv *obj, struct ww_acquire_ctx *ctx) { return ww_mutex_lock(&obj->lock, ctx); } /** * dma_resv_lock_interruptible - lock the reservation object * @obj: the reservation object * @ctx: the locking context * * Locks the reservation object interruptible for exclusive access and * modification. Note, that the lock is only against other writers, readers * will run concurrently with a writer under RCU. The seqlock is used to * notify readers if they overlap with a writer. * * As the reservation object may be locked by multiple parties in an * undefined order, a #ww_acquire_ctx is passed to unwind if a cycle * is detected. See ww_mutex_lock() and ww_acquire_init(). A reservation * object may be locked by itself by passing NULL as @ctx. * * When a die situation is indicated by returning -EDEADLK all locks held by * @ctx must be unlocked and then dma_resv_lock_slow_interruptible() called on * @obj. * * Unlocked by calling dma_resv_unlock(). */ static inline int dma_resv_lock_interruptible(struct dma_resv *obj, struct ww_acquire_ctx *ctx) { return ww_mutex_lock_interruptible(&obj->lock, ctx); } /** * dma_resv_lock_slow - slowpath lock the reservation object * @obj: the reservation object * @ctx: the locking context * * Acquires the reservation object after a die case. This function * will sleep until the lock becomes available. See dma_resv_lock() as * well. * * See also dma_resv_lock_slow_interruptible() for the interruptible variant. */ static inline void dma_resv_lock_slow(struct dma_resv *obj, struct ww_acquire_ctx *ctx) { ww_mutex_lock_slow(&obj->lock, ctx); } /** * dma_resv_lock_slow_interruptible - slowpath lock the reservation * object, interruptible * @obj: the reservation object * @ctx: the locking context * * Acquires the reservation object interruptible after a die case. This function * will sleep until the lock becomes available. See * dma_resv_lock_interruptible() as well. */ static inline int dma_resv_lock_slow_interruptible(struct dma_resv *obj, struct ww_acquire_ctx *ctx) { return ww_mutex_lock_slow_interruptible(&obj->lock, ctx); } /** * dma_resv_trylock - trylock the reservation object * @obj: the reservation object * * Tries to lock the reservation object for exclusive access and modification. * Note, that the lock is only against other writers, readers will run * concurrently with a writer under RCU. The seqlock is used to notify readers * if they overlap with a writer. * * Also note that since no context is provided, no deadlock protection is * possible, which is also not needed for a trylock. * * Returns true if the lock was acquired, false otherwise. */ static inline bool __must_check dma_resv_trylock(struct dma_resv *obj) { return ww_mutex_trylock(&obj->lock, NULL); } /** * dma_resv_is_locked - is the reservation object locked * @obj: the reservation object * * Returns true if the mutex is locked, false if unlocked. */ static inline bool dma_resv_is_locked(struct dma_resv *obj) { return ww_mutex_is_locked(&obj->lock); } /** * dma_resv_locking_ctx - returns the context used to lock the object * @obj: the reservation object * * Returns the context used to lock a reservation object or NULL if no context * was used or the object is not locked at all. * * WARNING: This interface is pretty horrible, but TTM needs it because it * doesn't pass the struct ww_acquire_ctx around in some very long callchains. * Everyone else just uses it to check whether they're holding a reservation or * not. */ static inline struct ww_acquire_ctx *dma_resv_locking_ctx(struct dma_resv *obj) { return READ_ONCE(obj->lock.ctx); } /** * dma_resv_unlock - unlock the reservation object * @obj: the reservation object * * Unlocks the reservation object following exclusive access. */ static inline void dma_resv_unlock(struct dma_resv *obj) { dma_resv_reset_max_fences(obj); ww_mutex_unlock(&obj->lock); } void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, enum dma_resv_usage usage); void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, struct dma_fence *fence, enum dma_resv_usage usage); int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences); int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, struct dma_fence **fence); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, bool intr, unsigned long timeout); void dma_resv_set_deadline(struct dma_resv *obj, enum dma_resv_usage usage, ktime_t deadline); bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage); void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq); #endif /* _LINUX_RESERVATION_H */
10 10 11 8 11 4 4 11 6 1 5 17 5 17 3 1 2 9 10 12 8 14 14 10 7 6 11 17 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 // SPDX-License-Identifier: GPL-2.0 /* * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum * * Copyright 2018 Google LLC */ /* * "NHPoly1305" is the main component of Adiantum hashing. * Specifically, it is the calculation * * H_L ← Poly1305_{K_L}(NH_{K_N}(pad_{128}(L))) * * from the procedure in section 6.4 of the Adiantum paper [1]. It is an * ε-almost-∆-universal (ε-∆U) hash function for equal-length inputs over * Z/(2^{128}Z), where the "∆" operation is addition. It hashes 1024-byte * chunks of the input with the NH hash function [2], reducing the input length * by 32x. The resulting NH digests are evaluated as a polynomial in * GF(2^{130}-5), like in the Poly1305 MAC [3]. Note that the polynomial * evaluation by itself would suffice to achieve the ε-∆U property; NH is used * for performance since it's over twice as fast as Poly1305. * * This is *not* a cryptographic hash function; do not use it as such! * * [1] Adiantum: length-preserving encryption for entry-level processors * (https://eprint.iacr.org/2018/720.pdf) * [2] UMAC: Fast and Secure Message Authentication * (https://fastcrypto.org/umac/umac_proc.pdf) * [3] The Poly1305-AES message-authentication code * (https://cr.yp.to/mac/poly1305-20050329.pdf) */ #include <linux/unaligned.h> #include <crypto/algapi.h> #include <crypto/internal/hash.h> #include <crypto/internal/poly1305.h> #include <crypto/nhpoly1305.h> #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/module.h> static void nh_generic(const u32 *key, const u8 *message, size_t message_len, __le64 hash[NH_NUM_PASSES]) { u64 sums[4] = { 0, 0, 0, 0 }; BUILD_BUG_ON(NH_PAIR_STRIDE != 2); BUILD_BUG_ON(NH_NUM_PASSES != 4); while (message_len) { u32 m0 = get_unaligned_le32(message + 0); u32 m1 = get_unaligned_le32(message + 4); u32 m2 = get_unaligned_le32(message + 8); u32 m3 = get_unaligned_le32(message + 12); sums[0] += (u64)(u32)(m0 + key[ 0]) * (u32)(m2 + key[ 2]); sums[1] += (u64)(u32)(m0 + key[ 4]) * (u32)(m2 + key[ 6]); sums[2] += (u64)(u32)(m0 + key[ 8]) * (u32)(m2 + key[10]); sums[3] += (u64)(u32)(m0 + key[12]) * (u32)(m2 + key[14]); sums[0] += (u64)(u32)(m1 + key[ 1]) * (u32)(m3 + key[ 3]); sums[1] += (u64)(u32)(m1 + key[ 5]) * (u32)(m3 + key[ 7]); sums[2] += (u64)(u32)(m1 + key[ 9]) * (u32)(m3 + key[11]); sums[3] += (u64)(u32)(m1 + key[13]) * (u32)(m3 + key[15]); key += NH_MESSAGE_UNIT / sizeof(key[0]); message += NH_MESSAGE_UNIT; message_len -= NH_MESSAGE_UNIT; } hash[0] = cpu_to_le64(sums[0]); hash[1] = cpu_to_le64(sums[1]); hash[2] = cpu_to_le64(sums[2]); hash[3] = cpu_to_le64(sums[3]); } /* Pass the next NH hash value through Poly1305 */ static void process_nh_hash_value(struct nhpoly1305_state *state, const struct nhpoly1305_key *key) { BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0); poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash, NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1); } /* * Feed the next portion of the source data, as a whole number of 16-byte * "NH message units", through NH and Poly1305. Each NH hash is taken over * 1024 bytes, except possibly the final one which is taken over a multiple of * 16 bytes up to 1024. Also, in the case where data is passed in misaligned * chunks, we combine partial hashes; the end result is the same either way. */ static void nhpoly1305_units(struct nhpoly1305_state *state, const struct nhpoly1305_key *key, const u8 *src, unsigned int srclen, nh_t nh_fn) { do { unsigned int bytes; if (state->nh_remaining == 0) { /* Starting a new NH message */ bytes = min_t(unsigned int, srclen, NH_MESSAGE_BYTES); nh_fn(key->nh_key, src, bytes, state->nh_hash); state->nh_remaining = NH_MESSAGE_BYTES - bytes; } else { /* Continuing a previous NH message */ __le64 tmp_hash[NH_NUM_PASSES]; unsigned int pos; int i; pos = NH_MESSAGE_BYTES - state->nh_remaining; bytes = min(srclen, state->nh_remaining); nh_fn(&key->nh_key[pos / 4], src, bytes, tmp_hash); for (i = 0; i < NH_NUM_PASSES; i++) le64_add_cpu(&state->nh_hash[i], le64_to_cpu(tmp_hash[i])); state->nh_remaining -= bytes; } if (state->nh_remaining == 0) process_nh_hash_value(state, key); src += bytes; srclen -= bytes; } while (srclen); } int crypto_nhpoly1305_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct nhpoly1305_key *ctx = crypto_shash_ctx(tfm); int i; if (keylen != NHPOLY1305_KEY_SIZE) return -EINVAL; poly1305_core_setkey(&ctx->poly_key, key); key += POLY1305_BLOCK_SIZE; for (i = 0; i < NH_KEY_WORDS; i++) ctx->nh_key[i] = get_unaligned_le32(key + i * sizeof(u32)); return 0; } EXPORT_SYMBOL(crypto_nhpoly1305_setkey); int crypto_nhpoly1305_init(struct shash_desc *desc) { struct nhpoly1305_state *state = shash_desc_ctx(desc); poly1305_core_init(&state->poly_state); state->buflen = 0; state->nh_remaining = 0; return 0; } EXPORT_SYMBOL(crypto_nhpoly1305_init); int crypto_nhpoly1305_update_helper(struct shash_desc *desc, const u8 *src, unsigned int srclen, nh_t nh_fn) { struct nhpoly1305_state *state = shash_desc_ctx(desc); const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); unsigned int bytes; if (state->buflen) { bytes = min(srclen, (int)NH_MESSAGE_UNIT - state->buflen); memcpy(&state->buffer[state->buflen], src, bytes); state->buflen += bytes; if (state->buflen < NH_MESSAGE_UNIT) return 0; nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, nh_fn); state->buflen = 0; src += bytes; srclen -= bytes; } if (srclen >= NH_MESSAGE_UNIT) { bytes = round_down(srclen, NH_MESSAGE_UNIT); nhpoly1305_units(state, key, src, bytes, nh_fn); src += bytes; srclen -= bytes; } if (srclen) { memcpy(state->buffer, src, srclen); state->buflen = srclen; } return 0; } EXPORT_SYMBOL(crypto_nhpoly1305_update_helper); int crypto_nhpoly1305_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { return crypto_nhpoly1305_update_helper(desc, src, srclen, nh_generic); } EXPORT_SYMBOL(crypto_nhpoly1305_update); int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn) { struct nhpoly1305_state *state = shash_desc_ctx(desc); const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); if (state->buflen) { memset(&state->buffer[state->buflen], 0, NH_MESSAGE_UNIT - state->buflen); nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, nh_fn); } if (state->nh_remaining) process_nh_hash_value(state, key); poly1305_core_emit(&state->poly_state, NULL, dst); return 0; } EXPORT_SYMBOL(crypto_nhpoly1305_final_helper); int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst) { return crypto_nhpoly1305_final_helper(desc, dst, nh_generic); } EXPORT_SYMBOL(crypto_nhpoly1305_final); static struct shash_alg nhpoly1305_alg = { .base.cra_name = "nhpoly1305", .base.cra_driver_name = "nhpoly1305-generic", .base.cra_priority = 100, .base.cra_ctxsize = sizeof(struct nhpoly1305_key), .base.cra_module = THIS_MODULE, .digestsize = POLY1305_DIGEST_SIZE, .init = crypto_nhpoly1305_init, .update = crypto_nhpoly1305_update, .final = crypto_nhpoly1305_final, .setkey = crypto_nhpoly1305_setkey, .descsize = sizeof(struct nhpoly1305_state), }; static int __init nhpoly1305_mod_init(void) { return crypto_register_shash(&nhpoly1305_alg); } static void __exit nhpoly1305_mod_exit(void) { crypto_unregister_shash(&nhpoly1305_alg); } subsys_initcall(nhpoly1305_mod_init); module_exit(nhpoly1305_mod_exit); MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); MODULE_ALIAS_CRYPTO("nhpoly1305"); MODULE_ALIAS_CRYPTO("nhpoly1305-generic");
8 8 8 8 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* Copyright (C) 2019 Netronome Systems, Inc. */ #ifndef __NET_TC_MPLS_H #define __NET_TC_MPLS_H #include <linux/tc_act/tc_mpls.h> #include <net/act_api.h> struct tcf_mpls_params { int tcfm_action; u32 tcfm_label; u8 tcfm_tc; u8 tcfm_ttl; u8 tcfm_bos; __be16 tcfm_proto; struct rcu_head rcu; }; #define ACT_MPLS_TC_NOT_SET 0xff #define ACT_MPLS_BOS_NOT_SET 0xff #define ACT_MPLS_LABEL_NOT_SET 0xffffffff struct tcf_mpls { struct tc_action common; struct tcf_mpls_params __rcu *mpls_p; }; #define to_mpls(a) ((struct tcf_mpls *)a) static inline bool is_tcf_mpls(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT if (a->ops && a->ops->id == TCA_ID_MPLS) return true; #endif return false; } static inline u32 tcf_mpls_action(const struct tc_action *a) { u32 tcfm_action; rcu_read_lock(); tcfm_action = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_action; rcu_read_unlock(); return tcfm_action; } static inline __be16 tcf_mpls_proto(const struct tc_action *a) { __be16 tcfm_proto; rcu_read_lock(); tcfm_proto = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_proto; rcu_read_unlock(); return tcfm_proto; } static inline u32 tcf_mpls_label(const struct tc_action *a) { u32 tcfm_label; rcu_read_lock(); tcfm_label = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_label; rcu_read_unlock(); return tcfm_label; } static inline u8 tcf_mpls_tc(const struct tc_action *a) { u8 tcfm_tc; rcu_read_lock(); tcfm_tc = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_tc; rcu_read_unlock(); return tcfm_tc; } static inline u8 tcf_mpls_bos(const struct tc_action *a) { u8 tcfm_bos; rcu_read_lock(); tcfm_bos = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_bos; rcu_read_unlock(); return tcfm_bos; } static inline u8 tcf_mpls_ttl(const struct tc_action *a) { u8 tcfm_ttl; rcu_read_lock(); tcfm_ttl = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_ttl; rcu_read_unlock(); return tcfm_ttl; } #endif /* __NET_TC_MPLS_H */
3 5 1 1 1 56 1 1 4 50 61 5 54 18 2 11 1 10 8 1 1 2 2 2 2 2 1 1 1 2 2 17 4 12 1 1 13 2 11 10 1 7 2 8 1 8 1 4 5 8 1 8 1 8 1 1 3 5 1 3 4 2 2 5 1 6 56 47 9 2 9 2 5 6 9 1 8 2 9 1 7 3 5 5 10 5 52 3 3 2 1 3 1 1 1 1 2 2 1 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 // SPDX-License-Identifier: GPL-2.0-only /* * This file contains vfs inode ops for the 9P2000.L protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" #include "cache.h" #include "xattr.h" #include "acl.h" static int v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, dev_t rdev); /** * v9fs_get_fsgid_for_create - Helper function to get the gid for a new object * @dir_inode: The directory inode * * Helper function to get the gid for creating a * new file system object. This checks the S_ISGID to determine the owning * group of the new file system object. */ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) { BUG_ON(dir_inode == NULL); if (dir_inode->i_mode & S_ISGID) { /* set_gid bit is set.*/ return dir_inode->i_gid; } return current_fsgid(); } static int v9fs_test_inode_dotl(struct inode *inode, void *data) { struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_stat_dotl *st = (struct p9_stat_dotl *)data; /* don't match inode of different type */ if (inode_wrong_type(inode, st->st_mode)) return 0; if (inode->i_generation != st->st_gen) return 0; /* compare qid details */ if (memcmp(&v9inode->qid.version, &st->qid.version, sizeof(v9inode->qid.version))) return 0; if (v9inode->qid.type != st->qid.type) return 0; if (v9inode->qid.path != st->qid.path) return 0; return 1; } /* Always get a new inode */ static int v9fs_test_new_inode_dotl(struct inode *inode, void *data) { return 0; } static int v9fs_set_inode_dotl(struct inode *inode, void *data) { struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_stat_dotl *st = (struct p9_stat_dotl *)data; memcpy(&v9inode->qid, &st->qid, sizeof(st->qid)); inode->i_generation = st->st_gen; return 0; } static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, struct p9_qid *qid, struct p9_fid *fid, struct p9_stat_dotl *st, int new) { int retval; struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; int (*test)(struct inode *inode, void *data); if (new) test = v9fs_test_new_inode_dotl; else test = v9fs_test_inode_dotl; inode = iget5_locked(sb, QID2INO(qid), test, v9fs_set_inode_dotl, st); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; /* * initialize the inode with the stat info * FIXME!! we may need support for stale inodes * later. */ inode->i_ino = QID2INO(qid); retval = v9fs_init_inode(v9ses, inode, st->st_mode, new_decode_dev(st->st_rdev)); if (retval) goto error; v9fs_stat2inode_dotl(st, inode, 0); v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); if (retval) goto error; unlock_new_inode(inode); return inode; error: iget_failed(inode); return ERR_PTR(retval); } struct inode * v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new) { struct p9_stat_dotl *st; struct inode *inode = NULL; st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN); if (IS_ERR(st)) return ERR_CAST(st); inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st, new); kfree(st); return inode; } struct dotl_openflag_map { int open_flag; int dotl_flag; }; static int v9fs_mapped_dotl_flags(int flags) { int i; int rflags = 0; struct dotl_openflag_map dotl_oflag_map[] = { { O_CREAT, P9_DOTL_CREATE }, { O_EXCL, P9_DOTL_EXCL }, { O_NOCTTY, P9_DOTL_NOCTTY }, { O_APPEND, P9_DOTL_APPEND }, { O_NONBLOCK, P9_DOTL_NONBLOCK }, { O_DSYNC, P9_DOTL_DSYNC }, { FASYNC, P9_DOTL_FASYNC }, { O_DIRECT, P9_DOTL_DIRECT }, { O_LARGEFILE, P9_DOTL_LARGEFILE }, { O_DIRECTORY, P9_DOTL_DIRECTORY }, { O_NOFOLLOW, P9_DOTL_NOFOLLOW }, { O_NOATIME, P9_DOTL_NOATIME }, { O_CLOEXEC, P9_DOTL_CLOEXEC }, { O_SYNC, P9_DOTL_SYNC}, }; for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { if (flags & dotl_oflag_map[i].open_flag) rflags |= dotl_oflag_map[i].dotl_flag; } return rflags; } /** * v9fs_open_to_dotl_flags- convert Linux specific open flags to * plan 9 open flag. * @flags: flags to convert */ int v9fs_open_to_dotl_flags(int flags) { int rflags = 0; /* * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY * and P9_DOTL_NOACCESS */ rflags |= flags & O_ACCMODE; rflags |= v9fs_mapped_dotl_flags(flags); return rflags; } /** * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. * @idmap: The user namespace of the mount * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @omode: create permissions * @excl: True if the file must not yet exist * */ static int v9fs_vfs_create_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, bool excl) { return v9fs_vfs_mknod_dotl(idmap, dir, dentry, omode, 0); } static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct file *file, unsigned int flags, umode_t omode) { int err = 0; kgid_t gid; umode_t mode; int p9_omode = v9fs_open_to_dotl_flags(flags); const unsigned char *name = NULL; struct p9_qid qid; struct inode *inode; struct p9_fid *fid = NULL; struct p9_fid *dfid = NULL, *ofid = NULL; struct v9fs_session_info *v9ses; struct posix_acl *pacl = NULL, *dacl = NULL; struct dentry *res = NULL; if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); if (res) dentry = res; } /* Only creates */ if (!(flags & O_CREAT) || d_really_is_positive(dentry)) return finish_no_open(file, res); v9ses = v9fs_inode2v9ses(dir); name = dentry->d_name.name; p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%x\n", name, flags, omode); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); goto out; } /* clone a fid to use for creation */ ofid = clone_fid(dfid); if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto out; } gid = v9fs_get_fsgid_for_create(dir); mode = omode; /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in create %d\n", err); goto out; } if ((v9ses->cache & CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) { p9_omode = (p9_omode & ~P9_OWRITE) | P9_ORDWR; p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, creating w/ O_RDWR\n"); } err = p9_client_create_dotl(ofid, name, p9_omode, mode, gid, &qid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in create %d\n", err); goto out; } v9fs_invalidate_inode_attr(dir); /* instantiate inode and assign the unopened fid to the dentry */ fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto out; } inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto out; } /* Now set the ACL based on the default value */ v9fs_set_create_acl(inode, fid, dacl, pacl); v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); /* Since we are opening a file, assign the open fid to the file */ err = finish_open(file, dentry, generic_file_open); if (err) goto out; file->private_data = ofid; #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) { struct v9fs_inode *v9inode = V9FS_I(inode); fscache_use_cookie(v9fs_inode_cookie(v9inode), file->f_mode & FMODE_WRITE); } #endif v9fs_fid_add_modes(ofid, v9ses->flags, v9ses->cache, flags); v9fs_open_fid_add(inode, &ofid); file->f_mode |= FMODE_CREATED; out: p9_fid_put(dfid); p9_fid_put(ofid); p9_fid_put(fid); v9fs_put_acl(dacl, pacl); dput(res); return err; } /** * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory * @idmap: The idmap of the mount * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @omode: mode for new directory * */ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode) { int err; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; kgid_t gid; const unsigned char *name; umode_t mode; struct inode *inode; struct p9_qid qid; struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); v9ses = v9fs_inode2v9ses(dir); omode |= S_IFDIR; if (dir->i_mode & S_ISGID) omode |= S_ISGID; dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); goto error; } gid = v9fs_get_fsgid_for_create(dir); mode = omode; /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mkdir %d\n", err); goto error; } name = dentry->d_name.name; err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid); if (err < 0) goto error; fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } /* instantiate inode and assign the unopened fid to the dentry */ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } v9fs_fid_add(dentry, &fid); v9fs_set_create_acl(inode, fid, dacl, pacl); d_instantiate(dentry, inode); err = 0; inc_nlink(dir); v9fs_invalidate_inode_attr(dir); error: p9_fid_put(fid); v9fs_put_acl(dacl, pacl); p9_fid_put(dfid); return err; } static int v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct inode *inode = d_inode(dentry); struct p9_stat_dotl *st; p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } else if (v9ses->cache) { if (S_ISREG(inode->i_mode)) { int retval = filemap_fdatawrite(inode->i_mapping); if (retval) p9_debug(P9_DEBUG_ERROR, "flushing writeback during getattr returned %d\n", retval); } } fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); /* Ask for all the fields in stat structure. Server will return * whatever it supports */ st = p9_client_getattr_dotl(fid, P9_STATS_ALL); p9_fid_put(fid); if (IS_ERR(st)) return PTR_ERR(st); v9fs_stat2inode_dotl(st, d_inode(dentry), 0); generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat); /* Change block size to what the server returned */ stat->blksize = st->st_blksize; kfree(st); return 0; } /* * Attribute flags. */ #define P9_ATTR_MODE (1 << 0) #define P9_ATTR_UID (1 << 1) #define P9_ATTR_GID (1 << 2) #define P9_ATTR_SIZE (1 << 3) #define P9_ATTR_ATIME (1 << 4) #define P9_ATTR_MTIME (1 << 5) #define P9_ATTR_CTIME (1 << 6) #define P9_ATTR_ATIME_SET (1 << 7) #define P9_ATTR_MTIME_SET (1 << 8) struct dotl_iattr_map { int iattr_valid; int p9_iattr_valid; }; static int v9fs_mapped_iattr_valid(int iattr_valid) { int i; int p9_iattr_valid = 0; struct dotl_iattr_map dotl_iattr_map[] = { { ATTR_MODE, P9_ATTR_MODE }, { ATTR_UID, P9_ATTR_UID }, { ATTR_GID, P9_ATTR_GID }, { ATTR_SIZE, P9_ATTR_SIZE }, { ATTR_ATIME, P9_ATTR_ATIME }, { ATTR_MTIME, P9_ATTR_MTIME }, { ATTR_CTIME, P9_ATTR_CTIME }, { ATTR_ATIME_SET, P9_ATTR_ATIME_SET }, { ATTR_MTIME_SET, P9_ATTR_MTIME_SET }, }; for (i = 0; i < ARRAY_SIZE(dotl_iattr_map); i++) { if (iattr_valid & dotl_iattr_map[i].iattr_valid) p9_iattr_valid |= dotl_iattr_map[i].p9_iattr_valid; } return p9_iattr_valid; } /** * v9fs_vfs_setattr_dotl - set file metadata * @idmap: idmap of the mount * @dentry: file whose metadata to set * @iattr: metadata assignment structure * */ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { int retval, use_dentry = 0; struct inode *inode = d_inode(dentry); struct v9fs_session_info __maybe_unused *v9ses; struct p9_fid *fid = NULL; struct p9_iattr_dotl p9attr = { .uid = INVALID_UID, .gid = INVALID_GID, }; p9_debug(P9_DEBUG_VFS, "\n"); retval = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (retval) return retval; v9ses = v9fs_dentry2v9ses(dentry); p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid); if (iattr->ia_valid & ATTR_MODE) p9attr.mode = iattr->ia_mode; if (iattr->ia_valid & ATTR_UID) p9attr.uid = iattr->ia_uid; if (iattr->ia_valid & ATTR_GID) p9attr.gid = iattr->ia_gid; if (iattr->ia_valid & ATTR_SIZE) p9attr.size = iattr->ia_size; if (iattr->ia_valid & ATTR_ATIME_SET) { p9attr.atime_sec = iattr->ia_atime.tv_sec; p9attr.atime_nsec = iattr->ia_atime.tv_nsec; } if (iattr->ia_valid & ATTR_MTIME_SET) { p9attr.mtime_sec = iattr->ia_mtime.tv_sec; p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; } if (iattr->ia_valid & ATTR_FILE) { fid = iattr->ia_file->private_data; WARN_ON(!fid); } if (!fid) { fid = v9fs_fid_lookup(dentry); use_dentry = 1; } if (IS_ERR(fid)) return PTR_ERR(fid); /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { retval = filemap_fdatawrite(inode->i_mapping); if (retval < 0) p9_debug(P9_DEBUG_ERROR, "Flushing file prior to setattr failed: %d\n", retval); } retval = p9_client_setattr(fid, &p9attr); if (retval < 0) { if (use_dentry) p9_fid_put(fid); return retval; } if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { truncate_setsize(inode, iattr->ia_size); netfs_resize_file(netfs_inode(inode), iattr->ia_size, true); #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) fscache_resize_cookie(v9fs_inode_cookie(V9FS_I(inode)), iattr->ia_size); #endif } v9fs_invalidate_inode_attr(inode); setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) { /* We also want to update ACL when we update mode bits */ retval = v9fs_acl_chmod(inode, fid); if (retval < 0) { if (use_dentry) p9_fid_put(fid); return retval; } } if (use_dentry) p9_fid_put(fid); return 0; } /** * v9fs_stat2inode_dotl - populate an inode structure with stat info * @stat: stat structure * @inode: inode to populate * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) * */ void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, unsigned int flags) { umode_t mode; struct v9fs_inode *v9inode = V9FS_I(inode); if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { inode_set_atime(inode, stat->st_atime_sec, stat->st_atime_nsec); inode_set_mtime(inode, stat->st_mtime_sec, stat->st_mtime_nsec); inode_set_ctime(inode, stat->st_ctime_sec, stat->st_ctime_nsec); inode->i_uid = stat->st_uid; inode->i_gid = stat->st_gid; set_nlink(inode, stat->st_nlink); mode = stat->st_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; v9inode->netfs.remote_i_size = stat->st_size; if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) v9fs_i_size_write(inode, stat->st_size); inode->i_blocks = stat->st_blocks; } else { if (stat->st_result_mask & P9_STATS_ATIME) { inode_set_atime(inode, stat->st_atime_sec, stat->st_atime_nsec); } if (stat->st_result_mask & P9_STATS_MTIME) { inode_set_mtime(inode, stat->st_mtime_sec, stat->st_mtime_nsec); } if (stat->st_result_mask & P9_STATS_CTIME) { inode_set_ctime(inode, stat->st_ctime_sec, stat->st_ctime_nsec); } if (stat->st_result_mask & P9_STATS_UID) inode->i_uid = stat->st_uid; if (stat->st_result_mask & P9_STATS_GID) inode->i_gid = stat->st_gid; if (stat->st_result_mask & P9_STATS_NLINK) set_nlink(inode, stat->st_nlink); if (stat->st_result_mask & P9_STATS_MODE) { mode = stat->st_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; } if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && stat->st_result_mask & P9_STATS_SIZE) { v9inode->netfs.remote_i_size = stat->st_size; v9fs_i_size_write(inode, stat->st_size); } if (stat->st_result_mask & P9_STATS_BLOCKS) inode->i_blocks = stat->st_blocks; } if (stat->st_result_mask & P9_STATS_GEN) inode->i_generation = stat->st_gen; /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION * because the inode structure does not have fields for them. */ v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; } static int v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int err; kgid_t gid; const unsigned char *name; struct p9_qid qid; struct p9_fid *dfid; struct p9_fid *fid = NULL; name = dentry->d_name.name; p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); return err; } gid = v9fs_get_fsgid_for_create(dir); /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */ err = p9_client_symlink(dfid, name, symname, gid, &qid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); goto error; } v9fs_invalidate_inode_attr(dir); error: p9_fid_put(fid); p9_fid_put(dfid); return err; } /** * v9fs_vfs_link_dotl - create a hardlink for dotl * @old_dentry: dentry for file to link to * @dir: inode destination for new link * @dentry: dentry for link * */ static int v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int err; struct p9_fid *dfid, *oldfid; struct v9fs_session_info *v9ses; p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %pd, new_name: %pd\n", dir->i_ino, old_dentry, dentry); v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) return PTR_ERR(dfid); oldfid = v9fs_fid_lookup(old_dentry); if (IS_ERR(oldfid)) { p9_fid_put(dfid); return PTR_ERR(oldfid); } err = p9_client_link(dfid, oldfid, dentry->d_name.name); p9_fid_put(dfid); p9_fid_put(oldfid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); return err; } v9fs_invalidate_inode_attr(dir); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { /* Get the latest stat info from server. */ struct p9_fid *fid; fid = v9fs_fid_lookup(old_dentry); if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_refresh_inode_dotl(fid, d_inode(old_dentry)); p9_fid_put(fid); } ihold(d_inode(old_dentry)); d_instantiate(dentry, d_inode(old_dentry)); return err; } /** * v9fs_vfs_mknod_dotl - create a special file * @idmap: The idmap of the mount * @dir: inode destination for new link * @dentry: dentry for file * @omode: mode for creation * @rdev: device associated with special file * */ static int v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, dev_t rdev) { int err; kgid_t gid; const unsigned char *name; umode_t mode; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; struct inode *inode; struct p9_qid qid; struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry, omode, MAJOR(rdev), MINOR(rdev)); v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); goto error; } gid = v9fs_get_fsgid_for_create(dir); mode = omode; /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mknod %d\n", err); goto error; } name = dentry->d_name.name; err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid); if (err < 0) goto error; v9fs_invalidate_inode_attr(dir); fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } v9fs_set_create_acl(inode, fid, dacl, pacl); v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); err = 0; error: p9_fid_put(fid); v9fs_put_acl(dacl, pacl); p9_fid_put(dfid); return err; } /** * v9fs_vfs_get_link_dotl - follow a symlink path * @dentry: dentry for symlink * @inode: inode for symlink * @done: destructor for return value */ static const char * v9fs_vfs_get_link_dotl(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct p9_fid *fid; char *target; int retval; if (!dentry) return ERR_PTR(-ECHILD); p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return ERR_CAST(fid); retval = p9_client_readlink(fid, &target); p9_fid_put(fid); if (retval) return ERR_PTR(retval); set_delayed_call(done, kfree_link, target); return target; } int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) { struct p9_stat_dotl *st; struct v9fs_session_info *v9ses; unsigned int flags; v9ses = v9fs_inode2v9ses(inode); st = p9_client_getattr_dotl(fid, P9_STATS_ALL); if (IS_ERR(st)) return PTR_ERR(st); /* * Don't update inode if the file type is different */ if (inode_wrong_type(inode, st->st_mode)) goto out; /* * We don't want to refresh inode->i_size, * because we may have cached data */ flags = (v9ses->cache & CACHE_LOOSE) ? V9FS_STAT2INODE_KEEP_ISIZE : 0; v9fs_stat2inode_dotl(st, inode, flags); out: kfree(st); return 0; } const struct inode_operations v9fs_dir_inode_operations_dotl = { .create = v9fs_vfs_create_dotl, .atomic_open = v9fs_vfs_atomic_open_dotl, .lookup = v9fs_vfs_lookup, .link = v9fs_vfs_link_dotl, .symlink = v9fs_vfs_symlink_dotl, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir_dotl, .rmdir = v9fs_vfs_rmdir, .mknod = v9fs_vfs_mknod_dotl, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, .get_inode_acl = v9fs_iop_get_inode_acl, .get_acl = v9fs_iop_get_acl, .set_acl = v9fs_iop_set_acl, }; const struct inode_operations v9fs_file_inode_operations_dotl = { .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, .get_inode_acl = v9fs_iop_get_inode_acl, .get_acl = v9fs_iop_get_acl, .set_acl = v9fs_iop_set_acl, }; const struct inode_operations v9fs_symlink_inode_operations_dotl = { .get_link = v9fs_vfs_get_link_dotl, .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, };
1 4 69 1 3 65 3 3 9 115 1 1 1 1 1 1 607 607 6 41 18 39 1 525 120 118 23 20 11 13 11 27 37 30 30 38 38 62 3 57 9 112 112 112 3 109 9 9 9 1 9 8 1 42 3 1 38 1 29 9 30 8 40 74 75 27 75 8 8 1 5 2 7 1 62 25 37 51 11 57 6 19 44 2 61 17 17 13 4 37 37 11 18 18 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/errno.h> #include <linux/kmod.h> #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/module.h> #include <linux/device.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/seq_file.h> #include <linux/uaccess.h> #include <linux/ratelimit.h> #include "tty.h" #undef LDISC_DEBUG_HANGUP #ifdef LDISC_DEBUG_HANGUP #define tty_ldisc_debug(tty, f, args...) tty_debug(tty, f, ##args) #else #define tty_ldisc_debug(tty, f, args...) #endif /* lockdep nested classes for tty->ldisc_sem */ enum { LDISC_SEM_NORMAL, LDISC_SEM_OTHER, }; /* * This guards the refcounted line discipline lists. The lock * must be taken with irqs off because there are hangup path * callers who will do ldisc lookups and cannot sleep. */ static DEFINE_RAW_SPINLOCK(tty_ldiscs_lock); /* Line disc dispatch table */ static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS]; /** * tty_register_ldisc - install a line discipline * @new_ldisc: pointer to the ldisc object * * Installs a new line discipline into the kernel. The discipline is set up as * unreferenced and then made available to the kernel from this point onwards. * * Locking: takes %tty_ldiscs_lock to guard against ldisc races */ int tty_register_ldisc(struct tty_ldisc_ops *new_ldisc) { unsigned long flags; if (new_ldisc->num < N_TTY || new_ldisc->num >= NR_LDISCS) return -EINVAL; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); tty_ldiscs[new_ldisc->num] = new_ldisc; raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); return 0; } EXPORT_SYMBOL(tty_register_ldisc); /** * tty_unregister_ldisc - unload a line discipline * @ldisc: ldisc number * * Remove a line discipline from the kernel providing it is not currently in * use. * * Locking: takes %tty_ldiscs_lock to guard against ldisc races */ void tty_unregister_ldisc(struct tty_ldisc_ops *ldisc) { unsigned long flags; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); tty_ldiscs[ldisc->num] = NULL; raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); } EXPORT_SYMBOL(tty_unregister_ldisc); static struct tty_ldisc_ops *get_ldops(int disc) { unsigned long flags; struct tty_ldisc_ops *ldops, *ret; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); ret = ERR_PTR(-EINVAL); ldops = tty_ldiscs[disc]; if (ldops) { ret = ERR_PTR(-EAGAIN); if (try_module_get(ldops->owner)) ret = ldops; } raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); return ret; } static void put_ldops(struct tty_ldisc_ops *ldops) { unsigned long flags; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); module_put(ldops->owner); raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); } int tty_ldisc_autoload = IS_BUILTIN(CONFIG_LDISC_AUTOLOAD); /** * tty_ldisc_get - take a reference to an ldisc * @tty: tty device * @disc: ldisc number * * Takes a reference to a line discipline. Deals with refcounts and module * locking counts. If the discipline is not available, its module loaded, if * possible. * * Returns: * * -%EINVAL if the discipline index is not [%N_TTY .. %NR_LDISCS] or if the * discipline is not registered * * -%EAGAIN if request_module() failed to load or register the discipline * * -%ENOMEM if allocation failure * * Otherwise, returns a pointer to the discipline and bumps the ref count * * Locking: takes %tty_ldiscs_lock to guard against ldisc races */ static struct tty_ldisc *tty_ldisc_get(struct tty_struct *tty, int disc) { struct tty_ldisc *ld; struct tty_ldisc_ops *ldops; if (disc < N_TTY || disc >= NR_LDISCS) return ERR_PTR(-EINVAL); /* * Get the ldisc ops - we may need to request them to be loaded * dynamically and try again. */ ldops = get_ldops(disc); if (IS_ERR(ldops)) { if (!capable(CAP_SYS_MODULE) && !tty_ldisc_autoload) return ERR_PTR(-EPERM); request_module("tty-ldisc-%d", disc); ldops = get_ldops(disc); if (IS_ERR(ldops)) return ERR_CAST(ldops); } /* * There is no way to handle allocation failure of only 16 bytes. * Let's simplify error handling and save more memory. */ ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL | __GFP_NOFAIL); ld->ops = ldops; ld->tty = tty; return ld; } /** * tty_ldisc_put - release the ldisc * @ld: lisdsc to release * * Complement of tty_ldisc_get(). */ static void tty_ldisc_put(struct tty_ldisc *ld) { if (WARN_ON_ONCE(!ld)) return; put_ldops(ld->ops); kfree(ld); } static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) { return (*pos < NR_LDISCS) ? pos : NULL; } static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; return (*pos < NR_LDISCS) ? pos : NULL; } static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) { } static int tty_ldiscs_seq_show(struct seq_file *m, void *v) { int i = *(loff_t *)v; struct tty_ldisc_ops *ldops; ldops = get_ldops(i); if (IS_ERR(ldops)) return 0; seq_printf(m, "%-10s %2d\n", ldops->name ? ldops->name : "???", i); put_ldops(ldops); return 0; } const struct seq_operations tty_ldiscs_seq_ops = { .start = tty_ldiscs_seq_start, .next = tty_ldiscs_seq_next, .stop = tty_ldiscs_seq_stop, .show = tty_ldiscs_seq_show, }; /** * tty_ldisc_ref_wait - wait for the tty ldisc * @tty: tty device * * Dereference the line discipline for the terminal and take a reference to it. * If the line discipline is in flux then wait patiently until it changes. * * Returns: %NULL if the tty has been hungup and not re-opened with a new file * descriptor, otherwise valid ldisc reference * * Note 1: Must not be called from an IRQ/timer context. The caller must also * be careful not to hold other locks that will deadlock against a discipline * change, such as an existing ldisc reference (which we check for). * * Note 2: a file_operations routine (read/poll/write) should use this function * to wait for any ldisc lifetime events to finish. */ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { struct tty_ldisc *ld; ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT); ld = tty->ldisc; if (!ld) ldsem_up_read(&tty->ldisc_sem); return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); /** * tty_ldisc_ref - get the tty ldisc * @tty: tty device * * Dereference the line discipline for the terminal and take a reference to it. * If the line discipline is in flux then return %NULL. Can be called from IRQ * and timer functions. */ struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) { struct tty_ldisc *ld = NULL; if (ldsem_down_read_trylock(&tty->ldisc_sem)) { ld = tty->ldisc; if (!ld) ldsem_up_read(&tty->ldisc_sem); } return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref); /** * tty_ldisc_deref - free a tty ldisc reference * @ld: reference to free up * * Undoes the effect of tty_ldisc_ref() or tty_ldisc_ref_wait(). May be called * in IRQ context. */ void tty_ldisc_deref(struct tty_ldisc *ld) { ldsem_up_read(&ld->tty->ldisc_sem); } EXPORT_SYMBOL_GPL(tty_ldisc_deref); static inline int __tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { return ldsem_down_write(&tty->ldisc_sem, timeout); } static inline int __tty_ldisc_lock_nested(struct tty_struct *tty, unsigned long timeout) { return ldsem_down_write_nested(&tty->ldisc_sem, LDISC_SEM_OTHER, timeout); } static inline void __tty_ldisc_unlock(struct tty_struct *tty) { ldsem_up_write(&tty->ldisc_sem); } int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; /* Kindly asking blocked readers to release the read side */ set_bit(TTY_LDISC_CHANGING, &tty->flags); wake_up_interruptible_all(&tty->read_wait); wake_up_interruptible_all(&tty->write_wait); ret = __tty_ldisc_lock(tty, timeout); if (!ret) return -EBUSY; set_bit(TTY_LDISC_HALTED, &tty->flags); return 0; } void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); /* Can be cleared here - ldisc_unlock will wake up writers firstly */ clear_bit(TTY_LDISC_CHANGING, &tty->flags); __tty_ldisc_unlock(tty); } static int tty_ldisc_lock_pair_timeout(struct tty_struct *tty, struct tty_struct *tty2, unsigned long timeout) { int ret; if (tty < tty2) { ret = __tty_ldisc_lock(tty, timeout); if (ret) { ret = __tty_ldisc_lock_nested(tty2, timeout); if (!ret) __tty_ldisc_unlock(tty); } } else { /* if this is possible, it has lots of implications */ WARN_ON_ONCE(tty == tty2); if (tty2 && tty != tty2) { ret = __tty_ldisc_lock(tty2, timeout); if (ret) { ret = __tty_ldisc_lock_nested(tty, timeout); if (!ret) __tty_ldisc_unlock(tty2); } } else ret = __tty_ldisc_lock(tty, timeout); } if (!ret) return -EBUSY; set_bit(TTY_LDISC_HALTED, &tty->flags); if (tty2) set_bit(TTY_LDISC_HALTED, &tty2->flags); return 0; } static void tty_ldisc_lock_pair(struct tty_struct *tty, struct tty_struct *tty2) { tty_ldisc_lock_pair_timeout(tty, tty2, MAX_SCHEDULE_TIMEOUT); } static void tty_ldisc_unlock_pair(struct tty_struct *tty, struct tty_struct *tty2) { __tty_ldisc_unlock(tty); if (tty2) __tty_ldisc_unlock(tty2); } /** * tty_ldisc_flush - flush line discipline queue * @tty: tty to flush ldisc for * * Flush the line discipline queue (if any) and the tty flip buffers for this * @tty. */ void tty_ldisc_flush(struct tty_struct *tty) { struct tty_ldisc *ld = tty_ldisc_ref(tty); tty_buffer_flush(tty, ld); if (ld) tty_ldisc_deref(ld); } EXPORT_SYMBOL_GPL(tty_ldisc_flush); /** * tty_set_termios_ldisc - set ldisc field * @tty: tty structure * @disc: line discipline number * * This is probably overkill for real world processors but they are not on hot * paths so a little discipline won't do any harm. * * The line discipline-related tty_struct fields are reset to prevent the ldisc * driver from re-using stale information for the new ldisc instance. * * Locking: takes termios_rwsem */ static void tty_set_termios_ldisc(struct tty_struct *tty, int disc) { down_write(&tty->termios_rwsem); tty->termios.c_line = disc; up_write(&tty->termios_rwsem); tty->disc_data = NULL; tty->receive_room = 0; } /** * tty_ldisc_open - open a line discipline * @tty: tty we are opening the ldisc on * @ld: discipline to open * * A helper opening method. Also a convenient debugging and check point. * * Locking: always called with BTM already held. */ static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld) { WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags)); if (ld->ops->open) { int ret; /* BTM here locks versus a hangup event */ ret = ld->ops->open(tty); if (ret) clear_bit(TTY_LDISC_OPEN, &tty->flags); tty_ldisc_debug(tty, "%p: opened\n", ld); return ret; } return 0; } /** * tty_ldisc_close - close a line discipline * @tty: tty we are opening the ldisc on * @ld: discipline to close * * A helper close method. Also a convenient debugging and check point. */ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) { lockdep_assert_held_write(&tty->ldisc_sem); WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags)); clear_bit(TTY_LDISC_OPEN, &tty->flags); if (ld->ops->close) ld->ops->close(tty); tty_ldisc_debug(tty, "%p: closed\n", ld); } /** * tty_ldisc_failto - helper for ldisc failback * @tty: tty to open the ldisc on * @ld: ldisc we are trying to fail back to * * Helper to try and recover a tty when switching back to the old ldisc fails * and we need something attached. */ static int tty_ldisc_failto(struct tty_struct *tty, int ld) { struct tty_ldisc *disc = tty_ldisc_get(tty, ld); int r; lockdep_assert_held_write(&tty->ldisc_sem); if (IS_ERR(disc)) return PTR_ERR(disc); tty->ldisc = disc; tty_set_termios_ldisc(tty, ld); r = tty_ldisc_open(tty, disc); if (r < 0) tty_ldisc_put(disc); return r; } /** * tty_ldisc_restore - helper for tty ldisc change * @tty: tty to recover * @old: previous ldisc * * Restore the previous line discipline or %N_TTY when a line discipline change * fails due to an open error */ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) { /* There is an outstanding reference here so this is safe */ if (tty_ldisc_failto(tty, old->ops->num) < 0) { const char *name = tty_name(tty); pr_warn("Falling back ldisc for %s.\n", name); /* * The traditional behaviour is to fall back to N_TTY, we * want to avoid falling back to N_NULL unless we have no * choice to avoid the risk of breaking anything */ if (tty_ldisc_failto(tty, N_TTY) < 0 && tty_ldisc_failto(tty, N_NULL) < 0) panic("Couldn't open N_NULL ldisc for %s.", name); } } /** * tty_set_ldisc - set line discipline * @tty: the terminal to set * @disc: the line discipline number * * Set the discipline of a tty line. Must be called from a process context. The * ldisc change logic has to protect itself against any overlapping ldisc * change (including on the other end of pty pairs), the close of one side of a * tty/pty pair, and eventually hangup. */ int tty_set_ldisc(struct tty_struct *tty, int disc) { int retval; struct tty_ldisc *old_ldisc, *new_ldisc; new_ldisc = tty_ldisc_get(tty, disc); if (IS_ERR(new_ldisc)) return PTR_ERR(new_ldisc); tty_lock(tty); retval = tty_ldisc_lock(tty, 5 * HZ); if (retval) goto err; if (!tty->ldisc) { retval = -EIO; goto out; } /* Check the no-op case */ if (tty->ldisc->ops->num == disc) goto out; if (test_bit(TTY_HUPPED, &tty->flags)) { /* We were raced by hangup */ retval = -EIO; goto out; } if (tty->ops->ldisc_ok) { retval = tty->ops->ldisc_ok(tty, disc); if (retval) goto out; } old_ldisc = tty->ldisc; /* Shutdown the old discipline. */ tty_ldisc_close(tty, old_ldisc); /* Now set up the new line discipline. */ tty->ldisc = new_ldisc; tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, new_ldisc); if (retval < 0) { /* Back to the old one or N_TTY if we can't */ tty_ldisc_put(new_ldisc); tty_ldisc_restore(tty, old_ldisc); } if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) { down_read(&tty->termios_rwsem); tty->ops->set_ldisc(tty); up_read(&tty->termios_rwsem); } /* * At this point we hold a reference to the new ldisc and a * reference to the old ldisc, or we hold two references to * the old ldisc (if it was restored as part of error cleanup * above). In either case, releasing a single reference from * the old ldisc is correct. */ new_ldisc = old_ldisc; out: tty_ldisc_unlock(tty); /* * Restart the work queue in case no characters kick it off. Safe if * already running */ tty_buffer_restart_work(tty->port); err: tty_ldisc_put(new_ldisc); /* drop the extra reference */ tty_unlock(tty); return retval; } EXPORT_SYMBOL_GPL(tty_set_ldisc); /** * tty_ldisc_kill - teardown ldisc * @tty: tty being released * * Perform final close of the ldisc and reset @tty->ldisc */ static void tty_ldisc_kill(struct tty_struct *tty) { lockdep_assert_held_write(&tty->ldisc_sem); if (!tty->ldisc) return; /* * Now kill off the ldisc */ tty_ldisc_close(tty, tty->ldisc); tty_ldisc_put(tty->ldisc); /* Force an oops if we mess this up */ tty->ldisc = NULL; } /** * tty_reset_termios - reset terminal state * @tty: tty to reset * * Restore a terminal to the driver default state. */ static void tty_reset_termios(struct tty_struct *tty) { down_write(&tty->termios_rwsem); tty->termios = tty->driver->init_termios; tty->termios.c_ispeed = tty_termios_input_baud_rate(&tty->termios); tty->termios.c_ospeed = tty_termios_baud_rate(&tty->termios); up_write(&tty->termios_rwsem); } /** * tty_ldisc_reinit - reinitialise the tty ldisc * @tty: tty to reinit * @disc: line discipline to reinitialize * * Completely reinitialize the line discipline state, by closing the current * instance, if there is one, and opening a new instance. If an error occurs * opening the new non-%N_TTY instance, the instance is dropped and @tty->ldisc * reset to %NULL. The caller can then retry with %N_TTY instead. * * Returns: 0 if successful, otherwise error code < 0 */ int tty_ldisc_reinit(struct tty_struct *tty, int disc) { struct tty_ldisc *ld; int retval; lockdep_assert_held_write(&tty->ldisc_sem); ld = tty_ldisc_get(tty, disc); if (IS_ERR(ld)) { BUG_ON(disc == N_TTY); return PTR_ERR(ld); } if (tty->ldisc) { tty_ldisc_close(tty, tty->ldisc); tty_ldisc_put(tty->ldisc); } /* switch the line discipline */ tty->ldisc = ld; tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { tty_ldisc_put(tty->ldisc); tty->ldisc = NULL; } return retval; } /** * tty_ldisc_hangup - hangup ldisc reset * @tty: tty being hung up * @reinit: whether to re-initialise the tty * * Some tty devices reset their termios when they receive a hangup event. In * that situation we must also switch back to %N_TTY properly before we reset * the termios data. * * Locking: We can take the ldisc mutex as the rest of the code is careful to * allow for this. * * In the pty pair case this occurs in the close() path of the tty itself so we * must be careful about locking rules. */ void tty_ldisc_hangup(struct tty_struct *tty, bool reinit) { struct tty_ldisc *ld; tty_ldisc_debug(tty, "%p: hangup\n", tty->ldisc); ld = tty_ldisc_ref(tty); if (ld != NULL) { if (ld->ops->flush_buffer) ld->ops->flush_buffer(tty); tty_driver_flush_buffer(tty); if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && ld->ops->write_wakeup) ld->ops->write_wakeup(tty); if (ld->ops->hangup) ld->ops->hangup(tty); tty_ldisc_deref(ld); } wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT); wake_up_interruptible_poll(&tty->read_wait, EPOLLIN); /* * Shutdown the current line discipline, and reset it to * N_TTY if need be. * * Avoid racing set_ldisc or tty_ldisc_release */ tty_ldisc_lock(tty, MAX_SCHEDULE_TIMEOUT); if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) tty_reset_termios(tty); if (tty->ldisc) { if (reinit) { if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0 && tty_ldisc_reinit(tty, N_TTY) < 0) WARN_ON(tty_ldisc_reinit(tty, N_NULL) < 0); } else tty_ldisc_kill(tty); } tty_ldisc_unlock(tty); } /** * tty_ldisc_setup - open line discipline * @tty: tty being shut down * @o_tty: pair tty for pty/tty pairs * * Called during the initial open of a tty/pty pair in order to set up the line * disciplines and bind them to the @tty. This has no locking issues as the * device isn't yet active. */ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) { int retval = tty_ldisc_open(tty, tty->ldisc); if (retval) return retval; if (o_tty) { /* * Called without o_tty->ldisc_sem held, as o_tty has been * just allocated and no one has a reference to it. */ retval = tty_ldisc_open(o_tty, o_tty->ldisc); if (retval) { tty_ldisc_close(tty, tty->ldisc); return retval; } } return 0; } /** * tty_ldisc_release - release line discipline * @tty: tty being shut down (or one end of pty pair) * * Called during the final close of a tty or a pty pair in order to shut down * the line discpline layer. On exit, each tty's ldisc is %NULL. */ void tty_ldisc_release(struct tty_struct *tty) { struct tty_struct *o_tty = tty->link; /* * Shutdown this line discipline. As this is the final close, * it does not race with the set_ldisc code path. */ tty_ldisc_lock_pair(tty, o_tty); tty_ldisc_kill(tty); if (o_tty) tty_ldisc_kill(o_tty); tty_ldisc_unlock_pair(tty, o_tty); /* * And the memory resources remaining (buffers, termios) will be * disposed of when the kref hits zero */ tty_ldisc_debug(tty, "released\n"); } /** * tty_ldisc_init - ldisc setup for new tty * @tty: tty being allocated * * Set up the line discipline objects for a newly allocated tty. Note that the * tty structure is not completely set up when this call is made. */ int tty_ldisc_init(struct tty_struct *tty) { struct tty_ldisc *ld = tty_ldisc_get(tty, N_TTY); if (IS_ERR(ld)) return PTR_ERR(ld); tty->ldisc = ld; return 0; } /** * tty_ldisc_deinit - ldisc cleanup for new tty * @tty: tty that was allocated recently * * The tty structure must not be completely set up (tty_ldisc_setup()) when * this call is made. */ void tty_ldisc_deinit(struct tty_struct *tty) { /* no ldisc_sem, tty is being destroyed */ if (tty->ldisc) tty_ldisc_put(tty->ldisc); tty->ldisc = NULL; }
12 15 15 15 15 2 13 12 1 7 7 26 26 26 89 88 63 26 40 22 26 26 26 26 26 26 26 26 26 26 26 26 26 26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 // SPDX-License-Identifier: GPL-2.0 /* * event tracer * * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * * - Added format output of fields of the trace point. * This was based off of work by Tom Zanussi <tzanussi@gmail.com>. * */ #define pr_fmt(fmt) fmt #include <linux/workqueue.h> #include <linux/security.h> #include <linux/spinlock.h> #include <linux/kthread.h> #include <linux/tracefs.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/sort.h> #include <linux/slab.h> #include <linux/delay.h> #include <trace/events/sched.h> #include <trace/syscall.h> #include <asm/setup.h> #include "trace_output.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM "TRACE_SYSTEM" DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); static LIST_HEAD(ftrace_generic_fields); static LIST_HEAD(ftrace_common_fields); static bool eventdir_initialized; static LIST_HEAD(module_strings); struct module_string { struct list_head next; struct module *module; char *str; }; #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; static inline int system_refcount(struct event_subsystem *system) { return system->ref_count; } static int system_refcount_inc(struct event_subsystem *system) { return system->ref_count++; } static int system_refcount_dec(struct event_subsystem *system) { return --system->ref_count; } /* Double loops, do not use break, only goto's work */ #define do_for_each_event_file(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ list_for_each_entry(file, &tr->events, list) #define do_for_each_event_file_safe(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ struct trace_event_file *___n; \ list_for_each_entry_safe(file, ___n, &tr->events, list) #define while_for_each_event_file() \ } static struct ftrace_event_field * __find_event_field(struct list_head *head, const char *name) { struct ftrace_event_field *field; list_for_each_entry(field, head, link) { if (!strcmp(field->name, name)) return field; } return NULL; } struct ftrace_event_field * trace_find_event_field(struct trace_event_call *call, char *name) { struct ftrace_event_field *field; struct list_head *head; head = trace_get_fields(call); field = __find_event_field(head, name); if (field) return field; field = __find_event_field(&ftrace_generic_fields, name); if (field) return field; return __find_event_field(&ftrace_common_fields, name); } static int __trace_define_field(struct list_head *head, const char *type, const char *name, int offset, int size, int is_signed, int filter_type, int len, int need_test) { struct ftrace_event_field *field; field = kmem_cache_alloc(field_cachep, GFP_TRACE); if (!field) return -ENOMEM; field->name = name; field->type = type; if (filter_type == FILTER_OTHER) field->filter_type = filter_assign_type(type); else field->filter_type = filter_type; field->offset = offset; field->size = size; field->is_signed = is_signed; field->needs_test = need_test; field->len = len; list_add(&field->link, head); return 0; } int trace_define_field(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type) { struct list_head *head; if (WARN_ON(!call->class)) return 0; head = trace_get_fields(call); return __trace_define_field(head, type, name, offset, size, is_signed, filter_type, 0, 0); } EXPORT_SYMBOL_GPL(trace_define_field); static int trace_define_field_ext(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type, int len, int need_test) { struct list_head *head; if (WARN_ON(!call->class)) return 0; head = trace_get_fields(call); return __trace_define_field(head, type, name, offset, size, is_signed, filter_type, len, need_test); } #define __generic_field(type, item, filter_type) \ ret = __trace_define_field(&ftrace_generic_fields, #type, \ #item, 0, 0, is_signed_type(type), \ filter_type, 0, 0); \ if (ret) \ return ret; #define __common_field(type, item) \ ret = __trace_define_field(&ftrace_common_fields, #type, \ "common_" #item, \ offsetof(typeof(ent), item), \ sizeof(ent.item), \ is_signed_type(type), FILTER_OTHER, \ 0, 0); \ if (ret) \ return ret; static int trace_define_generic_fields(void) { int ret; __generic_field(int, CPU, FILTER_CPU); __generic_field(int, cpu, FILTER_CPU); __generic_field(int, common_cpu, FILTER_CPU); __generic_field(char *, COMM, FILTER_COMM); __generic_field(char *, comm, FILTER_COMM); __generic_field(char *, stacktrace, FILTER_STACKTRACE); __generic_field(char *, STACKTRACE, FILTER_STACKTRACE); return ret; } static int trace_define_common_fields(void) { int ret; struct trace_entry ent; __common_field(unsigned short, type); __common_field(unsigned char, flags); /* Holds both preempt_count and migrate_disable */ __common_field(unsigned char, preempt_count); __common_field(int, pid); return ret; } static void trace_destroy_fields(struct trace_event_call *call) { struct ftrace_event_field *field, *next; struct list_head *head; head = trace_get_fields(call); list_for_each_entry_safe(field, next, head, link) { list_del(&field->link); kmem_cache_free(field_cachep, field); } } /* * run-time version of trace_event_get_offsets_<call>() that returns the last * accessible offset of trace fields excluding __dynamic_array bytes */ int trace_event_get_offsets(struct trace_event_call *call) { struct ftrace_event_field *tail; struct list_head *head; head = trace_get_fields(call); /* * head->next points to the last field with the largest offset, * since it was added last by trace_define_field() */ tail = list_first_entry(head, struct ftrace_event_field, link); return tail->offset + tail->size; } static struct trace_event_fields *find_event_field(const char *fmt, struct trace_event_call *call) { struct trace_event_fields *field = call->class->fields_array; const char *p = fmt; int len; if (!(len = str_has_prefix(fmt, "REC->"))) return NULL; fmt += len; for (p = fmt; *p; p++) { if (!isalnum(*p) && *p != '_') break; } len = p - fmt; for (; field->type; field++) { if (strncmp(field->name, fmt, len) || field->name[len]) continue; return field; } return NULL; } /* * Check if the referenced field is an array and return true, * as arrays are OK to dereference. */ static bool test_field(const char *fmt, struct trace_event_call *call) { struct trace_event_fields *field; field = find_event_field(fmt, call); if (!field) return false; /* This is an array and is OK to dereference. */ return strchr(field->type, '[') != NULL; } /* Look for a string within an argument */ static bool find_print_string(const char *arg, const char *str, const char *end) { const char *r; r = strstr(arg, str); return r && r < end; } /* Return true if the argument pointer is safe */ static bool process_pointer(const char *fmt, int len, struct trace_event_call *call) { const char *r, *e, *a; e = fmt + len; /* Find the REC-> in the argument */ r = strstr(fmt, "REC->"); if (r && r < e) { /* * Addresses of events on the buffer, or an array on the buffer is * OK to dereference. There's ways to fool this, but * this is to catch common mistakes, not malicious code. */ a = strchr(fmt, '&'); if ((a && (a < r)) || test_field(r, call)) return true; } else if (find_print_string(fmt, "__get_dynamic_array(", e)) { return true; } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) { return true; } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) { return true; } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) { return true; } else if (find_print_string(fmt, "__get_sockaddr(", e)) { return true; } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) { return true; } return false; } /* Return true if the string is safe */ static bool process_string(const char *fmt, int len, struct trace_event_call *call) { struct trace_event_fields *field; const char *r, *e, *s; e = fmt + len; /* * There are several helper functions that return strings. * If the argument contains a function, then assume its field is valid. * It is considered that the argument has a function if it has: * alphanumeric or '_' before a parenthesis. */ s = fmt; do { r = strstr(s, "("); if (!r || r >= e) break; for (int i = 1; r - i >= s; i++) { char ch = *(r - i); if (isspace(ch)) continue; if (isalnum(ch) || ch == '_') return true; /* Anything else, this isn't a function */ break; } /* A function could be wrapped in parethesis, try the next one */ s = r + 1; } while (s < e); /* * If there's any strings in the argument consider this arg OK as it * could be: REC->field ? "foo" : "bar" and we don't want to get into * verifying that logic here. */ if (find_print_string(fmt, "\"", e)) return true; /* Dereferenced strings are also valid like any other pointer */ if (process_pointer(fmt, len, call)) return true; /* Make sure the field is found */ field = find_event_field(fmt, call); if (!field) return false; /* Test this field's string before printing the event */ call->flags |= TRACE_EVENT_FL_TEST_STR; field->needs_test = 1; return true; } /* * Examine the print fmt of the event looking for unsafe dereference * pointers using %p* that could be recorded in the trace event and * much later referenced after the pointer was freed. Dereferencing * pointers are OK, if it is dereferenced into the event itself. */ static void test_event_printk(struct trace_event_call *call) { u64 dereference_flags = 0; u64 string_flags = 0; bool first = true; const char *fmt; int parens = 0; char in_quote = 0; int start_arg = 0; int arg = 0; int i, e; fmt = call->print_fmt; if (!fmt) return; for (i = 0; fmt[i]; i++) { switch (fmt[i]) { case '\\': i++; if (!fmt[i]) return; continue; case '"': case '\'': /* * The print fmt starts with a string that * is processed first to find %p* usage, * then after the first string, the print fmt * contains arguments that are used to check * if the dereferenced %p* usage is safe. */ if (first) { if (fmt[i] == '\'') continue; if (in_quote) { arg = 0; first = false; /* * If there was no %p* uses * the fmt is OK. */ if (!dereference_flags) return; } } if (in_quote) { if (in_quote == fmt[i]) in_quote = 0; } else { in_quote = fmt[i]; } continue; case '%': if (!first || !in_quote) continue; i++; if (!fmt[i]) return; switch (fmt[i]) { case '%': continue; case 'p': /* Find dereferencing fields */ switch (fmt[i + 1]) { case 'B': case 'R': case 'r': case 'b': case 'M': case 'm': case 'I': case 'i': case 'E': case 'U': case 'V': case 'N': case 'a': case 'd': case 'D': case 'g': case 't': case 'C': case 'O': case 'f': if (WARN_ONCE(arg == 63, "Too many args for event: %s", trace_event_name(call))) return; dereference_flags |= 1ULL << arg; } break; default: { bool star = false; int j; /* Increment arg if %*s exists. */ for (j = 0; fmt[i + j]; j++) { if (isdigit(fmt[i + j]) || fmt[i + j] == '.') continue; if (fmt[i + j] == '*') { star = true; continue; } if ((fmt[i + j] == 's')) { if (star) arg++; if (WARN_ONCE(arg == 63, "Too many args for event: %s", trace_event_name(call))) return; dereference_flags |= 1ULL << arg; string_flags |= 1ULL << arg; } break; } break; } /* default */ } /* switch */ arg++; continue; case '(': if (in_quote) continue; parens++; continue; case ')': if (in_quote) continue; parens--; if (WARN_ONCE(parens < 0, "Paren mismatch for event: %s\narg='%s'\n%*s", trace_event_name(call), fmt + start_arg, (i - start_arg) + 5, "^")) return; continue; case ',': if (in_quote || parens) continue; e = i; i++; while (isspace(fmt[i])) i++; /* * If start_arg is zero, then this is the start of the * first argument. The processing of the argument happens * when the end of the argument is found, as it needs to * handle paranthesis and such. */ if (!start_arg) { start_arg = i; /* Balance out the i++ in the for loop */ i--; continue; } if (dereference_flags & (1ULL << arg)) { if (string_flags & (1ULL << arg)) { if (process_string(fmt + start_arg, e - start_arg, call)) dereference_flags &= ~(1ULL << arg); } else if (process_pointer(fmt + start_arg, e - start_arg, call)) dereference_flags &= ~(1ULL << arg); } start_arg = i; arg++; /* Balance out the i++ in the for loop */ i--; } } if (dereference_flags & (1ULL << arg)) { if (string_flags & (1ULL << arg)) { if (process_string(fmt + start_arg, i - start_arg, call)) dereference_flags &= ~(1ULL << arg); } else if (process_pointer(fmt + start_arg, i - start_arg, call)) dereference_flags &= ~(1ULL << arg); } /* * If you triggered the below warning, the trace event reported * uses an unsafe dereference pointer %p*. As the data stored * at the trace event time may no longer exist when the trace * event is printed, dereferencing to the original source is * unsafe. The source of the dereference must be copied into the * event itself, and the dereference must access the copy instead. */ if (WARN_ON_ONCE(dereference_flags)) { arg = 1; while (!(dereference_flags & 1)) { dereference_flags >>= 1; arg++; } pr_warn("event %s has unsafe dereference of argument %d\n", trace_event_name(call), arg); pr_warn("print_fmt: %s\n", fmt); } } int trace_event_raw_init(struct trace_event_call *call) { int id; id = register_trace_event(&call->event); if (!id) return -ENODEV; test_event_printk(call); return 0; } EXPORT_SYMBOL_GPL(trace_event_raw_init); bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) { struct trace_array *tr = trace_file->tr; struct trace_array_cpu *data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; pid_list = rcu_dereference_raw(tr->filtered_pids); no_pid_list = rcu_dereference_raw(tr->filtered_no_pids); if (!pid_list && !no_pid_list) return false; data = this_cpu_ptr(tr->array_buffer.data); return data->ignore_pid; } EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, struct trace_event_file *trace_file, unsigned long len) { struct trace_event_call *event_call = trace_file->event_call; if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) && trace_event_ignore_this_pid(trace_file)) return NULL; /* * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables * preemption (adding one to the preempt_count). Since we are * interested in the preempt_count at the time the tracepoint was * hit, we need to subtract one to offset the increment. */ fbuffer->trace_ctx = tracing_gen_ctx_dec(); fbuffer->trace_file = trace_file; fbuffer->event = trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, event_call->event.type, len, fbuffer->trace_ctx); if (!fbuffer->event) return NULL; fbuffer->regs = NULL; fbuffer->entry = ring_buffer_event_data(fbuffer->event); return fbuffer->entry; } EXPORT_SYMBOL_GPL(trace_event_buffer_reserve); int trace_event_reg(struct trace_event_call *call, enum trace_reg type, void *data) { struct trace_event_file *file = data; WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); switch (type) { case TRACE_REG_REGISTER: return tracepoint_probe_register(call->tp, call->class->probe, file); case TRACE_REG_UNREGISTER: tracepoint_probe_unregister(call->tp, call->class->probe, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: return tracepoint_probe_register(call->tp, call->class->perf_probe, call); case TRACE_REG_PERF_UNREGISTER: tracepoint_probe_unregister(call->tp, call->class->perf_probe, call); return 0; case TRACE_REG_PERF_OPEN: case TRACE_REG_PERF_CLOSE: case TRACE_REG_PERF_ADD: case TRACE_REG_PERF_DEL: return 0; #endif } return 0; } EXPORT_SYMBOL_GPL(trace_event_reg); void trace_event_enable_cmd_record(bool enable) { struct trace_event_file *file; struct trace_array *tr; lockdep_assert_held(&event_mutex); do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; if (enable) { tracing_start_cmdline_record(); set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } else { tracing_stop_cmdline_record(); clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } } while_for_each_event_file(); } void trace_event_enable_tgid_record(bool enable) { struct trace_event_file *file; struct trace_array *tr; lockdep_assert_held(&event_mutex); do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; if (enable) { tracing_start_tgid_record(); set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } else { tracing_stop_tgid_record(); clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } } while_for_each_event_file(); } static int __ftrace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; int ret = 0; int disable; switch (enable) { case 0: /* * When soft_disable is set and enable is cleared, the sm_ref * reference counter is decremented. If it reaches 0, we want * to clear the SOFT_DISABLED flag but leave the event in the * state that it was. That is, if the event was enabled and * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED * is set we do not want the event to be enabled before we * clear the bit. * * When soft_disable is not set but the SOFT_MODE flag is, * we do nothing. Do not disable the tracepoint, otherwise * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. */ if (soft_disable) { if (atomic_dec_return(&file->sm_ref) > 0) break; disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); /* Disable use of trace_buffered_event */ trace_buffered_event_disable(); } else disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE); if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); if (file->flags & EVENT_FILE_FL_RECORDED_CMD) { tracing_stop_cmdline_record(); clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } if (file->flags & EVENT_FILE_FL_RECORDED_TGID) { tracing_stop_tgid_record(); clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } call->class->reg(call, TRACE_REG_UNREGISTER, file); } /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ if (file->flags & EVENT_FILE_FL_SOFT_MODE) set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); break; case 1: /* * When soft_disable is set and enable is set, we want to * register the tracepoint for the event, but leave the event * as is. That means, if the event was already enabled, we do * nothing (but set SOFT_MODE). If the event is disabled, we * set SOFT_DISABLED before enabling the event tracepoint, so * it still seems to be disabled. */ if (!soft_disable) clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else { if (atomic_inc_return(&file->sm_ref) > 1) break; set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); /* Enable use of trace_buffered_event */ trace_buffered_event_enable(); } if (!(file->flags & EVENT_FILE_FL_ENABLED)) { bool cmd = false, tgid = false; /* Keep the event disabled, when going to SOFT_MODE. */ if (soft_disable) set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); if (tr->trace_flags & TRACE_ITER_RECORD_CMD) { cmd = true; tracing_start_cmdline_record(); set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { tgid = true; tracing_start_tgid_record(); set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } ret = call->class->reg(call, TRACE_REG_REGISTER, file); if (ret) { if (cmd) tracing_stop_cmdline_record(); if (tgid) tracing_stop_tgid_record(); pr_info("event trace: Could not enable event " "%s\n", trace_event_name(call)); break; } set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); /* WAS_ENABLED gets set but never cleared. */ set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags); } break; } return ret; } int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { return __ftrace_event_enable_disable(file, enable, soft_disable); } static int ftrace_event_enable_disable(struct trace_event_file *file, int enable) { return __ftrace_event_enable_disable(file, enable, 0); } static void ftrace_clear_events(struct trace_array *tr) { struct trace_event_file *file; mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { ftrace_event_enable_disable(file, 0); } mutex_unlock(&event_mutex); } static void event_filter_pid_sched_process_exit(void *data, struct task_struct *task) { struct trace_pid_list *pid_list; struct trace_array *tr = data; pid_list = rcu_dereference_raw(tr->filtered_pids); trace_filter_add_remove_task(pid_list, NULL, task); pid_list = rcu_dereference_raw(tr->filtered_no_pids); trace_filter_add_remove_task(pid_list, NULL, task); } static void event_filter_pid_sched_process_fork(void *data, struct task_struct *self, struct task_struct *task) { struct trace_pid_list *pid_list; struct trace_array *tr = data; pid_list = rcu_dereference_sched(tr->filtered_pids); trace_filter_add_remove_task(pid_list, self, task); pid_list = rcu_dereference_sched(tr->filtered_no_pids); trace_filter_add_remove_task(pid_list, self, task); } void trace_event_follow_fork(struct trace_array *tr, bool enable) { if (enable) { register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork, tr, INT_MIN); register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit, tr, INT_MAX); } else { unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork, tr); unregister_trace_sched_process_free(event_filter_pid_sched_process_exit, tr); } } static void event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, struct task_struct *prev, struct task_struct *next, unsigned int prev_state) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; bool ret; pid_list = rcu_dereference_sched(tr->filtered_pids); no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); /* * Sched switch is funny, as we only want to ignore it * in the notrace case if both prev and next should be ignored. */ ret = trace_ignore_this_task(NULL, no_pid_list, prev) && trace_ignore_this_task(NULL, no_pid_list, next); this_cpu_write(tr->array_buffer.data->ignore_pid, ret || (trace_ignore_this_task(pid_list, NULL, prev) && trace_ignore_this_task(pid_list, NULL, next))); } static void event_filter_pid_sched_switch_probe_post(void *data, bool preempt, struct task_struct *prev, struct task_struct *next, unsigned int prev_state) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; pid_list = rcu_dereference_sched(tr->filtered_pids); no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, no_pid_list, next)); } static void event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; /* Nothing to do if we are already tracing */ if (!this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, no_pid_list, task)); } static void event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; /* Nothing to do if we are not tracing */ if (this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); /* Set tracing if current is enabled */ this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, no_pid_list, current)); } static void unregister_pid_events(struct trace_array *tr) { unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr); unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr); unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr); unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr); unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr); unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr); } static void __ftrace_clear_event_pids(struct trace_array *tr, int type) { struct trace_pid_list *pid_list; struct trace_pid_list *no_pid_list; struct trace_event_file *file; int cpu; pid_list = rcu_dereference_protected(tr->filtered_pids, lockdep_is_held(&event_mutex)); no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, lockdep_is_held(&event_mutex)); /* Make sure there's something to do */ if (!pid_type_enabled(type, pid_list, no_pid_list)) return; if (!still_need_pid_events(type, pid_list, no_pid_list)) { unregister_pid_events(tr); list_for_each_entry(file, &tr->events, list) { clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); } for_each_possible_cpu(cpu) per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false; } if (type & TRACE_PIDS) rcu_assign_pointer(tr->filtered_pids, NULL); if (type & TRACE_NO_PIDS) rcu_assign_pointer(tr->filtered_no_pids, NULL); /* Wait till all users are no longer using pid filtering */ tracepoint_synchronize_unregister(); if ((type & TRACE_PIDS) && pid_list) trace_pid_list_free(pid_list); if ((type & TRACE_NO_PIDS) && no_pid_list) trace_pid_list_free(no_pid_list); } static void ftrace_clear_event_pids(struct trace_array *tr, int type) { mutex_lock(&event_mutex); __ftrace_clear_event_pids(tr, type); mutex_unlock(&event_mutex); } static void __put_system(struct event_subsystem *system) { struct event_filter *filter = system->filter; WARN_ON_ONCE(system_refcount(system) == 0); if (system_refcount_dec(system)) return; list_del(&system->list); if (filter) { kfree(filter->filter_string); kfree(filter); } kfree_const(system->name); kfree(system); } static void __get_system(struct event_subsystem *system) { WARN_ON_ONCE(system_refcount(system) == 0); system_refcount_inc(system); } static void __get_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); dir->ref_count++; __get_system(dir->subsystem); } static void __put_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); __put_system(dir->subsystem); if (!--dir->ref_count) kfree(dir); } static void put_system(struct trace_subsystem_dir *dir) { mutex_lock(&event_mutex); __put_system_dir(dir); mutex_unlock(&event_mutex); } static void remove_subsystem(struct trace_subsystem_dir *dir) { if (!dir) return; if (!--dir->nr_events) { eventfs_remove_dir(dir->ei); list_del(&dir->list); __put_system_dir(dir); } } void event_file_get(struct trace_event_file *file) { refcount_inc(&file->ref); } void event_file_put(struct trace_event_file *file) { if (WARN_ON_ONCE(!refcount_read(&file->ref))) { if (file->flags & EVENT_FILE_FL_FREED) kmem_cache_free(file_cachep, file); return; } if (refcount_dec_and_test(&file->ref)) { /* Count should only go to zero when it is freed */ if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED))) return; kmem_cache_free(file_cachep, file); } } static void remove_event_file_dir(struct trace_event_file *file) { eventfs_remove_dir(file->ei); list_del(&file->list); remove_subsystem(file->system); free_event_filter(file->filter); file->flags |= EVENT_FILE_FL_FREED; event_file_put(file); } /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ static int __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, const char *sub, const char *event, int set) { struct trace_event_file *file; struct trace_event_call *call; const char *name; int ret = -EINVAL; int eret = 0; list_for_each_entry(file, &tr->events, list) { call = file->event_call; name = trace_event_name(call); if (!name || !call->class || !call->class->reg) continue; if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) continue; if (match && strcmp(match, name) != 0 && strcmp(match, call->class->system) != 0) continue; if (sub && strcmp(sub, call->class->system) != 0) continue; if (event && strcmp(event, name) != 0) continue; ret = ftrace_event_enable_disable(file, set); /* * Save the first error and return that. Some events * may still have been enabled, but let the user * know that something went wrong. */ if (ret && !eret) eret = ret; ret = eret; } return ret; } static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, const char *sub, const char *event, int set) { int ret; mutex_lock(&event_mutex); ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); mutex_unlock(&event_mutex); return ret; } int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) { char *event = NULL, *sub = NULL, *match; int ret; if (!tr) return -ENOENT; /* * The buf format can be <subsystem>:<event-name> * *:<event-name> means any event by that name. * :<event-name> is the same. * * <subsystem>:* means all events in that subsystem * <subsystem>: means the same. * * <name> (no ':') means all events in a subsystem with * the name <name> or any event that matches <name> */ match = strsep(&buf, ":"); if (buf) { sub = match; event = buf; match = NULL; if (!strlen(sub) || strcmp(sub, "*") == 0) sub = NULL; if (!strlen(event) || strcmp(event, "*") == 0) event = NULL; } ret = __ftrace_set_clr_event(tr, match, sub, event, set); /* Put back the colon to allow this to be called again */ if (buf) *(buf - 1) = ':'; return ret; } /** * trace_set_clr_event - enable or disable an event * @system: system name to match (NULL for any system) * @event: event name to match (NULL for all events, within system) * @set: 1 to enable, 0 to disable * * This is a way for other parts of the kernel to enable or disable * event recording. * * Returns 0 on success, -EINVAL if the parameters do not match any * registered events. */ int trace_set_clr_event(const char *system, const char *event, int set) { struct trace_array *tr = top_trace_array(); if (!tr) return -ENODEV; return __ftrace_set_clr_event(tr, NULL, system, event, set); } EXPORT_SYMBOL_GPL(trace_set_clr_event); /** * trace_array_set_clr_event - enable or disable an event for a trace array. * @tr: concerned trace array. * @system: system name to match (NULL for any system) * @event: event name to match (NULL for all events, within system) * @enable: true to enable, false to disable * * This is a way for other parts of the kernel to enable or disable * event recording. * * Returns 0 on success, -EINVAL if the parameters do not match any * registered events. */ int trace_array_set_clr_event(struct trace_array *tr, const char *system, const char *event, bool enable) { int set; if (!tr) return -ENOENT; set = (enable == true) ? 1 : 0; return __ftrace_set_clr_event(tr, NULL, system, event, set); } EXPORT_SYMBOL_GPL(trace_array_set_clr_event); /* 128 should be much more than enough */ #define EVENT_BUF_SIZE 127 static ssize_t ftrace_event_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_parser parser; struct seq_file *m = file->private_data; struct trace_array *tr = m->private; ssize_t read, ret; if (!cnt) return 0; ret = tracing_update_buffers(tr); if (ret < 0) return ret; if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) return -ENOMEM; read = trace_get_user(&parser, ubuf, cnt, ppos); if (read >= 0 && trace_parser_loaded((&parser))) { int set = 1; if (*parser.buffer == '!') set = 0; ret = ftrace_set_clr_event(tr, parser.buffer + !set, set); if (ret) goto out_put; } ret = read; out_put: trace_parser_put(&parser); return ret; } static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_event_file *file = v; struct trace_event_call *call; struct trace_array *tr = m->private; (*pos)++; list_for_each_entry_continue(file, &tr->events, list) { call = file->event_call; /* * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ if (call->class && call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) return file; } return NULL; } static void *t_start(struct seq_file *m, loff_t *pos) { struct trace_event_file *file; struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); file = list_entry(&tr->events, struct trace_event_file, list); for (l = 0; l <= *pos; ) { file = t_next(m, file, &l); if (!file) break; } return file; } static void * s_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_event_file *file = v; struct trace_array *tr = m->private; (*pos)++; list_for_each_entry_continue(file, &tr->events, list) { if (file->flags & EVENT_FILE_FL_ENABLED) return file; } return NULL; } static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_event_file *file; struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); file = list_entry(&tr->events, struct trace_event_file, list); for (l = 0; l <= *pos; ) { file = s_next(m, file, &l); if (!file) break; } return file; } static int t_show(struct seq_file *m, void *v) { struct trace_event_file *file = v; struct trace_event_call *call = file->event_call; if (strcmp(call->class->system, TRACE_SYSTEM) != 0) seq_printf(m, "%s:", call->class->system); seq_printf(m, "%s\n", trace_event_name(call)); return 0; } static void t_stop(struct seq_file *m, void *p) { mutex_unlock(&event_mutex); } static void * __next(struct seq_file *m, void *v, loff_t *pos, int type) { struct trace_array *tr = m->private; struct trace_pid_list *pid_list; if (type == TRACE_PIDS) pid_list = rcu_dereference_sched(tr->filtered_pids); else pid_list = rcu_dereference_sched(tr->filtered_no_pids); return trace_pid_next(pid_list, v, pos); } static void * p_next(struct seq_file *m, void *v, loff_t *pos) { return __next(m, v, pos, TRACE_PIDS); } static void * np_next(struct seq_file *m, void *v, loff_t *pos) { return __next(m, v, pos, TRACE_NO_PIDS); } static void *__start(struct seq_file *m, loff_t *pos, int type) __acquires(RCU) { struct trace_pid_list *pid_list; struct trace_array *tr = m->private; /* * Grab the mutex, to keep calls to p_next() having the same * tr->filtered_pids as p_start() has. * If we just passed the tr->filtered_pids around, then RCU would * have been enough, but doing that makes things more complex. */ mutex_lock(&event_mutex); rcu_read_lock_sched(); if (type == TRACE_PIDS) pid_list = rcu_dereference_sched(tr->filtered_pids); else pid_list = rcu_dereference_sched(tr->filtered_no_pids); if (!pid_list) return NULL; return trace_pid_start(pid_list, pos); } static void *p_start(struct seq_file *m, loff_t *pos) __acquires(RCU) { return __start(m, pos, TRACE_PIDS); } static void *np_start(struct seq_file *m, loff_t *pos) __acquires(RCU) { return __start(m, pos, TRACE_NO_PIDS); } static void p_stop(struct seq_file *m, void *p) __releases(RCU) { rcu_read_unlock_sched(); mutex_unlock(&event_mutex); } static ssize_t event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_event_file *file; unsigned long flags; char buf[4] = "0"; mutex_lock(&event_mutex); file = event_file_file(filp); if (likely(file)) flags = file->flags; mutex_unlock(&event_mutex); if (!file) return -ENODEV; if (flags & EVENT_FILE_FL_ENABLED && !(flags & EVENT_FILE_FL_SOFT_DISABLED)) strcpy(buf, "1"); if (flags & EVENT_FILE_FL_SOFT_DISABLED || flags & EVENT_FILE_FL_SOFT_MODE) strcat(buf, "*"); strcat(buf, "\n"); return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); } static ssize_t event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_event_file *file; unsigned long val; int ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; switch (val) { case 0: case 1: ret = -ENODEV; mutex_lock(&event_mutex); file = event_file_file(filp); if (likely(file)) { ret = tracing_update_buffers(file->tr); if (ret < 0) { mutex_unlock(&event_mutex); return ret; } ret = ftrace_event_enable_disable(file, val); } mutex_unlock(&event_mutex); break; default: return -EINVAL; } *ppos += cnt; return ret ? ret : cnt; } static ssize_t system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { const char set_to_char[4] = { '?', '0', '1', 'X' }; struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; struct trace_event_call *call; struct trace_event_file *file; struct trace_array *tr = dir->tr; char buf[2]; int set = 0; int ret; mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || !trace_event_name(call) || !call->class || !call->class->reg) continue; if (system && strcmp(call->class->system, system->name) != 0) continue; /* * We need to find out if all the events are set * or if all events or cleared, or if we have * a mixture. */ set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED)); /* * If we have a mixture, no need to look further. */ if (set == 3) break; } mutex_unlock(&event_mutex); buf[0] = set_to_char[set]; buf[1] = '\n'; ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); return ret; } static ssize_t system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; const char *name = NULL; unsigned long val; ssize_t ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; ret = tracing_update_buffers(dir->tr); if (ret < 0) return ret; if (val != 0 && val != 1) return -EINVAL; /* * Opening of "enable" adds a ref count to system, * so the name is safe to use. */ if (system) name = system->name; ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val); if (ret) goto out; ret = cnt; out: *ppos += cnt; return ret; } enum { FORMAT_HEADER = 1, FORMAT_FIELD_SEPERATOR = 2, FORMAT_PRINTFMT = 3, }; static void *f_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_event_file *file = event_file_data(m->private); struct trace_event_call *call = file->event_call; struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); struct list_head *node = v; (*pos)++; switch ((unsigned long)v) { case FORMAT_HEADER: node = common_head; break; case FORMAT_FIELD_SEPERATOR: node = head; break; case FORMAT_PRINTFMT: /* all done */ return NULL; } node = node->prev; if (node == common_head) return (void *)FORMAT_FIELD_SEPERATOR; else if (node == head) return (void *)FORMAT_PRINTFMT; else return node; } static int f_show(struct seq_file *m, void *v) { struct trace_event_file *file = event_file_data(m->private); struct trace_event_call *call = file->event_call; struct ftrace_event_field *field; const char *array_descriptor; switch ((unsigned long)v) { case FORMAT_HEADER: seq_printf(m, "name: %s\n", trace_event_name(call)); seq_printf(m, "ID: %d\n", call->event.type); seq_puts(m, "format:\n"); return 0; case FORMAT_FIELD_SEPERATOR: seq_putc(m, '\n'); return 0; case FORMAT_PRINTFMT: seq_printf(m, "\nprint fmt: %s\n", call->print_fmt); return 0; } field = list_entry(v, struct ftrace_event_field, link); /* * Smartly shows the array type(except dynamic array). * Normal: * field:TYPE VAR * If TYPE := TYPE[LEN], it is shown: * field:TYPE VAR[LEN] */ array_descriptor = strchr(field->type, '['); if (str_has_prefix(field->type, "__data_loc")) array_descriptor = NULL; if (!array_descriptor) seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", field->type, field->name, field->offset, field->size, !!field->is_signed); else if (field->len) seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n", (int)(array_descriptor - field->type), field->type, field->name, field->len, field->offset, field->size, !!field->is_signed); else seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n", (int)(array_descriptor - field->type), field->type, field->name, field->offset, field->size, !!field->is_signed); return 0; } static void *f_start(struct seq_file *m, loff_t *pos) { struct trace_event_file *file; void *p = (void *)FORMAT_HEADER; loff_t l = 0; /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); file = event_file_file(m->private); if (!file) return ERR_PTR(-ENODEV); while (l < *pos && p) p = f_next(m, p, &l); return p; } static void f_stop(struct seq_file *m, void *p) { mutex_unlock(&event_mutex); } static const struct seq_operations trace_format_seq_ops = { .start = f_start, .next = f_next, .stop = f_stop, .show = f_show, }; static int trace_format_open(struct inode *inode, struct file *file) { struct seq_file *m; int ret; /* Do we want to hide event format files on tracefs lockdown? */ ret = seq_open(file, &trace_format_seq_ops); if (ret < 0) return ret; m = file->private_data; m->private = file; return 0; } #ifdef CONFIG_PERF_EVENTS static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { int id = (long)event_file_data(filp); char buf[32]; int len; if (unlikely(!id)) return -ENODEV; len = sprintf(buf, "%d\n", id); return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } #endif static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_event_file *file; struct trace_seq *s; int r = -ENODEV; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); mutex_lock(&event_mutex); file = event_file_file(filp); if (file) print_event_filter(file, s); mutex_unlock(&event_mutex); if (file) r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); kfree(s); return r; } static ssize_t event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_event_file *file; char *buf; int err = -ENODEV; if (cnt >= PAGE_SIZE) return -EINVAL; buf = memdup_user_nul(ubuf, cnt); if (IS_ERR(buf)) return PTR_ERR(buf); mutex_lock(&event_mutex); file = event_file_file(filp); if (file) { if (file->flags & EVENT_FILE_FL_FREED) err = -ENODEV; else err = apply_event_filter(file, buf); } mutex_unlock(&event_mutex); kfree(buf); if (err < 0) return err; *ppos += cnt; return cnt; } static LIST_HEAD(event_subsystems); static int subsystem_open(struct inode *inode, struct file *filp) { struct trace_subsystem_dir *dir = NULL, *iter_dir; struct trace_array *tr = NULL, *iter_tr; struct event_subsystem *system = NULL; int ret; if (tracing_is_disabled()) return -ENODEV; /* Make sure the system still exists */ mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) { list_for_each_entry(iter_dir, &iter_tr->systems, list) { if (iter_dir == inode->i_private) { /* Don't open systems with no events */ tr = iter_tr; dir = iter_dir; if (dir->nr_events) { __get_system_dir(dir); system = dir->subsystem; } goto exit_loop; } } } exit_loop: mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); if (!system) return -ENODEV; /* Still need to increment the ref count of the system */ if (trace_array_get(tr) < 0) { put_system(dir); return -ENODEV; } ret = tracing_open_generic(inode, filp); if (ret < 0) { trace_array_put(tr); put_system(dir); } return ret; } static int system_tr_open(struct inode *inode, struct file *filp) { struct trace_subsystem_dir *dir; struct trace_array *tr = inode->i_private; int ret; /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); if (!dir) return -ENOMEM; ret = tracing_open_generic_tr(inode, filp); if (ret < 0) { kfree(dir); return ret; } dir->tr = tr; filp->private_data = dir; return 0; } static int subsystem_release(struct inode *inode, struct file *file) { struct trace_subsystem_dir *dir = file->private_data; trace_array_put(dir->tr); /* * If dir->subsystem is NULL, then this is a temporary * descriptor that was made for a trace_array to enable * all subsystems. */ if (dir->subsystem) put_system(dir); else kfree(dir); return 0; } static ssize_t subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; struct trace_seq *s; int r; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); print_subsystem_event_filter(system, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); kfree(s); return r; } static ssize_t subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_subsystem_dir *dir = filp->private_data; char *buf; int err; if (cnt >= PAGE_SIZE) return -EINVAL; buf = memdup_user_nul(ubuf, cnt); if (IS_ERR(buf)) return PTR_ERR(buf); err = apply_subsystem_event_filter(dir, buf); kfree(buf); if (err < 0) return err; *ppos += cnt; return cnt; } static ssize_t show_header_page_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; struct trace_seq *s; int r; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); ring_buffer_print_page_header(tr->array_buffer.buffer, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); kfree(s); return r; } static ssize_t show_header_event_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_seq *s; int r; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); ring_buffer_print_entry_header(s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); kfree(s); return r; } static void ignore_task_cpu(void *data) { struct trace_array *tr = data; struct trace_pid_list *pid_list; struct trace_pid_list *no_pid_list; /* * This function is called by on_each_cpu() while the * event_mutex is held. */ pid_list = rcu_dereference_protected(tr->filtered_pids, mutex_is_locked(&event_mutex)); no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, mutex_is_locked(&event_mutex)); this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, no_pid_list, current)); } static void register_pid_events(struct trace_array *tr) { /* * Register a probe that is called before all other probes * to set ignore_pid if next or prev do not match. * Register a probe this is called after all other probes * to only keep ignore_pid set if next pid matches. */ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre, tr, INT_MAX); register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post, tr, 0); register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr, INT_MAX); register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr, 0); register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr, INT_MAX); register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr, 0); register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr, INT_MAX); register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr, 0); } static ssize_t event_pid_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, int type) { struct seq_file *m = filp->private_data; struct trace_array *tr = m->private; struct trace_pid_list *filtered_pids = NULL; struct trace_pid_list *other_pids = NULL; struct trace_pid_list *pid_list; struct trace_event_file *file; ssize_t ret; if (!cnt) return 0; ret = tracing_update_buffers(tr); if (ret < 0) return ret; mutex_lock(&event_mutex); if (type == TRACE_PIDS) { filtered_pids = rcu_dereference_protected(tr->filtered_pids, lockdep_is_held(&event_mutex)); other_pids = rcu_dereference_protected(tr->filtered_no_pids, lockdep_is_held(&event_mutex)); } else { filtered_pids = rcu_dereference_protected(tr->filtered_no_pids, lockdep_is_held(&event_mutex)); other_pids = rcu_dereference_protected(tr->filtered_pids, lockdep_is_held(&event_mutex)); } ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); if (ret < 0) goto out; if (type == TRACE_PIDS) rcu_assign_pointer(tr->filtered_pids, pid_list); else rcu_assign_pointer(tr->filtered_no_pids, pid_list); list_for_each_entry(file, &tr->events, list) { set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); } if (filtered_pids) { tracepoint_synchronize_unregister(); trace_pid_list_free(filtered_pids); } else if (pid_list && !other_pids) { register_pid_events(tr); } /* * Ignoring of pids is done at task switch. But we have to * check for those tasks that are currently running. * Always do this in case a pid was appended or removed. */ on_each_cpu(ignore_task_cpu, tr, 1); out: mutex_unlock(&event_mutex); if (ret > 0) *ppos += ret; return ret; } static ssize_t ftrace_event_pid_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS); } static ssize_t ftrace_event_npid_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS); } static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); static int ftrace_event_release(struct inode *inode, struct file *file); static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, .show = t_show, .stop = t_stop, }; static const struct seq_operations show_set_event_seq_ops = { .start = s_start, .next = s_next, .show = t_show, .stop = t_stop, }; static const struct seq_operations show_set_pid_seq_ops = { .start = p_start, .next = p_next, .show = trace_pid_show, .stop = p_stop, }; static const struct seq_operations show_set_no_pid_seq_ops = { .start = np_start, .next = np_next, .show = trace_pid_show, .stop = p_stop, }; static const struct file_operations ftrace_avail_fops = { .open = ftrace_event_avail_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; static const struct file_operations ftrace_set_event_fops = { .open = ftrace_event_set_open, .read = seq_read, .write = ftrace_event_write, .llseek = seq_lseek, .release = ftrace_event_release, }; static const struct file_operations ftrace_set_event_pid_fops = { .open = ftrace_event_set_pid_open, .read = seq_read, .write = ftrace_event_pid_write, .llseek = seq_lseek, .release = ftrace_event_release, }; static const struct file_operations ftrace_set_event_notrace_pid_fops = { .open = ftrace_event_set_npid_open, .read = seq_read, .write = ftrace_event_npid_write, .llseek = seq_lseek, .release = ftrace_event_release, }; static const struct file_operations ftrace_enable_fops = { .open = tracing_open_file_tr, .read = event_enable_read, .write = event_enable_write, .release = tracing_release_file_tr, .llseek = default_llseek, }; static const struct file_operations ftrace_event_format_fops = { .open = trace_format_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; #ifdef CONFIG_PERF_EVENTS static const struct file_operations ftrace_event_id_fops = { .read = event_id_read, .llseek = default_llseek, }; #endif static const struct file_operations ftrace_event_filter_fops = { .open = tracing_open_file_tr, .read = event_filter_read, .write = event_filter_write, .release = tracing_release_file_tr, .llseek = default_llseek, }; static const struct file_operations ftrace_subsystem_filter_fops = { .open = subsystem_open, .read = subsystem_filter_read, .write = subsystem_filter_write, .llseek = default_llseek, .release = subsystem_release, }; static const struct file_operations ftrace_system_enable_fops = { .open = subsystem_open, .read = system_enable_read, .write = system_enable_write, .llseek = default_llseek, .release = subsystem_release, }; static const struct file_operations ftrace_tr_enable_fops = { .open = system_tr_open, .read = system_enable_read, .write = system_enable_write, .llseek = default_llseek, .release = subsystem_release, }; static const struct file_operations ftrace_show_header_page_fops = { .open = tracing_open_generic_tr, .read = show_header_page_file, .llseek = default_llseek, .release = tracing_release_generic_tr, }; static const struct file_operations ftrace_show_header_event_fops = { .open = tracing_open_generic_tr, .read = show_header_event_file, .llseek = default_llseek, .release = tracing_release_generic_tr, }; static int ftrace_event_open(struct inode *inode, struct file *file, const struct seq_operations *seq_ops) { struct seq_file *m; int ret; ret = security_locked_down(LOCKDOWN_TRACEFS); if (ret) return ret; ret = seq_open(file, seq_ops); if (ret < 0) return ret; m = file->private_data; /* copy tr over to seq ops */ m->private = inode->i_private; return ret; } static int ftrace_event_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; trace_array_put(tr); return seq_release(inode, file); } static int ftrace_event_avail_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_event_seq_ops; /* Checks for tracefs lockdown */ return ftrace_event_open(inode, file, seq_ops); } static int ftrace_event_set_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_event_seq_ops; struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_events(tr); ret = ftrace_event_open(inode, file, seq_ops); if (ret < 0) trace_array_put(tr); return ret; } static int ftrace_event_set_pid_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_pid_seq_ops; struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_event_pids(tr, TRACE_PIDS); ret = ftrace_event_open(inode, file, seq_ops); if (ret < 0) trace_array_put(tr); return ret; } static int ftrace_event_set_npid_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops; struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_event_pids(tr, TRACE_NO_PIDS); ret = ftrace_event_open(inode, file, seq_ops); if (ret < 0) trace_array_put(tr); return ret; } static struct event_subsystem * create_new_subsystem(const char *name) { struct event_subsystem *system; /* need to create new entry */ system = kmalloc(sizeof(*system), GFP_KERNEL); if (!system) return NULL; system->ref_count = 1; /* Only allocate if dynamic (kprobes and modules) */ system->name = kstrdup_const(name, GFP_KERNEL); if (!system->name) goto out_free; system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); if (!system->filter) goto out_free; list_add(&system->list, &event_subsystems); return system; out_free: kfree_const(system->name); kfree(system); return NULL; } static int system_callback(const char *name, umode_t *mode, void **data, const struct file_operations **fops) { if (strcmp(name, "filter") == 0) *fops = &ftrace_subsystem_filter_fops; else if (strcmp(name, "enable") == 0) *fops = &ftrace_system_enable_fops; else return 0; *mode = TRACE_MODE_WRITE; return 1; } static struct eventfs_inode * event_subsystem_dir(struct trace_array *tr, const char *name, struct trace_event_file *file, struct eventfs_inode *parent) { struct event_subsystem *system, *iter; struct trace_subsystem_dir *dir; struct eventfs_inode *ei; int nr_entries; static struct eventfs_entry system_entries[] = { { .name = "filter", .callback = system_callback, }, { .name = "enable", .callback = system_callback, } }; /* First see if we did not already create this dir */ list_for_each_entry(dir, &tr->systems, list) { system = dir->subsystem; if (strcmp(system->name, name) == 0) { dir->nr_events++; file->system = dir; return dir->ei; } } /* Now see if the system itself exists. */ system = NULL; list_for_each_entry(iter, &event_subsystems, list) { if (strcmp(iter->name, name) == 0) { system = iter; break; } } dir = kmalloc(sizeof(*dir), GFP_KERNEL); if (!dir) goto out_fail; if (!system) { system = create_new_subsystem(name); if (!system) goto out_free; } else __get_system(system); /* ftrace only has directories no files */ if (strcmp(name, "ftrace") == 0) nr_entries = 0; else nr_entries = ARRAY_SIZE(system_entries); ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir); if (IS_ERR(ei)) { pr_warn("Failed to create system directory %s\n", name); __put_system(system); goto out_free; } dir->ei = ei; dir->tr = tr; dir->ref_count = 1; dir->nr_events = 1; dir->subsystem = system; file->system = dir; list_add(&dir->list, &tr->systems); return dir->ei; out_free: kfree(dir); out_fail: /* Only print this message if failed on memory allocation */ if (!dir || !system) pr_warn("No memory to create event subsystem %s\n", name); return NULL; } static int event_define_fields(struct trace_event_call *call) { struct list_head *head; int ret = 0; /* * Other events may have the same class. Only update * the fields if they are not already defined. */ head = trace_get_fields(call); if (list_empty(head)) { struct trace_event_fields *field = call->class->fields_array; unsigned int offset = sizeof(struct trace_entry); for (; field->type; field++) { if (field->type == TRACE_FUNCTION_TYPE) { field->define_fields(call); break; } offset = ALIGN(offset, field->align); ret = trace_define_field_ext(call, field->type, field->name, offset, field->size, field->is_signed, field->filter_type, field->len, field->needs_test); if (WARN_ON_ONCE(ret)) { pr_err("error code is %d\n", ret); break; } offset += field->size; } } return ret; } static int event_callback(const char *name, umode_t *mode, void **data, const struct file_operations **fops) { struct trace_event_file *file = *data; struct trace_event_call *call = file->event_call; if (strcmp(name, "format") == 0) { *mode = TRACE_MODE_READ; *fops = &ftrace_event_format_fops; return 1; } /* * Only event directories that can be enabled should have * triggers or filters, with the exception of the "print" * event that can have a "trigger" file. */ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { if (call->class->reg && strcmp(name, "enable") == 0) { *mode = TRACE_MODE_WRITE; *fops = &ftrace_enable_fops; return 1; } if (strcmp(name, "filter") == 0) { *mode = TRACE_MODE_WRITE; *fops = &ftrace_event_filter_fops; return 1; } } if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || strcmp(trace_event_name(call), "print") == 0) { if (strcmp(name, "trigger") == 0) { *mode = TRACE_MODE_WRITE; *fops = &event_trigger_fops; return 1; } } #ifdef CONFIG_PERF_EVENTS if (call->event.type && call->class->reg && strcmp(name, "id") == 0) { *mode = TRACE_MODE_READ; *data = (void *)(long)call->event.type; *fops = &ftrace_event_id_fops; return 1; } #endif #ifdef CONFIG_HIST_TRIGGERS if (strcmp(name, "hist") == 0) { *mode = TRACE_MODE_READ; *fops = &event_hist_fops; return 1; } #endif #ifdef CONFIG_HIST_TRIGGERS_DEBUG if (strcmp(name, "hist_debug") == 0) { *mode = TRACE_MODE_READ; *fops = &event_hist_debug_fops; return 1; } #endif #ifdef CONFIG_TRACE_EVENT_INJECT if (call->event.type && call->class->reg && strcmp(name, "inject") == 0) { *mode = 0200; *fops = &event_inject_fops; return 1; } #endif return 0; } /* The file is incremented on creation and freeing the enable file decrements it */ static void event_release(const char *name, void *data) { struct trace_event_file *file = data; event_file_put(file); } static int event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) { struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; struct eventfs_inode *e_events; struct eventfs_inode *ei; const char *name; int nr_entries; int ret; static struct eventfs_entry event_entries[] = { { .name = "enable", .callback = event_callback, .release = event_release, }, { .name = "filter", .callback = event_callback, }, { .name = "trigger", .callback = event_callback, }, { .name = "format", .callback = event_callback, }, #ifdef CONFIG_PERF_EVENTS { .name = "id", .callback = event_callback, }, #endif #ifdef CONFIG_HIST_TRIGGERS { .name = "hist", .callback = event_callback, }, #endif #ifdef CONFIG_HIST_TRIGGERS_DEBUG { .name = "hist_debug", .callback = event_callback, }, #endif #ifdef CONFIG_TRACE_EVENT_INJECT { .name = "inject", .callback = event_callback, }, #endif }; /* * If the trace point header did not define TRACE_SYSTEM * then the system would be called "TRACE_SYSTEM". This should * never happen. */ if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0)) return -ENODEV; e_events = event_subsystem_dir(tr, call->class->system, file, parent); if (!e_events) return -ENOMEM; nr_entries = ARRAY_SIZE(event_entries); name = trace_event_name(call); ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file); if (IS_ERR(ei)) { pr_warn("Could not create tracefs '%s' directory\n", name); return -1; } file->ei = ei; ret = event_define_fields(call); if (ret < 0) { pr_warn("Could not initialize trace point events/%s\n", name); return ret; } /* Gets decremented on freeing of the "enable" file */ event_file_get(file); return 0; } static void remove_event_from_tracers(struct trace_event_call *call) { struct trace_event_file *file; struct trace_array *tr; do_for_each_event_file_safe(tr, file) { if (file->event_call != call) continue; remove_event_file_dir(file); /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this * trace_array, we use break to jump to the next * trace_array. */ break; } while_for_each_event_file(); } static void event_remove(struct trace_event_call *call) { struct trace_array *tr; struct trace_event_file *file; do_for_each_event_file(tr, file) { if (file->event_call != call) continue; if (file->flags & EVENT_FILE_FL_WAS_ENABLED) tr->clear_trace = true; ftrace_event_enable_disable(file, 0); /* * The do_for_each_event_file() is * a double loop. After finding the call for this * trace_array, we use break to jump to the next * trace_array. */ break; } while_for_each_event_file(); if (call->event.funcs) __unregister_trace_event(&call->event); remove_event_from_tracers(call); list_del(&call->list); } static int event_init(struct trace_event_call *call) { int ret = 0; const char *name; name = trace_event_name(call); if (WARN_ON(!name)) return -EINVAL; if (call->class->raw_init) { ret = call->class->raw_init(call); if (ret < 0 && ret != -ENOSYS) pr_warn("Could not initialize trace events/%s\n", name); } return ret; } static int __register_event(struct trace_event_call *call, struct module *mod) { int ret; ret = event_init(call); if (ret < 0) return ret; list_add(&call->list, &ftrace_events); if (call->flags & TRACE_EVENT_FL_DYNAMIC) atomic_set(&call->refcnt, 0); else call->module = mod; return 0; } static char *eval_replace(char *ptr, struct trace_eval_map *map, int len) { int rlen; int elen; /* Find the length of the eval value as a string */ elen = snprintf(ptr, 0, "%ld", map->eval_value); /* Make sure there's enough room to replace the string with the value */ if (len < elen) return NULL; snprintf(ptr, elen + 1, "%ld", map->eval_value); /* Get the rest of the string of ptr */ rlen = strlen(ptr + len); memmove(ptr + elen, ptr + len, rlen); /* Make sure we end the new string */ ptr[elen + rlen] = 0; return ptr + elen; } static void update_event_printk(struct trace_event_call *call, struct trace_eval_map *map) { char *ptr; int quote = 0; int len = strlen(map->eval_string); for (ptr = call->print_fmt; *ptr; ptr++) { if (*ptr == '\\') { ptr++; /* paranoid */ if (!*ptr) break; continue; } if (*ptr == '"') { quote ^= 1; continue; } if (quote) continue; if (isdigit(*ptr)) { /* skip numbers */ do { ptr++; /* Check for alpha chars like ULL */ } while (isalnum(*ptr)); if (!*ptr) break; /* * A number must have some kind of delimiter after * it, and we can ignore that too. */ continue; } if (isalpha(*ptr) || *ptr == '_') { if (strncmp(map->eval_string, ptr, len) == 0 && !isalnum(ptr[len]) && ptr[len] != '_') { ptr = eval_replace(ptr, map, len); /* enum/sizeof string smaller than value */ if (WARN_ON_ONCE(!ptr)) return; /* * No need to decrement here, as eval_replace() * returns the pointer to the character passed * the eval, and two evals can not be placed * back to back without something in between. * We can skip that something in between. */ continue; } skip_more: do { ptr++; } while (isalnum(*ptr) || *ptr == '_'); if (!*ptr) break; /* * If what comes after this variable is a '.' or * '->' then we can continue to ignore that string. */ if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) { ptr += *ptr == '.' ? 1 : 2; if (!*ptr) break; goto skip_more; } /* * Once again, we can skip the delimiter that came * after the string. */ continue; } } } static void add_str_to_module(struct module *module, char *str) { struct module_string *modstr; modstr = kmalloc(sizeof(*modstr), GFP_KERNEL); /* * If we failed to allocate memory here, then we'll just * let the str memory leak when the module is removed. * If this fails to allocate, there's worse problems than * a leaked string on module removal. */ if (WARN_ON_ONCE(!modstr)) return; modstr->module = module; modstr->str = str; list_add(&modstr->next, &module_strings); } static void update_event_fields(struct trace_event_call *call, struct trace_eval_map *map) { struct ftrace_event_field *field; struct list_head *head; char *ptr; char *str; int len = strlen(map->eval_string); /* Dynamic events should never have field maps */ if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) return; head = trace_get_fields(call); list_for_each_entry(field, head, link) { ptr = strchr(field->type, '['); if (!ptr) continue; ptr++; if (!isalpha(*ptr) && *ptr != '_') continue; if (strncmp(map->eval_string, ptr, len) != 0) continue; str = kstrdup(field->type, GFP_KERNEL); if (WARN_ON_ONCE(!str)) return; ptr = str + (ptr - field->type); ptr = eval_replace(ptr, map, len); /* enum/sizeof string smaller than value */ if (WARN_ON_ONCE(!ptr)) { kfree(str); continue; } /* * If the event is part of a module, then we need to free the string * when the module is removed. Otherwise, it will stay allocated * until a reboot. */ if (call->module) add_str_to_module(call->module, str); field->type = str; } } void trace_event_eval_update(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; bool first = false; int last_i; int i; down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { /* events are usually grouped together with systems */ if (!last_system || call->class->system != last_system) { first = true; last_i = 0; last_system = call->class->system; } /* * Since calls are grouped by systems, the likelihood that the * next call in the iteration belongs to the same system as the * previous call is high. As an optimization, we skip searching * for a map[] that matches the call's system if the last call * was from the same system. That's what last_i is for. If the * call has the same system as the previous call, then last_i * will be the index of the first map[] that has a matching * system. */ for (i = last_i; i < len; i++) { if (call->class->system == map[i]->system) { /* Save the first system if need be */ if (first) { last_i = i; first = false; } update_event_printk(call, map[i]); update_event_fields(call, map[i]); } } cond_resched(); } up_write(&trace_event_sem); } static bool event_in_systems(struct trace_event_call *call, const char *systems) { const char *system; const char *p; if (!systems) return true; system = call->class->system; p = strstr(systems, system); if (!p) return false; if (p != systems && !isspace(*(p - 1)) && *(p - 1) != ',') return false; p += strlen(system); return !*p || isspace(*p) || *p == ','; } static struct trace_event_file * trace_create_new_event(struct trace_event_call *call, struct trace_array *tr) { struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; struct trace_event_file *file; unsigned int first; if (!event_in_systems(call, tr->system_names)) return NULL; file = kmem_cache_alloc(file_cachep, GFP_TRACE); if (!file) return ERR_PTR(-ENOMEM); pid_list = rcu_dereference_protected(tr->filtered_pids, lockdep_is_held(&event_mutex)); no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, lockdep_is_held(&event_mutex)); if (!trace_pid_list_first(pid_list, &first) || !trace_pid_list_first(no_pid_list, &first)) file->flags |= EVENT_FILE_FL_PID_FILTER; file->event_call = call; file->tr = tr; atomic_set(&file->sm_ref, 0); atomic_set(&file->tm_ref, 0); INIT_LIST_HEAD(&file->triggers); list_add(&file->list, &tr->events); refcount_set(&file->ref, 1); return file; } #define MAX_BOOT_TRIGGERS 32 static struct boot_triggers { const char *event; char *trigger; } bootup_triggers[MAX_BOOT_TRIGGERS]; static char bootup_trigger_buf[COMMAND_LINE_SIZE]; static int nr_boot_triggers; static __init int setup_trace_triggers(char *str) { char *trigger; char *buf; int i; strscpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE); trace_set_ring_buffer_expanded(NULL); disable_tracing_selftest("running event triggers"); buf = bootup_trigger_buf; for (i = 0; i < MAX_BOOT_TRIGGERS; i++) { trigger = strsep(&buf, ","); if (!trigger) break; bootup_triggers[i].event = strsep(&trigger, "."); bootup_triggers[i].trigger = trigger; if (!bootup_triggers[i].trigger) break; } nr_boot_triggers = i; return 1; } __setup("trace_trigger=", setup_trace_triggers); /* Add an event to a trace directory */ static int __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) { struct trace_event_file *file; file = trace_create_new_event(call, tr); /* * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed * allocation, or NULL if the event is not part of the tr->system_names. * When the event is not part of the tr->system_names, return zero, not * an error. */ if (!file) return 0; if (IS_ERR(file)) return PTR_ERR(file); if (eventdir_initialized) return event_create_dir(tr->event_dir, file); else return event_define_fields(call); } static void trace_early_triggers(struct trace_event_file *file, const char *name) { int ret; int i; for (i = 0; i < nr_boot_triggers; i++) { if (strcmp(name, bootup_triggers[i].event)) continue; mutex_lock(&event_mutex); ret = trigger_process_regex(file, bootup_triggers[i].trigger); mutex_unlock(&event_mutex); if (ret) pr_err("Failed to register trigger '%s' on event %s\n", bootup_triggers[i].trigger, bootup_triggers[i].event); } } /* * Just create a descriptor for early init. A descriptor is required * for enabling events at boot. We want to enable events before * the filesystem is initialized. */ static int __trace_early_add_new_event(struct trace_event_call *call, struct trace_array *tr) { struct trace_event_file *file; int ret; file = trace_create_new_event(call, tr); /* * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed * allocation, or NULL if the event is not part of the tr->system_names. * When the event is not part of the tr->system_names, return zero, not * an error. */ if (!file) return 0; if (IS_ERR(file)) return PTR_ERR(file); ret = event_define_fields(call); if (ret) return ret; trace_early_triggers(file, trace_event_name(call)); return 0; } struct ftrace_module_file_ops; static void __add_event_to_tracers(struct trace_event_call *call); /* Add an additional event_call dynamically */ int trace_add_event_call(struct trace_event_call *call) { int ret; lockdep_assert_held(&event_mutex); mutex_lock(&trace_types_lock); ret = __register_event(call, NULL); if (ret >= 0) __add_event_to_tracers(call); mutex_unlock(&trace_types_lock); return ret; } EXPORT_SYMBOL_GPL(trace_add_event_call); /* * Must be called under locking of trace_types_lock, event_mutex and * trace_event_sem. */ static void __trace_remove_event_call(struct trace_event_call *call) { event_remove(call); trace_destroy_fields(call); } static int probe_remove_event_call(struct trace_event_call *call) { struct trace_array *tr; struct trace_event_file *file; #ifdef CONFIG_PERF_EVENTS if (call->perf_refcount) return -EBUSY; #endif do_for_each_event_file(tr, file) { if (file->event_call != call) continue; /* * We can't rely on ftrace_event_enable_disable(enable => 0) * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) goto busy; if (file->flags & EVENT_FILE_FL_WAS_ENABLED) tr->clear_trace = true; /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this * trace_array, we use break to jump to the next * trace_array. */ break; } while_for_each_event_file(); __trace_remove_event_call(call); return 0; busy: /* No need to clear the trace now */ list_for_each_entry(tr, &ftrace_trace_arrays, list) { tr->clear_trace = false; } return -EBUSY; } /* Remove an event_call */ int trace_remove_event_call(struct trace_event_call *call) { int ret; lockdep_assert_held(&event_mutex); mutex_lock(&trace_types_lock); down_write(&trace_event_sem); ret = probe_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&trace_types_lock); return ret; } EXPORT_SYMBOL_GPL(trace_remove_event_call); #define for_each_event(event, start, end) \ for (event = start; \ (unsigned long)event < (unsigned long)end; \ event++) #ifdef CONFIG_MODULES static void trace_module_add_events(struct module *mod) { struct trace_event_call **call, **start, **end; if (!mod->num_trace_events) return; /* Don't add infrastructure for mods without tracepoints */ if (trace_module_has_bad_taint(mod)) { pr_err("%s: module has bad taint, not creating trace events\n", mod->name); return; } start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; for_each_event(call, start, end) { __register_event(*call, mod); __add_event_to_tracers(*call); } } static void trace_module_remove_events(struct module *mod) { struct trace_event_call *call, *p; struct module_string *modstr, *m; down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module) continue; if (call->module == mod) __trace_remove_event_call(call); } /* Check for any strings allocade for this module */ list_for_each_entry_safe(modstr, m, &module_strings, next) { if (modstr->module != mod) continue; list_del(&modstr->next); kfree(modstr->str); kfree(modstr); } up_write(&trace_event_sem); /* * It is safest to reset the ring buffer if the module being unloaded * registered any events that were used. The only worry is if * a new module gets loaded, and takes on the same id as the events * of this module. When printing out the buffer, traced events left * over from this module may be passed to the new module events and * unexpected results may occur. */ tracing_reset_all_online_cpus_unlocked(); } static int trace_module_notify(struct notifier_block *self, unsigned long val, void *data) { struct module *mod = data; mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); switch (val) { case MODULE_STATE_COMING: trace_module_add_events(mod); break; case MODULE_STATE_GOING: trace_module_remove_events(mod); break; } mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); return NOTIFY_OK; } static struct notifier_block trace_module_nb = { .notifier_call = trace_module_notify, .priority = 1, /* higher than trace.c module notify */ }; #endif /* CONFIG_MODULES */ /* Create a new event directory structure for a trace directory. */ static void __trace_add_event_dirs(struct trace_array *tr) { struct trace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { ret = __trace_add_new_event(call, tr); if (ret < 0) pr_warn("Could not create directory for event %s\n", trace_event_name(call)); } } /* Returns any file that matches the system and event */ struct trace_event_file * __find_event_file(struct trace_array *tr, const char *system, const char *event) { struct trace_event_file *file; struct trace_event_call *call; const char *name; list_for_each_entry(file, &tr->events, list) { call = file->event_call; name = trace_event_name(call); if (!name || !call->class) continue; if (strcmp(event, name) == 0 && strcmp(system, call->class->system) == 0) return file; } return NULL; } /* Returns valid trace event files that match system and event */ struct trace_event_file * find_event_file(struct trace_array *tr, const char *system, const char *event) { struct trace_event_file *file; file = __find_event_file(tr, system, event); if (!file || !file->event_call->class->reg || file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) return NULL; return file; } /** * trace_get_event_file - Find and return a trace event file * @instance: The name of the trace instance containing the event * @system: The name of the system containing the event * @event: The name of the event * * Return a trace event file given the trace instance name, trace * system, and trace event name. If the instance name is NULL, it * refers to the top-level trace array. * * This function will look it up and return it if found, after calling * trace_array_get() to prevent the instance from going away, and * increment the event's module refcount to prevent it from being * removed. * * To release the file, call trace_put_event_file(), which will call * trace_array_put() and decrement the event's module refcount. * * Return: The trace event on success, ERR_PTR otherwise. */ struct trace_event_file *trace_get_event_file(const char *instance, const char *system, const char *event) { struct trace_array *tr = top_trace_array(); struct trace_event_file *file = NULL; int ret = -EINVAL; if (instance) { tr = trace_array_find_get(instance); if (!tr) return ERR_PTR(-ENOENT); } else { ret = trace_array_get(tr); if (ret) return ERR_PTR(ret); } mutex_lock(&event_mutex); file = find_event_file(tr, system, event); if (!file) { trace_array_put(tr); ret = -EINVAL; goto out; } /* Don't let event modules unload while in use */ ret = trace_event_try_get_ref(file->event_call); if (!ret) { trace_array_put(tr); ret = -EBUSY; goto out; } ret = 0; out: mutex_unlock(&event_mutex); if (ret) file = ERR_PTR(ret); return file; } EXPORT_SYMBOL_GPL(trace_get_event_file); /** * trace_put_event_file - Release a file from trace_get_event_file() * @file: The trace event file * * If a file was retrieved using trace_get_event_file(), this should * be called when it's no longer needed. It will cancel the previous * trace_array_get() called by that function, and decrement the * event's module refcount. */ void trace_put_event_file(struct trace_event_file *file) { mutex_lock(&event_mutex); trace_event_put_ref(file->event_call); mutex_unlock(&event_mutex); trace_array_put(file->tr); } EXPORT_SYMBOL_GPL(trace_put_event_file); #ifdef CONFIG_DYNAMIC_FTRACE /* Avoid typos */ #define ENABLE_EVENT_STR "enable_event" #define DISABLE_EVENT_STR "disable_event" struct event_probe_data { struct trace_event_file *file; unsigned long count; int ref; bool enable; }; static void update_event_probe(struct event_probe_data *data) { if (data->enable) clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); else set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); } static void event_enable_probe(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_mapper *mapper = data; struct event_probe_data *edata; void **pdata; pdata = ftrace_func_mapper_find_ip(mapper, ip); if (!pdata || !*pdata) return; edata = *pdata; update_event_probe(edata); } static void event_enable_count_probe(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_mapper *mapper = data; struct event_probe_data *edata; void **pdata; pdata = ftrace_func_mapper_find_ip(mapper, ip); if (!pdata || !*pdata) return; edata = *pdata; if (!edata->count) return; /* Skip if the event is in a state we want to switch to */ if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) return; if (edata->count != -1) (edata->count)--; update_event_probe(edata); } static int event_enable_print(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_mapper *mapper = data; struct event_probe_data *edata; void **pdata; pdata = ftrace_func_mapper_find_ip(mapper, ip); if (WARN_ON_ONCE(!pdata || !*pdata)) return 0; edata = *pdata; seq_printf(m, "%ps:", (void *)ip); seq_printf(m, "%s:%s:%s", edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, edata->file->event_call->class->system, trace_event_name(edata->file->event_call)); if (edata->count == -1) seq_puts(m, ":unlimited\n"); else seq_printf(m, ":count=%ld\n", edata->count); return 0; } static int event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *init_data, void **data) { struct ftrace_func_mapper *mapper = *data; struct event_probe_data *edata = init_data; int ret; if (!mapper) { mapper = allocate_ftrace_func_mapper(); if (!mapper) return -ENODEV; *data = mapper; } ret = ftrace_func_mapper_add_ip(mapper, ip, edata); if (ret < 0) return ret; edata->ref++; return 0; } static int free_probe_data(void *data) { struct event_probe_data *edata = data; edata->ref--; if (!edata->ref) { /* Remove the SOFT_MODE flag */ __ftrace_event_enable_disable(edata->file, 0, 1); trace_event_put_ref(edata->file->event_call); kfree(edata); } return 0; } static void event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *data) { struct ftrace_func_mapper *mapper = data; struct event_probe_data *edata; if (!ip) { if (!mapper) return; free_ftrace_func_mapper(mapper, free_probe_data); return; } edata = ftrace_func_mapper_remove_ip(mapper, ip); if (WARN_ON_ONCE(!edata)) return; if (WARN_ON_ONCE(edata->ref <= 0)) return; free_probe_data(edata); } static struct ftrace_probe_ops event_enable_probe_ops = { .func = event_enable_probe, .print = event_enable_print, .init = event_enable_init, .free = event_enable_free, }; static struct ftrace_probe_ops event_enable_count_probe_ops = { .func = event_enable_count_probe, .print = event_enable_print, .init = event_enable_init, .free = event_enable_free, }; static struct ftrace_probe_ops event_disable_probe_ops = { .func = event_enable_probe, .print = event_enable_print, .init = event_enable_init, .free = event_enable_free, }; static struct ftrace_probe_ops event_disable_count_probe_ops = { .func = event_enable_count_probe, .print = event_enable_print, .init = event_enable_init, .free = event_enable_free, }; static int event_enable_func(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enabled) { struct trace_event_file *file; struct ftrace_probe_ops *ops; struct event_probe_data *data; const char *system; const char *event; char *number; bool enable; int ret; if (!tr) return -ENODEV; /* hash funcs only work with set_ftrace_filter */ if (!enabled || !param) return -EINVAL; system = strsep(&param, ":"); if (!param) return -EINVAL; event = strsep(&param, ":"); mutex_lock(&event_mutex); ret = -EINVAL; file = find_event_file(tr, system, event); if (!file) goto out; enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; if (enable) ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops; else ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops; if (glob[0] == '!') { ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); goto out; } ret = -ENOMEM; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out; data->enable = enable; data->count = -1; data->file = file; if (!param) goto out_reg; number = strsep(&param, ":"); ret = -EINVAL; if (!strlen(number)) goto out_free; /* * We use the callback data field (which is a pointer) * as our counter. */ ret = kstrtoul(number, 0, &data->count); if (ret) goto out_free; out_reg: /* Don't let event modules unload while probe registered */ ret = trace_event_try_get_ref(file->event_call); if (!ret) { ret = -EBUSY; goto out_free; } ret = __ftrace_event_enable_disable(file, 1, 1); if (ret < 0) goto out_put; ret = register_ftrace_function_probe(glob, tr, ops, data); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. * Consider no functions a failure too. */ if (!ret) { ret = -ENOENT; goto out_disable; } else if (ret < 0) goto out_disable; /* Just return zero, not the number of enabled functions */ ret = 0; out: mutex_unlock(&event_mutex); return ret; out_disable: __ftrace_event_enable_disable(file, 0, 1); out_put: trace_event_put_ref(file->event_call); out_free: kfree(data); goto out; } static struct ftrace_func_command event_enable_cmd = { .name = ENABLE_EVENT_STR, .func = event_enable_func, }; static struct ftrace_func_command event_disable_cmd = { .name = DISABLE_EVENT_STR, .func = event_enable_func, }; static __init int register_event_cmds(void) { int ret; ret = register_ftrace_command(&event_enable_cmd); if (WARN_ON(ret < 0)) return ret; ret = register_ftrace_command(&event_disable_cmd); if (WARN_ON(ret < 0)) unregister_ftrace_command(&event_enable_cmd); return ret; } #else static inline int register_event_cmds(void) { return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ /* * The top level array and trace arrays created by boot-time tracing * have already had its trace_event_file descriptors created in order * to allow for early events to be recorded. * This function is called after the tracefs has been initialized, * and we now have to create the files associated to the events. */ static void __trace_early_add_event_dirs(struct trace_array *tr) { struct trace_event_file *file; int ret; list_for_each_entry(file, &tr->events, list) { ret = event_create_dir(tr->event_dir, file); if (ret < 0) pr_warn("Could not create directory for event %s\n", trace_event_name(file->event_call)); } } /* * For early boot up, the top trace array and the trace arrays created * by boot-time tracing require to have a list of events that can be * enabled. This must be done before the filesystem is set up in order * to allow events to be traced early. */ void __trace_early_add_events(struct trace_array *tr) { struct trace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { /* Early boot up should not have any modules loaded */ if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) && WARN_ON_ONCE(call->module)) continue; ret = __trace_early_add_new_event(call, tr); if (ret < 0) pr_warn("Could not create early event %s\n", trace_event_name(call)); } } /* Remove the event directory structure for a trace directory. */ static void __trace_remove_event_dirs(struct trace_array *tr) { struct trace_event_file *file, *next; list_for_each_entry_safe(file, next, &tr->events, list) remove_event_file_dir(file); } static void __add_event_to_tracers(struct trace_event_call *call) { struct trace_array *tr; list_for_each_entry(tr, &ftrace_trace_arrays, list) __trace_add_new_event(call, tr); } extern struct trace_event_call *__start_ftrace_events[]; extern struct trace_event_call *__stop_ftrace_events[]; static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; static __init int setup_trace_event(char *str) { strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE); trace_set_ring_buffer_expanded(NULL); disable_tracing_selftest("running event tracing"); return 1; } __setup("trace_event=", setup_trace_event); static int events_callback(const char *name, umode_t *mode, void **data, const struct file_operations **fops) { if (strcmp(name, "enable") == 0) { *mode = TRACE_MODE_WRITE; *fops = &ftrace_tr_enable_fops; return 1; } if (strcmp(name, "header_page") == 0) { *mode = TRACE_MODE_READ; *fops = &ftrace_show_header_page_fops; } else if (strcmp(name, "header_event") == 0) { *mode = TRACE_MODE_READ; *fops = &ftrace_show_header_event_fops; } else return 0; return 1; } /* Expects to have event_mutex held when called */ static int create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) { struct eventfs_inode *e_events; struct dentry *entry; int nr_entries; static struct eventfs_entry events_entries[] = { { .name = "enable", .callback = events_callback, }, { .name = "header_page", .callback = events_callback, }, { .name = "header_event", .callback = events_callback, }, }; entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, tr, &ftrace_set_event_fops); if (!entry) return -ENOMEM; nr_entries = ARRAY_SIZE(events_entries); e_events = eventfs_create_events_dir("events", parent, events_entries, nr_entries, tr); if (IS_ERR(e_events)) { pr_warn("Could not create tracefs 'events' directory\n"); return -ENOMEM; } /* There are not as crucial, just warn if they are not created */ trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent, tr, &ftrace_set_event_pid_fops); trace_create_file("set_event_notrace_pid", TRACE_MODE_WRITE, parent, tr, &ftrace_set_event_notrace_pid_fops); tr->event_dir = e_events; return 0; } /** * event_trace_add_tracer - add a instance of a trace_array to events * @parent: The parent dentry to place the files/directories for events in * @tr: The trace array associated with these events * * When a new instance is created, it needs to set up its events * directory, as well as other files associated with events. It also * creates the event hierarchy in the @parent/events directory. * * Returns 0 on success. * * Must be called with event_mutex held. */ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) { int ret; lockdep_assert_held(&event_mutex); ret = create_event_toplevel_files(parent, tr); if (ret) goto out; down_write(&trace_event_sem); /* If tr already has the event list, it is initialized in early boot. */ if (unlikely(!list_empty(&tr->events))) __trace_early_add_event_dirs(tr); else __trace_add_event_dirs(tr); up_write(&trace_event_sem); out: return ret; } /* * The top trace array already had its file descriptors created. * Now the files themselves need to be created. */ static __init int early_event_add_tracer(struct dentry *parent, struct trace_array *tr) { int ret; mutex_lock(&event_mutex); ret = create_event_toplevel_files(parent, tr); if (ret) goto out_unlock; down_write(&trace_event_sem); __trace_early_add_event_dirs(tr); up_write(&trace_event_sem); out_unlock: mutex_unlock(&event_mutex); return ret; } /* Must be called with event_mutex held */ int event_trace_del_tracer(struct trace_array *tr) { lockdep_assert_held(&event_mutex); /* Disable any event triggers and associated soft-disabled events */ clear_event_triggers(tr); /* Clear the pid list */ __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS); /* Disable any running events */ __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); /* Make sure no more events are being executed */ tracepoint_synchronize_unregister(); down_write(&trace_event_sem); __trace_remove_event_dirs(tr); eventfs_remove_events_dir(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; return 0; } static __init int event_trace_memsetup(void) { field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC); file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC); return 0; } __init void early_enable_events(struct trace_array *tr, char *buf, bool disable_first) { char *token; int ret; while (true) { token = strsep(&buf, ","); if (!token) break; if (*token) { /* Restarting syscalls requires that we stop them first */ if (disable_first) ftrace_set_clr_event(tr, token, 0); ret = ftrace_set_clr_event(tr, token, 1); if (ret) pr_warn("Failed to enable trace event: %s\n", token); } /* Put back the comma to allow this to be called again */ if (buf) *(buf - 1) = ','; } } static __init int event_trace_enable(void) { struct trace_array *tr = top_trace_array(); struct trace_event_call **iter, *call; int ret; if (!tr) return -ENODEV; for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) { call = *iter; ret = event_init(call); if (!ret) list_add(&call->list, &ftrace_events); } register_trigger_cmds(); /* * We need the top trace array to have a working set of trace * points at early init, before the debug files and directories * are created. Create the file entries now, and attach them * to the actual file dentries later. */ __trace_early_add_events(tr); early_enable_events(tr, bootup_event_buf, false); trace_printk_start_comm(); register_event_cmds(); return 0; } /* * event_trace_enable() is called from trace_event_init() first to * initialize events and perhaps start any events that are on the * command line. Unfortunately, there are some events that will not * start this early, like the system call tracepoints that need * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But * event_trace_enable() is called before pid 1 starts, and this flag * is never set, making the syscall tracepoint never get reached, but * the event is enabled regardless (and not doing anything). */ static __init int event_trace_enable_again(void) { struct trace_array *tr; tr = top_trace_array(); if (!tr) return -ENODEV; early_enable_events(tr, bootup_event_buf, true); return 0; } early_initcall(event_trace_enable_again); /* Init fields which doesn't related to the tracefs */ static __init int event_trace_init_fields(void) { if (trace_define_generic_fields()) pr_warn("tracing: Failed to allocated generic fields"); if (trace_define_common_fields()) pr_warn("tracing: Failed to allocate common fields"); return 0; } __init int event_trace_init(void) { struct trace_array *tr; int ret; tr = top_trace_array(); if (!tr) return -ENODEV; trace_create_file("available_events", TRACE_MODE_READ, NULL, tr, &ftrace_avail_fops); ret = early_event_add_tracer(NULL, tr); if (ret) return ret; #ifdef CONFIG_MODULES ret = register_module_notifier(&trace_module_nb); if (ret) pr_warn("Failed to register trace events module notifier\n"); #endif eventdir_initialized = true; return 0; } void __init trace_event_init(void) { event_trace_memsetup(); init_ftrace_syscalls(); event_trace_enable(); event_trace_init_fields(); } #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST static DEFINE_SPINLOCK(test_spinlock); static DEFINE_SPINLOCK(test_spinlock_irq); static DEFINE_MUTEX(test_mutex); static __init void test_work(struct work_struct *dummy) { spin_lock(&test_spinlock); spin_lock_irq(&test_spinlock_irq); udelay(1); spin_unlock_irq(&test_spinlock_irq); spin_unlock(&test_spinlock); mutex_lock(&test_mutex); msleep(1); mutex_unlock(&test_mutex); } static __init int event_test_thread(void *unused) { void *test_malloc; test_malloc = kmalloc(1234, GFP_KERNEL); if (!test_malloc) pr_info("failed to kmalloc\n"); schedule_on_each_cpu(test_work); kfree(test_malloc); set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { schedule(); set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); return 0; } /* * Do various things that may trigger events. */ static __init void event_test_stuff(void) { struct task_struct *test_thread; test_thread = kthread_run(event_test_thread, NULL, "test-events"); msleep(1); kthread_stop(test_thread); } /* * For every trace event defined, we will test each trace point separately, * and then by groups, and finally all trace points. */ static __init void event_trace_self_tests(void) { struct trace_subsystem_dir *dir; struct trace_event_file *file; struct trace_event_call *call; struct event_subsystem *system; struct trace_array *tr; int ret; tr = top_trace_array(); if (!tr) return; pr_info("Running tests on trace events:\n"); list_for_each_entry(file, &tr->events, list) { call = file->event_call; /* Only test those that have a probe */ if (!call->class || !call->class->probe) continue; /* * Testing syscall events here is pretty useless, but * we still do it if configured. But this is time consuming. * What we really need is a user thread to perform the * syscalls as we test. */ #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS if (call->class->system && strcmp(call->class->system, "syscalls") == 0) continue; #endif pr_info("Testing event %s: ", trace_event_name(call)); /* * If an event is already enabled, someone is using * it and the self test should not be on. */ if (file->flags & EVENT_FILE_FL_ENABLED) { pr_warn("Enabled event during self test!\n"); WARN_ON_ONCE(1); continue; } ftrace_event_enable_disable(file, 1); event_test_stuff(); ftrace_event_enable_disable(file, 0); pr_cont("OK\n"); } /* Now test at the sub system level */ pr_info("Running tests on trace event systems:\n"); list_for_each_entry(dir, &tr->systems, list) { system = dir->subsystem; /* the ftrace system is special, skip it */ if (strcmp(system->name, "ftrace") == 0) continue; pr_info("Testing event system %s: ", system->name); ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1); if (WARN_ON_ONCE(ret)) { pr_warn("error enabling system %s\n", system->name); continue; } event_test_stuff(); ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0); if (WARN_ON_ONCE(ret)) { pr_warn("error disabling system %s\n", system->name); continue; } pr_cont("OK\n"); } /* Test with all events enabled */ pr_info("Running tests on all trace events:\n"); pr_info("Testing all events: "); ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1); if (WARN_ON_ONCE(ret)) { pr_warn("error enabling all events\n"); return; } event_test_stuff(); /* reset sysname */ ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); if (WARN_ON_ONCE(ret)) { pr_warn("error disabling all events\n"); return; } pr_cont("OK\n"); } #ifdef CONFIG_FUNCTION_TRACER static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); static struct trace_event_file event_trace_file __initdata; static void __init function_test_events_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *regs) { struct trace_buffer *buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; unsigned int trace_ctx; long disabled; int cpu; trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); if (disabled != 1) goto out; event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, TRACE_FN, sizeof(*entry), trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); entry->ip = ip; entry->parent_ip = parent_ip; event_trigger_unlock_commit(&event_trace_file, buffer, event, entry, trace_ctx); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); preempt_enable_notrace(); } static struct ftrace_ops trace_ops __initdata = { .func = function_test_events_call, }; static __init void event_trace_self_test_with_function(void) { int ret; event_trace_file.tr = top_trace_array(); if (WARN_ON(!event_trace_file.tr)) return; ret = register_ftrace_function(&trace_ops); if (WARN_ON(ret < 0)) { pr_info("Failed to enable function tracer for event tests\n"); return; } pr_info("Running tests again, along with the function tracer\n"); event_trace_self_tests(); unregister_ftrace_function(&trace_ops); } #else static __init void event_trace_self_test_with_function(void) { } #endif static __init int event_trace_self_tests_init(void) { if (!tracing_selftest_disabled) { event_trace_self_tests(); event_trace_self_test_with_function(); } return 0; } late_initcall(event_trace_self_tests_init); #endif
7 5 5 3 2 2 7 7 5 2 4 1 3 3 3 3 3 4 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. */ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/pkt_sched.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/if_bonding.h> #include <linux/if_vlan.h> #include <linux/in.h> #include <net/arp.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <asm/byteorder.h> #include <net/bonding.h> #include <net/bond_alb.h> static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x01 }; static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; #pragma pack(1) struct learning_pkt { u8 mac_dst[ETH_ALEN]; u8 mac_src[ETH_ALEN]; __be16 type; u8 padding[ETH_ZLEN - ETH_HLEN]; }; struct arp_pkt { __be16 hw_addr_space; __be16 prot_addr_space; u8 hw_addr_len; u8 prot_addr_len; __be16 op_code; u8 mac_src[ETH_ALEN]; /* sender hardware address */ __be32 ip_src; /* sender IP address */ u8 mac_dst[ETH_ALEN]; /* target hardware address */ __be32 ip_dst; /* target IP address */ }; #pragma pack() /* Forward declaration */ static void alb_send_learning_packets(struct slave *slave, const u8 mac_addr[], bool strict_match); static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); static void rlb_src_unlink(struct bonding *bond, u32 index); static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash); static inline u8 _simple_hash(const u8 *hash_start, int hash_size) { int i; u8 hash = 0; for (i = 0; i < hash_size; i++) hash ^= hash_start[i]; return hash; } /*********************** tlb specific functions ***************************/ static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) { if (save_load) { entry->load_history = 1 + entry->tx_bytes / BOND_TLB_REBALANCE_INTERVAL; entry->tx_bytes = 0; } entry->tx_slave = NULL; entry->next = TLB_NULL_INDEX; entry->prev = TLB_NULL_INDEX; } static inline void tlb_init_slave(struct slave *slave) { SLAVE_TLB_INFO(slave).load = 0; SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; } static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) { struct tlb_client_info *tx_hash_table; u32 index; /* clear slave from tx_hashtbl */ tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; /* skip this if we've already freed the tx hash table */ if (tx_hash_table) { index = SLAVE_TLB_INFO(slave).head; while (index != TLB_NULL_INDEX) { u32 next_index = tx_hash_table[index].next; tlb_init_table_entry(&tx_hash_table[index], save_load); index = next_index; } } tlb_init_slave(slave); } static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) { spin_lock_bh(&bond->mode_lock); __tlb_clear_slave(bond, slave, save_load); spin_unlock_bh(&bond->mode_lock); } /* Must be called before starting the monitor timer */ static int tlb_initialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); struct tlb_client_info *new_hashtbl; int i; new_hashtbl = kzalloc(size, GFP_KERNEL); if (!new_hashtbl) return -ENOMEM; spin_lock_bh(&bond->mode_lock); bond_info->tx_hashtbl = new_hashtbl; for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); spin_unlock_bh(&bond->mode_lock); return 0; } /* Must be called only after all slaves have been released */ static void tlb_deinitialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); spin_lock_bh(&bond->mode_lock); kfree(bond_info->tx_hashtbl); bond_info->tx_hashtbl = NULL; spin_unlock_bh(&bond->mode_lock); } static long long compute_gap(struct slave *slave) { return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */ (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ } static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) { struct slave *slave, *least_loaded; struct list_head *iter; long long max_gap; least_loaded = NULL; max_gap = LLONG_MIN; /* Find the slave with the largest gap */ bond_for_each_slave_rcu(bond, slave, iter) { if (bond_slave_can_tx(slave)) { long long gap = compute_gap(slave); if (max_gap < gap) { least_loaded = slave; max_gap = gap; } } } return least_loaded; } static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct tlb_client_info *hash_table; struct slave *assigned_slave; hash_table = bond_info->tx_hashtbl; assigned_slave = hash_table[hash_index].tx_slave; if (!assigned_slave) { assigned_slave = tlb_get_least_loaded_slave(bond); if (assigned_slave) { struct tlb_slave_info *slave_info = &(SLAVE_TLB_INFO(assigned_slave)); u32 next_index = slave_info->head; hash_table[hash_index].tx_slave = assigned_slave; hash_table[hash_index].next = next_index; hash_table[hash_index].prev = TLB_NULL_INDEX; if (next_index != TLB_NULL_INDEX) hash_table[next_index].prev = hash_index; slave_info->head = hash_index; slave_info->load += hash_table[hash_index].load_history; } } if (assigned_slave) hash_table[hash_index].tx_bytes += skb_len; return assigned_slave; } static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) { struct slave *tx_slave; /* We don't need to disable softirq here, because * tlb_choose_channel() is only called by bond_alb_xmit() * which already has softirq disabled. */ spin_lock(&bond->mode_lock); tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); spin_unlock(&bond->mode_lock); return tx_slave; } /*********************** rlb specific functions ***************************/ /* when an ARP REPLY is received from a client update its info * in the rx_hashtbl */ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *client_info; u32 hash_index; spin_lock_bh(&bond->mode_lock); hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); client_info = &(bond_info->rx_hashtbl[hash_index]); if ((client_info->assigned) && (client_info->ip_src == arp->ip_dst) && (client_info->ip_dst == arp->ip_src) && (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) { /* update the clients MAC address */ ether_addr_copy(client_info->mac_dst, arp->mac_src); client_info->ntt = 1; bond_info->rx_ntt = 1; } spin_unlock_bh(&bond->mode_lock); } static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct arp_pkt *arp, _arp; if (skb->protocol != cpu_to_be16(ETH_P_ARP)) goto out; arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp); if (!arp) goto out; /* We received an ARP from arp->ip_src. * We might have used this IP address previously (on the bonding host * itself or on a system that is bridged together with the bond). * However, if arp->mac_src is different than what is stored in * rx_hashtbl, some other host is now using the IP and we must prevent * sending out client updates with this IP address and the old MAC * address. * Clean up all hash table entries that have this address as ip_src but * have a different mac_src. */ rlb_purge_src_ip(bond, arp); if (arp->op_code == htons(ARPOP_REPLY)) { /* update rx hash table for this ARP */ rlb_update_entry_from_arp(bond, arp); slave_dbg(bond->dev, slave->dev, "Server received an ARP Reply from client\n"); } out: return RX_HANDLER_ANOTHER; } /* Caller must hold rcu_read_lock() */ static struct slave *__rlb_next_rx_slave(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct slave *before = NULL, *rx_slave = NULL, *slave; struct list_head *iter; bool found = false; bond_for_each_slave_rcu(bond, slave, iter) { if (!bond_slave_can_tx(slave)) continue; if (!found) { if (!before || before->speed < slave->speed) before = slave; } else { if (!rx_slave || rx_slave->speed < slave->speed) rx_slave = slave; } if (slave == bond_info->rx_slave) found = true; } /* we didn't find anything after the current or we have something * better before and up to the current slave */ if (!rx_slave || (before && rx_slave->speed < before->speed)) rx_slave = before; if (rx_slave) bond_info->rx_slave = rx_slave; return rx_slave; } /* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */ static struct slave *rlb_next_rx_slave(struct bonding *bond) { struct slave *rx_slave; ASSERT_RTNL(); rcu_read_lock(); rx_slave = __rlb_next_rx_slave(bond); rcu_read_unlock(); return rx_slave; } /* teach the switch the mac of a disabled slave * on the primary for fault tolerance * * Caller must hold RTNL */ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, const u8 addr[]) { struct slave *curr_active = rtnl_dereference(bond->curr_active_slave); if (!curr_active) return; if (!bond->alb_info.primary_is_promisc) { if (!dev_set_promiscuity(curr_active->dev, 1)) bond->alb_info.primary_is_promisc = 1; else bond->alb_info.primary_is_promisc = 0; } bond->alb_info.rlb_promisc_timeout_counter = 0; alb_send_learning_packets(curr_active, addr, true); } /* slave being removed should not be active at this point * * Caller must hold rtnl. */ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *rx_hash_table; u32 index, next_index; /* clear slave from rx_hashtbl */ spin_lock_bh(&bond->mode_lock); rx_hash_table = bond_info->rx_hashtbl; index = bond_info->rx_hashtbl_used_head; for (; index != RLB_NULL_INDEX; index = next_index) { next_index = rx_hash_table[index].used_next; if (rx_hash_table[index].slave == slave) { struct slave *assigned_slave = rlb_next_rx_slave(bond); if (assigned_slave) { rx_hash_table[index].slave = assigned_slave; if (is_valid_ether_addr(rx_hash_table[index].mac_dst)) { bond_info->rx_hashtbl[index].ntt = 1; bond_info->rx_ntt = 1; /* A slave has been removed from the * table because it is either disabled * or being released. We must retry the * update to avoid clients from not * being updated & disconnecting when * there is stress */ bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; } } else { /* there is no active slave */ rx_hash_table[index].slave = NULL; } } } spin_unlock_bh(&bond->mode_lock); if (slave != rtnl_dereference(bond->curr_active_slave)) rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); } static void rlb_update_client(struct rlb_client_info *client_info) { int i; if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst)) return; for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { struct sk_buff *skb; skb = arp_create(ARPOP_REPLY, ETH_P_ARP, client_info->ip_dst, client_info->slave->dev, client_info->ip_src, client_info->mac_dst, client_info->slave->dev->dev_addr, client_info->mac_dst); if (!skb) { slave_err(client_info->slave->bond->dev, client_info->slave->dev, "failed to create an ARP packet\n"); continue; } skb->dev = client_info->slave->dev; if (client_info->vlan_id) { __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), client_info->vlan_id); } arp_xmit(skb); } } /* sends ARP REPLIES that update the clients that need updating */ static void rlb_update_rx_clients(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *client_info; u32 hash_index; spin_lock_bh(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); if (client_info->ntt) { rlb_update_client(client_info); if (bond_info->rlb_update_retry_counter == 0) client_info->ntt = 0; } } /* do not update the entries again until this counter is zero so that * not to confuse the clients. */ bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; spin_unlock_bh(&bond->mode_lock); } /* The slave was assigned a new mac address - update the clients */ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *client_info; int ntt = 0; u32 hash_index; spin_lock_bh(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); if ((client_info->slave == slave) && is_valid_ether_addr(client_info->mac_dst)) { client_info->ntt = 1; ntt = 1; } } /* update the team's flag only after the whole iteration */ if (ntt) { bond_info->rx_ntt = 1; /* fasten the change */ bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; } spin_unlock_bh(&bond->mode_lock); } /* mark all clients using src_ip to be updated */ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *client_info; u32 hash_index; spin_lock(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); if (!client_info->slave) { netdev_err(bond->dev, "found a client with no channel in the client's hash table\n"); continue; } /* update all clients using this src_ip, that are not assigned * to the team's address (curr_active_slave) and have a known * unicast mac address. */ if ((client_info->ip_src == src_ip) && !ether_addr_equal_64bits(client_info->slave->dev->dev_addr, bond->dev->dev_addr) && is_valid_ether_addr(client_info->mac_dst)) { client_info->ntt = 1; bond_info->rx_ntt = 1; } } spin_unlock(&bond->mode_lock); } static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond, const struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; spin_lock(&bond->mode_lock); curr_active_slave = rcu_dereference(bond->curr_active_slave); hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); client_info = &(bond_info->rx_hashtbl[hash_index]); if (client_info->assigned) { if ((client_info->ip_src == arp->ip_src) && (client_info->ip_dst == arp->ip_dst)) { /* the entry is already assigned to this client */ if (!is_broadcast_ether_addr(arp->mac_dst)) { /* update mac address from arp */ ether_addr_copy(client_info->mac_dst, arp->mac_dst); } ether_addr_copy(client_info->mac_src, arp->mac_src); assigned_slave = client_info->slave; if (assigned_slave) { spin_unlock(&bond->mode_lock); return assigned_slave; } } else { /* the entry is already assigned to some other client, * move the old client to primary (curr_active_slave) so * that the new client can be assigned to this entry. */ if (curr_active_slave && client_info->slave != curr_active_slave) { client_info->slave = curr_active_slave; rlb_update_client(client_info); } } } /* assign a new slave */ assigned_slave = __rlb_next_rx_slave(bond); if (assigned_slave) { if (!(client_info->assigned && client_info->ip_src == arp->ip_src)) { /* ip_src is going to be updated, * fix the src hash list */ u32 hash_src = _simple_hash((u8 *)&arp->ip_src, sizeof(arp->ip_src)); rlb_src_unlink(bond, hash_index); rlb_src_link(bond, hash_src, hash_index); } client_info->ip_src = arp->ip_src; client_info->ip_dst = arp->ip_dst; /* arp->mac_dst is broadcast for arp requests. * will be updated with clients actual unicast mac address * upon receiving an arp reply. */ ether_addr_copy(client_info->mac_dst, arp->mac_dst); ether_addr_copy(client_info->mac_src, arp->mac_src); client_info->slave = assigned_slave; if (is_valid_ether_addr(client_info->mac_dst)) { client_info->ntt = 1; bond->alb_info.rx_ntt = 1; } else { client_info->ntt = 0; } if (vlan_get_tag(skb, &client_info->vlan_id)) client_info->vlan_id = 0; if (!client_info->assigned) { u32 prev_tbl_head = bond_info->rx_hashtbl_used_head; bond_info->rx_hashtbl_used_head = hash_index; client_info->used_next = prev_tbl_head; if (prev_tbl_head != RLB_NULL_INDEX) { bond_info->rx_hashtbl[prev_tbl_head].used_prev = hash_index; } client_info->assigned = 1; } } spin_unlock(&bond->mode_lock); return assigned_slave; } /* chooses (and returns) transmit channel for arp reply * does not choose channel for other arp types since they are * sent on the curr_active_slave */ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) { struct slave *tx_slave = NULL; struct net_device *dev; struct arp_pkt *arp; if (!pskb_network_may_pull(skb, sizeof(*arp))) return NULL; arp = (struct arp_pkt *)skb_network_header(skb); /* Don't modify or load balance ARPs that do not originate * from the bond itself or a VLAN directly above the bond. */ if (!bond_slave_has_mac_rcu(bond, arp->mac_src)) return NULL; dev = ip_dev_find(dev_net(bond->dev), arp->ip_src); if (dev) { if (netif_is_any_bridge_master(dev)) { dev_put(dev); return NULL; } dev_put(dev); } if (arp->op_code == htons(ARPOP_REPLY)) { /* the arp must be sent on the selected rx channel */ tx_slave = rlb_choose_channel(skb, bond, arp); if (tx_slave) bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, tx_slave->dev->addr_len); netdev_dbg(bond->dev, "(slave %s): Server sent ARP Reply packet\n", tx_slave ? tx_slave->dev->name : "NULL"); } else if (arp->op_code == htons(ARPOP_REQUEST)) { /* Create an entry in the rx_hashtbl for this client as a * place holder. * When the arp reply is received the entry will be updated * with the correct unicast address of the client. */ tx_slave = rlb_choose_channel(skb, bond, arp); /* The ARP reply packets must be delayed so that * they can cancel out the influence of the ARP request. */ bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; /* arp requests are broadcast and are sent on the primary * the arp request will collapse all clients on the subnet to * the primary slave. We must register these clients to be * updated with their assigned mac. */ rlb_req_update_subnet_clients(bond, arp->ip_src); netdev_dbg(bond->dev, "(slave %s): Server sent ARP Request packet\n", tx_slave ? tx_slave->dev->name : "NULL"); } return tx_slave; } static void rlb_rebalance(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct slave *assigned_slave; struct rlb_client_info *client_info; int ntt; u32 hash_index; spin_lock_bh(&bond->mode_lock); ntt = 0; hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); assigned_slave = __rlb_next_rx_slave(bond); if (assigned_slave && (client_info->slave != assigned_slave)) { client_info->slave = assigned_slave; if (!is_zero_ether_addr(client_info->mac_dst)) { client_info->ntt = 1; ntt = 1; } } } /* update the team's flag only after the whole iteration */ if (ntt) bond_info->rx_ntt = 1; spin_unlock_bh(&bond->mode_lock); } /* Caller must hold mode_lock */ static void rlb_init_table_entry_dst(struct rlb_client_info *entry) { entry->used_next = RLB_NULL_INDEX; entry->used_prev = RLB_NULL_INDEX; entry->assigned = 0; entry->slave = NULL; entry->vlan_id = 0; } static void rlb_init_table_entry_src(struct rlb_client_info *entry) { entry->src_first = RLB_NULL_INDEX; entry->src_prev = RLB_NULL_INDEX; entry->src_next = RLB_NULL_INDEX; } static void rlb_init_table_entry(struct rlb_client_info *entry) { memset(entry, 0, sizeof(struct rlb_client_info)); rlb_init_table_entry_dst(entry); rlb_init_table_entry_src(entry); } static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); u32 next_index = bond_info->rx_hashtbl[index].used_next; u32 prev_index = bond_info->rx_hashtbl[index].used_prev; if (index == bond_info->rx_hashtbl_used_head) bond_info->rx_hashtbl_used_head = next_index; if (prev_index != RLB_NULL_INDEX) bond_info->rx_hashtbl[prev_index].used_next = next_index; if (next_index != RLB_NULL_INDEX) bond_info->rx_hashtbl[next_index].used_prev = prev_index; } /* unlink a rlb hash table entry from the src list */ static void rlb_src_unlink(struct bonding *bond, u32 index) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); u32 next_index = bond_info->rx_hashtbl[index].src_next; u32 prev_index = bond_info->rx_hashtbl[index].src_prev; bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX; bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX; if (next_index != RLB_NULL_INDEX) bond_info->rx_hashtbl[next_index].src_prev = prev_index; if (prev_index == RLB_NULL_INDEX) return; /* is prev_index pointing to the head of this list? */ if (bond_info->rx_hashtbl[prev_index].src_first == index) bond_info->rx_hashtbl[prev_index].src_first = next_index; else bond_info->rx_hashtbl[prev_index].src_next = next_index; } static void rlb_delete_table_entry(struct bonding *bond, u32 index) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); rlb_delete_table_entry_dst(bond, index); rlb_init_table_entry_dst(entry); rlb_src_unlink(bond, index); } /* add the rx_hashtbl[ip_dst_hash] entry to the list * of entries with identical ip_src_hash */ static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); u32 next; bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash; next = bond_info->rx_hashtbl[ip_src_hash].src_first; bond_info->rx_hashtbl[ip_dst_hash].src_next = next; if (next != RLB_NULL_INDEX) bond_info->rx_hashtbl[next].src_prev = ip_dst_hash; bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; } /* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does * not match arp->mac_src */ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); u32 index; spin_lock_bh(&bond->mode_lock); index = bond_info->rx_hashtbl[ip_src_hash].src_first; while (index != RLB_NULL_INDEX) { struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); u32 next_index = entry->src_next; if (entry->ip_src == arp->ip_src && !ether_addr_equal_64bits(arp->mac_src, entry->mac_src)) rlb_delete_table_entry(bond, index); index = next_index; } spin_unlock_bh(&bond->mode_lock); } static int rlb_initialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *new_hashtbl; int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); int i; new_hashtbl = kmalloc(size, GFP_KERNEL); if (!new_hashtbl) return -1; spin_lock_bh(&bond->mode_lock); bond_info->rx_hashtbl = new_hashtbl; bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) rlb_init_table_entry(bond_info->rx_hashtbl + i); spin_unlock_bh(&bond->mode_lock); /* register to receive ARPs */ bond->recv_probe = rlb_arp_recv; return 0; } static void rlb_deinitialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); spin_lock_bh(&bond->mode_lock); kfree(bond_info->rx_hashtbl); bond_info->rx_hashtbl = NULL; bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; spin_unlock_bh(&bond->mode_lock); } static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); u32 curr_index; spin_lock_bh(&bond->mode_lock); curr_index = bond_info->rx_hashtbl_used_head; while (curr_index != RLB_NULL_INDEX) { struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); u32 next_index = bond_info->rx_hashtbl[curr_index].used_next; if (curr->vlan_id == vlan_id) rlb_delete_table_entry(bond, curr_index); curr_index = next_index; } spin_unlock_bh(&bond->mode_lock); } /*********************** tlb/rlb shared functions *********************/ static void alb_send_lp_vid(struct slave *slave, const u8 mac_addr[], __be16 vlan_proto, u16 vid) { struct learning_pkt pkt; struct sk_buff *skb; int size = sizeof(struct learning_pkt); memset(&pkt, 0, size); ether_addr_copy(pkt.mac_dst, mac_addr); ether_addr_copy(pkt.mac_src, mac_addr); pkt.type = cpu_to_be16(ETH_P_LOOPBACK); skb = dev_alloc_skb(size); if (!skb) return; skb_put_data(skb, &pkt, size); skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; skb->protocol = pkt.type; skb->priority = TC_PRIO_CONTROL; skb->dev = slave->dev; slave_dbg(slave->bond->dev, slave->dev, "Send learning packet: mac %pM vlan %d\n", mac_addr, vid); if (vid) __vlan_hwaccel_put_tag(skb, vlan_proto, vid); dev_queue_xmit(skb); } struct alb_walk_data { struct bonding *bond; struct slave *slave; const u8 *mac_addr; bool strict_match; }; static int alb_upper_dev_walk(struct net_device *upper, struct netdev_nested_priv *priv) { struct alb_walk_data *data = (struct alb_walk_data *)priv->data; bool strict_match = data->strict_match; const u8 *mac_addr = data->mac_addr; struct bonding *bond = data->bond; struct slave *slave = data->slave; struct bond_vlan_tag *tags; if (is_vlan_dev(upper) && bond->dev->lower_level == upper->lower_level - 1) { if (upper->addr_assign_type == NET_ADDR_STOLEN) { alb_send_lp_vid(slave, mac_addr, vlan_dev_vlan_proto(upper), vlan_dev_vlan_id(upper)); } else { alb_send_lp_vid(slave, upper->dev_addr, vlan_dev_vlan_proto(upper), vlan_dev_vlan_id(upper)); } } /* If this is a macvlan device, then only send updates * when strict_match is turned off. */ if (netif_is_macvlan(upper) && !strict_match) { tags = bond_verify_device_path(bond->dev, upper, 0); if (IS_ERR_OR_NULL(tags)) return -ENOMEM; alb_send_lp_vid(slave, upper->dev_addr, tags[0].vlan_proto, tags[0].vlan_id); kfree(tags); } return 0; } static void alb_send_learning_packets(struct slave *slave, const u8 mac_addr[], bool strict_match) { struct bonding *bond = bond_get_bond_by_slave(slave); struct netdev_nested_priv priv; struct alb_walk_data data = { .strict_match = strict_match, .mac_addr = mac_addr, .slave = slave, .bond = bond, }; priv.data = (void *)&data; /* send untagged */ alb_send_lp_vid(slave, mac_addr, 0, 0); /* loop through all devices and see if we need to send a packet * for that device. */ rcu_read_lock(); netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &priv); rcu_read_unlock(); } static int alb_set_slave_mac_addr(struct slave *slave, const u8 addr[], unsigned int len) { struct net_device *dev = slave->dev; struct sockaddr_storage ss; if (BOND_MODE(slave->bond) == BOND_MODE_TLB) { __dev_addr_set(dev, addr, len); return 0; } /* for rlb each slave must have a unique hw mac addresses so that * each slave will receive packets destined to a different mac */ memcpy(ss.__data, addr, len); ss.ss_family = dev->type; if (dev_set_mac_address(dev, (struct sockaddr *)&ss, NULL)) { slave_err(slave->bond->dev, dev, "dev_set_mac_address on slave failed! ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n"); return -EOPNOTSUPP; } return 0; } /* Swap MAC addresses between two slaves. * * Called with RTNL held, and no other locks. */ static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) { u8 tmp_mac_addr[MAX_ADDR_LEN]; bond_hw_addr_copy(tmp_mac_addr, slave1->dev->dev_addr, slave1->dev->addr_len); alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, slave2->dev->addr_len); alb_set_slave_mac_addr(slave2, tmp_mac_addr, slave1->dev->addr_len); } /* Send learning packets after MAC address swap. * * Called with RTNL and no other locks */ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, struct slave *slave2) { int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2)); struct slave *disabled_slave = NULL; ASSERT_RTNL(); /* fasten the change in the switch */ if (bond_slave_can_tx(slave1)) { alb_send_learning_packets(slave1, slave1->dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform the clients that the mac address * has changed */ rlb_req_update_slave_clients(bond, slave1); } } else { disabled_slave = slave1; } if (bond_slave_can_tx(slave2)) { alb_send_learning_packets(slave2, slave2->dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform the clients that the mac address * has changed */ rlb_req_update_slave_clients(bond, slave2); } } else { disabled_slave = slave2; } if (bond->alb_info.rlb_enabled && slaves_state_differ) { /* A disabled slave was assigned an active mac addr */ rlb_teach_disabled_mac_on_primary(bond, disabled_slave->dev->dev_addr); } } /** * alb_change_hw_addr_on_detach * @bond: bonding we're working on * @slave: the slave that was just detached * * We assume that @slave was already detached from the slave list. * * If @slave's permanent hw address is different both from its current * address and from @bond's address, then somewhere in the bond there's * a slave that has @slave's permanet address as its current address. * We'll make sure that slave no longer uses @slave's permanent address. * * Caller must hold RTNL and no other locks */ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) { int perm_curr_diff; int perm_bond_diff; struct slave *found_slave; perm_curr_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, slave->dev->dev_addr); perm_bond_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr); if (perm_curr_diff && perm_bond_diff) { found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr); if (found_slave) { alb_swap_mac_addr(slave, found_slave); alb_fasten_mac_swap(bond, slave, found_slave); } } } /** * alb_handle_addr_collision_on_attach * @bond: bonding we're working on * @slave: the slave that was just attached * * checks uniqueness of slave's mac address and handles the case the * new slave uses the bonds mac address. * * If the permanent hw address of @slave is @bond's hw address, we need to * find a different hw address to give @slave, that isn't in use by any other * slave in the bond. This address must be, of course, one of the permanent * addresses of the other slaves. * * We go over the slave list, and for each slave there we compare its * permanent hw address with the current address of all the other slaves. * If no match was found, then we've found a slave with a permanent address * that isn't used by any other slave in the bond, so we can assign it to * @slave. * * assumption: this function is called before @slave is attached to the * bond slave list. */ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) { struct slave *has_bond_addr = rcu_access_pointer(bond->curr_active_slave); struct slave *tmp_slave1, *free_mac_slave = NULL; struct list_head *iter; if (!bond_has_slaves(bond)) { /* this is the first slave */ return 0; } /* if slave's mac address differs from bond's mac address * check uniqueness of slave's mac address against the other * slaves in the bond. */ if (!ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) { if (!bond_slave_has_mac(bond, slave->dev->dev_addr)) return 0; /* Try setting slave mac to bond address and fall-through * to code handling that situation below... */ alb_set_slave_mac_addr(slave, bond->dev->dev_addr, bond->dev->addr_len); } /* The slave's address is equal to the address of the bond. * Search for a spare address in the bond for this slave. */ bond_for_each_slave(bond, tmp_slave1, iter) { if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) { /* no slave has tmp_slave1's perm addr * as its curr addr */ free_mac_slave = tmp_slave1; break; } if (!has_bond_addr) { if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr, bond->dev->dev_addr)) { has_bond_addr = tmp_slave1; } } } if (free_mac_slave) { alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, free_mac_slave->dev->addr_len); slave_warn(bond->dev, slave->dev, "the slave hw address is in use by the bond; giving it the hw address of %s\n", free_mac_slave->dev->name); } else if (has_bond_addr) { slave_err(bond->dev, slave->dev, "the slave hw address is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n"); return -EFAULT; } return 0; } /** * alb_set_mac_address * @bond: bonding we're working on * @addr: MAC address to set * * In TLB mode all slaves are configured to the bond's hw address, but set * their dev_addr field to different addresses (based on their permanent hw * addresses). * * For each slave, this function sets the interface to the new address and then * changes its dev_addr field to its previous value. * * Unwinding assumes bond's mac address has not yet changed. */ static int alb_set_mac_address(struct bonding *bond, void *addr) { struct slave *slave, *rollback_slave; struct list_head *iter; struct sockaddr_storage ss; char tmp_addr[MAX_ADDR_LEN]; int res; if (bond->alb_info.rlb_enabled) return 0; bond_for_each_slave(bond, slave, iter) { /* save net_device's current hw address */ bond_hw_addr_copy(tmp_addr, slave->dev->dev_addr, slave->dev->addr_len); res = dev_set_mac_address(slave->dev, addr, NULL); /* restore net_device's hw address */ dev_addr_set(slave->dev, tmp_addr); if (res) goto unwind; } return 0; unwind: memcpy(ss.__data, bond->dev->dev_addr, bond->dev->addr_len); ss.ss_family = bond->dev->type; /* unwind from head to the slave that failed */ bond_for_each_slave(bond, rollback_slave, iter) { if (rollback_slave == slave) break; bond_hw_addr_copy(tmp_addr, rollback_slave->dev->dev_addr, rollback_slave->dev->addr_len); dev_set_mac_address(rollback_slave->dev, (struct sockaddr *)&ss, NULL); dev_addr_set(rollback_slave->dev, tmp_addr); } return res; } /* determine if the packet is NA or NS */ static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond) { struct ipv6hdr *ip6hdr; struct icmp6hdr *hdr; if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) return true; ip6hdr = ipv6_hdr(skb); if (ip6hdr->nexthdr != IPPROTO_ICMPV6) return false; if (!pskb_network_may_pull(skb, sizeof(*ip6hdr) + sizeof(*hdr))) return true; hdr = icmp6_hdr(skb); return hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT || hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION; } /************************ exported alb functions ************************/ int bond_alb_initialize(struct bonding *bond, int rlb_enabled) { int res; res = tlb_initialize(bond); if (res) return res; if (rlb_enabled) { res = rlb_initialize(bond); if (res) { tlb_deinitialize(bond); return res; } bond->alb_info.rlb_enabled = 1; } else { bond->alb_info.rlb_enabled = 0; } return 0; } void bond_alb_deinitialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); tlb_deinitialize(bond); if (bond_info->rlb_enabled) rlb_deinitialize(bond); } static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, struct slave *tx_slave) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct ethhdr *eth_data = eth_hdr(skb); if (!tx_slave) { /* unbalanced or unassigned, send through primary */ tx_slave = rcu_dereference(bond->curr_active_slave); if (bond->params.tlb_dynamic_lb) bond_info->unbalanced_load += skb->len; } if (tx_slave && bond_slave_can_tx(tx_slave)) { if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) { ether_addr_copy(eth_data->h_source, tx_slave->dev->dev_addr); } return bond_dev_queue_xmit(bond, skb, tx_slave->dev); } if (tx_slave && bond->params.tlb_dynamic_lb) { spin_lock(&bond->mode_lock); __tlb_clear_slave(bond, tx_slave, 0); spin_unlock(&bond->mode_lock); } /* no suitable interface, frame not sent */ return bond_tx_drop(bond->dev, skb); } struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, struct sk_buff *skb) { struct slave *tx_slave = NULL; struct ethhdr *eth_data; u32 hash_index; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); /* Do not TX balance any multicast or broadcast */ if (!is_multicast_ether_addr(eth_data->h_dest)) { switch (skb->protocol) { case htons(ETH_P_IPV6): if (alb_determine_nd(skb, bond)) break; fallthrough; case htons(ETH_P_IP): hash_index = bond_xmit_hash(bond, skb); if (bond->params.tlb_dynamic_lb) { tx_slave = tlb_choose_channel(bond, hash_index & 0xFF, skb->len); } else { struct bond_up_slave *slaves; unsigned int count; slaves = rcu_dereference(bond->usable_slaves); count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) tx_slave = slaves->arr[hash_index % count]; } break; } } return tx_slave; } netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); struct slave *tx_slave; tx_slave = bond_xmit_tlb_slave_get(bond, skb); return bond_do_alb_xmit(skb, bond, tx_slave); } struct slave *bond_xmit_alb_slave_get(struct bonding *bond, struct sk_buff *skb) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); static const __be32 ip_bcast = htonl(0xffffffff); struct slave *tx_slave = NULL; const u8 *hash_start = NULL; bool do_tx_balance = true; struct ethhdr *eth_data; u32 hash_index = 0; int hash_size = 0; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); switch (ntohs(skb->protocol)) { case ETH_P_IP: { const struct iphdr *iph; if (is_broadcast_ether_addr(eth_data->h_dest) || !pskb_network_may_pull(skb, sizeof(*iph))) { do_tx_balance = false; break; } iph = ip_hdr(skb); if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) { do_tx_balance = false; break; } hash_start = (char *)&(iph->daddr); hash_size = sizeof(iph->daddr); break; } case ETH_P_IPV6: { const struct ipv6hdr *ip6hdr; /* IPv6 doesn't really use broadcast mac address, but leave * that here just in case. */ if (is_broadcast_ether_addr(eth_data->h_dest)) { do_tx_balance = false; break; } /* IPv6 uses all-nodes multicast as an equivalent to * broadcasts in IPv4. */ if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) { do_tx_balance = false; break; } if (alb_determine_nd(skb, bond)) { do_tx_balance = false; break; } /* The IPv6 header is pulled by alb_determine_nd */ /* Additionally, DAD probes should not be tx-balanced as that * will lead to false positives for duplicate addresses and * prevent address configuration from working. */ ip6hdr = ipv6_hdr(skb); if (ipv6_addr_any(&ip6hdr->saddr)) { do_tx_balance = false; break; } hash_start = (char *)&ip6hdr->daddr; hash_size = sizeof(ip6hdr->daddr); break; } case ETH_P_ARP: do_tx_balance = false; if (bond_info->rlb_enabled) tx_slave = rlb_arp_xmit(skb, bond); break; default: do_tx_balance = false; break; } if (do_tx_balance) { if (bond->params.tlb_dynamic_lb) { hash_index = _simple_hash(hash_start, hash_size); tx_slave = tlb_choose_channel(bond, hash_index, skb->len); } else { /* * do_tx_balance means we are free to select the tx_slave * So we do exactly what tlb would do for hash selection */ struct bond_up_slave *slaves; unsigned int count; slaves = rcu_dereference(bond->usable_slaves); count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) tx_slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; } } return tx_slave; } netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); struct slave *tx_slave = NULL; tx_slave = bond_xmit_alb_slave_get(bond, skb); return bond_do_alb_xmit(skb, bond, tx_slave); } void bond_alb_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, alb_work.work); struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct list_head *iter; struct slave *slave; if (!bond_has_slaves(bond)) { atomic_set(&bond_info->tx_rebalance_counter, 0); bond_info->lp_counter = 0; goto re_arm; } rcu_read_lock(); atomic_inc(&bond_info->tx_rebalance_counter); bond_info->lp_counter++; /* send learning packets */ if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { bool strict_match; bond_for_each_slave_rcu(bond, slave, iter) { /* If updating current_active, use all currently * user mac addresses (!strict_match). Otherwise, only * use mac of the slave device. * In RLB mode, we always use strict matches. */ strict_match = (slave != rcu_access_pointer(bond->curr_active_slave) || bond_info->rlb_enabled); alb_send_learning_packets(slave, slave->dev->dev_addr, strict_match); } bond_info->lp_counter = 0; } /* rebalance tx traffic */ if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) { bond_for_each_slave_rcu(bond, slave, iter) { tlb_clear_slave(bond, slave, 1); if (slave == rcu_access_pointer(bond->curr_active_slave)) { SLAVE_TLB_INFO(slave).load = bond_info->unbalanced_load / BOND_TLB_REBALANCE_INTERVAL; bond_info->unbalanced_load = 0; } } atomic_set(&bond_info->tx_rebalance_counter, 0); } if (bond_info->rlb_enabled) { if (bond_info->primary_is_promisc && (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { /* dev_set_promiscuity requires rtnl and * nothing else. Avoid race with bond_close. */ rcu_read_unlock(); if (!rtnl_trylock()) goto re_arm; bond_info->rlb_promisc_timeout_counter = 0; /* If the primary was set to promiscuous mode * because a slave was disabled then * it can now leave promiscuous mode. */ dev_set_promiscuity(rtnl_dereference(bond->curr_active_slave)->dev, -1); bond_info->primary_is_promisc = 0; rtnl_unlock(); rcu_read_lock(); } if (bond_info->rlb_rebalance) { bond_info->rlb_rebalance = 0; rlb_rebalance(bond); } /* check if clients need updating */ if (bond_info->rx_ntt) { if (bond_info->rlb_update_delay_counter) { --bond_info->rlb_update_delay_counter; } else { rlb_update_rx_clients(bond); if (bond_info->rlb_update_retry_counter) --bond_info->rlb_update_retry_counter; else bond_info->rx_ntt = 0; } } } rcu_read_unlock(); re_arm: queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); } /* assumption: called before the slave is attached to the bond * and not locked by the bond lock */ int bond_alb_init_slave(struct bonding *bond, struct slave *slave) { int res; res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr, slave->dev->addr_len); if (res) return res; res = alb_handle_addr_collision_on_attach(bond, slave); if (res) return res; tlb_init_slave(slave); /* order a rebalance ASAP */ atomic_set(&bond->alb_info.tx_rebalance_counter, BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) bond->alb_info.rlb_rebalance = 1; return 0; } /* Remove slave from tlb and rlb hash tables, and fix up MAC addresses * if necessary. * * Caller must hold RTNL and no other locks */ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) { if (bond_has_slaves(bond)) alb_change_hw_addr_on_detach(bond, slave); tlb_clear_slave(bond, slave, 0); if (bond->alb_info.rlb_enabled) { bond->alb_info.rx_slave = NULL; rlb_clear_slave(bond, slave); } } void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); if (link == BOND_LINK_DOWN) { tlb_clear_slave(bond, slave, 0); if (bond->alb_info.rlb_enabled) rlb_clear_slave(bond, slave); } else if (link == BOND_LINK_UP) { /* order a rebalance ASAP */ atomic_set(&bond_info->tx_rebalance_counter, BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) { bond->alb_info.rlb_rebalance = 1; /* If the updelay module parameter is smaller than the * forwarding delay of the switch the rebalance will * not work because the rebalance arp replies will * not be forwarded to the clients.. */ } } if (bond_is_nondyn_tlb(bond)) { if (bond_update_slave_arr(bond, NULL)) pr_err("Failed to build slave-array for TLB mode.\n"); } } /** * bond_alb_handle_active_change - assign new curr_active_slave * @bond: our bonding struct * @new_slave: new slave to assign * * Set the bond->curr_active_slave to @new_slave and handle * mac address swapping and promiscuity changes as needed. * * Caller must hold RTNL */ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) { struct slave *swap_slave; struct slave *curr_active; curr_active = rtnl_dereference(bond->curr_active_slave); if (curr_active == new_slave) return; if (curr_active && bond->alb_info.primary_is_promisc) { dev_set_promiscuity(curr_active->dev, -1); bond->alb_info.primary_is_promisc = 0; bond->alb_info.rlb_promisc_timeout_counter = 0; } swap_slave = curr_active; rcu_assign_pointer(bond->curr_active_slave, new_slave); if (!new_slave || !bond_has_slaves(bond)) return; /* set the new curr_active_slave to the bonds mac address * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave */ if (!swap_slave) swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr); /* Arrange for swap_slave and new_slave to temporarily be * ignored so we can mess with their MAC addresses without * fear of interference from transmit activity. */ if (swap_slave) tlb_clear_slave(bond, swap_slave, 1); tlb_clear_slave(bond, new_slave, 1); /* in TLB mode, the slave might flip down/up with the old dev_addr, * and thus filter bond->dev_addr's packets, so force bond's mac */ if (BOND_MODE(bond) == BOND_MODE_TLB) { struct sockaddr_storage ss; u8 tmp_addr[MAX_ADDR_LEN]; bond_hw_addr_copy(tmp_addr, new_slave->dev->dev_addr, new_slave->dev->addr_len); bond_hw_addr_copy(ss.__data, bond->dev->dev_addr, bond->dev->addr_len); ss.ss_family = bond->dev->type; /* we don't care if it can't change its mac, best effort */ dev_set_mac_address(new_slave->dev, (struct sockaddr *)&ss, NULL); dev_addr_set(new_slave->dev, tmp_addr); } /* curr_active_slave must be set before calling alb_swap_mac_addr */ if (swap_slave) { /* swap mac address */ alb_swap_mac_addr(swap_slave, new_slave); alb_fasten_mac_swap(bond, swap_slave, new_slave); } else { /* set the new_slave to the bond mac address */ alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr, bond->dev->addr_len); alb_send_learning_packets(new_slave, bond->dev->dev_addr, false); } } /* Called with RTNL */ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) { struct bonding *bond = netdev_priv(bond_dev); struct sockaddr_storage *ss = addr; struct slave *curr_active; struct slave *swap_slave; int res; if (!is_valid_ether_addr(ss->__data)) return -EADDRNOTAVAIL; res = alb_set_mac_address(bond, addr); if (res) return res; dev_addr_set(bond_dev, ss->__data); /* If there is no curr_active_slave there is nothing else to do. * Otherwise we'll need to pass the new address to it and handle * duplications. */ curr_active = rtnl_dereference(bond->curr_active_slave); if (!curr_active) return 0; swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr); if (swap_slave) { alb_swap_mac_addr(swap_slave, curr_active); alb_fasten_mac_swap(bond, swap_slave, curr_active); } else { alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr, bond_dev->addr_len); alb_send_learning_packets(curr_active, bond_dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform clients mac address has changed */ rlb_req_update_slave_clients(bond, curr_active); } } return 0; } void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) { if (bond->alb_info.rlb_enabled) rlb_clear_vlan(bond, vlan_id); }
4 4 2 2 2 2 2 4 4 4 4 4 2 4 4 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 // SPDX-License-Identifier: LGPL-2.1-or-later /* * dvbdev.c * * Copyright (C) 2000 Ralph Metzler <ralph@convergence.de> * & Marcus Metzler <marcus@convergence.de> * for convergence integrated media GmbH */ #define pr_fmt(fmt) "dvbdev: " fmt #include <linux/types.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/i2c.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/cdev.h> #include <linux/mutex.h> #include <media/dvbdev.h> /* Due to enum tuner_pad_index */ #include <media/tuner.h> static DEFINE_MUTEX(dvbdev_mutex); static LIST_HEAD(dvbdevfops_list); static int dvbdev_debug; module_param(dvbdev_debug, int, 0644); MODULE_PARM_DESC(dvbdev_debug, "Turn on/off device debugging (default:off)."); #define dprintk(fmt, arg...) do { \ if (dvbdev_debug) \ printk(KERN_DEBUG pr_fmt("%s: " fmt), \ __func__, ##arg); \ } while (0) static LIST_HEAD(dvb_adapter_list); static DEFINE_MUTEX(dvbdev_register_lock); static const char * const dnames[] = { [DVB_DEVICE_VIDEO] = "video", [DVB_DEVICE_AUDIO] = "audio", [DVB_DEVICE_SEC] = "sec", [DVB_DEVICE_FRONTEND] = "frontend", [DVB_DEVICE_DEMUX] = "demux", [DVB_DEVICE_DVR] = "dvr", [DVB_DEVICE_CA] = "ca", [DVB_DEVICE_NET] = "net", [DVB_DEVICE_OSD] = "osd" }; #ifdef CONFIG_DVB_DYNAMIC_MINORS #define MAX_DVB_MINORS 256 #define DVB_MAX_IDS MAX_DVB_MINORS #else #define DVB_MAX_IDS 4 static const u8 minor_type[] = { [DVB_DEVICE_VIDEO] = 0, [DVB_DEVICE_AUDIO] = 1, [DVB_DEVICE_SEC] = 2, [DVB_DEVICE_FRONTEND] = 3, [DVB_DEVICE_DEMUX] = 4, [DVB_DEVICE_DVR] = 5, [DVB_DEVICE_CA] = 6, [DVB_DEVICE_NET] = 7, [DVB_DEVICE_OSD] = 8, }; #define nums2minor(num, type, id) \ (((num) << 6) | ((id) << 4) | minor_type[type]) #define MAX_DVB_MINORS (DVB_MAX_ADAPTERS * 64) #endif static struct class *dvb_class; static struct dvb_device *dvb_minors[MAX_DVB_MINORS]; static DECLARE_RWSEM(minor_rwsem); static int dvb_device_open(struct inode *inode, struct file *file) { struct dvb_device *dvbdev; unsigned int minor = iminor(inode); if (minor >= MAX_DVB_MINORS) return -ENODEV; mutex_lock(&dvbdev_mutex); down_read(&minor_rwsem); dvbdev = dvb_minors[minor]; if (dvbdev && dvbdev->fops) { int err = 0; const struct file_operations *new_fops; new_fops = fops_get(dvbdev->fops); if (!new_fops) goto fail; file->private_data = dvb_device_get(dvbdev); replace_fops(file, new_fops); if (file->f_op->open) err = file->f_op->open(inode, file); up_read(&minor_rwsem); mutex_unlock(&dvbdev_mutex); if (err) dvb_device_put(dvbdev); return err; } fail: up_read(&minor_rwsem); mutex_unlock(&dvbdev_mutex); return -ENODEV; } static const struct file_operations dvb_device_fops = { .owner = THIS_MODULE, .open = dvb_device_open, .llseek = noop_llseek, }; static struct cdev dvb_device_cdev; int dvb_generic_open(struct inode *inode, struct file *file) { struct dvb_device *dvbdev = file->private_data; if (!dvbdev) return -ENODEV; if (!dvbdev->users) return -EBUSY; if ((file->f_flags & O_ACCMODE) == O_RDONLY) { if (!dvbdev->readers) return -EBUSY; dvbdev->readers--; } else { if (!dvbdev->writers) return -EBUSY; dvbdev->writers--; } dvbdev->users--; return 0; } EXPORT_SYMBOL(dvb_generic_open); int dvb_generic_release(struct inode *inode, struct file *file) { struct dvb_device *dvbdev = file->private_data; if (!dvbdev) return -ENODEV; if ((file->f_flags & O_ACCMODE) == O_RDONLY) dvbdev->readers++; else dvbdev->writers++; dvbdev->users++; dvb_device_put(dvbdev); return 0; } EXPORT_SYMBOL(dvb_generic_release); long dvb_generic_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct dvb_device *dvbdev = file->private_data; if (!dvbdev) return -ENODEV; if (!dvbdev->kernel_ioctl) return -EINVAL; return dvb_usercopy(file, cmd, arg, dvbdev->kernel_ioctl); } EXPORT_SYMBOL(dvb_generic_ioctl); static int dvbdev_get_free_id(struct dvb_adapter *adap, int type) { u32 id = 0; while (id < DVB_MAX_IDS) { struct dvb_device *dev; list_for_each_entry(dev, &adap->device_list, list_head) if (dev->type == type && dev->id == id) goto skip; return id; skip: id++; } return -ENFILE; } static void dvb_media_device_free(struct dvb_device *dvbdev) { #if defined(CONFIG_MEDIA_CONTROLLER_DVB) if (dvbdev->entity) { media_device_unregister_entity(dvbdev->entity); kfree(dvbdev->entity); kfree(dvbdev->pads); dvbdev->entity = NULL; dvbdev->pads = NULL; } if (dvbdev->tsout_entity) { int i; for (i = 0; i < dvbdev->tsout_num_entities; i++) { media_device_unregister_entity(&dvbdev->tsout_entity[i]); kfree(dvbdev->tsout_entity[i].name); } kfree(dvbdev->tsout_entity); kfree(dvbdev->tsout_pads); dvbdev->tsout_entity = NULL; dvbdev->tsout_pads = NULL; dvbdev->tsout_num_entities = 0; } if (dvbdev->intf_devnode) { media_devnode_remove(dvbdev->intf_devnode); dvbdev->intf_devnode = NULL; } if (dvbdev->adapter->conn) { media_device_unregister_entity(dvbdev->adapter->conn); kfree(dvbdev->adapter->conn); dvbdev->adapter->conn = NULL; kfree(dvbdev->adapter->conn_pads); dvbdev->adapter->conn_pads = NULL; } #endif } #if defined(CONFIG_MEDIA_CONTROLLER_DVB) static int dvb_create_tsout_entity(struct dvb_device *dvbdev, const char *name, int npads) { int i; dvbdev->tsout_pads = kcalloc(npads, sizeof(*dvbdev->tsout_pads), GFP_KERNEL); if (!dvbdev->tsout_pads) return -ENOMEM; dvbdev->tsout_entity = kcalloc(npads, sizeof(*dvbdev->tsout_entity), GFP_KERNEL); if (!dvbdev->tsout_entity) return -ENOMEM; dvbdev->tsout_num_entities = npads; for (i = 0; i < npads; i++) { struct media_pad *pads = &dvbdev->tsout_pads[i]; struct media_entity *entity = &dvbdev->tsout_entity[i]; int ret; entity->name = kasprintf(GFP_KERNEL, "%s #%d", name, i); if (!entity->name) return -ENOMEM; entity->function = MEDIA_ENT_F_IO_DTV; pads->flags = MEDIA_PAD_FL_SINK; ret = media_entity_pads_init(entity, 1, pads); if (ret < 0) return ret; ret = media_device_register_entity(dvbdev->adapter->mdev, entity); if (ret < 0) return ret; } return 0; } #define DEMUX_TSOUT "demux-tsout" #define DVR_TSOUT "dvr-tsout" static int dvb_create_media_entity(struct dvb_device *dvbdev, int type, int demux_sink_pads) { int i, ret, npads; switch (type) { case DVB_DEVICE_FRONTEND: npads = 2; break; case DVB_DEVICE_DVR: ret = dvb_create_tsout_entity(dvbdev, DVR_TSOUT, demux_sink_pads); return ret; case DVB_DEVICE_DEMUX: npads = 1 + demux_sink_pads; ret = dvb_create_tsout_entity(dvbdev, DEMUX_TSOUT, demux_sink_pads); if (ret < 0) return ret; break; case DVB_DEVICE_CA: npads = 2; break; case DVB_DEVICE_NET: /* * We should be creating entities for the MPE/ULE * decapsulation hardware (or software implementation). * * However, the number of for the MPE/ULE decaps may not be * fixed. As we don't have yet dynamic support for PADs at * the Media Controller, let's not create the decap * entities yet. */ return 0; default: return 0; } dvbdev->entity = kzalloc(sizeof(*dvbdev->entity), GFP_KERNEL); if (!dvbdev->entity) return -ENOMEM; dvbdev->entity->name = dvbdev->name; if (npads) { dvbdev->pads = kcalloc(npads, sizeof(*dvbdev->pads), GFP_KERNEL); if (!dvbdev->pads) { kfree(dvbdev->entity); dvbdev->entity = NULL; return -ENOMEM; } } switch (type) { case DVB_DEVICE_FRONTEND: dvbdev->entity->function = MEDIA_ENT_F_DTV_DEMOD; dvbdev->pads[0].flags = MEDIA_PAD_FL_SINK; dvbdev->pads[1].flags = MEDIA_PAD_FL_SOURCE; break; case DVB_DEVICE_DEMUX: dvbdev->entity->function = MEDIA_ENT_F_TS_DEMUX; dvbdev->pads[0].flags = MEDIA_PAD_FL_SINK; for (i = 1; i < npads; i++) dvbdev->pads[i].flags = MEDIA_PAD_FL_SOURCE; break; case DVB_DEVICE_CA: dvbdev->entity->function = MEDIA_ENT_F_DTV_CA; dvbdev->pads[0].flags = MEDIA_PAD_FL_SINK; dvbdev->pads[1].flags = MEDIA_PAD_FL_SOURCE; break; default: /* Should never happen, as the first switch prevents it */ kfree(dvbdev->entity); kfree(dvbdev->pads); dvbdev->entity = NULL; dvbdev->pads = NULL; return 0; } if (npads) { ret = media_entity_pads_init(dvbdev->entity, npads, dvbdev->pads); if (ret) return ret; } ret = media_device_register_entity(dvbdev->adapter->mdev, dvbdev->entity); if (ret) return ret; pr_info("%s: media entity '%s' registered.\n", __func__, dvbdev->entity->name); return 0; } #endif static int dvb_register_media_device(struct dvb_device *dvbdev, int type, int minor, unsigned int demux_sink_pads) { #if defined(CONFIG_MEDIA_CONTROLLER_DVB) struct media_link *link; u32 intf_type; int ret; if (!dvbdev->adapter->mdev) return 0; ret = dvb_create_media_entity(dvbdev, type, demux_sink_pads); if (ret) return ret; switch (type) { case DVB_DEVICE_FRONTEND: intf_type = MEDIA_INTF_T_DVB_FE; break; case DVB_DEVICE_DEMUX: intf_type = MEDIA_INTF_T_DVB_DEMUX; break; case DVB_DEVICE_DVR: intf_type = MEDIA_INTF_T_DVB_DVR; break; case DVB_DEVICE_CA: intf_type = MEDIA_INTF_T_DVB_CA; break; case DVB_DEVICE_NET: intf_type = MEDIA_INTF_T_DVB_NET; break; default: return 0; } dvbdev->intf_devnode = media_devnode_create(dvbdev->adapter->mdev, intf_type, 0, DVB_MAJOR, minor); if (!dvbdev->intf_devnode) return -ENOMEM; /* * Create the "obvious" link, e. g. the ones that represent * a direct association between an interface and an entity. * Other links should be created elsewhere, like: * DVB FE intf -> tuner * DVB demux intf -> dvr */ if (!dvbdev->entity) return 0; link = media_create_intf_link(dvbdev->entity, &dvbdev->intf_devnode->intf, MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); if (!link) return -ENOMEM; #endif return 0; } int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, const struct dvb_device *template, void *priv, enum dvb_device_type type, int demux_sink_pads) { struct dvb_device *dvbdev; struct file_operations *dvbdevfops = NULL; struct dvbdevfops_node *node = NULL, *new_node = NULL; struct device *clsdev; int minor; int id, ret; mutex_lock(&dvbdev_register_lock); id = dvbdev_get_free_id(adap, type); if (id < 0) { mutex_unlock(&dvbdev_register_lock); *pdvbdev = NULL; pr_err("%s: couldn't find free device id\n", __func__); return -ENFILE; } *pdvbdev = dvbdev = kzalloc(sizeof(*dvbdev), GFP_KERNEL); if (!dvbdev) { mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } /* * When a device of the same type is probe()d more than once, * the first allocated fops are used. This prevents memory leaks * that can occur when the same device is probe()d repeatedly. */ list_for_each_entry(node, &dvbdevfops_list, list_head) { if (node->fops->owner == adap->module && node->type == type && node->template == template) { dvbdevfops = node->fops; break; } } if (!dvbdevfops) { dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL); if (!dvbdevfops) { kfree(dvbdev); *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); if (!new_node) { kfree(dvbdevfops); kfree(dvbdev); *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } new_node->fops = dvbdevfops; new_node->type = type; new_node->template = template; list_add_tail(&new_node->list_head, &dvbdevfops_list); } memcpy(dvbdev, template, sizeof(struct dvb_device)); kref_init(&dvbdev->ref); dvbdev->type = type; dvbdev->id = id; dvbdev->adapter = adap; dvbdev->priv = priv; dvbdev->fops = dvbdevfops; init_waitqueue_head(&dvbdev->wait_queue); dvbdevfops->owner = adap->module; list_add_tail(&dvbdev->list_head, &adap->device_list); down_write(&minor_rwsem); #ifdef CONFIG_DVB_DYNAMIC_MINORS for (minor = 0; minor < MAX_DVB_MINORS; minor++) if (!dvb_minors[minor]) break; #else minor = nums2minor(adap->num, type, id); #endif if (minor >= MAX_DVB_MINORS) { if (new_node) { list_del(&new_node->list_head); kfree(dvbdevfops); kfree(new_node); } list_del(&dvbdev->list_head); kfree(dvbdev); *pdvbdev = NULL; up_write(&minor_rwsem); mutex_unlock(&dvbdev_register_lock); return -EINVAL; } dvbdev->minor = minor; dvb_minors[minor] = dvb_device_get(dvbdev); up_write(&minor_rwsem); ret = dvb_register_media_device(dvbdev, type, minor, demux_sink_pads); if (ret) { pr_err("%s: dvb_register_media_device failed to create the mediagraph\n", __func__); if (new_node) { list_del(&new_node->list_head); kfree(dvbdevfops); kfree(new_node); } dvb_media_device_free(dvbdev); list_del(&dvbdev->list_head); kfree(dvbdev); *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return ret; } clsdev = device_create(dvb_class, adap->device, MKDEV(DVB_MAJOR, minor), dvbdev, "dvb%d.%s%d", adap->num, dnames[type], id); if (IS_ERR(clsdev)) { pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n", __func__, adap->num, dnames[type], id, PTR_ERR(clsdev)); if (new_node) { list_del(&new_node->list_head); kfree(dvbdevfops); kfree(new_node); } dvb_media_device_free(dvbdev); list_del(&dvbdev->list_head); kfree(dvbdev); *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return PTR_ERR(clsdev); } dprintk("DVB: register adapter%d/%s%d @ minor: %i (0x%02x)\n", adap->num, dnames[type], id, minor, minor); mutex_unlock(&dvbdev_register_lock); return 0; } EXPORT_SYMBOL(dvb_register_device); void dvb_remove_device(struct dvb_device *dvbdev) { if (!dvbdev) return; down_write(&minor_rwsem); dvb_minors[dvbdev->minor] = NULL; dvb_device_put(dvbdev); up_write(&minor_rwsem); dvb_media_device_free(dvbdev); device_destroy(dvb_class, MKDEV(DVB_MAJOR, dvbdev->minor)); list_del(&dvbdev->list_head); } EXPORT_SYMBOL(dvb_remove_device); static void dvb_free_device(struct kref *ref) { struct dvb_device *dvbdev = container_of(ref, struct dvb_device, ref); kfree(dvbdev); } struct dvb_device *dvb_device_get(struct dvb_device *dvbdev) { kref_get(&dvbdev->ref); return dvbdev; } EXPORT_SYMBOL(dvb_device_get); void dvb_device_put(struct dvb_device *dvbdev) { if (dvbdev) kref_put(&dvbdev->ref, dvb_free_device); } void dvb_unregister_device(struct dvb_device *dvbdev) { dvb_remove_device(dvbdev); dvb_device_put(dvbdev); } EXPORT_SYMBOL(dvb_unregister_device); #ifdef CONFIG_MEDIA_CONTROLLER_DVB static int dvb_create_io_intf_links(struct dvb_adapter *adap, struct media_interface *intf, char *name) { struct media_device *mdev = adap->mdev; struct media_entity *entity; struct media_link *link; media_device_for_each_entity(entity, mdev) { if (entity->function == MEDIA_ENT_F_IO_DTV) { if (strncmp(entity->name, name, strlen(name))) continue; link = media_create_intf_link(entity, intf, MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); if (!link) return -ENOMEM; } } return 0; } int dvb_create_media_graph(struct dvb_adapter *adap, bool create_rf_connector) { struct media_device *mdev = adap->mdev; struct media_entity *entity, *tuner = NULL, *demod = NULL, *conn; struct media_entity *demux = NULL, *ca = NULL; struct media_link *link; struct media_interface *intf; unsigned int demux_pad = 0; unsigned int dvr_pad = 0; unsigned int ntuner = 0, ndemod = 0; int ret, pad_source, pad_sink; static const char *connector_name = "Television"; if (!mdev) return 0; media_device_for_each_entity(entity, mdev) { switch (entity->function) { case MEDIA_ENT_F_TUNER: tuner = entity; ntuner++; break; case MEDIA_ENT_F_DTV_DEMOD: demod = entity; ndemod++; break; case MEDIA_ENT_F_TS_DEMUX: demux = entity; break; case MEDIA_ENT_F_DTV_CA: ca = entity; break; } } /* * Prepare to signalize to media_create_pad_links() that multiple * entities of the same type exists and a 1:n or n:1 links need to be * created. * NOTE: if both tuner and demod have multiple instances, it is up * to the caller driver to create such links. */ if (ntuner > 1) tuner = NULL; if (ndemod > 1) demod = NULL; if (create_rf_connector) { conn = kzalloc(sizeof(*conn), GFP_KERNEL); if (!conn) return -ENOMEM; adap->conn = conn; adap->conn_pads = kzalloc(sizeof(*adap->conn_pads), GFP_KERNEL); if (!adap->conn_pads) return -ENOMEM; conn->flags = MEDIA_ENT_FL_CONNECTOR; conn->function = MEDIA_ENT_F_CONN_RF; conn->name = connector_name; adap->conn_pads->flags = MEDIA_PAD_FL_SOURCE; ret = media_entity_pads_init(conn, 1, adap->conn_pads); if (ret) return ret; ret = media_device_register_entity(mdev, conn); if (ret) return ret; if (!ntuner) { ret = media_create_pad_links(mdev, MEDIA_ENT_F_CONN_RF, conn, 0, MEDIA_ENT_F_DTV_DEMOD, demod, 0, MEDIA_LNK_FL_ENABLED, false); } else { pad_sink = media_get_pad_index(tuner, MEDIA_PAD_FL_SINK, PAD_SIGNAL_ANALOG); if (pad_sink < 0) return -EINVAL; ret = media_create_pad_links(mdev, MEDIA_ENT_F_CONN_RF, conn, 0, MEDIA_ENT_F_TUNER, tuner, pad_sink, MEDIA_LNK_FL_ENABLED, false); } if (ret) return ret; } if (ntuner && ndemod) { /* NOTE: first found tuner source pad presumed correct */ pad_source = media_get_pad_index(tuner, MEDIA_PAD_FL_SOURCE, PAD_SIGNAL_ANALOG); if (pad_source < 0) return -EINVAL; ret = media_create_pad_links(mdev, MEDIA_ENT_F_TUNER, tuner, pad_source, MEDIA_ENT_F_DTV_DEMOD, demod, 0, MEDIA_LNK_FL_ENABLED, false); if (ret) return ret; } if (ndemod && demux) { ret = media_create_pad_links(mdev, MEDIA_ENT_F_DTV_DEMOD, demod, 1, MEDIA_ENT_F_TS_DEMUX, demux, 0, MEDIA_LNK_FL_ENABLED, false); if (ret) return ret; } if (demux && ca) { ret = media_create_pad_link(demux, 1, ca, 0, MEDIA_LNK_FL_ENABLED); if (ret) return ret; } /* Create demux links for each ringbuffer/pad */ if (demux) { media_device_for_each_entity(entity, mdev) { if (entity->function == MEDIA_ENT_F_IO_DTV) { if (!strncmp(entity->name, DVR_TSOUT, strlen(DVR_TSOUT))) { ret = media_create_pad_link(demux, ++dvr_pad, entity, 0, 0); if (ret) return ret; } if (!strncmp(entity->name, DEMUX_TSOUT, strlen(DEMUX_TSOUT))) { ret = media_create_pad_link(demux, ++demux_pad, entity, 0, 0); if (ret) return ret; } } } } /* Create interface links for FE->tuner, DVR->demux and CA->ca */ media_device_for_each_intf(intf, mdev) { if (intf->type == MEDIA_INTF_T_DVB_CA && ca) { link = media_create_intf_link(ca, intf, MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); if (!link) return -ENOMEM; } if (intf->type == MEDIA_INTF_T_DVB_FE && tuner) { link = media_create_intf_link(tuner, intf, MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); if (!link) return -ENOMEM; } #if 0 /* * Indirect link - let's not create yet, as we don't know how * to handle indirect links, nor if this will * actually be needed. */ if (intf->type == MEDIA_INTF_T_DVB_DVR && demux) { link = media_create_intf_link(demux, intf, MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); if (!link) return -ENOMEM; } #endif if (intf->type == MEDIA_INTF_T_DVB_DVR) { ret = dvb_create_io_intf_links(adap, intf, DVR_TSOUT); if (ret) return ret; } if (intf->type == MEDIA_INTF_T_DVB_DEMUX) { ret = dvb_create_io_intf_links(adap, intf, DEMUX_TSOUT); if (ret) return ret; } } return 0; } EXPORT_SYMBOL_GPL(dvb_create_media_graph); #endif static int dvbdev_check_free_adapter_num(int num) { struct list_head *entry; list_for_each(entry, &dvb_adapter_list) { struct dvb_adapter *adap; adap = list_entry(entry, struct dvb_adapter, list_head); if (adap->num == num) return 0; } return 1; } static int dvbdev_get_free_adapter_num(void) { int num = 0; while (num < DVB_MAX_ADAPTERS) { if (dvbdev_check_free_adapter_num(num)) return num; num++; } return -ENFILE; } int dvb_register_adapter(struct dvb_adapter *adap, const char *name, struct module *module, struct device *device, short *adapter_nums) { int i, num; mutex_lock(&dvbdev_register_lock); for (i = 0; i < DVB_MAX_ADAPTERS; ++i) { num = adapter_nums[i]; if (num >= 0 && num < DVB_MAX_ADAPTERS) { /* use the one the driver asked for */ if (dvbdev_check_free_adapter_num(num)) break; } else { num = dvbdev_get_free_adapter_num(); break; } num = -1; } if (num < 0) { mutex_unlock(&dvbdev_register_lock); return -ENFILE; } memset(adap, 0, sizeof(struct dvb_adapter)); INIT_LIST_HEAD(&adap->device_list); pr_info("DVB: registering new adapter (%s)\n", name); adap->num = num; adap->name = name; adap->module = module; adap->device = device; adap->mfe_shared = 0; adap->mfe_dvbdev = NULL; mutex_init(&adap->mfe_lock); #ifdef CONFIG_MEDIA_CONTROLLER_DVB mutex_init(&adap->mdev_lock); #endif list_add_tail(&adap->list_head, &dvb_adapter_list); mutex_unlock(&dvbdev_register_lock); return num; } EXPORT_SYMBOL(dvb_register_adapter); int dvb_unregister_adapter(struct dvb_adapter *adap) { mutex_lock(&dvbdev_register_lock); list_del(&adap->list_head); mutex_unlock(&dvbdev_register_lock); return 0; } EXPORT_SYMBOL(dvb_unregister_adapter); /* * if the miracle happens and "generic_usercopy()" is included into * the kernel, then this can vanish. please don't make the mistake and * define this as video_usercopy(). this will introduce a dependency * to the v4l "videodev.o" module, which is unnecessary for some * cards (ie. the budget dvb-cards don't need the v4l module...) */ int dvb_usercopy(struct file *file, unsigned int cmd, unsigned long arg, int (*func)(struct file *file, unsigned int cmd, void *arg)) { char sbuf[128] = {}; void *mbuf = NULL; void *parg = NULL; int err = -EINVAL; /* Copy arguments into temp kernel buffer */ switch (_IOC_DIR(cmd)) { case _IOC_NONE: /* * For this command, the pointer is actually an integer * argument. */ parg = (void *)arg; break; case _IOC_READ: /* some v4l ioctls are marked wrong ... */ case _IOC_WRITE: case (_IOC_WRITE | _IOC_READ): if (_IOC_SIZE(cmd) <= sizeof(sbuf)) { parg = sbuf; } else { /* too big to allocate from stack */ mbuf = kmalloc(_IOC_SIZE(cmd), GFP_KERNEL); if (!mbuf) return -ENOMEM; parg = mbuf; } err = -EFAULT; if (copy_from_user(parg, (void __user *)arg, _IOC_SIZE(cmd))) goto out; break; } /* call driver */ err = func(file, cmd, parg); if (err == -ENOIOCTLCMD) err = -ENOTTY; if (err < 0) goto out; /* Copy results into user buffer */ switch (_IOC_DIR(cmd)) { case _IOC_READ: case (_IOC_WRITE | _IOC_READ): if (copy_to_user((void __user *)arg, parg, _IOC_SIZE(cmd))) err = -EFAULT; break; } out: kfree(mbuf); return err; } #if IS_ENABLED(CONFIG_I2C) struct i2c_client *dvb_module_probe(const char *module_name, const char *name, struct i2c_adapter *adap, unsigned char addr, void *platform_data) { struct i2c_client *client; struct i2c_board_info *board_info; board_info = kzalloc(sizeof(*board_info), GFP_KERNEL); if (!board_info) return NULL; if (name) strscpy(board_info->type, name, I2C_NAME_SIZE); else strscpy(board_info->type, module_name, I2C_NAME_SIZE); board_info->addr = addr; board_info->platform_data = platform_data; request_module(module_name); client = i2c_new_client_device(adap, board_info); if (!i2c_client_has_driver(client)) { kfree(board_info); return NULL; } if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); client = NULL; } kfree(board_info); return client; } EXPORT_SYMBOL_GPL(dvb_module_probe); void dvb_module_release(struct i2c_client *client) { if (!client) return; module_put(client->dev.driver->owner); i2c_unregister_device(client); } EXPORT_SYMBOL_GPL(dvb_module_release); #endif static int dvb_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct dvb_device *dvbdev = dev_get_drvdata(dev); add_uevent_var(env, "DVB_ADAPTER_NUM=%d", dvbdev->adapter->num); add_uevent_var(env, "DVB_DEVICE_TYPE=%s", dnames[dvbdev->type]); add_uevent_var(env, "DVB_DEVICE_NUM=%d", dvbdev->id); return 0; } static char *dvb_devnode(const struct device *dev, umode_t *mode) { const struct dvb_device *dvbdev = dev_get_drvdata(dev); return kasprintf(GFP_KERNEL, "dvb/adapter%d/%s%d", dvbdev->adapter->num, dnames[dvbdev->type], dvbdev->id); } static int __init init_dvbdev(void) { int retval; dev_t dev = MKDEV(DVB_MAJOR, 0); retval = register_chrdev_region(dev, MAX_DVB_MINORS, "DVB"); if (retval != 0) { pr_err("dvb-core: unable to get major %d\n", DVB_MAJOR); return retval; } cdev_init(&dvb_device_cdev, &dvb_device_fops); retval = cdev_add(&dvb_device_cdev, dev, MAX_DVB_MINORS); if (retval != 0) { pr_err("dvb-core: unable register character device\n"); goto error; } dvb_class = class_create("dvb"); if (IS_ERR(dvb_class)) { retval = PTR_ERR(dvb_class); goto error; } dvb_class->dev_uevent = dvb_uevent; dvb_class->devnode = dvb_devnode; return 0; error: cdev_del(&dvb_device_cdev); unregister_chrdev_region(dev, MAX_DVB_MINORS); return retval; } static void __exit exit_dvbdev(void) { struct dvbdevfops_node *node, *next; class_destroy(dvb_class); cdev_del(&dvb_device_cdev); unregister_chrdev_region(MKDEV(DVB_MAJOR, 0), MAX_DVB_MINORS); list_for_each_entry_safe(node, next, &dvbdevfops_list, list_head) { list_del(&node->list_head); kfree(node->fops); kfree(node); } } subsys_initcall(init_dvbdev); module_exit(exit_dvbdev); MODULE_DESCRIPTION("DVB Core Driver"); MODULE_AUTHOR("Marcus Metzler, Ralph Metzler, Holger Waechtler"); MODULE_LICENSE("GPL");
2 2 7 1 6 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 // SPDX-License-Identifier: GPL-2.0-only /* * test/set flag bits stored in conntrack extension area. * * (C) 2013 Astaro GmbH & Co KG */ #include <linux/export.h> #include <linux/types.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> static int replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; do { old = *address; tmp = (old & mask) ^ new; if (old == tmp) return 0; } while (cmpxchg(address, old, tmp) != old); return 1; } int nf_connlabels_replace(struct nf_conn *ct, const u32 *data, const u32 *mask, unsigned int words32) { struct nf_conn_labels *labels; unsigned int size, i; int changed = 0; u32 *dst; labels = nf_ct_labels_find(ct); if (!labels) return -ENOSPC; size = sizeof(labels->bits); if (size < (words32 * sizeof(u32))) words32 = size / sizeof(u32); dst = (u32 *) labels->bits; for (i = 0; i < words32; i++) changed |= replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); size /= sizeof(u32); for (i = words32; i < size; i++) /* pad */ replace_u32(&dst[i], 0, 0); if (changed) nf_conntrack_event_cache(IPCT_LABEL, ct); return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_replace); int nf_connlabels_get(struct net *net, unsigned int bits) { int v; if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long)) return -ERANGE; BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); v = atomic_inc_return_relaxed(&net->ct.labels_used); WARN_ON_ONCE(v <= 0); return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_get); void nf_connlabels_put(struct net *net) { int v = atomic_dec_return_relaxed(&net->ct.labels_used); WARN_ON_ONCE(v < 0); } EXPORT_SYMBOL_GPL(nf_connlabels_put);
2 3 11 11 4 1 17 15 1 1 18 17 17 17 5 5 18 18 17 17 1 1 2 2 2 5 5 2 2 2 2 2 5 5 5 5 5 39 39 39 18 18 18 32 10 22 1 1 1 1 10 9 22 21 1 2 11 23 1 1 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 // SPDX-License-Identifier: GPL-2.0-only /* * The NFC Controller Interface is the communication protocol between an * NFC Controller (NFCC) and a Device Host (DH). * * Copyright (C) 2011 Texas Instruments, Inc. * Copyright (C) 2014 Marvell International Ltd. * * Written by Ilan Elias <ilane@ti.com> * * Acknowledgements: * This file is based on hci_core.c, which was written * by Maxim Krasnyansky. */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ #include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/workqueue.h> #include <linux/completion.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/kcov.h> #include "../nfc.h" #include <net/nfc/nci.h> #include <net/nfc/nci_core.h> #include <linux/nfc.h> struct core_conn_create_data { int length; struct nci_core_conn_create_cmd *cmd; }; static void nci_cmd_work(struct work_struct *work); static void nci_rx_work(struct work_struct *work); static void nci_tx_work(struct work_struct *work); struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, int conn_id) { struct nci_conn_info *conn_info; list_for_each_entry(conn_info, &ndev->conn_info_list, list) { if (conn_info->conn_id == conn_id) return conn_info; } return NULL; } int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, const struct dest_spec_params *params) { const struct nci_conn_info *conn_info; list_for_each_entry(conn_info, &ndev->conn_info_list, list) { if (conn_info->dest_type == dest_type) { if (!params) return conn_info->conn_id; if (params->id == conn_info->dest_params->id && params->protocol == conn_info->dest_params->protocol) return conn_info->conn_id; } } return -EINVAL; } EXPORT_SYMBOL(nci_get_conn_info_by_dest_type_params); /* ---- NCI requests ---- */ void nci_req_complete(struct nci_dev *ndev, int result) { if (ndev->req_status == NCI_REQ_PEND) { ndev->req_result = result; ndev->req_status = NCI_REQ_DONE; complete(&ndev->req_completion); } } EXPORT_SYMBOL(nci_req_complete); static void nci_req_cancel(struct nci_dev *ndev, int err) { if (ndev->req_status == NCI_REQ_PEND) { ndev->req_result = err; ndev->req_status = NCI_REQ_CANCELED; complete(&ndev->req_completion); } } /* Execute request and wait for completion. */ static int __nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, const void *opt), const void *opt, __u32 timeout) { int rc = 0; long completion_rc; ndev->req_status = NCI_REQ_PEND; reinit_completion(&ndev->req_completion); req(ndev, opt); completion_rc = wait_for_completion_interruptible_timeout(&ndev->req_completion, timeout); pr_debug("wait_for_completion return %ld\n", completion_rc); if (completion_rc > 0) { switch (ndev->req_status) { case NCI_REQ_DONE: rc = nci_to_errno(ndev->req_result); break; case NCI_REQ_CANCELED: rc = -ndev->req_result; break; default: rc = -ETIMEDOUT; break; } } else { pr_err("wait_for_completion_interruptible_timeout failed %ld\n", completion_rc); rc = ((completion_rc == 0) ? (-ETIMEDOUT) : (completion_rc)); } ndev->req_status = ndev->req_result = 0; return rc; } inline int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, const void *opt), const void *opt, __u32 timeout) { int rc; /* Serialize all requests */ mutex_lock(&ndev->req_lock); /* check the state after obtaing the lock against any races * from nci_close_device when the device gets removed. */ if (test_bit(NCI_UP, &ndev->flags)) rc = __nci_request(ndev, req, opt, timeout); else rc = -ENETDOWN; mutex_unlock(&ndev->req_lock); return rc; } static void nci_reset_req(struct nci_dev *ndev, const void *opt) { struct nci_core_reset_cmd cmd; cmd.reset_type = NCI_RESET_TYPE_RESET_CONFIG; nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd); } static void nci_init_req(struct nci_dev *ndev, const void *opt) { u8 plen = 0; if (opt) plen = sizeof(struct nci_core_init_v2_cmd); nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt); } static void nci_init_complete_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_disc_map_cmd cmd; struct disc_map_config *cfg = cmd.mapping_configs; __u8 *num = &cmd.num_mapping_configs; int i; /* set rf mapping configurations */ *num = 0; /* by default mapping is set to NCI_RF_INTERFACE_FRAME */ for (i = 0; i < ndev->num_supported_rf_interfaces; i++) { if (ndev->supported_rf_interfaces[i] == NCI_RF_INTERFACE_ISO_DEP) { cfg[*num].rf_protocol = NCI_RF_PROTOCOL_ISO_DEP; cfg[*num].mode = NCI_DISC_MAP_MODE_POLL | NCI_DISC_MAP_MODE_LISTEN; cfg[*num].rf_interface = NCI_RF_INTERFACE_ISO_DEP; (*num)++; } else if (ndev->supported_rf_interfaces[i] == NCI_RF_INTERFACE_NFC_DEP) { cfg[*num].rf_protocol = NCI_RF_PROTOCOL_NFC_DEP; cfg[*num].mode = NCI_DISC_MAP_MODE_POLL | NCI_DISC_MAP_MODE_LISTEN; cfg[*num].rf_interface = NCI_RF_INTERFACE_NFC_DEP; (*num)++; } if (*num == NCI_MAX_NUM_MAPPING_CONFIGS) break; } nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_MAP_CMD, (1 + ((*num) * sizeof(struct disc_map_config))), &cmd); } struct nci_set_config_param { __u8 id; size_t len; const __u8 *val; }; static void nci_set_config_req(struct nci_dev *ndev, const void *opt) { const struct nci_set_config_param *param = opt; struct nci_core_set_config_cmd cmd; BUG_ON(param->len > NCI_MAX_PARAM_LEN); cmd.num_params = 1; cmd.param.id = param->id; cmd.param.len = param->len; memcpy(cmd.param.val, param->val, param->len); nci_send_cmd(ndev, NCI_OP_CORE_SET_CONFIG_CMD, (3 + param->len), &cmd); } struct nci_rf_discover_param { __u32 im_protocols; __u32 tm_protocols; }; static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt) { const struct nci_rf_discover_param *param = opt; struct nci_rf_disc_cmd cmd; cmd.num_disc_configs = 0; if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && (param->im_protocols & NFC_PROTO_JEWEL_MASK || param->im_protocols & NFC_PROTO_MIFARE_MASK || param->im_protocols & NFC_PROTO_ISO14443_MASK || param->im_protocols & NFC_PROTO_NFC_DEP_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_A_PASSIVE_POLL_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && (param->im_protocols & NFC_PROTO_ISO14443_B_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_B_PASSIVE_POLL_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && (param->im_protocols & NFC_PROTO_FELICA_MASK || param->im_protocols & NFC_PROTO_NFC_DEP_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_F_PASSIVE_POLL_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && (param->im_protocols & NFC_PROTO_ISO15693_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_V_PASSIVE_POLL_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS - 1) && (param->tm_protocols & NFC_PROTO_NFC_DEP_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_A_PASSIVE_LISTEN_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_F_PASSIVE_LISTEN_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_CMD, (1 + (cmd.num_disc_configs * sizeof(struct disc_config))), &cmd); } struct nci_rf_discover_select_param { __u8 rf_discovery_id; __u8 rf_protocol; }; static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt) { const struct nci_rf_discover_select_param *param = opt; struct nci_rf_discover_select_cmd cmd; cmd.rf_discovery_id = param->rf_discovery_id; cmd.rf_protocol = param->rf_protocol; switch (cmd.rf_protocol) { case NCI_RF_PROTOCOL_ISO_DEP: cmd.rf_interface = NCI_RF_INTERFACE_ISO_DEP; break; case NCI_RF_PROTOCOL_NFC_DEP: cmd.rf_interface = NCI_RF_INTERFACE_NFC_DEP; break; default: cmd.rf_interface = NCI_RF_INTERFACE_FRAME; break; } nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_SELECT_CMD, sizeof(struct nci_rf_discover_select_cmd), &cmd); } static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_deactivate_cmd cmd; cmd.type = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD, sizeof(struct nci_rf_deactivate_cmd), &cmd); } struct nci_cmd_param { __u16 opcode; size_t len; const __u8 *payload; }; static void nci_generic_req(struct nci_dev *ndev, const void *opt) { const struct nci_cmd_param *param = opt; nci_send_cmd(ndev, param->opcode, param->len, param->payload); } int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload) { struct nci_cmd_param param; param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid); param.len = len; param.payload = payload; return __nci_request(ndev, nci_generic_req, &param, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_prop_cmd); int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, const __u8 *payload) { struct nci_cmd_param param; param.opcode = opcode; param.len = len; param.payload = payload; return __nci_request(ndev, nci_generic_req, &param, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_cmd); int nci_core_reset(struct nci_dev *ndev) { return __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } EXPORT_SYMBOL(nci_core_reset); int nci_core_init(struct nci_dev *ndev) { return __nci_request(ndev, nci_init_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } EXPORT_SYMBOL(nci_core_init); struct nci_loopback_data { u8 conn_id; struct sk_buff *data; }; static void nci_send_data_req(struct nci_dev *ndev, const void *opt) { const struct nci_loopback_data *data = opt; nci_send_data(ndev, data->conn_id, data->data); } static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err) { struct nci_dev *ndev = (struct nci_dev *)context; struct nci_conn_info *conn_info; conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id); if (!conn_info) { nci_req_complete(ndev, NCI_STATUS_REJECTED); return; } conn_info->rx_skb = skb; nci_req_complete(ndev, NCI_STATUS_OK); } int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len, struct sk_buff **resp) { int r; struct nci_loopback_data loopback_data; struct nci_conn_info *conn_info; struct sk_buff *skb; int conn_id = nci_get_conn_info_by_dest_type_params(ndev, NCI_DESTINATION_NFCC_LOOPBACK, NULL); if (conn_id < 0) { r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCC_LOOPBACK, 0, 0, NULL); if (r != NCI_STATUS_OK) return r; conn_id = nci_get_conn_info_by_dest_type_params(ndev, NCI_DESTINATION_NFCC_LOOPBACK, NULL); } conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); if (!conn_info) return -EPROTO; /* store cb and context to be used on receiving data */ conn_info->data_exchange_cb = nci_nfcc_loopback_cb; conn_info->data_exchange_cb_context = ndev; skb = nci_skb_alloc(ndev, NCI_DATA_HDR_SIZE + data_len, GFP_KERNEL); if (!skb) return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE); skb_put_data(skb, data, data_len); loopback_data.conn_id = conn_id; loopback_data.data = skb; ndev->cur_conn_id = conn_id; r = nci_request(ndev, nci_send_data_req, &loopback_data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK && resp) *resp = conn_info->rx_skb; return r; } EXPORT_SYMBOL(nci_nfcc_loopback); static int nci_open_device(struct nci_dev *ndev) { int rc = 0; mutex_lock(&ndev->req_lock); if (test_bit(NCI_UNREG, &ndev->flags)) { rc = -ENODEV; goto done; } if (test_bit(NCI_UP, &ndev->flags)) { rc = -EALREADY; goto done; } if (ndev->ops->open(ndev)) { rc = -EIO; goto done; } atomic_set(&ndev->cmd_cnt, 1); set_bit(NCI_INIT, &ndev->flags); if (ndev->ops->init) rc = ndev->ops->init(ndev); if (!rc) { rc = __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } if (!rc && ndev->ops->setup) { rc = ndev->ops->setup(ndev); } if (!rc) { struct nci_core_init_v2_cmd nci_init_v2_cmd = { .feature1 = NCI_FEATURE_DISABLE, .feature2 = NCI_FEATURE_DISABLE }; const void *opt = NULL; if (ndev->nci_ver & NCI_VER_2_MASK) opt = &nci_init_v2_cmd; rc = __nci_request(ndev, nci_init_req, opt, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } if (!rc && ndev->ops->post_setup) rc = ndev->ops->post_setup(ndev); if (!rc) { rc = __nci_request(ndev, nci_init_complete_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } clear_bit(NCI_INIT, &ndev->flags); if (!rc) { set_bit(NCI_UP, &ndev->flags); nci_clear_target_list(ndev); atomic_set(&ndev->state, NCI_IDLE); } else { /* Init failed, cleanup */ skb_queue_purge(&ndev->cmd_q); skb_queue_purge(&ndev->rx_q); skb_queue_purge(&ndev->tx_q); ndev->ops->close(ndev); ndev->flags &= BIT(NCI_UNREG); } done: mutex_unlock(&ndev->req_lock); return rc; } static int nci_close_device(struct nci_dev *ndev) { nci_req_cancel(ndev, ENODEV); /* This mutex needs to be held as a barrier for * caller nci_unregister_device */ mutex_lock(&ndev->req_lock); if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { /* Need to flush the cmd wq in case * there is a queued/running cmd_work */ flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); del_timer_sync(&ndev->data_timer); mutex_unlock(&ndev->req_lock); return 0; } /* Drop RX and TX queues */ skb_queue_purge(&ndev->rx_q); skb_queue_purge(&ndev->tx_q); /* Flush RX and TX wq */ flush_workqueue(ndev->rx_wq); flush_workqueue(ndev->tx_wq); /* Reset device */ skb_queue_purge(&ndev->cmd_q); atomic_set(&ndev->cmd_cnt, 1); set_bit(NCI_INIT, &ndev->flags); __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty * and no works are scheduled. */ ndev->ops->close(ndev); clear_bit(NCI_INIT, &ndev->flags); /* Flush cmd wq */ flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); /* Clear flags except NCI_UNREG */ ndev->flags &= BIT(NCI_UNREG); mutex_unlock(&ndev->req_lock); return 0; } /* NCI command timer function */ static void nci_cmd_timer(struct timer_list *t) { struct nci_dev *ndev = from_timer(ndev, t, cmd_timer); atomic_set(&ndev->cmd_cnt, 1); queue_work(ndev->cmd_wq, &ndev->cmd_work); } /* NCI data exchange timer function */ static void nci_data_timer(struct timer_list *t) { struct nci_dev *ndev = from_timer(ndev, t, data_timer); set_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); queue_work(ndev->rx_wq, &ndev->rx_work); } static int nci_dev_up(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); return nci_open_device(ndev); } static int nci_dev_down(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); return nci_close_device(ndev); } int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val) { struct nci_set_config_param param; if (!val || !len) return 0; param.id = id; param.len = len; param.val = val; return __nci_request(ndev, nci_set_config_req, &param, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } EXPORT_SYMBOL(nci_set_config); static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt) { struct nci_nfcee_discover_cmd cmd; __u8 action = (unsigned long)opt; cmd.discovery_action = action; nci_send_cmd(ndev, NCI_OP_NFCEE_DISCOVER_CMD, 1, &cmd); } int nci_nfcee_discover(struct nci_dev *ndev, u8 action) { unsigned long opt = action; return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_discover); static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt) { const struct nci_nfcee_mode_set_cmd *cmd = opt; nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD, sizeof(struct nci_nfcee_mode_set_cmd), cmd); } int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode) { struct nci_nfcee_mode_set_cmd cmd; cmd.nfcee_id = nfcee_id; cmd.nfcee_mode = nfcee_mode; return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_mode_set); static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt) { const struct core_conn_create_data *data = opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd); } int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, u8 number_destination_params, size_t params_len, const struct core_conn_create_dest_spec_params *params) { int r; struct nci_core_conn_create_cmd *cmd; struct core_conn_create_data data; data.length = params_len + sizeof(struct nci_core_conn_create_cmd); cmd = kzalloc(data.length, GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->destination_type = destination_type; cmd->number_destination_params = number_destination_params; data.cmd = cmd; if (params) { memcpy(cmd->params, params, params_len); if (params->length > 0) memcpy(&ndev->cur_params, &params->value[DEST_SPEC_PARAMS_ID_INDEX], sizeof(struct dest_spec_params)); else ndev->cur_params.id = 0; } else { ndev->cur_params.id = 0; } ndev->cur_dest_type = destination_type; r = __nci_request(ndev, nci_core_conn_create_req, &data, msecs_to_jiffies(NCI_CMD_TIMEOUT)); kfree(cmd); return r; } EXPORT_SYMBOL(nci_core_conn_create); static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt) { __u8 conn_id = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id); } int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) { unsigned long opt = conn_id; ndev->cur_conn_id = conn_id; return __nci_request(ndev, nci_core_conn_close_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_conn_close); static void nci_set_target_ats(struct nfc_target *target, struct nci_dev *ndev) { if (ndev->target_ats_len > 0) { target->ats_len = ndev->target_ats_len; memcpy(target->ats, ndev->target_ats, target->ats_len); } } static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); struct nci_set_config_param param; int rc; param.val = nfc_get_local_general_bytes(nfc_dev, &param.len); if ((param.val == NULL) || (param.len == 0)) return 0; if (param.len > NFC_MAX_GT_LEN) return -EINVAL; param.id = NCI_PN_ATR_REQ_GEN_BYTES; rc = nci_request(ndev, nci_set_config_req, &param, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); if (rc) return rc; param.id = NCI_LN_ATR_RES_GEN_BYTES; return nci_request(ndev, nci_set_config_req, &param, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } static int nci_set_listen_parameters(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; __u8 val; val = NCI_LA_SEL_INFO_NFC_DEP_MASK; rc = nci_set_config(ndev, NCI_LA_SEL_INFO, 1, &val); if (rc) return rc; val = NCI_LF_PROTOCOL_TYPE_NFC_DEP_MASK; rc = nci_set_config(ndev, NCI_LF_PROTOCOL_TYPE, 1, &val); if (rc) return rc; val = NCI_LF_CON_BITR_F_212 | NCI_LF_CON_BITR_F_424; return nci_set_config(ndev, NCI_LF_CON_BITR_F, 1, &val); } static int nci_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, __u32 tm_protocols) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); struct nci_rf_discover_param param; int rc; if ((atomic_read(&ndev->state) == NCI_DISCOVERY) || (atomic_read(&ndev->state) == NCI_W4_ALL_DISCOVERIES)) { pr_err("unable to start poll, since poll is already active\n"); return -EBUSY; } if (ndev->target_active_prot) { pr_err("there is an active target\n"); return -EBUSY; } if ((atomic_read(&ndev->state) == NCI_W4_HOST_SELECT) || (atomic_read(&ndev->state) == NCI_POLL_ACTIVE)) { pr_debug("target active or w4 select, implicitly deactivate\n"); rc = nci_request(ndev, nci_rf_deactivate_req, (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); if (rc) return -EBUSY; } if ((im_protocols | tm_protocols) & NFC_PROTO_NFC_DEP_MASK) { rc = nci_set_local_general_bytes(nfc_dev); if (rc) { pr_err("failed to set local general bytes\n"); return rc; } } if (tm_protocols & NFC_PROTO_NFC_DEP_MASK) { rc = nci_set_listen_parameters(nfc_dev); if (rc) pr_err("failed to set listen parameters\n"); } param.im_protocols = im_protocols; param.tm_protocols = tm_protocols; rc = nci_request(ndev, nci_rf_discover_req, &param, msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); if (!rc) ndev->poll_prots = im_protocols; return rc; } static void nci_stop_poll(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if ((atomic_read(&ndev->state) != NCI_DISCOVERY) && (atomic_read(&ndev->state) != NCI_W4_ALL_DISCOVERIES)) { pr_err("unable to stop poll, since poll is not active\n"); return; } nci_request(ndev, nci_rf_deactivate_req, (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } static int nci_activate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, __u32 protocol) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); struct nci_rf_discover_select_param param; const struct nfc_target *nci_target = NULL; int i; int rc = 0; pr_debug("target_idx %d, protocol 0x%x\n", target->idx, protocol); if ((atomic_read(&ndev->state) != NCI_W4_HOST_SELECT) && (atomic_read(&ndev->state) != NCI_POLL_ACTIVE)) { pr_err("there is no available target to activate\n"); return -EINVAL; } if (ndev->target_active_prot) { pr_err("there is already an active target\n"); return -EBUSY; } for (i = 0; i < ndev->n_targets; i++) { if (ndev->targets[i].idx == target->idx) { nci_target = &ndev->targets[i]; break; } } if (!nci_target) { pr_err("unable to find the selected target\n"); return -EINVAL; } if (protocol >= NFC_PROTO_MAX) { pr_err("the requested nfc protocol is invalid\n"); return -EINVAL; } if (!(nci_target->supported_protocols & (1 << protocol))) { pr_err("target does not support the requested protocol 0x%x\n", protocol); return -EINVAL; } if (atomic_read(&ndev->state) == NCI_W4_HOST_SELECT) { param.rf_discovery_id = nci_target->logical_idx; if (protocol == NFC_PROTO_JEWEL) param.rf_protocol = NCI_RF_PROTOCOL_T1T; else if (protocol == NFC_PROTO_MIFARE) param.rf_protocol = NCI_RF_PROTOCOL_T2T; else if (protocol == NFC_PROTO_FELICA) param.rf_protocol = NCI_RF_PROTOCOL_T3T; else if (protocol == NFC_PROTO_ISO14443 || protocol == NFC_PROTO_ISO14443_B) param.rf_protocol = NCI_RF_PROTOCOL_ISO_DEP; else param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP; rc = nci_request(ndev, nci_rf_discover_select_req, &param, msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT)); } if (!rc) { ndev->target_active_prot = protocol; if (protocol == NFC_PROTO_ISO14443) nci_set_target_ats(target, ndev); } return rc; } static void nci_deactivate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, __u8 mode) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; if (!ndev->target_active_prot) { pr_err("unable to deactivate target, no active target\n"); return; } ndev->target_active_prot = 0; switch (mode) { case NFC_TARGET_MODE_SLEEP: nci_mode = NCI_DEACTIVATE_TYPE_SLEEP_MODE; break; } if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) { nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } } static int nci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, __u8 comm_mode, __u8 *gb, size_t gb_len) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; pr_debug("target_idx %d, comm_mode %d\n", target->idx, comm_mode); rc = nci_activate_target(nfc_dev, target, NFC_PROTO_NFC_DEP); if (rc) return rc; rc = nfc_set_remote_general_bytes(nfc_dev, ndev->remote_gb, ndev->remote_gb_len); if (!rc) rc = nfc_dep_link_is_up(nfc_dev, target->idx, NFC_COMM_PASSIVE, NFC_RF_INITIATOR); return rc; } static int nci_dep_link_down(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; if (nfc_dev->rf_mode == NFC_RF_INITIATOR) { nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE); } else { if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE || atomic_read(&ndev->state) == NCI_DISCOVERY) { nci_request(ndev, nci_rf_deactivate_req, (void *)0, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } rc = nfc_tm_deactivated(nfc_dev); if (rc) pr_err("error when signaling tm deactivation\n"); } return 0; } static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; struct nci_conn_info *conn_info; conn_info = ndev->rf_conn_info; if (!conn_info) return -EPROTO; pr_debug("target_idx %d, len %d\n", target->idx, skb->len); if (!ndev->target_active_prot) { pr_err("unable to exchange data, no active target\n"); return -EINVAL; } if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) return -EBUSY; /* store cb and context to be used on receiving data */ conn_info->data_exchange_cb = cb; conn_info->data_exchange_cb_context = cb_context; rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb); if (rc) clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); return rc; } static int nci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb); if (rc) pr_err("unable to send data\n"); return rc; } static int nci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (ndev->ops->enable_se) return ndev->ops->enable_se(ndev, se_idx); return 0; } static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (ndev->ops->disable_se) return ndev->ops->disable_se(ndev, se_idx); return 0; } static int nci_discover_se(struct nfc_dev *nfc_dev) { int r; struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (ndev->ops->discover_se) { r = nci_nfcee_discover(ndev, NCI_NFCEE_DISCOVERY_ACTION_ENABLE); if (r != NCI_STATUS_OK) return -EPROTO; return ndev->ops->discover_se(ndev); } return 0; } static int nci_se_io(struct nfc_dev *nfc_dev, u32 se_idx, u8 *apdu, size_t apdu_length, se_io_cb_t cb, void *cb_context) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (ndev->ops->se_io) return ndev->ops->se_io(ndev, se_idx, apdu, apdu_length, cb, cb_context); return 0; } static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (!ndev->ops->fw_download) return -ENOTSUPP; return ndev->ops->fw_download(ndev, firmware_name); } static const struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, .start_poll = nci_start_poll, .stop_poll = nci_stop_poll, .dep_link_up = nci_dep_link_up, .dep_link_down = nci_dep_link_down, .activate_target = nci_activate_target, .deactivate_target = nci_deactivate_target, .im_transceive = nci_transceive, .tm_send = nci_tm_send, .enable_se = nci_enable_se, .disable_se = nci_disable_se, .discover_se = nci_discover_se, .se_io = nci_se_io, .fw_download = nci_fw_download, }; /* ---- Interface to NCI drivers ---- */ /** * nci_allocate_device - allocate a new nci device * * @ops: device operations * @supported_protocols: NFC protocols supported by the device * @tx_headroom: Reserved space at beginning of skb * @tx_tailroom: Reserved space at end of skb */ struct nci_dev *nci_allocate_device(const struct nci_ops *ops, __u32 supported_protocols, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; pr_debug("supported_protocols 0x%x\n", supported_protocols); if (!ops->open || !ops->close || !ops->send) return NULL; if (!supported_protocols) return NULL; ndev = kzalloc(sizeof(struct nci_dev), GFP_KERNEL); if (!ndev) return NULL; ndev->ops = ops; if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) { pr_err("Too many proprietary commands: %zd\n", ops->n_prop_ops); goto free_nci; } ndev->tx_headroom = tx_headroom; ndev->tx_tailroom = tx_tailroom; init_completion(&ndev->req_completion); ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) goto free_nci; ndev->hci_dev = nci_hci_allocate(ndev); if (!ndev->hci_dev) goto free_nfc; nfc_set_drvdata(ndev->nfc_dev, ndev); return ndev; free_nfc: nfc_free_device(ndev->nfc_dev); free_nci: kfree(ndev); return NULL; } EXPORT_SYMBOL(nci_allocate_device); /** * nci_free_device - deallocate nci device * * @ndev: The nci device to deallocate */ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); /* drop partial rx data packet if present */ if (ndev->rx_data_reassembly) kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); /** * nci_register_device - register a nci device in the nfc subsystem * * @ndev: The nci device to register */ int nci_register_device(struct nci_dev *ndev) { int rc; struct device *dev = &ndev->nfc_dev->dev; char name[32]; ndev->flags = 0; INIT_WORK(&ndev->cmd_work, nci_cmd_work); snprintf(name, sizeof(name), "%s_nci_cmd_wq", dev_name(dev)); ndev->cmd_wq = create_singlethread_workqueue(name); if (!ndev->cmd_wq) { rc = -ENOMEM; goto exit; } INIT_WORK(&ndev->rx_work, nci_rx_work); snprintf(name, sizeof(name), "%s_nci_rx_wq", dev_name(dev)); ndev->rx_wq = create_singlethread_workqueue(name); if (!ndev->rx_wq) { rc = -ENOMEM; goto destroy_cmd_wq_exit; } INIT_WORK(&ndev->tx_work, nci_tx_work); snprintf(name, sizeof(name), "%s_nci_tx_wq", dev_name(dev)); ndev->tx_wq = create_singlethread_workqueue(name); if (!ndev->tx_wq) { rc = -ENOMEM; goto destroy_rx_wq_exit; } skb_queue_head_init(&ndev->cmd_q); skb_queue_head_init(&ndev->rx_q); skb_queue_head_init(&ndev->tx_q); timer_setup(&ndev->cmd_timer, nci_cmd_timer, 0); timer_setup(&ndev->data_timer, nci_data_timer, 0); mutex_init(&ndev->req_lock); INIT_LIST_HEAD(&ndev->conn_info_list); rc = nfc_register_device(ndev->nfc_dev); if (rc) goto destroy_tx_wq_exit; goto exit; destroy_tx_wq_exit: destroy_workqueue(ndev->tx_wq); destroy_rx_wq_exit: destroy_workqueue(ndev->rx_wq); destroy_cmd_wq_exit: destroy_workqueue(ndev->cmd_wq); exit: return rc; } EXPORT_SYMBOL(nci_register_device); /** * nci_unregister_device - unregister a nci device in the nfc subsystem * * @ndev: The nci device to unregister */ void nci_unregister_device(struct nci_dev *ndev) { struct nci_conn_info *conn_info, *n; /* This set_bit is not protected with specialized barrier, * However, it is fine because the mutex_lock(&ndev->req_lock); * in nci_close_device() will help to emit one. */ set_bit(NCI_UNREG, &ndev->flags); nci_close_device(ndev); destroy_workqueue(ndev->cmd_wq); destroy_workqueue(ndev->rx_wq); destroy_workqueue(ndev->tx_wq); list_for_each_entry_safe(conn_info, n, &ndev->conn_info_list, list) { list_del(&conn_info->list); /* conn_info is allocated with devm_kzalloc */ } nfc_unregister_device(ndev->nfc_dev); } EXPORT_SYMBOL(nci_unregister_device); /** * nci_recv_frame - receive frame from NCI drivers * * @ndev: The nci device * @skb: The sk_buff to receive */ int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb) { pr_debug("len %d\n", skb->len); if (!ndev || (!test_bit(NCI_UP, &ndev->flags) && !test_bit(NCI_INIT, &ndev->flags))) { kfree_skb(skb); return -ENXIO; } /* Queue frame for rx worker thread */ skb_queue_tail(&ndev->rx_q, skb); queue_work(ndev->rx_wq, &ndev->rx_work); return 0; } EXPORT_SYMBOL(nci_recv_frame); int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) { pr_debug("len %d\n", skb->len); if (!ndev) { kfree_skb(skb); return -ENODEV; } /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); /* Send copy to sniffer */ nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_TX); return ndev->ops->send(ndev, skb); } EXPORT_SYMBOL(nci_send_frame); /* Send NCI command */ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload) { struct nci_ctrl_hdr *hdr; struct sk_buff *skb; pr_debug("opcode 0x%x, plen %d\n", opcode, plen); skb = nci_skb_alloc(ndev, (NCI_CTRL_HDR_SIZE + plen), GFP_KERNEL); if (!skb) { pr_err("no memory for command\n"); return -ENOMEM; } hdr = skb_put(skb, NCI_CTRL_HDR_SIZE); hdr->gid = nci_opcode_gid(opcode); hdr->oid = nci_opcode_oid(opcode); hdr->plen = plen; nci_mt_set((__u8 *)hdr, NCI_MT_CMD_PKT); nci_pbf_set((__u8 *)hdr, NCI_PBF_LAST); if (plen) skb_put_data(skb, payload, plen); skb_queue_tail(&ndev->cmd_q, skb); queue_work(ndev->cmd_wq, &ndev->cmd_work); return 0; } EXPORT_SYMBOL(nci_send_cmd); /* Proprietary commands API */ static const struct nci_driver_ops *ops_cmd_lookup(const struct nci_driver_ops *ops, size_t n_ops, __u16 opcode) { size_t i; const struct nci_driver_ops *op; if (!ops || !n_ops) return NULL; for (i = 0; i < n_ops; i++) { op = &ops[i]; if (op->opcode == opcode) return op; } return NULL; } static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, rsp_opcode); if (!op || !op->rsp) return -ENOTSUPP; return op->rsp(ndev, skb); } static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode, struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, ntf_opcode); if (!op || !op->ntf) return -ENOTSUPP; return op->ntf(ndev, skb); } int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb) { return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->prop_ops, ndev->ops->n_prop_ops); } int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb) { return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->prop_ops, ndev->ops->n_prop_ops); } int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb) { return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->core_ops, ndev->ops->n_core_ops); } int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb) { return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->core_ops, ndev->ops->n_core_ops); } static bool nci_valid_size(struct sk_buff *skb) { BUILD_BUG_ON(NCI_CTRL_HDR_SIZE != NCI_DATA_HDR_SIZE); unsigned int hdr_size = NCI_CTRL_HDR_SIZE; if (skb->len < hdr_size || !nci_plen(skb->data) || skb->len < hdr_size + nci_plen(skb->data)) { return false; } return true; } /* ---- NCI TX Data worker thread ---- */ static void nci_tx_work(struct work_struct *work) { struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work); struct nci_conn_info *conn_info; struct sk_buff *skb; conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id); if (!conn_info) return; pr_debug("credits_cnt %d\n", atomic_read(&conn_info->credits_cnt)); /* Send queued tx data */ while (atomic_read(&conn_info->credits_cnt)) { skb = skb_dequeue(&ndev->tx_q); if (!skb) return; kcov_remote_start_common(skb_get_kcov_handle(skb)); /* Check if data flow control is used */ if (atomic_read(&conn_info->credits_cnt) != NCI_DATA_FLOW_CONTROL_NOT_USED) atomic_dec(&conn_info->credits_cnt); pr_debug("NCI TX: MT=data, PBF=%d, conn_id=%d, plen=%d\n", nci_pbf(skb->data), nci_conn_id(skb->data), nci_plen(skb->data)); nci_send_frame(ndev, skb); mod_timer(&ndev->data_timer, jiffies + msecs_to_jiffies(NCI_DATA_TIMEOUT)); kcov_remote_stop(); } } /* ----- NCI RX worker thread (data & control) ----- */ static void nci_rx_work(struct work_struct *work) { struct nci_dev *ndev = container_of(work, struct nci_dev, rx_work); struct sk_buff *skb; for (; (skb = skb_dequeue(&ndev->rx_q)); kcov_remote_stop()) { kcov_remote_start_common(skb_get_kcov_handle(skb)); /* Send copy to sniffer */ nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); if (!nci_valid_size(skb)) { kfree_skb(skb); continue; } /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: nci_rsp_packet(ndev, skb); break; case NCI_MT_NTF_PKT: nci_ntf_packet(ndev, skb); break; case NCI_MT_DATA_PKT: nci_rx_data_packet(ndev, skb); break; default: pr_err("unknown MT 0x%x\n", nci_mt(skb->data)); kfree_skb(skb); break; } } /* check if a data exchange timeout has occurred */ if (test_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags)) { /* complete the data exchange transaction, if exists */ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) nci_data_exchange_complete(ndev, NULL, ndev->cur_conn_id, -ETIMEDOUT); clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); } } /* ----- NCI TX CMD worker thread ----- */ static void nci_cmd_work(struct work_struct *work) { struct nci_dev *ndev = container_of(work, struct nci_dev, cmd_work); struct sk_buff *skb; pr_debug("cmd_cnt %d\n", atomic_read(&ndev->cmd_cnt)); /* Send queued command */ if (atomic_read(&ndev->cmd_cnt)) { skb = skb_dequeue(&ndev->cmd_q); if (!skb) return; kcov_remote_start_common(skb_get_kcov_handle(skb)); atomic_dec(&ndev->cmd_cnt); pr_debug("NCI TX: MT=cmd, PBF=%d, GID=0x%x, OID=0x%x, plen=%d\n", nci_pbf(skb->data), nci_opcode_gid(nci_opcode(skb->data)), nci_opcode_oid(nci_opcode(skb->data)), nci_plen(skb->data)); nci_send_frame(ndev, skb); mod_timer(&ndev->cmd_timer, jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT)); kcov_remote_stop(); } } MODULE_DESCRIPTION("NFC Controller Interface"); MODULE_LICENSE("GPL");
2 1 1 4 3 1 13 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 // SPDX-License-Identifier: GPL-2.0 /* * Zoned block device handling * * Copyright (c) 2015, Hannes Reinecke * Copyright (c) 2015, SUSE Linux GmbH * * Copyright (c) 2016, Damien Le Moal * Copyright (c) 2016, Western Digital * Copyright (c) 2024, Western Digital Corporation or its affiliates. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/sched/mm.h> #include <linux/spinlock.h> #include <linux/refcount.h> #include <linux/mempool.h> #include "blk.h" #include "blk-mq-sched.h" #include "blk-mq-debugfs.h" #define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name static const char *const zone_cond_name[] = { ZONE_COND_NAME(NOT_WP), ZONE_COND_NAME(EMPTY), ZONE_COND_NAME(IMP_OPEN), ZONE_COND_NAME(EXP_OPEN), ZONE_COND_NAME(CLOSED), ZONE_COND_NAME(READONLY), ZONE_COND_NAME(FULL), ZONE_COND_NAME(OFFLINE), }; #undef ZONE_COND_NAME /* * Per-zone write plug. * @node: hlist_node structure for managing the plug using a hash table. * @ref: Zone write plug reference counter. A zone write plug reference is * always at least 1 when the plug is hashed in the disk plug hash table. * The reference is incremented whenever a new BIO needing plugging is * submitted and when a function needs to manipulate a plug. The * reference count is decremented whenever a plugged BIO completes and * when a function that referenced the plug returns. The initial * reference is dropped whenever the zone of the zone write plug is reset, * finished and when the zone becomes full (last write BIO to the zone * completes). * @lock: Spinlock to atomically manipulate the plug. * @flags: Flags indicating the plug state. * @zone_no: The number of the zone the plug is managing. * @wp_offset: The zone write pointer location relative to the start of the zone * as a number of 512B sectors. * @bio_list: The list of BIOs that are currently plugged. * @bio_work: Work struct to handle issuing of plugged BIOs * @rcu_head: RCU head to free zone write plugs with an RCU grace period. * @disk: The gendisk the plug belongs to. */ struct blk_zone_wplug { struct hlist_node node; refcount_t ref; spinlock_t lock; unsigned int flags; unsigned int zone_no; unsigned int wp_offset; struct bio_list bio_list; struct work_struct bio_work; struct rcu_head rcu_head; struct gendisk *disk; }; /* * Zone write plug flags bits: * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged, * that is, that write BIOs are being throttled due to a write BIO already * being executed or the zone write plug bio list is not empty. * - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone * write pointer offset and need to update it. * - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed * from the disk hash table and that the initial reference to the zone * write plug set when the plug was first added to the hash table has been * dropped. This flag is set when a zone is reset, finished or become full, * to prevent new references to the zone write plug to be taken for * newly incoming BIOs. A zone write plug flagged with this flag will be * freed once all remaining references from BIOs or functions are dropped. */ #define BLK_ZONE_WPLUG_PLUGGED (1U << 0) #define BLK_ZONE_WPLUG_NEED_WP_UPDATE (1U << 1) #define BLK_ZONE_WPLUG_UNHASHED (1U << 2) /** * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX. * @zone_cond: BLK_ZONE_COND_XXX. * * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX * into string format. Useful in the debugging and tracing zone conditions. For * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN". */ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond) { static const char *zone_cond_str = "UNKNOWN"; if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond]) zone_cond_str = zone_cond_name[zone_cond]; return zone_cond_str; } EXPORT_SYMBOL_GPL(blk_zone_cond_str); struct disk_report_zones_cb_args { struct gendisk *disk; report_zones_cb user_cb; void *user_data; }; static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk, struct blk_zone *zone); static int disk_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct disk_report_zones_cb_args *args = data; struct gendisk *disk = args->disk; if (disk->zone_wplugs_hash) disk_zone_wplug_sync_wp_offset(disk, zone); if (!args->user_cb) return 0; return args->user_cb(zone, idx, args->user_data); } /** * blkdev_report_zones - Get zones information * @bdev: Target block device * @sector: Sector from which to report zones * @nr_zones: Maximum number of zones to report * @cb: Callback function called for each reported zone * @data: Private data for the callback * * Description: * Get zone information starting from the zone containing @sector for at most * @nr_zones, and call @cb for each zone reported by the device. * To report all zones in a device starting from @sector, the BLK_ALL_ZONES * constant can be passed to @nr_zones. * Returns the number of zones reported by the device, or a negative errno * value in case of failure. * * Note: The caller must use memalloc_noXX_save/restore() calls to control * memory allocations done within this function. */ int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct gendisk *disk = bdev->bd_disk; sector_t capacity = get_capacity(disk); struct disk_report_zones_cb_args args = { .disk = disk, .user_cb = cb, .user_data = data, }; if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) return -EOPNOTSUPP; if (!nr_zones || sector >= capacity) return 0; return disk->fops->report_zones(disk, sector, nr_zones, disk_report_zones_cb, &args); } EXPORT_SYMBOL_GPL(blkdev_report_zones); static int blkdev_zone_reset_all(struct block_device *bdev) { struct bio bio; bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC); return submit_bio_wait(&bio); } /** * blkdev_zone_mgmt - Execute a zone management operation on a range of zones * @bdev: Target block device * @op: Operation to be performed on the zones * @sector: Start sector of the first zone to operate on * @nr_sectors: Number of sectors, should be at least the length of one zone and * must be zone size aligned. * * Description: * Perform the specified operation on the range of zones specified by * @sector..@sector+@nr_sectors. Specifying the entire disk sector range * is valid, but the specified range should not contain conventional zones. * The operation to execute on each zone can be a zone reset, open, close * or finish request. */ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sector, sector_t nr_sectors) { sector_t zone_sectors = bdev_zone_sectors(bdev); sector_t capacity = bdev_nr_sectors(bdev); sector_t end_sector = sector + nr_sectors; struct bio *bio = NULL; int ret = 0; if (!bdev_is_zoned(bdev)) return -EOPNOTSUPP; if (bdev_read_only(bdev)) return -EPERM; if (!op_is_zone_mgmt(op)) return -EOPNOTSUPP; if (end_sector <= sector || end_sector > capacity) /* Out of range */ return -EINVAL; /* Check alignment (handle eventual smaller last zone) */ if (!bdev_is_zone_start(bdev, sector)) return -EINVAL; if (!bdev_is_zone_start(bdev, nr_sectors) && end_sector != capacity) return -EINVAL; /* * In the case of a zone reset operation over all zones, use * REQ_OP_ZONE_RESET_ALL. */ if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) return blkdev_zone_reset_all(bdev); while (sector < end_sector) { bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL); bio->bi_iter.bi_sector = sector; sector += zone_sectors; /* This may take a while, so be nice to others */ cond_resched(); } ret = submit_bio_wait(bio); bio_put(bio); return ret; } EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); struct zone_report_args { struct blk_zone __user *zones; }; static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx, void *data) { struct zone_report_args *args = data; if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone))) return -EFAULT; return 0; } /* * BLKREPORTZONE ioctl processing. * Called from blkdev_ioctl. */ int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct zone_report_args args; struct blk_zone_report rep; int ret; if (!argp) return -EINVAL; if (!bdev_is_zoned(bdev)) return -ENOTTY; if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) return -EFAULT; if (!rep.nr_zones) return -EINVAL; args.zones = argp + sizeof(struct blk_zone_report); ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, blkdev_copy_zone_to_user, &args); if (ret < 0) return ret; rep.nr_zones = ret; rep.flags = BLK_ZONE_REP_CAPACITY; if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) return -EFAULT; return 0; } static int blkdev_truncate_zone_range(struct block_device *bdev, blk_mode_t mode, const struct blk_zone_range *zrange) { loff_t start, end; if (zrange->sector + zrange->nr_sectors <= zrange->sector || zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk)) /* Out of range */ return -EINVAL; start = zrange->sector << SECTOR_SHIFT; end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1; return truncate_bdev_range(bdev, mode, start, end); } /* * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. * Called from blkdev_ioctl. */ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct blk_zone_range zrange; enum req_op op; int ret; if (!argp) return -EINVAL; if (!bdev_is_zoned(bdev)) return -ENOTTY; if (!(mode & BLK_OPEN_WRITE)) return -EBADF; if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) return -EFAULT; switch (cmd) { case BLKRESETZONE: op = REQ_OP_ZONE_RESET; /* Invalidate the page cache, including dirty pages. */ filemap_invalidate_lock(bdev->bd_mapping); ret = blkdev_truncate_zone_range(bdev, mode, &zrange); if (ret) goto fail; break; case BLKOPENZONE: op = REQ_OP_ZONE_OPEN; break; case BLKCLOSEZONE: op = REQ_OP_ZONE_CLOSE; break; case BLKFINISHZONE: op = REQ_OP_ZONE_FINISH; break; default: return -ENOTTY; } ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors); fail: if (cmd == BLKRESETZONE) filemap_invalidate_unlock(bdev->bd_mapping); return ret; } static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone) { return zone->start + zone->len >= get_capacity(disk); } static bool disk_zone_is_full(struct gendisk *disk, unsigned int zno, unsigned int offset_in_zone) { if (zno < disk->nr_zones - 1) return offset_in_zone >= disk->zone_capacity; return offset_in_zone >= disk->last_zone_capacity; } static bool disk_zone_wplug_is_full(struct gendisk *disk, struct blk_zone_wplug *zwplug) { return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset); } static bool disk_insert_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { struct blk_zone_wplug *zwplg; unsigned long flags; unsigned int idx = hash_32(zwplug->zone_no, disk->zone_wplugs_hash_bits); /* * Add the new zone write plug to the hash table, but carefully as we * are racing with other submission context, so we may already have a * zone write plug for the same zone. */ spin_lock_irqsave(&disk->zone_wplugs_lock, flags); hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) { if (zwplg->zone_no == zwplug->zone_no) { spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); return false; } } hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); return true; } static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, sector_t sector) { unsigned int zno = disk_zone_no(disk, sector); unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits); struct blk_zone_wplug *zwplug; rcu_read_lock(); hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[idx], node) { if (zwplug->zone_no == zno && refcount_inc_not_zero(&zwplug->ref)) { rcu_read_unlock(); return zwplug; } } rcu_read_unlock(); return NULL; } static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head) { struct blk_zone_wplug *zwplug = container_of(rcu_head, struct blk_zone_wplug, rcu_head); mempool_free(zwplug, zwplug->disk->zone_wplugs_pool); } static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug) { if (refcount_dec_and_test(&zwplug->ref)) { WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list)); WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED); WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)); call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu); } } static inline bool disk_should_remove_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { /* If the zone write plug was already removed, we are done. */ if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) return false; /* If the zone write plug is still plugged, it cannot be removed. */ if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) return false; /* * Completions of BIOs with blk_zone_write_plug_bio_endio() may * happen after handling a request completion with * blk_zone_write_plug_finish_request() (e.g. with split BIOs * that are chained). In such case, disk_zone_wplug_unplug_bio() * should not attempt to remove the zone write plug until all BIO * completions are seen. Check by looking at the zone write plug * reference count, which is 2 when the plug is unused (one reference * taken when the plug was allocated and another reference taken by the * caller context). */ if (refcount_read(&zwplug->ref) > 2) return false; /* We can remove zone write plugs for zones that are empty or full. */ return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug); } static void disk_remove_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { unsigned long flags; /* If the zone write plug was already removed, we have nothing to do. */ if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) return; /* * Mark the zone write plug as unhashed and drop the extra reference we * took when the plug was inserted in the hash table. */ zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED; spin_lock_irqsave(&disk->zone_wplugs_lock, flags); hlist_del_init_rcu(&zwplug->node); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); disk_put_zone_wplug(zwplug); } static void blk_zone_wplug_bio_work(struct work_struct *work); /* * Get a reference on the write plug for the zone containing @sector. * If the plug does not exist, it is allocated and hashed. * Return a pointer to the zone write plug with the plug spinlock held. */ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk, sector_t sector, gfp_t gfp_mask, unsigned long *flags) { unsigned int zno = disk_zone_no(disk, sector); struct blk_zone_wplug *zwplug; again: zwplug = disk_get_zone_wplug(disk, sector); if (zwplug) { /* * Check that a BIO completion or a zone reset or finish * operation has not already removed the zone write plug from * the hash table and dropped its reference count. In such case, * we need to get a new plug so start over from the beginning. */ spin_lock_irqsave(&zwplug->lock, *flags); if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) { spin_unlock_irqrestore(&zwplug->lock, *flags); disk_put_zone_wplug(zwplug); goto again; } return zwplug; } /* * Allocate and initialize a zone write plug with an extra reference * so that it is not freed when the zone write plug becomes idle without * the zone being full. */ zwplug = mempool_alloc(disk->zone_wplugs_pool, gfp_mask); if (!zwplug) return NULL; INIT_HLIST_NODE(&zwplug->node); refcount_set(&zwplug->ref, 2); spin_lock_init(&zwplug->lock); zwplug->flags = 0; zwplug->zone_no = zno; zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector); bio_list_init(&zwplug->bio_list); INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work); zwplug->disk = disk; spin_lock_irqsave(&zwplug->lock, *flags); /* * Insert the new zone write plug in the hash table. This can fail only * if another context already inserted a plug. Retry from the beginning * in such case. */ if (!disk_insert_zone_wplug(disk, zwplug)) { spin_unlock_irqrestore(&zwplug->lock, *flags); mempool_free(zwplug, disk->zone_wplugs_pool); goto again; } return zwplug; } static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug, struct bio *bio) { struct request_queue *q = zwplug->disk->queue; bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING); bio_io_error(bio); disk_put_zone_wplug(zwplug); blk_queue_exit(q); } /* * Abort (fail) all plugged BIOs of a zone write plug. */ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) { struct bio *bio; while ((bio = bio_list_pop(&zwplug->bio_list))) blk_zone_wplug_bio_io_error(zwplug, bio); } /* * Set a zone write plug write pointer offset to the specified value. * This aborts all plugged BIOs, which is fine as this function is called for * a zone reset operation, a zone finish operation or if the zone needs a wp * update from a report zone after a write error. */ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk, struct blk_zone_wplug *zwplug, unsigned int wp_offset) { lockdep_assert_held(&zwplug->lock); /* Update the zone write pointer and abort all plugged BIOs. */ zwplug->flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE; zwplug->wp_offset = wp_offset; disk_zone_wplug_abort(zwplug); /* * The zone write plug now has no BIO plugged: remove it from the * hash table so that it cannot be seen. The plug will be freed * when the last reference is dropped. */ if (disk_should_remove_zone_wplug(disk, zwplug)) disk_remove_zone_wplug(disk, zwplug); } static unsigned int blk_zone_wp_offset(struct blk_zone *zone) { switch (zone->cond) { case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: case BLK_ZONE_COND_CLOSED: return zone->wp - zone->start; case BLK_ZONE_COND_FULL: return zone->len; case BLK_ZONE_COND_EMPTY: return 0; case BLK_ZONE_COND_NOT_WP: case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: default: /* * Conventional, offline and read-only zones do not have a valid * write pointer. */ return UINT_MAX; } } static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk, struct blk_zone *zone) { struct blk_zone_wplug *zwplug; unsigned long flags; zwplug = disk_get_zone_wplug(disk, zone->start); if (!zwplug) return; spin_lock_irqsave(&zwplug->lock, flags); if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) disk_zone_wplug_set_wp_offset(disk, zwplug, blk_zone_wp_offset(zone)); spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); } static int disk_zone_sync_wp_offset(struct gendisk *disk, sector_t sector) { struct disk_report_zones_cb_args args = { .disk = disk, }; return disk->fops->report_zones(disk, sector, 1, disk_report_zones_cb, &args); } static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, unsigned int wp_offset) { struct gendisk *disk = bio->bi_bdev->bd_disk; sector_t sector = bio->bi_iter.bi_sector; struct blk_zone_wplug *zwplug; unsigned long flags; /* Conventional zones cannot be reset nor finished. */ if (!bdev_zone_is_seq(bio->bi_bdev, sector)) { bio_io_error(bio); return true; } /* * No-wait reset or finish BIOs do not make much sense as the callers * issue these as blocking operations in most cases. To avoid issues * the BIO execution potentially failing with BLK_STS_AGAIN, warn about * REQ_NOWAIT being set and ignore that flag. */ if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) bio->bi_opf &= ~REQ_NOWAIT; /* * If we have a zone write plug, set its write pointer offset to 0 * (reset case) or to the zone size (finish case). This will abort all * BIOs plugged for the target zone. It is fine as resetting or * finishing zones while writes are still in-flight will result in the * writes failing anyway. */ zwplug = disk_get_zone_wplug(disk, sector); if (zwplug) { spin_lock_irqsave(&zwplug->lock, flags); disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset); spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); } return false; } static bool blk_zone_wplug_handle_reset_all(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; struct blk_zone_wplug *zwplug; unsigned long flags; sector_t sector; /* * Set the write pointer offset of all zone write plugs to 0. This will * abort all plugged BIOs. It is fine as resetting zones while writes * are still in-flight will result in the writes failing anyway. */ for (sector = 0; sector < get_capacity(disk); sector += disk->queue->limits.chunk_sectors) { zwplug = disk_get_zone_wplug(disk, sector); if (zwplug) { spin_lock_irqsave(&zwplug->lock, flags); disk_zone_wplug_set_wp_offset(disk, zwplug, 0); spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); } } return false; } static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk, struct blk_zone_wplug *zwplug) { /* * Take a reference on the zone write plug and schedule the submission * of the next plugged BIO. blk_zone_wplug_bio_work() will release the * reference we take here. */ WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)); refcount_inc(&zwplug->ref); queue_work(disk->zone_wplugs_wq, &zwplug->bio_work); } static inline void disk_zone_wplug_add_bio(struct gendisk *disk, struct blk_zone_wplug *zwplug, struct bio *bio, unsigned int nr_segs) { bool schedule_bio_work = false; /* * Grab an extra reference on the BIO request queue usage counter. * This reference will be reused to submit a request for the BIO for * blk-mq devices and dropped when the BIO is failed and after * it is issued in the case of BIO-based devices. */ percpu_ref_get(&bio->bi_bdev->bd_disk->queue->q_usage_counter); /* * The BIO is being plugged and thus will have to wait for the on-going * write and for all other writes already plugged. So polling makes * no sense. */ bio_clear_polled(bio); /* * REQ_NOWAIT BIOs are always handled using the zone write plug BIO * work, which can block. So clear the REQ_NOWAIT flag and schedule the * work if this is the first BIO we are plugging. */ if (bio->bi_opf & REQ_NOWAIT) { schedule_bio_work = !(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED); bio->bi_opf &= ~REQ_NOWAIT; } /* * Reuse the poll cookie field to store the number of segments when * split to the hardware limits. */ bio->__bi_nr_segments = nr_segs; /* * We always receive BIOs after they are split and ready to be issued. * The block layer passes the parts of a split BIO in order, and the * user must also issue write sequentially. So simply add the new BIO * at the tail of the list to preserve the sequential write order. */ bio_list_add(&zwplug->bio_list, bio); zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; if (schedule_bio_work) disk_zone_wplug_schedule_bio_work(disk, zwplug); } /* * Called from bio_attempt_back_merge() when a BIO was merged with a request. */ void blk_zone_write_plug_bio_merged(struct bio *bio) { struct blk_zone_wplug *zwplug; unsigned long flags; /* * If the BIO was already plugged, then we were called through * blk_zone_write_plug_init_request() -> blk_attempt_bio_merge(). * For this case, we already hold a reference on the zone write plug for * the BIO and blk_zone_write_plug_init_request() will handle the * zone write pointer offset update. */ if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING)) return; bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING); /* * Get a reference on the zone write plug of the target zone and advance * the zone write pointer offset. Given that this is a merge, we already * have at least one request and one BIO referencing the zone write * plug. So this should not fail. */ zwplug = disk_get_zone_wplug(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); if (WARN_ON_ONCE(!zwplug)) return; spin_lock_irqsave(&zwplug->lock, flags); zwplug->wp_offset += bio_sectors(bio); spin_unlock_irqrestore(&zwplug->lock, flags); } /* * Attempt to merge plugged BIOs with a newly prepared request for a BIO that * already went through zone write plugging (either a new BIO or one that was * unplugged). */ void blk_zone_write_plug_init_request(struct request *req) { sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req); struct request_queue *q = req->q; struct gendisk *disk = q->disk; struct blk_zone_wplug *zwplug = disk_get_zone_wplug(disk, blk_rq_pos(req)); unsigned long flags; struct bio *bio; if (WARN_ON_ONCE(!zwplug)) return; /* * Indicate that completion of this request needs to be handled with * blk_zone_write_plug_finish_request(), which will drop the reference * on the zone write plug we took above on entry to this function. */ req->rq_flags |= RQF_ZONE_WRITE_PLUGGING; if (blk_queue_nomerges(q)) return; /* * Walk through the list of plugged BIOs to check if they can be merged * into the back of the request. */ spin_lock_irqsave(&zwplug->lock, flags); while (!disk_zone_wplug_is_full(disk, zwplug)) { bio = bio_list_peek(&zwplug->bio_list); if (!bio) break; if (bio->bi_iter.bi_sector != req_back_sector || !blk_rq_merge_ok(req, bio)) break; WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE_ZEROES && !bio->__bi_nr_segments); bio_list_pop(&zwplug->bio_list); if (bio_attempt_back_merge(req, bio, bio->__bi_nr_segments) != BIO_MERGE_OK) { bio_list_add_head(&zwplug->bio_list, bio); break; } /* * Drop the extra reference on the queue usage we got when * plugging the BIO and advance the write pointer offset. */ blk_queue_exit(q); zwplug->wp_offset += bio_sectors(bio); req_back_sector += bio_sectors(bio); } spin_unlock_irqrestore(&zwplug->lock, flags); } /* * Check and prepare a BIO for submission by incrementing the write pointer * offset of its zone write plug and changing zone append operations into * regular write when zone append emulation is needed. */ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; /* * If we lost track of the zone write pointer due to a write error, * the user must either execute a report zones, reset the zone or finish * the to recover a reliable write pointer position. Fail BIOs if the * user did not do that as we cannot handle emulated zone append * otherwise. */ if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) return false; /* * Check that the user is not attempting to write to a full zone. * We know such BIO will fail, and that would potentially overflow our * write pointer offset beyond the end of the zone. */ if (disk_zone_wplug_is_full(disk, zwplug)) return false; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { /* * Use a regular write starting at the current write pointer. * Similarly to native zone append operations, do not allow * merging. */ bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= REQ_OP_WRITE | REQ_NOMERGE; bio->bi_iter.bi_sector += zwplug->wp_offset; /* * Remember that this BIO is in fact a zone append operation * so that we can restore its operation code on completion. */ bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND); } else { /* * Check for non-sequential writes early as we know that BIOs * with a start sector not unaligned to the zone write pointer * will fail. */ if (bio_offset_from_zone_start(bio) != zwplug->wp_offset) return false; } /* Advance the zone write pointer offset. */ zwplug->wp_offset += bio_sectors(bio); return true; } static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) { struct gendisk *disk = bio->bi_bdev->bd_disk; sector_t sector = bio->bi_iter.bi_sector; struct blk_zone_wplug *zwplug; gfp_t gfp_mask = GFP_NOIO; unsigned long flags; /* * BIOs must be fully contained within a zone so that we use the correct * zone write plug for the entire BIO. For blk-mq devices, the block * layer should already have done any splitting required to ensure this * and this BIO should thus not be straddling zone boundaries. For * BIO-based devices, it is the responsibility of the driver to split * the bio before submitting it. */ if (WARN_ON_ONCE(bio_straddles_zones(bio))) { bio_io_error(bio); return true; } /* Conventional zones do not need write plugging. */ if (!bdev_zone_is_seq(bio->bi_bdev, sector)) { /* Zone append to conventional zones is not allowed. */ if (bio_op(bio) == REQ_OP_ZONE_APPEND) { bio_io_error(bio); return true; } return false; } if (bio->bi_opf & REQ_NOWAIT) gfp_mask = GFP_NOWAIT; zwplug = disk_get_and_lock_zone_wplug(disk, sector, gfp_mask, &flags); if (!zwplug) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); else bio_io_error(bio); return true; } /* Indicate that this BIO is being handled using zone write plugging. */ bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING); /* * If the zone is already plugged, add the BIO to the plug BIO list. * Do the same for REQ_NOWAIT BIOs to ensure that we will not see a * BLK_STS_AGAIN failure if we let the BIO execute. * Otherwise, plug and let the BIO execute. */ if ((zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) || (bio->bi_opf & REQ_NOWAIT)) goto plug; if (!blk_zone_wplug_prepare_bio(zwplug, bio)) { spin_unlock_irqrestore(&zwplug->lock, flags); bio_io_error(bio); return true; } zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; spin_unlock_irqrestore(&zwplug->lock, flags); return false; plug: disk_zone_wplug_add_bio(disk, zwplug, bio, nr_segs); spin_unlock_irqrestore(&zwplug->lock, flags); return true; } /** * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging * @bio: The BIO being submitted * @nr_segs: The number of physical segments of @bio * * Handle write, write zeroes and zone append operations requiring emulation * using zone write plugging. * * Return true whenever @bio execution needs to be delayed through the zone * write plug. Otherwise, return false to let the submission path process * @bio normally. */ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) { struct block_device *bdev = bio->bi_bdev; if (!bdev->bd_disk->zone_wplugs_hash) return false; /* * If the BIO already has the plugging flag set, then it was already * handled through this path and this is a submission from the zone * plug bio submit work. */ if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING)) return false; /* * We do not need to do anything special for empty flush BIOs, e.g * BIOs such as issued by blkdev_issue_flush(). The is because it is * the responsibility of the user to first wait for the completion of * write operations for flush to have any effect on the persistence of * the written data. */ if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return false; /* * Regular writes and write zeroes need to be handled through the target * zone write plug. This includes writes with REQ_FUA | REQ_PREFLUSH * which may need to go through the flush machinery depending on the * target device capabilities. Plugging such writes is fine as the flush * machinery operates at the request level, below the plug, and * completion of the flush sequence will go through the regular BIO * completion, which will handle zone write plugging. * Zone append operations for devices that requested emulation must * also be plugged so that these BIOs can be changed into regular * write BIOs. * Zone reset, reset all and finish commands need special treatment * to correctly track the write pointer offset of zones. These commands * are not plugged as we do not need serialization with write * operations. It is the responsibility of the user to not issue reset * and finish commands when write operations are in flight. */ switch (bio_op(bio)) { case REQ_OP_ZONE_APPEND: if (!bdev_emulates_zone_append(bdev)) return false; fallthrough; case REQ_OP_WRITE: case REQ_OP_WRITE_ZEROES: return blk_zone_wplug_handle_write(bio, nr_segs); case REQ_OP_ZONE_RESET: return blk_zone_wplug_handle_reset_or_finish(bio, 0); case REQ_OP_ZONE_FINISH: return blk_zone_wplug_handle_reset_or_finish(bio, bdev_zone_sectors(bdev)); case REQ_OP_ZONE_RESET_ALL: return blk_zone_wplug_handle_reset_all(bio); default: return false; } return false; } EXPORT_SYMBOL_GPL(blk_zone_plug_bio); static void disk_zone_wplug_unplug_bio(struct gendisk *disk, struct blk_zone_wplug *zwplug) { unsigned long flags; spin_lock_irqsave(&zwplug->lock, flags); /* Schedule submission of the next plugged BIO if we have one. */ if (!bio_list_empty(&zwplug->bio_list)) { disk_zone_wplug_schedule_bio_work(disk, zwplug); spin_unlock_irqrestore(&zwplug->lock, flags); return; } zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; /* * If the zone is full (it was fully written or finished, or empty * (it was reset), remove its zone write plug from the hash table. */ if (disk_should_remove_zone_wplug(disk, zwplug)) disk_remove_zone_wplug(disk, zwplug); spin_unlock_irqrestore(&zwplug->lock, flags); } void blk_zone_write_plug_bio_endio(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; struct blk_zone_wplug *zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); unsigned long flags; if (WARN_ON_ONCE(!zwplug)) return; /* Make sure we do not see this BIO again by clearing the plug flag. */ bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING); /* * If this is a regular write emulating a zone append operation, * restore the original operation code. */ if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) { bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= REQ_OP_ZONE_APPEND; } /* * If the BIO failed, abort all plugged BIOs and mark the plug as * needing a write pointer update. */ if (bio->bi_status != BLK_STS_OK) { spin_lock_irqsave(&zwplug->lock, flags); disk_zone_wplug_abort(zwplug); zwplug->flags |= BLK_ZONE_WPLUG_NEED_WP_UPDATE; spin_unlock_irqrestore(&zwplug->lock, flags); } /* Drop the reference we took when the BIO was issued. */ disk_put_zone_wplug(zwplug); /* * For BIO-based devices, blk_zone_write_plug_finish_request() * is not called. So we need to schedule execution of the next * plugged BIO here. */ if (bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) disk_zone_wplug_unplug_bio(disk, zwplug); /* Drop the reference we took when entering this function. */ disk_put_zone_wplug(zwplug); } void blk_zone_write_plug_finish_request(struct request *req) { struct gendisk *disk = req->q->disk; struct blk_zone_wplug *zwplug; zwplug = disk_get_zone_wplug(disk, req->__sector); if (WARN_ON_ONCE(!zwplug)) return; req->rq_flags &= ~RQF_ZONE_WRITE_PLUGGING; /* * Drop the reference we took when the request was initialized in * blk_zone_write_plug_init_request(). */ disk_put_zone_wplug(zwplug); disk_zone_wplug_unplug_bio(disk, zwplug); /* Drop the reference we took when entering this function. */ disk_put_zone_wplug(zwplug); } static void blk_zone_wplug_bio_work(struct work_struct *work) { struct blk_zone_wplug *zwplug = container_of(work, struct blk_zone_wplug, bio_work); struct block_device *bdev; unsigned long flags; struct bio *bio; /* * Submit the next plugged BIO. If we do not have any, clear * the plugged flag. */ spin_lock_irqsave(&zwplug->lock, flags); again: bio = bio_list_pop(&zwplug->bio_list); if (!bio) { zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; spin_unlock_irqrestore(&zwplug->lock, flags); goto put_zwplug; } if (!blk_zone_wplug_prepare_bio(zwplug, bio)) { blk_zone_wplug_bio_io_error(zwplug, bio); goto again; } spin_unlock_irqrestore(&zwplug->lock, flags); bdev = bio->bi_bdev; submit_bio_noacct_nocheck(bio); /* * blk-mq devices will reuse the extra reference on the request queue * usage counter we took when the BIO was plugged, but the submission * path for BIO-based devices will not do that. So drop this extra * reference here. */ if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) blk_queue_exit(bdev->bd_disk->queue); put_zwplug: /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ disk_put_zone_wplug(zwplug); } static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) { return 1U << disk->zone_wplugs_hash_bits; } void disk_init_zone_resources(struct gendisk *disk) { spin_lock_init(&disk->zone_wplugs_lock); } /* * For the size of a disk zone write plug hash table, use the size of the * zone write plug mempool, which is the maximum of the disk open zones and * active zones limits. But do not exceed 4KB (512 hlist head entries), that is, * 9 bits. For a disk that has no limits, mempool size defaults to 128. */ #define BLK_ZONE_WPLUG_MAX_HASH_BITS 9 #define BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE 128 static int disk_alloc_zone_resources(struct gendisk *disk, unsigned int pool_size) { unsigned int i; disk->zone_wplugs_hash_bits = min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS); disk->zone_wplugs_hash = kcalloc(disk_zone_wplugs_hash_size(disk), sizeof(struct hlist_head), GFP_KERNEL); if (!disk->zone_wplugs_hash) return -ENOMEM; for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) INIT_HLIST_HEAD(&disk->zone_wplugs_hash[i]); disk->zone_wplugs_pool = mempool_create_kmalloc_pool(pool_size, sizeof(struct blk_zone_wplug)); if (!disk->zone_wplugs_pool) goto free_hash; disk->zone_wplugs_wq = alloc_workqueue("%s_zwplugs", WQ_MEM_RECLAIM | WQ_HIGHPRI, pool_size, disk->disk_name); if (!disk->zone_wplugs_wq) goto destroy_pool; return 0; destroy_pool: mempool_destroy(disk->zone_wplugs_pool); disk->zone_wplugs_pool = NULL; free_hash: kfree(disk->zone_wplugs_hash); disk->zone_wplugs_hash = NULL; disk->zone_wplugs_hash_bits = 0; return -ENOMEM; } static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk) { struct blk_zone_wplug *zwplug; unsigned int i; if (!disk->zone_wplugs_hash) return; /* Free all the zone write plugs we have. */ for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) { while (!hlist_empty(&disk->zone_wplugs_hash[i])) { zwplug = hlist_entry(disk->zone_wplugs_hash[i].first, struct blk_zone_wplug, node); refcount_inc(&zwplug->ref); disk_remove_zone_wplug(disk, zwplug); disk_put_zone_wplug(zwplug); } } kfree(disk->zone_wplugs_hash); disk->zone_wplugs_hash = NULL; disk->zone_wplugs_hash_bits = 0; } static unsigned int disk_set_conv_zones_bitmap(struct gendisk *disk, unsigned long *bitmap) { unsigned int nr_conv_zones = 0; unsigned long flags; spin_lock_irqsave(&disk->zone_wplugs_lock, flags); if (bitmap) nr_conv_zones = bitmap_weight(bitmap, disk->nr_zones); bitmap = rcu_replace_pointer(disk->conv_zones_bitmap, bitmap, lockdep_is_held(&disk->zone_wplugs_lock)); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); kfree_rcu_mightsleep(bitmap); return nr_conv_zones; } void disk_free_zone_resources(struct gendisk *disk) { if (!disk->zone_wplugs_pool) return; if (disk->zone_wplugs_wq) { destroy_workqueue(disk->zone_wplugs_wq); disk->zone_wplugs_wq = NULL; } disk_destroy_zone_wplugs_hash_table(disk); /* * Wait for the zone write plugs to be RCU-freed before * destorying the mempool. */ rcu_barrier(); mempool_destroy(disk->zone_wplugs_pool); disk->zone_wplugs_pool = NULL; disk_set_conv_zones_bitmap(disk, NULL); disk->zone_capacity = 0; disk->last_zone_capacity = 0; disk->nr_zones = 0; } static inline bool disk_need_zone_resources(struct gendisk *disk) { /* * All mq zoned devices need zone resources so that the block layer * can automatically handle write BIO plugging. BIO-based device drivers * (e.g. DM devices) are normally responsible for handling zone write * ordering and do not need zone resources, unless the driver requires * zone append emulation. */ return queue_is_mq(disk->queue) || queue_emulates_zone_append(disk->queue); } static int disk_revalidate_zone_resources(struct gendisk *disk, unsigned int nr_zones) { struct queue_limits *lim = &disk->queue->limits; unsigned int pool_size; if (!disk_need_zone_resources(disk)) return 0; /* * If the device has no limit on the maximum number of open and active * zones, use BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE. */ pool_size = max(lim->max_open_zones, lim->max_active_zones); if (!pool_size) pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_zones); if (!disk->zone_wplugs_hash) return disk_alloc_zone_resources(disk, pool_size); return 0; } struct blk_revalidate_zone_args { struct gendisk *disk; unsigned long *conv_zones_bitmap; unsigned int nr_zones; unsigned int zone_capacity; unsigned int last_zone_capacity; sector_t sector; }; /* * Update the disk zone resources information and device queue limits. * The disk queue is frozen when this is executed. */ static int disk_update_zone_resources(struct gendisk *disk, struct blk_revalidate_zone_args *args) { struct request_queue *q = disk->queue; unsigned int nr_seq_zones, nr_conv_zones; unsigned int pool_size; struct queue_limits lim; int ret; disk->nr_zones = args->nr_zones; disk->zone_capacity = args->zone_capacity; disk->last_zone_capacity = args->last_zone_capacity; nr_conv_zones = disk_set_conv_zones_bitmap(disk, args->conv_zones_bitmap); if (nr_conv_zones >= disk->nr_zones) { pr_warn("%s: Invalid number of conventional zones %u / %u\n", disk->disk_name, nr_conv_zones, disk->nr_zones); return -ENODEV; } lim = queue_limits_start_update(q); /* * Some devices can advertize zone resource limits that are larger than * the number of sequential zones of the zoned block device, e.g. a * small ZNS namespace. For such case, assume that the zoned device has * no zone resource limits. */ nr_seq_zones = disk->nr_zones - nr_conv_zones; if (lim.max_open_zones >= nr_seq_zones) lim.max_open_zones = 0; if (lim.max_active_zones >= nr_seq_zones) lim.max_active_zones = 0; if (!disk->zone_wplugs_pool) goto commit; /* * If the device has no limit on the maximum number of open and active * zones, set its max open zone limit to the mempool size to indicate * to the user that there is a potential performance impact due to * dynamic zone write plug allocation when simultaneously writing to * more zones than the size of the mempool. */ pool_size = max(lim.max_open_zones, lim.max_active_zones); if (!pool_size) pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_seq_zones); mempool_resize(disk->zone_wplugs_pool, pool_size); if (!lim.max_open_zones && !lim.max_active_zones) { if (pool_size < nr_seq_zones) lim.max_open_zones = pool_size; else lim.max_open_zones = 0; } commit: blk_mq_freeze_queue(q); ret = queue_limits_commit_update(q, &lim); blk_mq_unfreeze_queue(q); return ret; } static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, struct blk_revalidate_zone_args *args) { struct gendisk *disk = args->disk; if (zone->capacity != zone->len) { pr_warn("%s: Invalid conventional zone capacity\n", disk->disk_name); return -ENODEV; } if (disk_zone_is_last(disk, zone)) args->last_zone_capacity = zone->capacity; if (!disk_need_zone_resources(disk)) return 0; if (!args->conv_zones_bitmap) { args->conv_zones_bitmap = bitmap_zalloc(args->nr_zones, GFP_NOIO); if (!args->conv_zones_bitmap) return -ENOMEM; } set_bit(idx, args->conv_zones_bitmap); return 0; } static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, struct blk_revalidate_zone_args *args) { struct gendisk *disk = args->disk; struct blk_zone_wplug *zwplug; unsigned int wp_offset; unsigned long flags; /* * Remember the capacity of the first sequential zone and check * if it is constant for all zones, ignoring the last zone as it can be * smaller. */ if (!args->zone_capacity) args->zone_capacity = zone->capacity; if (disk_zone_is_last(disk, zone)) { args->last_zone_capacity = zone->capacity; } else if (zone->capacity != args->zone_capacity) { pr_warn("%s: Invalid variable zone capacity\n", disk->disk_name); return -ENODEV; } /* * We need to track the write pointer of all zones that are not * empty nor full. So make sure we have a zone write plug for * such zone if the device has a zone write plug hash table. */ if (!disk->zone_wplugs_hash) return 0; disk_zone_wplug_sync_wp_offset(disk, zone); wp_offset = blk_zone_wp_offset(zone); if (!wp_offset || wp_offset >= zone->capacity) return 0; zwplug = disk_get_and_lock_zone_wplug(disk, zone->wp, GFP_NOIO, &flags); if (!zwplug) return -ENOMEM; spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); return 0; } /* * Helper function to check the validity of zones of a zoned block device. */ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct blk_revalidate_zone_args *args = data; struct gendisk *disk = args->disk; sector_t zone_sectors = disk->queue->limits.chunk_sectors; int ret; /* Check for bad zones and holes in the zone report */ if (zone->start != args->sector) { pr_warn("%s: Zone gap at sectors %llu..%llu\n", disk->disk_name, args->sector, zone->start); return -ENODEV; } if (zone->start >= get_capacity(disk) || !zone->len) { pr_warn("%s: Invalid zone start %llu, length %llu\n", disk->disk_name, zone->start, zone->len); return -ENODEV; } /* * All zones must have the same size, with the exception on an eventual * smaller last zone. */ if (!disk_zone_is_last(disk, zone)) { if (zone->len != zone_sectors) { pr_warn("%s: Invalid zoned device with non constant zone size\n", disk->disk_name); return -ENODEV; } } else if (zone->len > zone_sectors) { pr_warn("%s: Invalid zoned device with larger last zone size\n", disk->disk_name); return -ENODEV; } if (!zone->capacity || zone->capacity > zone->len) { pr_warn("%s: Invalid zone capacity\n", disk->disk_name); return -ENODEV; } /* Check zone type */ switch (zone->type) { case BLK_ZONE_TYPE_CONVENTIONAL: ret = blk_revalidate_conv_zone(zone, idx, args); break; case BLK_ZONE_TYPE_SEQWRITE_REQ: ret = blk_revalidate_seq_zone(zone, idx, args); break; case BLK_ZONE_TYPE_SEQWRITE_PREF: default: pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", disk->disk_name, (int)zone->type, zone->start); ret = -ENODEV; } if (!ret) args->sector += zone->len; return ret; } /** * blk_revalidate_disk_zones - (re)allocate and initialize zone write plugs * @disk: Target disk * * Helper function for low-level device drivers to check, (re) allocate and * initialize resources used for managing zoned disks. This function should * normally be called by blk-mq based drivers when a zoned gendisk is probed * and when the zone configuration of the gendisk changes (e.g. after a format). * Before calling this function, the device driver must already have set the * device zone size (chunk_sector limit) and the max zone append limit. * BIO based drivers can also use this function as long as the device queue * can be safely frozen. */ int blk_revalidate_disk_zones(struct gendisk *disk) { struct request_queue *q = disk->queue; sector_t zone_sectors = q->limits.chunk_sectors; sector_t capacity = get_capacity(disk); struct blk_revalidate_zone_args args = { }; unsigned int noio_flag; int ret = -ENOMEM; if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) return -EIO; if (!capacity) return -ENODEV; /* * Checks that the device driver indicated a valid zone size and that * the max zone append limit is set. */ if (!zone_sectors || !is_power_of_2(zone_sectors)) { pr_warn("%s: Invalid non power of two zone size (%llu)\n", disk->disk_name, zone_sectors); return -ENODEV; } /* * Ensure that all memory allocations in this context are done as if * GFP_NOIO was specified. */ args.disk = disk; args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors); noio_flag = memalloc_noio_save(); ret = disk_revalidate_zone_resources(disk, args.nr_zones); if (ret) { memalloc_noio_restore(noio_flag); return ret; } ret = disk->fops->report_zones(disk, 0, UINT_MAX, blk_revalidate_zone_cb, &args); if (!ret) { pr_warn("%s: No zones reported\n", disk->disk_name); ret = -ENODEV; } memalloc_noio_restore(noio_flag); /* * If zones where reported, make sure that the entire disk capacity * has been checked. */ if (ret > 0 && args.sector != capacity) { pr_warn("%s: Missing zones from sector %llu\n", disk->disk_name, args.sector); ret = -ENODEV; } /* * Set the new disk zone parameters only once the queue is frozen and * all I/Os are completed. */ if (ret > 0) ret = disk_update_zone_resources(disk, &args); else pr_warn("%s: failed to revalidate zones\n", disk->disk_name); if (ret) { blk_mq_freeze_queue(q); disk_free_zone_resources(disk); blk_mq_unfreeze_queue(q); } return ret; } EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); /** * blk_zone_issue_zeroout - zero-fill a block range in a zone * @bdev: blockdev to write * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) * * Description: * Zero-fill a block range in a zone (@sector must be equal to the zone write * pointer), handling potential errors due to the (initially unknown) lack of * hardware offload (See blkdev_issue_zeroout()). */ int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask) { int ret; if (WARN_ON_ONCE(!bdev_is_zoned(bdev))) return -EIO; ret = blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, BLKDEV_ZERO_NOFALLBACK); if (ret != -EOPNOTSUPP) return ret; /* * The failed call to blkdev_issue_zeroout() advanced the zone write * pointer. Undo this using a report zone to update the zone write * pointer to the correct current value. */ ret = disk_zone_sync_wp_offset(bdev->bd_disk, sector); if (ret != 1) return ret < 0 ? ret : -EIO; /* * Retry without BLKDEV_ZERO_NOFALLBACK to force the fallback to a * regular write with zero-pages. */ return blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, 0); } EXPORT_SYMBOL_GPL(blk_zone_issue_zeroout); #ifdef CONFIG_BLK_DEBUG_FS int queue_zone_wplugs_show(void *data, struct seq_file *m) { struct request_queue *q = data; struct gendisk *disk = q->disk; struct blk_zone_wplug *zwplug; unsigned int zwp_wp_offset, zwp_flags; unsigned int zwp_zone_no, zwp_ref; unsigned int zwp_bio_list_size, i; unsigned long flags; if (!disk->zone_wplugs_hash) return 0; rcu_read_lock(); for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) { hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[i], node) { spin_lock_irqsave(&zwplug->lock, flags); zwp_zone_no = zwplug->zone_no; zwp_flags = zwplug->flags; zwp_ref = refcount_read(&zwplug->ref); zwp_wp_offset = zwplug->wp_offset; zwp_bio_list_size = bio_list_size(&zwplug->bio_list); spin_unlock_irqrestore(&zwplug->lock, flags); seq_printf(m, "%u 0x%x %u %u %u\n", zwp_zone_no, zwp_flags, zwp_ref, zwp_wp_offset, zwp_bio_list_size); } } rcu_read_unlock(); return 0; } #endif
3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SECCOMP_H #define _LINUX_SECCOMP_H #include <uapi/linux/seccomp.h> #include <linux/seccomp_types.h> #define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ SECCOMP_FILTER_FLAG_LOG | \ SECCOMP_FILTER_FLAG_SPEC_ALLOW | \ SECCOMP_FILTER_FLAG_NEW_LISTENER | \ SECCOMP_FILTER_FLAG_TSYNC_ESRCH | \ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) /* sizeof() the first published struct seccomp_notif_addfd */ #define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24 #define SECCOMP_NOTIFY_ADDFD_SIZE_LATEST SECCOMP_NOTIFY_ADDFD_SIZE_VER0 #ifdef CONFIG_SECCOMP #include <linux/thread_info.h> #include <linux/atomic.h> #include <asm/seccomp.h> #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER extern int __secure_computing(const struct seccomp_data *sd); static inline int secure_computing(void) { if (unlikely(test_syscall_work(SECCOMP))) return __secure_computing(NULL); return 0; } #else extern void secure_computing_strict(int this_syscall); static inline int __secure_computing(const struct seccomp_data *sd) { secure_computing_strict(sd->nr); return 0; } #endif extern long prctl_get_seccomp(void); extern long prctl_set_seccomp(unsigned long, void __user *); static inline int seccomp_mode(struct seccomp *s) { return s->mode; } #else /* CONFIG_SECCOMP */ #include <linux/errno.h> struct seccomp_data; #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER static inline int secure_computing(void) { return 0; } static inline int __secure_computing(const struct seccomp_data *sd) { return 0; } #else static inline void secure_computing_strict(int this_syscall) { return; } #endif static inline long prctl_get_seccomp(void) { return -EINVAL; } static inline long prctl_set_seccomp(unsigned long arg2, char __user *arg3) { return -EINVAL; } static inline int seccomp_mode(struct seccomp *s) { return SECCOMP_MODE_DISABLED; } #endif /* CONFIG_SECCOMP */ #ifdef CONFIG_SECCOMP_FILTER extern void seccomp_filter_release(struct task_struct *tsk); extern void get_seccomp_filter(struct task_struct *tsk); #else /* CONFIG_SECCOMP_FILTER */ static inline void seccomp_filter_release(struct task_struct *tsk) { return; } static inline void get_seccomp_filter(struct task_struct *tsk) { return; } #endif /* CONFIG_SECCOMP_FILTER */ #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE) extern long seccomp_get_filter(struct task_struct *task, unsigned long filter_off, void __user *data); extern long seccomp_get_metadata(struct task_struct *task, unsigned long filter_off, void __user *data); #else static inline long seccomp_get_filter(struct task_struct *task, unsigned long n, void __user *data) { return -EINVAL; } static inline long seccomp_get_metadata(struct task_struct *task, unsigned long filter_off, void __user *data) { return -EINVAL; } #endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */ #ifdef CONFIG_SECCOMP_CACHE_DEBUG struct seq_file; struct pid_namespace; struct pid; int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); #endif #endif /* _LINUX_SECCOMP_H */
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 // SPDX-License-Identifier: GPL-2.0 /* * thermal_helpers.c - helper functions to handle thermal devices * * Copyright (C) 2016 Eduardo Valentin <edubezval@gmail.com> * * Highly based on original thermal_core.c * Copyright (C) 2008 Intel Corp * Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com> * Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/device.h> #include <linux/err.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/sysfs.h> #include "thermal_core.h" #include "thermal_trace.h" int get_tz_trend(struct thermal_zone_device *tz, const struct thermal_trip *trip) { enum thermal_trend trend; if (tz->emul_temperature || !tz->ops.get_trend || tz->ops.get_trend(tz, trip, &trend)) { if (tz->temperature > tz->last_temperature) trend = THERMAL_TREND_RAISING; else if (tz->temperature < tz->last_temperature) trend = THERMAL_TREND_DROPPING; else trend = THERMAL_TREND_STABLE; } return trend; } static bool thermal_instance_present(struct thermal_zone_device *tz, struct thermal_cooling_device *cdev, const struct thermal_trip *trip) { const struct thermal_trip_desc *td = trip_to_trip_desc(trip); struct thermal_instance *ti; list_for_each_entry(ti, &td->thermal_instances, trip_node) { if (ti->cdev == cdev) return true; } return false; } bool thermal_trip_is_bound_to_cdev(struct thermal_zone_device *tz, const struct thermal_trip *trip, struct thermal_cooling_device *cdev) { guard(thermal_zone)(tz); guard(cooling_dev)(cdev); return thermal_instance_present(tz, cdev, trip); } EXPORT_SYMBOL_GPL(thermal_trip_is_bound_to_cdev); /** * __thermal_zone_get_temp() - returns the temperature of a thermal zone * @tz: a valid pointer to a struct thermal_zone_device * @temp: a valid pointer to where to store the resulting temperature. * * When a valid thermal zone reference is passed, it will fetch its * temperature and fill @temp. * * Both tz and tz->ops must be valid pointers when calling this function, * and the tz->ops.get_temp callback must be provided. * The function must be called under tz->lock. * * Return: On success returns 0, an error code otherwise */ int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) { const struct thermal_trip_desc *td; int crit_temp = INT_MAX; int ret = -EINVAL; lockdep_assert_held(&tz->lock); ret = tz->ops.get_temp(tz, temp); if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) { for_each_trip_desc(tz, td) { const struct thermal_trip *trip = &td->trip; if (trip->type == THERMAL_TRIP_CRITICAL) { crit_temp = trip->temperature; break; } } /* * Only allow emulating a temperature when the real temperature * is below the critical temperature so that the emulation code * cannot hide critical conditions. */ if (!ret && *temp < crit_temp) *temp = tz->emul_temperature; } if (ret) dev_dbg(&tz->device, "Failed to get temperature: %d\n", ret); return ret; } /** * thermal_zone_get_temp() - returns the temperature of a thermal zone * @tz: a valid pointer to a struct thermal_zone_device * @temp: a valid pointer to where to store the resulting temperature. * * When a valid thermal zone reference is passed, it will fetch its * temperature and fill @temp. * * Return: On success returns 0, an error code otherwise */ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) { int ret; if (IS_ERR_OR_NULL(tz)) return -EINVAL; guard(thermal_zone)(tz); if (!tz->ops.get_temp) return -EINVAL; ret = __thermal_zone_get_temp(tz, temp); if (!ret && *temp <= THERMAL_TEMP_INVALID) return -ENODATA; return ret; } EXPORT_SYMBOL_GPL(thermal_zone_get_temp); static int thermal_cdev_set_cur_state(struct thermal_cooling_device *cdev, int state) { int ret; /* * No check is needed for the ops->set_cur_state as the * registering function checked the ops are correctly set */ ret = cdev->ops->set_cur_state(cdev, state); if (ret) return ret; thermal_notify_cdev_state_update(cdev, state); thermal_cooling_device_stats_update(cdev, state); thermal_debug_cdev_state_update(cdev, state); return 0; } void __thermal_cdev_update(struct thermal_cooling_device *cdev) { struct thermal_instance *instance; unsigned long target = 0; /* Make sure cdev enters the deepest cooling state */ list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) { if (instance->target == THERMAL_NO_TARGET) continue; if (instance->target > target) target = instance->target; } thermal_cdev_set_cur_state(cdev, target); trace_cdev_update(cdev, target); dev_dbg(&cdev->device, "set to state %lu\n", target); } /** * thermal_cdev_update - update cooling device state if needed * @cdev: pointer to struct thermal_cooling_device * * Update the cooling device state if there is a need. */ void thermal_cdev_update(struct thermal_cooling_device *cdev) { guard(cooling_dev)(cdev); if (!cdev->updated) { __thermal_cdev_update(cdev); cdev->updated = true; } } /** * thermal_cdev_update_nocheck() - Unconditionally update cooling device state * @cdev: Target cooling device. */ void thermal_cdev_update_nocheck(struct thermal_cooling_device *cdev) { guard(cooling_dev)(cdev); __thermal_cdev_update(cdev); } /** * thermal_zone_get_slope - return the slope attribute of the thermal zone * @tz: thermal zone device with the slope attribute * * Return: If the thermal zone device has a slope attribute, return it, else * return 1. */ int thermal_zone_get_slope(struct thermal_zone_device *tz) { if (tz && tz->tzp) return tz->tzp->slope; return 1; } EXPORT_SYMBOL_GPL(thermal_zone_get_slope); /** * thermal_zone_get_offset - return the offset attribute of the thermal zone * @tz: thermal zone device with the offset attribute * * Return: If the thermal zone device has a offset attribute, return it, else * return 0. */ int thermal_zone_get_offset(struct thermal_zone_device *tz) { if (tz && tz->tzp) return tz->tzp->offset; return 0; } EXPORT_SYMBOL_GPL(thermal_zone_get_offset);
1 6 6 6 9 9 8 1 9 9 2 2 3 3 1 2 10 11 2 9 2 2 7 3 3 1 2 24 1 22 11 6 5 11 11 11 2 1 8 1 1 11 1 1 24 24 1 13 2 1 1 1 1 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Bluetooth HCI UART driver * * Copyright (C) 2000-2001 Qualcomm Incorporated * Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com> * Copyright (C) 2004-2005 Marcel Holtmann <marcel@holtmann.org> */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/interrupt.h> #include <linux/ptrace.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/signal.h> #include <linux/ioctl.h> #include <linux/skbuff.h> #include <linux/firmware.h> #include <linux/serdev.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include "btintel.h" #include "btbcm.h" #include "hci_uart.h" #define VERSION "2.3" static const struct hci_uart_proto *hup[HCI_UART_MAX_PROTO]; int hci_uart_register_proto(const struct hci_uart_proto *p) { if (p->id >= HCI_UART_MAX_PROTO) return -EINVAL; if (hup[p->id]) return -EEXIST; hup[p->id] = p; BT_INFO("HCI UART protocol %s registered", p->name); return 0; } int hci_uart_unregister_proto(const struct hci_uart_proto *p) { if (p->id >= HCI_UART_MAX_PROTO) return -EINVAL; if (!hup[p->id]) return -EINVAL; hup[p->id] = NULL; return 0; } static const struct hci_uart_proto *hci_uart_get_proto(unsigned int id) { if (id >= HCI_UART_MAX_PROTO) return NULL; return hup[id]; } static inline void hci_uart_tx_complete(struct hci_uart *hu, int pkt_type) { struct hci_dev *hdev = hu->hdev; /* Update HCI stat counters */ switch (pkt_type) { case HCI_COMMAND_PKT: hdev->stat.cmd_tx++; break; case HCI_ACLDATA_PKT: hdev->stat.acl_tx++; break; case HCI_SCODATA_PKT: hdev->stat.sco_tx++; break; } } static inline struct sk_buff *hci_uart_dequeue(struct hci_uart *hu) { struct sk_buff *skb = hu->tx_skb; if (!skb) { percpu_down_read(&hu->proto_lock); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) skb = hu->proto->dequeue(hu); percpu_up_read(&hu->proto_lock); } else { hu->tx_skb = NULL; } return skb; } int hci_uart_tx_wakeup(struct hci_uart *hu) { /* This may be called in an IRQ context, so we can't sleep. Therefore * we try to acquire the lock only, and if that fails we assume the * tty is being closed because that is the only time the write lock is * acquired. If, however, at some point in the future the write lock * is also acquired in other situations, then this must be revisited. */ if (!percpu_down_read_trylock(&hu->proto_lock)) return 0; if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) goto no_schedule; set_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); if (test_and_set_bit(HCI_UART_SENDING, &hu->tx_state)) goto no_schedule; BT_DBG(""); schedule_work(&hu->write_work); no_schedule: percpu_up_read(&hu->proto_lock); return 0; } EXPORT_SYMBOL_GPL(hci_uart_tx_wakeup); static void hci_uart_write_work(struct work_struct *work) { struct hci_uart *hu = container_of(work, struct hci_uart, write_work); struct tty_struct *tty = hu->tty; struct hci_dev *hdev = hu->hdev; struct sk_buff *skb; /* REVISIT: should we cope with bad skbs or ->write() returning * and error value ? */ restart: clear_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); while ((skb = hci_uart_dequeue(hu))) { int len; set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); len = tty->ops->write(tty, skb->data, skb->len); hdev->stat.byte_tx += len; skb_pull(skb, len); if (skb->len) { hu->tx_skb = skb; break; } hci_uart_tx_complete(hu, hci_skb_pkt_type(skb)); kfree_skb(skb); } clear_bit(HCI_UART_SENDING, &hu->tx_state); if (test_bit(HCI_UART_TX_WAKEUP, &hu->tx_state)) goto restart; wake_up_bit(&hu->tx_state, HCI_UART_SENDING); } void hci_uart_init_work(struct work_struct *work) { struct hci_uart *hu = container_of(work, struct hci_uart, init_ready); int err; struct hci_dev *hdev; if (!test_and_clear_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return; err = hci_register_dev(hu->hdev); if (err < 0) { BT_ERR("Can't register HCI device"); clear_bit(HCI_UART_PROTO_READY, &hu->flags); hu->proto->close(hu); hdev = hu->hdev; hu->hdev = NULL; hci_free_dev(hdev); return; } set_bit(HCI_UART_REGISTERED, &hu->flags); } int hci_uart_init_ready(struct hci_uart *hu) { if (!test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return -EALREADY; schedule_work(&hu->init_ready); return 0; } int hci_uart_wait_until_sent(struct hci_uart *hu) { return wait_on_bit_timeout(&hu->tx_state, HCI_UART_SENDING, TASK_INTERRUPTIBLE, msecs_to_jiffies(2000)); } /* ------- Interface to HCI layer ------ */ /* Reset device */ static int hci_uart_flush(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct tty_struct *tty = hu->tty; BT_DBG("hdev %p tty %p", hdev, tty); if (hu->tx_skb) { kfree_skb(hu->tx_skb); hu->tx_skb = NULL; } /* Flush any pending characters in the driver and discipline. */ tty_ldisc_flush(tty); tty_driver_flush_buffer(tty); percpu_down_read(&hu->proto_lock); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) hu->proto->flush(hu); percpu_up_read(&hu->proto_lock); return 0; } /* Initialize device */ static int hci_uart_open(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); /* Undo clearing this from hci_uart_close() */ hdev->flush = hci_uart_flush; return 0; } /* Close device */ static int hci_uart_close(struct hci_dev *hdev) { BT_DBG("hdev %p", hdev); hci_uart_flush(hdev); hdev->flush = NULL; return 0; } /* Send frames from HCI layer */ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_uart *hu = hci_get_drvdata(hdev); BT_DBG("%s: type %d len %d", hdev->name, hci_skb_pkt_type(skb), skb->len); percpu_down_read(&hu->proto_lock); if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) { percpu_up_read(&hu->proto_lock); return -EUNATCH; } hu->proto->enqueue(hu, skb); percpu_up_read(&hu->proto_lock); hci_uart_tx_wakeup(hu); return 0; } /* Check the underlying device or tty has flow control support */ bool hci_uart_has_flow_control(struct hci_uart *hu) { /* serdev nodes check if the needed operations are present */ if (hu->serdev) return true; if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset) return true; return false; } /* Flow control or un-flow control the device */ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) { struct tty_struct *tty = hu->tty; struct ktermios ktermios; int status; unsigned int set = 0; unsigned int clear = 0; if (hu->serdev) { serdev_device_set_flow_control(hu->serdev, !enable); serdev_device_set_rts(hu->serdev, !enable); return; } if (enable) { /* Disable hardware flow control */ ktermios = tty->termios; ktermios.c_cflag &= ~CRTSCTS; tty_set_termios(tty, &ktermios); BT_DBG("Disabling hardware flow control: %s", (tty->termios.c_cflag & CRTSCTS) ? "failed" : "success"); /* Clear RTS to prevent the device from sending */ /* Most UARTs need OUT2 to enable interrupts */ status = tty->driver->ops->tiocmget(tty); BT_DBG("Current tiocm 0x%x", status); set &= ~(TIOCM_OUT2 | TIOCM_RTS); clear = ~set; set &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | TIOCM_OUT2 | TIOCM_LOOP; clear &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | TIOCM_OUT2 | TIOCM_LOOP; status = tty->driver->ops->tiocmset(tty, set, clear); BT_DBG("Clearing RTS: %s", status ? "failed" : "success"); } else { /* Set RTS to allow the device to send again */ status = tty->driver->ops->tiocmget(tty); BT_DBG("Current tiocm 0x%x", status); set |= (TIOCM_OUT2 | TIOCM_RTS); clear = ~set; set &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | TIOCM_OUT2 | TIOCM_LOOP; clear &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | TIOCM_OUT2 | TIOCM_LOOP; status = tty->driver->ops->tiocmset(tty, set, clear); BT_DBG("Setting RTS: %s", status ? "failed" : "success"); /* Re-enable hardware flow control */ ktermios = tty->termios; ktermios.c_cflag |= CRTSCTS; tty_set_termios(tty, &ktermios); BT_DBG("Enabling hardware flow control: %s", !(tty->termios.c_cflag & CRTSCTS) ? "failed" : "success"); } } void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed, unsigned int oper_speed) { hu->init_speed = init_speed; hu->oper_speed = oper_speed; } void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed) { struct tty_struct *tty = hu->tty; struct ktermios ktermios; ktermios = tty->termios; ktermios.c_cflag &= ~CBAUD; tty_termios_encode_baud_rate(&ktermios, speed, speed); /* tty_set_termios() return not checked as it is always 0 */ tty_set_termios(tty, &ktermios); BT_DBG("%s: New tty speeds: %d/%d", hu->hdev->name, tty->termios.c_ispeed, tty->termios.c_ospeed); } static int hci_uart_setup(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct hci_rp_read_local_version *ver; struct sk_buff *skb; unsigned int speed; int err; /* Init speed if any */ if (hu->init_speed) speed = hu->init_speed; else if (hu->proto->init_speed) speed = hu->proto->init_speed; else speed = 0; if (speed) hci_uart_set_baudrate(hu, speed); /* Operational speed if any */ if (hu->oper_speed) speed = hu->oper_speed; else if (hu->proto->oper_speed) speed = hu->proto->oper_speed; else speed = 0; if (hu->proto->set_baudrate && speed) { err = hu->proto->set_baudrate(hu, speed); if (!err) hci_uart_set_baudrate(hu, speed); } if (hu->proto->setup) return hu->proto->setup(hu); if (!test_bit(HCI_UART_VND_DETECT, &hu->hdev_flags)) return 0; skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { BT_ERR("%s: Reading local version information failed (%ld)", hdev->name, PTR_ERR(skb)); return 0; } if (skb->len != sizeof(*ver)) { BT_ERR("%s: Event length mismatch for version information", hdev->name); goto done; } ver = (struct hci_rp_read_local_version *)skb->data; switch (le16_to_cpu(ver->manufacturer)) { #ifdef CONFIG_BT_HCIUART_INTEL case 2: hdev->set_bdaddr = btintel_set_bdaddr; btintel_check_bdaddr(hdev); break; #endif #ifdef CONFIG_BT_HCIUART_BCM case 15: hdev->set_bdaddr = btbcm_set_bdaddr; btbcm_check_bdaddr(hdev); break; #endif default: break; } done: kfree_skb(skb); return 0; } /* ------ LDISC part ------ */ /* hci_uart_tty_open * * Called when line discipline changed to HCI_UART. * * Arguments: * tty pointer to tty info structure * Return Value: * 0 if success, otherwise error code */ static int hci_uart_tty_open(struct tty_struct *tty) { struct hci_uart *hu; BT_DBG("tty %p", tty); if (!capable(CAP_NET_ADMIN)) return -EPERM; /* Error if the tty has no write op instead of leaving an exploitable * hole */ if (tty->ops->write == NULL) return -EOPNOTSUPP; hu = kzalloc(sizeof(*hu), GFP_KERNEL); if (!hu) { BT_ERR("Can't allocate control structure"); return -ENFILE; } if (percpu_init_rwsem(&hu->proto_lock)) { BT_ERR("Can't allocate semaphore structure"); kfree(hu); return -ENOMEM; } tty->disc_data = hu; hu->tty = tty; tty->receive_room = 65536; /* disable alignment support by default */ hu->alignment = 1; hu->padding = 0; /* Use serial port speed as oper_speed */ hu->oper_speed = tty->termios.c_ospeed; INIT_WORK(&hu->init_ready, hci_uart_init_work); INIT_WORK(&hu->write_work, hci_uart_write_work); /* Flush any pending characters in the driver */ tty_driver_flush_buffer(tty); return 0; } /* hci_uart_tty_close() * * Called when the line discipline is changed to something * else, the tty is closed, or the tty detects a hangup. */ static void hci_uart_tty_close(struct tty_struct *tty) { struct hci_uart *hu = tty->disc_data; struct hci_dev *hdev; BT_DBG("tty %p", tty); /* Detach from the tty */ tty->disc_data = NULL; if (!hu) return; hdev = hu->hdev; if (hdev) hci_uart_close(hdev); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) { percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_READY, &hu->flags); percpu_up_write(&hu->proto_lock); cancel_work_sync(&hu->init_ready); cancel_work_sync(&hu->write_work); if (hdev) { if (test_bit(HCI_UART_REGISTERED, &hu->flags)) hci_unregister_dev(hdev); hci_free_dev(hdev); } hu->proto->close(hu); } clear_bit(HCI_UART_PROTO_SET, &hu->flags); percpu_free_rwsem(&hu->proto_lock); kfree(hu); } /* hci_uart_tty_wakeup() * * Callback for transmit wakeup. Called when low level * device driver can accept more send data. * * Arguments: tty pointer to associated tty instance data * Return Value: None */ static void hci_uart_tty_wakeup(struct tty_struct *tty) { struct hci_uart *hu = tty->disc_data; BT_DBG(""); if (!hu) return; clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); if (tty != hu->tty) return; if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) hci_uart_tx_wakeup(hu); } /* hci_uart_tty_receive() * * Called by tty low level driver when receive data is * available. * * Arguments: tty pointer to tty instance data * data pointer to received data * flags pointer to flags for data * count count of received data in bytes * * Return Value: None */ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, const u8 *flags, size_t count) { struct hci_uart *hu = tty->disc_data; if (!hu || tty != hu->tty) return; percpu_down_read(&hu->proto_lock); if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) { percpu_up_read(&hu->proto_lock); return; } /* It does not need a lock here as it is already protected by a mutex in * tty caller */ hu->proto->recv(hu, data, count); percpu_up_read(&hu->proto_lock); if (hu->hdev) hu->hdev->stat.byte_rx += count; tty_unthrottle(tty); } static int hci_uart_register_dev(struct hci_uart *hu) { struct hci_dev *hdev; int err; BT_DBG(""); /* Initialize and register HCI device */ hdev = hci_alloc_dev(); if (!hdev) { BT_ERR("Can't allocate HCI device"); return -ENOMEM; } hu->hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, hu); /* Only when vendor specific setup callback is provided, consider * the manufacturer information valid. This avoids filling in the * value for Ericsson when nothing is specified. */ if (hu->proto->setup) hdev->manufacturer = hu->proto->manufacturer; hdev->open = hci_uart_open; hdev->close = hci_uart_close; hdev->flush = hci_uart_flush; hdev->send = hci_uart_send_frame; hdev->setup = hci_uart_setup; SET_HCIDEV_DEV(hdev, hu->tty->dev); if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags)) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags)) set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); if (!test_bit(HCI_UART_RESET_ON_INIT, &hu->hdev_flags)) set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); /* Only call open() for the protocol after hdev is fully initialized as * open() (or a timer/workqueue it starts) may attempt to reference it. */ err = hu->proto->open(hu); if (err) { hu->hdev = NULL; hci_free_dev(hdev); return err; } if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return 0; if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); hu->proto->close(hu); hu->hdev = NULL; hci_free_dev(hdev); return -ENODEV; } set_bit(HCI_UART_REGISTERED, &hu->flags); return 0; } static int hci_uart_set_proto(struct hci_uart *hu, int id) { const struct hci_uart_proto *p; int err; p = hci_uart_get_proto(id); if (!p) return -EPROTONOSUPPORT; hu->proto = p; err = hci_uart_register_dev(hu); if (err) { return err; } set_bit(HCI_UART_PROTO_READY, &hu->flags); return 0; } static int hci_uart_set_flags(struct hci_uart *hu, unsigned long flags) { unsigned long valid_flags = BIT(HCI_UART_RAW_DEVICE) | BIT(HCI_UART_RESET_ON_INIT) | BIT(HCI_UART_INIT_PENDING) | BIT(HCI_UART_EXT_CONFIG) | BIT(HCI_UART_VND_DETECT); if (flags & ~valid_flags) return -EINVAL; hu->hdev_flags = flags; return 0; } /* hci_uart_tty_ioctl() * * Process IOCTL system call for the tty device. * * Arguments: * * tty pointer to tty instance data * cmd IOCTL command code * arg argument for IOCTL call (cmd dependent) * * Return Value: Command dependent */ static int hci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct hci_uart *hu = tty->disc_data; int err = 0; BT_DBG(""); /* Verify the status of the device */ if (!hu) return -EBADF; switch (cmd) { case HCIUARTSETPROTO: if (!test_and_set_bit(HCI_UART_PROTO_SET, &hu->flags)) { err = hci_uart_set_proto(hu, arg); if (err) clear_bit(HCI_UART_PROTO_SET, &hu->flags); } else err = -EBUSY; break; case HCIUARTGETPROTO: if (test_bit(HCI_UART_PROTO_SET, &hu->flags) && test_bit(HCI_UART_PROTO_READY, &hu->flags)) err = hu->proto->id; else err = -EUNATCH; break; case HCIUARTGETDEVICE: if (test_bit(HCI_UART_REGISTERED, &hu->flags)) err = hu->hdev->id; else err = -EUNATCH; break; case HCIUARTSETFLAGS: if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) err = -EBUSY; else err = hci_uart_set_flags(hu, arg); break; case HCIUARTGETFLAGS: err = hu->hdev_flags; break; default: err = n_tty_ioctl_helper(tty, cmd, arg); break; } return err; } /* * We don't provide read/write/poll interface for user space. */ static ssize_t hci_uart_tty_read(struct tty_struct *tty, struct file *file, u8 *buf, size_t nr, void **cookie, unsigned long offset) { return 0; } static ssize_t hci_uart_tty_write(struct tty_struct *tty, struct file *file, const u8 *data, size_t count) { return 0; } static struct tty_ldisc_ops hci_uart_ldisc = { .owner = THIS_MODULE, .num = N_HCI, .name = "n_hci", .open = hci_uart_tty_open, .close = hci_uart_tty_close, .read = hci_uart_tty_read, .write = hci_uart_tty_write, .ioctl = hci_uart_tty_ioctl, .compat_ioctl = hci_uart_tty_ioctl, .receive_buf = hci_uart_tty_receive, .write_wakeup = hci_uart_tty_wakeup, }; static int __init hci_uart_init(void) { int err; BT_INFO("HCI UART driver ver %s", VERSION); /* Register the tty discipline */ err = tty_register_ldisc(&hci_uart_ldisc); if (err) { BT_ERR("HCI line discipline registration failed. (%d)", err); return err; } #ifdef CONFIG_BT_HCIUART_H4 h4_init(); #endif #ifdef CONFIG_BT_HCIUART_BCSP bcsp_init(); #endif #ifdef CONFIG_BT_HCIUART_LL ll_init(); #endif #ifdef CONFIG_BT_HCIUART_ATH3K ath_init(); #endif #ifdef CONFIG_BT_HCIUART_3WIRE h5_init(); #endif #ifdef CONFIG_BT_HCIUART_INTEL intel_init(); #endif #ifdef CONFIG_BT_HCIUART_BCM bcm_init(); #endif #ifdef CONFIG_BT_HCIUART_QCA qca_init(); #endif #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_init(); #endif #ifdef CONFIG_BT_HCIUART_MRVL mrvl_init(); #endif #ifdef CONFIG_BT_HCIUART_AML aml_init(); #endif return 0; } static void __exit hci_uart_exit(void) { #ifdef CONFIG_BT_HCIUART_H4 h4_deinit(); #endif #ifdef CONFIG_BT_HCIUART_BCSP bcsp_deinit(); #endif #ifdef CONFIG_BT_HCIUART_LL ll_deinit(); #endif #ifdef CONFIG_BT_HCIUART_ATH3K ath_deinit(); #endif #ifdef CONFIG_BT_HCIUART_3WIRE h5_deinit(); #endif #ifdef CONFIG_BT_HCIUART_INTEL intel_deinit(); #endif #ifdef CONFIG_BT_HCIUART_BCM bcm_deinit(); #endif #ifdef CONFIG_BT_HCIUART_QCA qca_deinit(); #endif #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_deinit(); #endif #ifdef CONFIG_BT_HCIUART_MRVL mrvl_deinit(); #endif #ifdef CONFIG_BT_HCIUART_AML aml_deinit(); #endif tty_unregister_ldisc(&hci_uart_ldisc); } module_init(hci_uart_init); module_exit(hci_uart_exit); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth HCI UART driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_HCI);
32 32 4 4 3 1 5 5 1 6 5 6 1 5 5 7 7 7 7 3 3 3 3 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) ST-Ericsson AB 2010 * Author: Sjur Brendeland */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/module.h> #include <net/caif/caif_layer.h> #include <net/caif/cfpkt.h> #include <net/caif/cfcnfg.h> #include <net/caif/cfctrl.h> #include <net/caif/cfmuxl.h> #include <net/caif/cffrml.h> #include <net/caif/cfserl.h> #include <net/caif/cfsrvl.h> #include <net/caif/caif_dev.h> #define container_obj(layr) container_of(layr, struct cfcnfg, layer) /* Information about CAIF physical interfaces held by Config Module in order * to manage physical interfaces */ struct cfcnfg_phyinfo { struct list_head node; bool up; /* Pointer to the layer below the MUX (framing layer) */ struct cflayer *frm_layer; /* Pointer to the lowest actual physical layer */ struct cflayer *phy_layer; /* Unique identifier of the physical interface */ unsigned int id; /* Preference of the physical in interface */ enum cfcnfg_phy_preference pref; /* Information about the physical device */ struct dev_info dev_info; /* Interface index */ int ifindex; /* Protocol head room added for CAIF link layer */ int head_room; /* Use Start of frame checksum */ bool use_fcs; }; struct cfcnfg { struct cflayer layer; struct cflayer *ctrl; struct cflayer *mux; struct list_head phys; struct mutex lock; }; static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, u8 phyid, struct cflayer *adapt_layer); static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id); static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, struct cflayer *adapt_layer); static void cfctrl_resp_func(void); static void cfctrl_enum_resp(void); struct cfcnfg *cfcnfg_create(void) { struct cfcnfg *this; struct cfctrl_rsp *resp; might_sleep(); /* Initiate this layer */ this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); if (!this) return NULL; this->mux = cfmuxl_create(); if (!this->mux) goto out_of_mem; this->ctrl = cfctrl_create(); if (!this->ctrl) goto out_of_mem; /* Initiate response functions */ resp = cfctrl_get_respfuncs(this->ctrl); resp->enum_rsp = cfctrl_enum_resp; resp->linkerror_ind = cfctrl_resp_func; resp->linkdestroy_rsp = cfcnfg_linkdestroy_rsp; resp->sleep_rsp = cfctrl_resp_func; resp->wake_rsp = cfctrl_resp_func; resp->restart_rsp = cfctrl_resp_func; resp->radioset_rsp = cfctrl_resp_func; resp->linksetup_rsp = cfcnfg_linkup_rsp; resp->reject_rsp = cfcnfg_reject_rsp; INIT_LIST_HEAD(&this->phys); cfmuxl_set_uplayer(this->mux, this->ctrl, 0); layer_set_dn(this->ctrl, this->mux); layer_set_up(this->ctrl, this); mutex_init(&this->lock); return this; out_of_mem: synchronize_rcu(); kfree(this->mux); kfree(this->ctrl); kfree(this); return NULL; } void cfcnfg_remove(struct cfcnfg *cfg) { might_sleep(); if (cfg) { synchronize_rcu(); kfree(cfg->mux); cfctrl_remove(cfg->ctrl); kfree(cfg); } } static void cfctrl_resp_func(void) { } static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg, u8 phyid) { struct cfcnfg_phyinfo *phy; list_for_each_entry_rcu(phy, &cnfg->phys, node) if (phy->id == phyid) return phy; return NULL; } static void cfctrl_enum_resp(void) { } static struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg, enum cfcnfg_phy_preference phy_pref) { /* Try to match with specified preference */ struct cfcnfg_phyinfo *phy; list_for_each_entry_rcu(phy, &cnfg->phys, node) { if (phy->up && phy->pref == phy_pref && phy->frm_layer != NULL) return &phy->dev_info; } /* Otherwise just return something */ list_for_each_entry_rcu(phy, &cnfg->phys, node) if (phy->up) return &phy->dev_info; return NULL; } static int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi) { struct cfcnfg_phyinfo *phy; list_for_each_entry_rcu(phy, &cnfg->phys, node) if (phy->ifindex == ifi && phy->up) return phy->id; return -ENODEV; } int caif_disconnect_client(struct net *net, struct cflayer *adap_layer) { u8 channel_id; struct cfcnfg *cfg = get_cfcnfg(net); caif_assert(adap_layer != NULL); cfctrl_cancel_req(cfg->ctrl, adap_layer); channel_id = adap_layer->id; if (channel_id != 0) { struct cflayer *servl; servl = cfmuxl_remove_uplayer(cfg->mux, channel_id); cfctrl_linkdown_req(cfg->ctrl, channel_id, adap_layer); if (servl != NULL) layer_set_up(servl, NULL); } else pr_debug("nothing to disconnect\n"); /* Do RCU sync before initiating cleanup */ synchronize_rcu(); if (adap_layer->ctrlcmd != NULL) adap_layer->ctrlcmd(adap_layer, CAIF_CTRLCMD_DEINIT_RSP, 0); return 0; } EXPORT_SYMBOL(caif_disconnect_client); static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id) { } static const int protohead[CFCTRL_SRV_MASK] = { [CFCTRL_SRV_VEI] = 4, [CFCTRL_SRV_DATAGRAM] = 7, [CFCTRL_SRV_UTIL] = 4, [CFCTRL_SRV_RFM] = 3, [CFCTRL_SRV_DBG] = 3, }; static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, struct caif_connect_request *s, struct cfctrl_link_param *l) { struct dev_info *dev_info; enum cfcnfg_phy_preference pref; int res; memset(l, 0, sizeof(*l)); /* In caif protocol low value is high priority */ l->priority = CAIF_PRIO_MAX - s->priority + 1; if (s->ifindex != 0) { res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex); if (res < 0) return res; l->phyid = res; } else { switch (s->link_selector) { case CAIF_LINK_HIGH_BANDW: pref = CFPHYPREF_HIGH_BW; break; case CAIF_LINK_LOW_LATENCY: pref = CFPHYPREF_LOW_LAT; break; default: return -EINVAL; } dev_info = cfcnfg_get_phyid(cnfg, pref); if (dev_info == NULL) return -ENODEV; l->phyid = dev_info->id; } switch (s->protocol) { case CAIFPROTO_AT: l->linktype = CFCTRL_SRV_VEI; l->endpoint = (s->sockaddr.u.at.type >> 2) & 0x3; l->chtype = s->sockaddr.u.at.type & 0x3; break; case CAIFPROTO_DATAGRAM: l->linktype = CFCTRL_SRV_DATAGRAM; l->chtype = 0x00; l->u.datagram.connid = s->sockaddr.u.dgm.connection_id; break; case CAIFPROTO_DATAGRAM_LOOP: l->linktype = CFCTRL_SRV_DATAGRAM; l->chtype = 0x03; l->endpoint = 0x00; l->u.datagram.connid = s->sockaddr.u.dgm.connection_id; break; case CAIFPROTO_RFM: l->linktype = CFCTRL_SRV_RFM; l->u.datagram.connid = s->sockaddr.u.rfm.connection_id; strscpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume, sizeof(l->u.rfm.volume)); break; case CAIFPROTO_UTIL: l->linktype = CFCTRL_SRV_UTIL; l->endpoint = 0x00; l->chtype = 0x00; strscpy(l->u.utility.name, s->sockaddr.u.util.service, sizeof(l->u.utility.name)); caif_assert(sizeof(l->u.utility.name) > 10); l->u.utility.paramlen = s->param.size; if (l->u.utility.paramlen > sizeof(l->u.utility.params)) l->u.utility.paramlen = sizeof(l->u.utility.params); memcpy(l->u.utility.params, s->param.data, l->u.utility.paramlen); break; case CAIFPROTO_DEBUG: l->linktype = CFCTRL_SRV_DBG; l->endpoint = s->sockaddr.u.dbg.service; l->chtype = s->sockaddr.u.dbg.type; break; default: return -EINVAL; } return 0; } int caif_connect_client(struct net *net, struct caif_connect_request *conn_req, struct cflayer *adap_layer, int *ifindex, int *proto_head, int *proto_tail) { struct cflayer *frml; struct cfcnfg_phyinfo *phy; int err; struct cfctrl_link_param param; struct cfcnfg *cfg = get_cfcnfg(net); rcu_read_lock(); err = caif_connect_req_to_link_param(cfg, conn_req, &param); if (err) goto unlock; phy = cfcnfg_get_phyinfo_rcu(cfg, param.phyid); if (!phy) { err = -ENODEV; goto unlock; } err = -EINVAL; if (adap_layer == NULL) { pr_err("adap_layer is zero\n"); goto unlock; } if (adap_layer->receive == NULL) { pr_err("adap_layer->receive is NULL\n"); goto unlock; } if (adap_layer->ctrlcmd == NULL) { pr_err("adap_layer->ctrlcmd == NULL\n"); goto unlock; } err = -ENODEV; frml = phy->frm_layer; if (frml == NULL) { pr_err("Specified PHY type does not exist!\n"); goto unlock; } caif_assert(param.phyid == phy->id); caif_assert(phy->frm_layer->id == param.phyid); caif_assert(phy->phy_layer->id == param.phyid); *ifindex = phy->ifindex; *proto_tail = 2; *proto_head = protohead[param.linktype] + phy->head_room; rcu_read_unlock(); /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */ cfctrl_enum_req(cfg->ctrl, param.phyid); return cfctrl_linkup_request(cfg->ctrl, &param, adap_layer); unlock: rcu_read_unlock(); return err; } EXPORT_SYMBOL(caif_connect_client); static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, struct cflayer *adapt_layer) { if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL) adapt_layer->ctrlcmd(adapt_layer, CAIF_CTRLCMD_INIT_FAIL_RSP, 0); } static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, u8 phyid, struct cflayer *adapt_layer) { struct cfcnfg *cnfg = container_obj(layer); struct cflayer *servicel = NULL; struct cfcnfg_phyinfo *phyinfo; struct net_device *netdev; if (channel_id == 0) { pr_warn("received channel_id zero\n"); if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL) adapt_layer->ctrlcmd(adapt_layer, CAIF_CTRLCMD_INIT_FAIL_RSP, 0); return; } rcu_read_lock(); if (adapt_layer == NULL) { pr_debug("link setup response but no client exist, send linkdown back\n"); cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); goto unlock; } caif_assert(cnfg != NULL); caif_assert(phyid != 0); phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid); if (phyinfo == NULL) { pr_err("ERROR: Link Layer Device disappeared while connecting\n"); goto unlock; } caif_assert(phyinfo != NULL); caif_assert(phyinfo->id == phyid); caif_assert(phyinfo->phy_layer != NULL); caif_assert(phyinfo->phy_layer->id == phyid); adapt_layer->id = channel_id; switch (serv) { case CFCTRL_SRV_VEI: servicel = cfvei_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_DATAGRAM: servicel = cfdgml_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_RFM: netdev = phyinfo->dev_info.dev; servicel = cfrfml_create(channel_id, &phyinfo->dev_info, netdev->mtu); break; case CFCTRL_SRV_UTIL: servicel = cfutill_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_VIDEO: servicel = cfvidl_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_DBG: servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); break; default: pr_err("Protocol error. Link setup response - unknown channel type\n"); goto unlock; } if (!servicel) goto unlock; layer_set_dn(servicel, cnfg->mux); cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id); layer_set_up(servicel, adapt_layer); layer_set_dn(adapt_layer, servicel); rcu_read_unlock(); servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0); return; unlock: rcu_read_unlock(); } int cfcnfg_add_phy_layer(struct cfcnfg *cnfg, struct net_device *dev, struct cflayer *phy_layer, enum cfcnfg_phy_preference pref, struct cflayer *link_support, bool fcs, int head_room) { struct cflayer *frml; struct cfcnfg_phyinfo *phyinfo = NULL; int i, res = 0; u8 phyid; mutex_lock(&cnfg->lock); /* CAIF protocol allow maximum 6 link-layers */ for (i = 0; i < 7; i++) { phyid = (dev->ifindex + i) & 0x7; if (phyid == 0) continue; if (cfcnfg_get_phyinfo_rcu(cnfg, phyid) == NULL) goto got_phyid; } pr_warn("Too many CAIF Link Layers (max 6)\n"); res = -EEXIST; goto out; got_phyid: phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC); if (!phyinfo) { res = -ENOMEM; goto out; } phy_layer->id = phyid; phyinfo->pref = pref; phyinfo->id = phyid; phyinfo->dev_info.id = phyid; phyinfo->dev_info.dev = dev; phyinfo->phy_layer = phy_layer; phyinfo->ifindex = dev->ifindex; phyinfo->head_room = head_room; phyinfo->use_fcs = fcs; frml = cffrml_create(phyid, fcs); if (!frml) { res = -ENOMEM; goto out_err; } phyinfo->frm_layer = frml; layer_set_up(frml, cnfg->mux); if (link_support != NULL) { link_support->id = phyid; layer_set_dn(frml, link_support); layer_set_up(link_support, frml); layer_set_dn(link_support, phy_layer); layer_set_up(phy_layer, link_support); } else { layer_set_dn(frml, phy_layer); layer_set_up(phy_layer, frml); } list_add_rcu(&phyinfo->node, &cnfg->phys); out: mutex_unlock(&cnfg->lock); return res; out_err: kfree(phyinfo); mutex_unlock(&cnfg->lock); return res; } EXPORT_SYMBOL(cfcnfg_add_phy_layer); int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer, bool up) { struct cfcnfg_phyinfo *phyinfo; rcu_read_lock(); phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phy_layer->id); if (phyinfo == NULL) { rcu_read_unlock(); return -ENODEV; } if (phyinfo->up == up) { rcu_read_unlock(); return 0; } phyinfo->up = up; if (up) { cffrml_hold(phyinfo->frm_layer); cfmuxl_set_dnlayer(cnfg->mux, phyinfo->frm_layer, phy_layer->id); } else { cfmuxl_remove_dnlayer(cnfg->mux, phy_layer->id); cffrml_put(phyinfo->frm_layer); } rcu_read_unlock(); return 0; } EXPORT_SYMBOL(cfcnfg_set_phy_state); int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer) { struct cflayer *frml, *frml_dn; u16 phyid; struct cfcnfg_phyinfo *phyinfo; might_sleep(); mutex_lock(&cnfg->lock); phyid = phy_layer->id; phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid); if (phyinfo == NULL) { mutex_unlock(&cnfg->lock); return 0; } caif_assert(phyid == phyinfo->id); caif_assert(phy_layer == phyinfo->phy_layer); caif_assert(phy_layer->id == phyid); caif_assert(phyinfo->frm_layer->id == phyid); list_del_rcu(&phyinfo->node); synchronize_rcu(); /* Fail if reference count is not zero */ if (cffrml_refcnt_read(phyinfo->frm_layer) != 0) { pr_info("Wait for device inuse\n"); list_add_rcu(&phyinfo->node, &cnfg->phys); mutex_unlock(&cnfg->lock); return -EAGAIN; } frml = phyinfo->frm_layer; frml_dn = frml->dn; cffrml_set_uplayer(frml, NULL); cffrml_set_dnlayer(frml, NULL); if (phy_layer != frml_dn) { layer_set_up(frml_dn, NULL); layer_set_dn(frml_dn, NULL); } layer_set_up(phy_layer, NULL); if (phyinfo->phy_layer != frml_dn) kfree(frml_dn); cffrml_free(frml); kfree(phyinfo); mutex_unlock(&cnfg->lock); return 0; } EXPORT_SYMBOL(cfcnfg_del_phy_layer);
33 10 28 4 9 9 8 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 #ifndef __LINUX_MROUTE_BASE_H #define __LINUX_MROUTE_BASE_H #include <linux/netdevice.h> #include <linux/rhashtable-types.h> #include <linux/spinlock.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/fib_notifier.h> #include <net/ip_fib.h> /** * struct vif_device - interface representor for multicast routing * @dev: network device being used * @dev_tracker: refcount tracker for @dev reference * @bytes_in: statistic; bytes ingressing * @bytes_out: statistic; bytes egresing * @pkt_in: statistic; packets ingressing * @pkt_out: statistic; packets egressing * @rate_limit: Traffic shaping (NI) * @threshold: TTL threshold * @flags: Control flags * @link: Physical interface index * @dev_parent_id: device parent id * @local: Local address * @remote: Remote address for tunnels */ struct vif_device { struct net_device __rcu *dev; netdevice_tracker dev_tracker; unsigned long bytes_in, bytes_out; unsigned long pkt_in, pkt_out; unsigned long rate_limit; unsigned char threshold; unsigned short flags; int link; /* Currently only used by ipmr */ struct netdev_phys_item_id dev_parent_id; __be32 local, remote; }; struct vif_entry_notifier_info { struct fib_notifier_info info; struct net_device *dev; unsigned short vif_index; unsigned short vif_flags; u32 tb_id; }; static inline int mr_call_vif_notifier(struct notifier_block *nb, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, struct netlink_ext_ack *extack) { struct vif_entry_notifier_info info = { .info = { .family = family, .extack = extack, }, .dev = vif_dev, .vif_index = vif_index, .vif_flags = vif->flags, .tb_id = tb_id, }; return call_fib_notifier(nb, event_type, &info.info); } static inline int mr_call_vif_notifiers(struct net *net, unsigned short family, enum fib_event_type event_type,