5 1 10 2 2 1 2 23 15 20 3 5 15 6 32 12 16 36 48 12 36 3 19 20 2 25 22 1 2 4 3 4 2 4 47 48 6 41 3 42 39 3 29 32 43 40 3 39 3 40 40 11 9 20 23 28 1 4 37 4 38 48 3 2 1 43 40 3 39 54 54 14 54 49 44 3 43 54 6 4 5 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2018-2019 HUAWEI, Inc. * https://www.huawei.com/ */ #include "internal.h" #include <linux/unaligned.h> #include <trace/events/erofs.h> struct z_erofs_maprecorder { struct inode *inode; struct erofs_map_blocks *map; unsigned long lcn; /* compression extent information gathered */ u8 type, headtype; u16 clusterofs; u16 delta[2]; erofs_blk_t pblk, compressedblks; erofs_off_t nextpackoff; bool partialref; }; static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, unsigned long lcn) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize) + lcn * sizeof(struct z_erofs_lcluster_index); struct z_erofs_lcluster_index *di; unsigned int advise; di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, EROFS_KMAP); if (IS_ERR(di)) return PTR_ERR(di); m->lcn = lcn; m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index); advise = le16_to_cpu(di->di_advise); m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK; if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { m->clusterofs = 1 << vi->z_logical_clusterbits; m->delta[0] = le16_to_cpu(di->di_u.delta[0]); if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) { if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { DBG_BUGON(1); return -EFSCORRUPTED; } m->compressedblks = m->delta[0] & ~Z_EROFS_LI_D0_CBLKCNT; m->delta[0] = 1; } m->delta[1] = le16_to_cpu(di->di_u.delta[1]); } else { m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF); m->clusterofs = le16_to_cpu(di->di_clusterofs); if (m->clusterofs >= 1 << vi->z_logical_clusterbits) { DBG_BUGON(1); return -EFSCORRUPTED; } m->pblk = le32_to_cpu(di->di_u.blkaddr); } return 0; } static unsigned int decode_compactedbits(unsigned int lobits, u8 *in, unsigned int pos, u8 *type) { const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7); const unsigned int lo = v & ((1 << lobits) - 1); *type = (v >> lobits) & 3; return lo; } static int get_compacted_la_distance(unsigned int lobits, unsigned int encodebits, unsigned int vcnt, u8 *in, int i) { unsigned int lo, d1 = 0; u8 type; DBG_BUGON(i >= vcnt); do { lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) return d1; ++d1; } while (++i < vcnt); /* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */ if (!(lo & Z_EROFS_LI_D0_CBLKCNT)) d1 += lo - 1; return d1; } static int unpack_compacted_index(struct z_erofs_maprecorder *m, unsigned int amortizedshift, erofs_off_t pos, bool lookahead) { struct erofs_inode *const vi = EROFS_I(m->inode); const unsigned int lclusterbits = vi->z_logical_clusterbits; unsigned int vcnt, lo, lobits, encodebits, nblk, bytes; bool big_pcluster; u8 *in, type; int i; if (1 << amortizedshift == 4 && lclusterbits <= 14) vcnt = 2; else if (1 << amortizedshift == 2 && lclusterbits <= 12) vcnt = 16; else return -EOPNOTSUPP; in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, EROFS_KMAP); if (IS_ERR(in)) return PTR_ERR(in); /* it doesn't equal to round_up(..) */ m->nextpackoff = round_down(pos, vcnt << amortizedshift) + (vcnt << amortizedshift); big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U); encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; bytes = pos & ((vcnt << amortizedshift) - 1); in -= bytes; i = bytes >> amortizedshift; lo = decode_compactedbits(lobits, in, encodebits * i, &type); m->type = type; if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { m->clusterofs = 1 << lclusterbits; /* figure out lookahead_distance: delta[1] if needed */ if (lookahead) m->delta[1] = get_compacted_la_distance(lobits, encodebits, vcnt, in, i); if (lo & Z_EROFS_LI_D0_CBLKCNT) { if (!big_pcluster) { DBG_BUGON(1); return -EFSCORRUPTED; } m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT; m->delta[0] = 1; return 0; } else if (i + 1 != (int)vcnt) { m->delta[0] = lo; return 0; } /* * since the last lcluster in the pack is special, * of which lo saves delta[1] rather than delta[0]. * Hence, get delta[0] by the previous lcluster indirectly. */ lo = decode_compactedbits(lobits, in, encodebits * (i - 1), &type); if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) lo = 0; else if (lo & Z_EROFS_LI_D0_CBLKCNT) lo = 1; m->delta[0] = lo + 1; return 0; } m->clusterofs = lo; m->delta[0] = 0; /* figout out blkaddr (pblk) for HEAD lclusters */ if (!big_pcluster) { nblk = 1; while (i > 0) { --i; lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) i -= lo; if (i >= 0) ++nblk; } } else { nblk = 0; while (i > 0) { --i; lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { if (lo & Z_EROFS_LI_D0_CBLKCNT) { --i; nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT; continue; } /* bigpcluster shouldn't have plain d0 == 1 */ if (lo <= 1) { DBG_BUGON(1); return -EFSCORRUPTED; } i -= lo - 2; continue; } ++nblk; } } in += (vcnt << amortizedshift) - sizeof(__le32); m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; return 0; } static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, unsigned long lcn, bool lookahead) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); const erofs_off_t ebase = sizeof(struct z_erofs_map_header) + ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); unsigned int totalidx = erofs_iblks(inode); unsigned int compacted_4b_initial, compacted_2b; unsigned int amortizedshift; erofs_off_t pos; if (lcn >= totalidx) return -EINVAL; m->lcn = lcn; /* used to align to 32-byte (compacted_2b) alignment */ compacted_4b_initial = (32 - ebase % 32) / 4; if (compacted_4b_initial == 32 / 4) compacted_4b_initial = 0; if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) && compacted_4b_initial < totalidx) compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); else compacted_2b = 0; pos = ebase; if (lcn < compacted_4b_initial) { amortizedshift = 2; goto out; } pos += compacted_4b_initial * 4; lcn -= compacted_4b_initial; if (lcn < compacted_2b) { amortizedshift = 1; goto out; } pos += compacted_2b * 2; lcn -= compacted_2b; amortizedshift = 2; out: pos += lcn * (1 << amortizedshift); return unpack_compacted_index(m, amortizedshift, pos, lookahead); } static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, unsigned int lcn, bool lookahead) { switch (EROFS_I(m->inode)->datalayout) { case EROFS_INODE_COMPRESSED_FULL: return z_erofs_load_full_lcluster(m, lcn); case EROFS_INODE_COMPRESSED_COMPACT: return z_erofs_load_compact_lcluster(m, lcn, lookahead); default: return -EINVAL; } } static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned int lookback_distance) { struct super_block *sb = m->inode->i_sb; struct erofs_inode *const vi = EROFS_I(m->inode); const unsigned int lclusterbits = vi->z_logical_clusterbits; while (m->lcn >= lookback_distance) { unsigned long lcn = m->lcn - lookback_distance; int err; err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; switch (m->type) { case Z_EROFS_LCLUSTER_TYPE_NONHEAD: lookback_distance = m->delta[0]; if (!lookback_distance) goto err_bogus; continue; case Z_EROFS_LCLUSTER_TYPE_PLAIN: case Z_EROFS_LCLUSTER_TYPE_HEAD1: case Z_EROFS_LCLUSTER_TYPE_HEAD2: m->headtype = m->type; m->map->m_la = (lcn << lclusterbits) | m->clusterofs; return 0; default: erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu", m->type, lcn, vi->nid); DBG_BUGON(1); return -EOPNOTSUPP; } } err_bogus: erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu", lookback_distance, m->lcn, vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, unsigned int initial_lcn) { struct super_block *sb = m->inode->i_sb; struct erofs_inode *const vi = EROFS_I(m->inode); struct erofs_map_blocks *const map = m->map; const unsigned int lclusterbits = vi->z_logical_clusterbits; unsigned long lcn; int err; DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN && m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 && m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2); DBG_BUGON(m->type != m->headtype); if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) || ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { map->m_plen = 1ULL << lclusterbits; return 0; } lcn = m->lcn + 1; if (m->compressedblks) goto out; err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; /* * If the 1st NONHEAD lcluster has already been handled initially w/o * valid compressedblks, which means at least it mustn't be CBLKCNT, or * an internal implemenatation error is detected. * * The following code can also handle it properly anyway, but let's * BUG_ON in the debugging mode only for developers to notice that. */ DBG_BUGON(lcn == initial_lcn && m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); switch (m->type) { case Z_EROFS_LCLUSTER_TYPE_PLAIN: case Z_EROFS_LCLUSTER_TYPE_HEAD1: case Z_EROFS_LCLUSTER_TYPE_HEAD2: /* * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type * rather than CBLKCNT, it's a 1 lcluster-sized pcluster. */ m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits); break; case Z_EROFS_LCLUSTER_TYPE_NONHEAD: if (m->delta[0] != 1) goto err_bonus_cblkcnt; if (m->compressedblks) break; fallthrough; default: erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } out: map->m_plen = erofs_pos(sb, m->compressedblks); return 0; err_bonus_cblkcnt: erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) { struct inode *inode = m->inode; struct erofs_inode *vi = EROFS_I(inode); struct erofs_map_blocks *map = m->map; unsigned int lclusterbits = vi->z_logical_clusterbits; u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits; int err; do { /* handle the last EOF pcluster (no next HEAD lcluster) */ if ((lcn << lclusterbits) >= inode->i_size) { map->m_llen = inode->i_size - map->m_la; return 0; } err = z_erofs_load_lcluster_from_disk(m, lcn, true); if (err) return err; if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { DBG_BUGON(!m->delta[1] && m->clusterofs != 1 << lclusterbits); } else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN || m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1 || m->type == Z_EROFS_LCLUSTER_TYPE_HEAD2) { /* go on until the next HEAD lcluster */ if (lcn != headlcn) break; m->delta[1] = 1; } else { erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu", m->type, lcn, vi->nid); DBG_BUGON(1); return -EOPNOTSUPP; } lcn += m->delta[1]; } while (m->delta[1]); map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la; return 0; } static int z_erofs_do_map_blocks(struct inode *inode, struct erofs_map_blocks *map, int flags) { struct erofs_inode *const vi = EROFS_I(inode); bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER; bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; struct z_erofs_maprecorder m = { .inode = inode, .map = map, }; int err = 0; unsigned int lclusterbits, endoff, afmt; unsigned long initial_lcn; unsigned long long ofs, end; lclusterbits = vi->z_logical_clusterbits; ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la; initial_lcn = ofs >> lclusterbits; endoff = ofs & ((1 << lclusterbits) - 1); err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false); if (err) goto unmap_out; if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL)) vi->z_idataoff = m.nextpackoff; map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; end = (m.lcn + 1ULL) << lclusterbits; switch (m.type) { case Z_EROFS_LCLUSTER_TYPE_PLAIN: case Z_EROFS_LCLUSTER_TYPE_HEAD1: case Z_EROFS_LCLUSTER_TYPE_HEAD2: if (endoff >= m.clusterofs) { m.headtype = m.type; map->m_la = (m.lcn << lclusterbits) | m.clusterofs; /* * For ztailpacking files, in order to inline data more * effectively, special EOF lclusters are now supported * which can have three parts at most. */ if (ztailpacking && end > inode->i_size) end = inode->i_size; break; } /* m.lcn should be >= 1 if endoff < m.clusterofs */ if (!m.lcn) { erofs_err(inode->i_sb, "invalid logical cluster 0 at nid %llu", vi->nid); err = -EFSCORRUPTED; goto unmap_out; } end = (m.lcn << lclusterbits) | m.clusterofs; map->m_flags |= EROFS_MAP_FULL_MAPPED; m.delta[0] = 1; fallthrough; case Z_EROFS_LCLUSTER_TYPE_NONHEAD: /* get the corresponding first chunk */ err = z_erofs_extent_lookback(&m, m.delta[0]); if (err) goto unmap_out; break; default: erofs_err(inode->i_sb, "unknown type %u @ offset %llu of nid %llu", m.type, ofs, vi->nid); err = -EOPNOTSUPP; goto unmap_out; } if (m.partialref) map->m_flags |= EROFS_MAP_PARTIAL_REF; map->m_llen = end - map->m_la; if (flags & EROFS_GET_BLOCKS_FINDTAIL) { vi->z_tailextent_headlcn = m.lcn; /* for non-compact indexes, fragmentoff is 64 bits */ if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL) vi->z_fragmentoff |= (u64)m.pblk << 32; } if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { map->m_flags |= EROFS_MAP_META; map->m_pa = vi->z_idataoff; map->m_plen = vi->z_idata_size; } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { map->m_flags |= EROFS_MAP_FRAGMENT; } else { map->m_pa = erofs_pos(inode->i_sb, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); if (err) goto unmap_out; } if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) { if (map->m_llen > map->m_plen) { DBG_BUGON(1); err = -EFSCORRUPTED; goto unmap_out; } afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ? Z_EROFS_COMPRESSION_INTERLACED : Z_EROFS_COMPRESSION_SHIFTED; } else { afmt = m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2 ? vi->z_algorithmtype[1] : vi->z_algorithmtype[0]; if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) { erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", afmt, vi->nid); err = -EFSCORRUPTED; goto unmap_out; } } map->m_algorithmformat = afmt; if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA || map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE || map->m_algorithmformat == Z_EROFS_COMPRESSION_ZSTD) && map->m_llen >= i_blocksize(inode))) { err = z_erofs_get_extent_decompressedlen(&m); if (!err) map->m_flags |= EROFS_MAP_FULL_MAPPED; } unmap_out: erofs_unmap_metabuf(&m.map->buf); return err; } static int z_erofs_fill_inode_lazy(struct inode *inode) { struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = inode->i_sb; int err, headnr; erofs_off_t pos; struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct z_erofs_map_header *h; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { /* * paired with smp_mb() at the end of the function to ensure * fields will only be observed after the bit is set. */ smp_mb(); return 0; } if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE)) return -ERESTARTSYS; err = 0; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) goto out_unlock; pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); h = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP); if (IS_ERR(h)) { err = PTR_ERR(h); goto out_unlock; } /* * if the highest bit of the 8-byte map header is set, the whole file * is stored in the packed inode. The rest bits keeps z_fragmentoff. */ if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) { vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); vi->z_tailextent_headlcn = 0; goto done; } vi->z_advise = le16_to_cpu(h->h_advise); vi->z_algorithmtype[0] = h->h_algorithmtype & 15; vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; headnr = 0; if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX || vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) { erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel", headnr + 1, vi->z_algorithmtype[headnr], vi->nid); err = -EOPNOTSUPP; goto out_put_metabuf; } vi->z_logical_clusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 7); if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", vi->nid); err = -EFSCORRUPTED; goto out_put_metabuf; } if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", vi->nid); err = -EFSCORRUPTED; goto out_put_metabuf; } if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) { struct erofs_map_blocks map = { .buf = __EROFS_BUF_INITIALIZER }; vi->z_idata_size = le16_to_cpu(h->h_idata_size); err = z_erofs_do_map_blocks(inode, &map, EROFS_GET_BLOCKS_FINDTAIL); erofs_put_metabuf(&map.buf); if (!map.m_plen || erofs_blkoff(sb, map.m_pa) + map.m_plen > sb->s_blocksize) { erofs_err(sb, "invalid tail-packing pclustersize %llu", map.m_plen); err = -EFSCORRUPTED; } if (err < 0) goto out_put_metabuf; } if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) { struct erofs_map_blocks map = { .buf = __EROFS_BUF_INITIALIZER }; vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); err = z_erofs_do_map_blocks(inode, &map, EROFS_GET_BLOCKS_FINDTAIL); erofs_put_metabuf(&map.buf); if (err < 0) goto out_put_metabuf; } done: /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); out_put_metabuf: erofs_put_metabuf(&buf); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; } int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags) { struct erofs_inode *const vi = EROFS_I(inode); int err = 0; trace_erofs_map_blocks_enter(inode, map, flags); if (map->m_la >= inode->i_size) { /* post-EOF unmapped extent */ map->m_llen = map->m_la + 1 - inode->i_size; map->m_la = inode->i_size; map->m_flags = 0; } else { err = z_erofs_fill_inode_lazy(inode); if (!err) { if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) && !vi->z_tailextent_headlcn) { map->m_la = 0; map->m_llen = inode->i_size; map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; } else { err = z_erofs_do_map_blocks(inode, map, flags); } } if (!err && (map->m_flags & EROFS_MAP_ENCODED) && unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE || map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE)) err = -EOPNOTSUPP; if (err) map->m_llen = 0; } trace_erofs_map_blocks_exit(inode, map, flags, err); return err; } static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { int ret; struct erofs_map_blocks map = { .m_la = offset }; ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP); erofs_put_metabuf(&map.buf); if (ret < 0) return ret; iomap->bdev = inode->i_sb->s_bdev; iomap->offset = map.m_la; iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; /* * No strict rule on how to describe extents for post EOF, yet * we need to do like below. Otherwise, iomap itself will get * into an endless loop on post EOF. * * Calculate the effective offset by subtracting extent start * (map.m_la) from the requested offset, and add it to length. * (NB: offset >= map.m_la always) */ if (iomap->offset >= inode->i_size) iomap->length = length + offset - map.m_la; } iomap->flags = 0; return 0; } const struct iomap_ops z_erofs_iomap_report_ops = { .iomap_begin = z_erofs_iomap_begin_report, }; |
120 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | // SPDX-License-Identifier: GPL-2.0 #include <linux/bitops.h> #include <linux/bug.h> #include <linux/export.h> #include <linux/limits.h> #include <linux/math.h> #include <linux/minmax.h> #include <linux/types.h> #include <linux/reciprocal_div.h> /* * For a description of the algorithm please have a look at * include/linux/reciprocal_div.h */ struct reciprocal_value reciprocal_value(u32 d) { struct reciprocal_value R; u64 m; int l; l = fls(d - 1); m = ((1ULL << 32) * ((1ULL << l) - d)); do_div(m, d); ++m; R.m = (u32)m; R.sh1 = min(l, 1); R.sh2 = max(l - 1, 0); return R; } EXPORT_SYMBOL(reciprocal_value); struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec) { struct reciprocal_value_adv R; u32 l, post_shift; u64 mhigh, mlow; /* ceil(log2(d)) */ l = fls(d - 1); /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to * be handled before calling "reciprocal_value_adv", please see the * comment at include/linux/reciprocal_div.h. */ WARN(l == 32, "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor", d, __func__); post_shift = l; mlow = 1ULL << (32 + l); do_div(mlow, d); mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec)); do_div(mhigh, d); for (; post_shift > 0; post_shift--) { u64 lo = mlow >> 1, hi = mhigh >> 1; if (lo >= hi) break; mlow = lo; mhigh = hi; } R.m = (u32)mhigh; R.sh = post_shift; R.exp = l; R.is_wide_m = mhigh > U32_MAX; return R; } EXPORT_SYMBOL(reciprocal_value_adv); |
8 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BTRFS_MISC_H #define BTRFS_MISC_H #include <linux/types.h> #include <linux/bitmap.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/math64.h> #include <linux/rbtree.h> /* * Enumerate bits using enum autoincrement. Define the @name as the n-th bit. */ #define ENUM_BIT(name) \ __ ## name ## _BIT, \ name = (1U << __ ## name ## _BIT), \ __ ## name ## _SEQ = __ ## name ## _BIT static inline void cond_wake_up(struct wait_queue_head *wq) { /* * This implies a full smp_mb barrier, see comments for * waitqueue_active why. */ if (wq_has_sleeper(wq)) wake_up(wq); } static inline void cond_wake_up_nomb(struct wait_queue_head *wq) { /* * Special case for conditional wakeup where the barrier required for * waitqueue_active is implied by some of the preceding code. Eg. one * of such atomic operations (atomic_dec_and_return, ...), or a * unlock/lock sequence, etc. */ if (waitqueue_active(wq)) wake_up(wq); } static inline u64 mult_perc(u64 num, u32 percent) { return div_u64(num * percent, 100); } /* Copy of is_power_of_two that is 64bit safe */ static inline bool is_power_of_two_u64(u64 n) { return n != 0 && (n & (n - 1)) == 0; } static inline bool has_single_bit_set(u64 n) { return is_power_of_two_u64(n); } /* * Simple bytenr based rb_tree relate structures * * Any structure wants to use bytenr as single search index should have their * structure start with these members. */ struct rb_simple_node { struct rb_node rb_node; u64 bytenr; }; static inline struct rb_node *rb_simple_search(const struct rb_root *root, u64 bytenr) { struct rb_node *node = root->rb_node; struct rb_simple_node *entry; while (node) { entry = rb_entry(node, struct rb_simple_node, rb_node); if (bytenr < entry->bytenr) node = node->rb_left; else if (bytenr > entry->bytenr) node = node->rb_right; else return node; } return NULL; } /* * Search @root from an entry that starts or comes after @bytenr. * * @root: the root to search. * @bytenr: bytenr to search from. * * Return the rb_node that start at or after @bytenr. If there is no entry at * or after @bytner return NULL. */ static inline struct rb_node *rb_simple_search_first(const struct rb_root *root, u64 bytenr) { struct rb_node *node = root->rb_node, *ret = NULL; struct rb_simple_node *entry, *ret_entry = NULL; while (node) { entry = rb_entry(node, struct rb_simple_node, rb_node); if (bytenr < entry->bytenr) { if (!ret || entry->bytenr < ret_entry->bytenr) { ret = node; ret_entry = entry; } node = node->rb_left; } else if (bytenr > entry->bytenr) { node = node->rb_right; } else { return node; } } return ret; } static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr, struct rb_node *node) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct rb_simple_node *entry; while (*p) { parent = *p; entry = rb_entry(parent, struct rb_simple_node, rb_node); if (bytenr < entry->bytenr) p = &(*p)->rb_left; else if (bytenr > entry->bytenr) p = &(*p)->rb_right; else return parent; } rb_link_node(node, parent, p); rb_insert_color(node, root); return NULL; } static inline bool bitmap_test_range_all_set(const unsigned long *addr, unsigned long start, unsigned long nbits) { unsigned long found_zero; found_zero = find_next_zero_bit(addr, start + nbits, start); return (found_zero == start + nbits); } static inline bool bitmap_test_range_all_zero(const unsigned long *addr, unsigned long start, unsigned long nbits) { unsigned long found_set; found_set = find_next_bit(addr, start + nbits, start); return (found_set == start + nbits); } #endif |
4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef DRIVERS_PCI_H #define DRIVERS_PCI_H #include <linux/pci.h> /* Number of possible devfns: 0.0 to 1f.7 inclusive */ #define MAX_NR_DEVFNS 256 #define PCI_FIND_CAP_TTL 48 #define PCI_VSEC_ID_INTEL_TBT 0x1234 /* Thunderbolt */ #define PCIE_LINK_RETRAIN_TIMEOUT_MS 1000 /* * Power stable to PERST# inactive. * * See the "Power Sequencing and Reset Signal Timings" table of the PCI Express * Card Electromechanical Specification, Revision 5.1, Section 2.9.2, Symbol * "T_PVPERL". */ #define PCIE_T_PVPERL_MS 100 /* * REFCLK stable before PERST# inactive. * * See the "Power Sequencing and Reset Signal Timings" table of the PCI Express * Card Electromechanical Specification, Revision 5.1, Section 2.9.2, Symbol * "T_PERST-CLK". */ #define PCIE_T_PERST_CLK_US 100 /* * End of conventional reset (PERST# de-asserted) to first configuration * request (device able to respond with a "Request Retry Status" completion), * from PCIe r6.0, sec 6.6.1. */ #define PCIE_T_RRS_READY_MS 100 /* * PCIe r6.0, sec 5.3.3.2.1 <PME Synchronization> * Recommends 1ms to 10ms timeout to check L2 ready. */ #define PCIE_PME_TO_L2_TIMEOUT_US 10000 /* * PCIe r6.0, sec 6.6.1 <Conventional Reset> * * - "With a Downstream Port that does not support Link speeds greater * than 5.0 GT/s, software must wait a minimum of 100 ms following exit * from a Conventional Reset before sending a Configuration Request to * the device immediately below that Port." * * - "With a Downstream Port that supports Link speeds greater than * 5.0 GT/s, software must wait a minimum of 100 ms after Link training * completes before sending a Configuration Request to the device * immediately below that Port." */ #define PCIE_RESET_CONFIG_DEVICE_WAIT_MS 100 /* Message Routing (r[2:0]); PCIe r6.0, sec 2.2.8 */ #define PCIE_MSG_TYPE_R_RC 0 #define PCIE_MSG_TYPE_R_ADDR 1 #define PCIE_MSG_TYPE_R_ID 2 #define PCIE_MSG_TYPE_R_BC 3 #define PCIE_MSG_TYPE_R_LOCAL 4 #define PCIE_MSG_TYPE_R_GATHER 5 /* Power Management Messages; PCIe r6.0, sec 2.2.8.2 */ #define PCIE_MSG_CODE_PME_TURN_OFF 0x19 /* INTx Mechanism Messages; PCIe r6.0, sec 2.2.8.1 */ #define PCIE_MSG_CODE_ASSERT_INTA 0x20 #define PCIE_MSG_CODE_ASSERT_INTB 0x21 #define PCIE_MSG_CODE_ASSERT_INTC 0x22 #define PCIE_MSG_CODE_ASSERT_INTD 0x23 #define PCIE_MSG_CODE_DEASSERT_INTA 0x24 #define PCIE_MSG_CODE_DEASSERT_INTB 0x25 #define PCIE_MSG_CODE_DEASSERT_INTC 0x26 #define PCIE_MSG_CODE_DEASSERT_INTD 0x27 extern const unsigned char pcie_link_speed[]; extern bool pci_early_dump; bool pcie_cap_has_lnkctl(const struct pci_dev *dev); bool pcie_cap_has_lnkctl2(const struct pci_dev *dev); bool pcie_cap_has_rtctl(const struct pci_dev *dev); /* Functions internal to the PCI core code */ #ifdef CONFIG_DMI extern const struct attribute_group pci_dev_smbios_attr_group; #endif enum pci_mmap_api { PCI_MMAP_SYSFS, /* mmap on /sys/bus/pci/devices/<BDF>/resource<N> */ PCI_MMAP_PROCFS /* mmap on /proc/bus/pci/<BDF> */ }; int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai, enum pci_mmap_api mmap_api); bool pci_reset_supported(struct pci_dev *dev); void pci_init_reset_methods(struct pci_dev *dev); int pci_bridge_secondary_bus_reset(struct pci_dev *dev); int pci_bus_error_reset(struct pci_dev *dev); struct pci_cap_saved_data { u16 cap_nr; bool cap_extended; unsigned int size; u32 data[]; }; struct pci_cap_saved_state { struct hlist_node next; struct pci_cap_saved_data cap; }; void pci_allocate_cap_save_buffers(struct pci_dev *dev); void pci_free_cap_save_buffers(struct pci_dev *dev); int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size); int pci_add_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, unsigned int size); struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap); struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev, u16 cap); #define PCI_PM_D2_DELAY 200 /* usec; see PCIe r4.0, sec 5.9.1 */ #define PCI_PM_D3HOT_WAIT 10 /* msec */ #define PCI_PM_D3COLD_WAIT 100 /* msec */ void pci_update_current_state(struct pci_dev *dev, pci_power_t state); void pci_refresh_power_state(struct pci_dev *dev); int pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); int pci_finish_runtime_suspend(struct pci_dev *dev); void pcie_clear_device_status(struct pci_dev *dev); void pcie_clear_root_pme_status(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); void pci_pme_restore(struct pci_dev *dev); bool pci_dev_need_resume(struct pci_dev *dev); void pci_dev_adjust_pme(struct pci_dev *dev); void pci_dev_complete_resume(struct pci_dev *pci_dev); void pci_config_pm_runtime_get(struct pci_dev *dev); void pci_config_pm_runtime_put(struct pci_dev *dev); void pci_pm_init(struct pci_dev *dev); void pci_ea_init(struct pci_dev *dev); void pci_msi_init(struct pci_dev *dev); void pci_msix_init(struct pci_dev *dev); bool pci_bridge_d3_possible(struct pci_dev *dev); void pci_bridge_d3_update(struct pci_dev *dev); int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type); static inline bool pci_bus_rrs_vendor_id(u32 l) { return (l & 0xffff) == PCI_VENDOR_ID_PCI_SIG; } static inline void pci_wakeup_event(struct pci_dev *dev) { /* Wait 100 ms before the system can be put into a sleep state. */ pm_wakeup_event(&dev->dev, 100); } static inline bool pci_has_subordinate(struct pci_dev *pci_dev) { return !!(pci_dev->subordinate); } static inline bool pci_power_manageable(struct pci_dev *pci_dev) { /* * Currently we allow normal PCI devices and PCI bridges transition * into D3 if their bridge_d3 is set. */ return !pci_has_subordinate(pci_dev) || pci_dev->bridge_d3; } static inline bool pcie_downstream_port(const struct pci_dev *dev) { int type = pci_pcie_type(dev); return type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_PCIE_BRIDGE; } void pci_vpd_init(struct pci_dev *dev); extern const struct attribute_group pci_dev_vpd_attr_group; /* PCI Virtual Channel */ int pci_save_vc_state(struct pci_dev *dev); void pci_restore_vc_state(struct pci_dev *dev); void pci_allocate_vc_save_buffers(struct pci_dev *dev); /* PCI /proc functions */ #ifdef CONFIG_PROC_FS int pci_proc_attach_device(struct pci_dev *dev); int pci_proc_detach_device(struct pci_dev *dev); int pci_proc_detach_bus(struct pci_bus *bus); #else static inline int pci_proc_attach_device(struct pci_dev *dev) { return 0; } static inline int pci_proc_detach_device(struct pci_dev *dev) { return 0; } static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; } #endif /* Functions for PCI Hotplug drivers to use */ int pci_hp_add_bridge(struct pci_dev *dev); #if defined(CONFIG_SYSFS) && defined(HAVE_PCI_LEGACY) void pci_create_legacy_files(struct pci_bus *bus); void pci_remove_legacy_files(struct pci_bus *bus); #else static inline void pci_create_legacy_files(struct pci_bus *bus) { } static inline void pci_remove_legacy_files(struct pci_bus *bus) { } #endif /* Lock for read/write access to pci device and bus lists */ extern struct rw_semaphore pci_bus_sem; extern struct mutex pci_slot_mutex; extern raw_spinlock_t pci_lock; extern unsigned int pci_pm_d3hot_delay; #ifdef CONFIG_PCI_MSI void pci_no_msi(void); #else static inline void pci_no_msi(void) { } #endif void pci_realloc_get_opt(char *); static inline int pci_no_d1d2(struct pci_dev *dev) { unsigned int parent_dstates = 0; if (dev->bus->self) parent_dstates = dev->bus->self->no_d1d2; return (dev->no_d1d2 || parent_dstates); } #ifdef CONFIG_SYSFS int pci_create_sysfs_dev_files(struct pci_dev *pdev); void pci_remove_sysfs_dev_files(struct pci_dev *pdev); extern const struct attribute_group *pci_dev_groups[]; extern const struct attribute_group *pci_dev_attr_groups[]; extern const struct attribute_group *pcibus_groups[]; extern const struct attribute_group *pci_bus_groups[]; #else static inline int pci_create_sysfs_dev_files(struct pci_dev *pdev) { return 0; } static inline void pci_remove_sysfs_dev_files(struct pci_dev *pdev) { } #define pci_dev_groups NULL #define pci_dev_attr_groups NULL #define pcibus_groups NULL #define pci_bus_groups NULL #endif extern unsigned long pci_hotplug_io_size; extern unsigned long pci_hotplug_mmio_size; extern unsigned long pci_hotplug_mmio_pref_size; extern unsigned long pci_hotplug_bus_size; /** * pci_match_one_device - Tell if a PCI device structure has a matching * PCI device id structure * @id: single PCI device id structure to match * @dev: the PCI device structure to match against * * Returns the matching pci_device_id structure or %NULL if there is no match. */ static inline const struct pci_device_id * pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) { if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && (id->device == PCI_ANY_ID || id->device == dev->device) && (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) && (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) && !((id->class ^ dev->class) & id->class_mask)) return id; return NULL; } /* PCI slot sysfs helper code */ #define to_pci_slot(s) container_of(s, struct pci_slot, kobj) extern struct kset *pci_slots_kset; struct pci_slot_attribute { struct attribute attr; ssize_t (*show)(struct pci_slot *, char *); ssize_t (*store)(struct pci_slot *, const char *, size_t); }; #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr) enum pci_bar_type { pci_bar_unknown, /* Standard PCI BAR probe */ pci_bar_io, /* An I/O port BAR */ pci_bar_mem32, /* A 32-bit memory BAR */ pci_bar_mem64, /* A 64-bit memory BAR */ }; struct device *pci_get_host_bridge_device(struct pci_dev *dev); void pci_put_host_bridge_device(struct device *dev); int pci_configure_extended_tags(struct pci_dev *dev, void *ign); bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int rrs_timeout); bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int rrs_timeout); int pci_idt_bus_quirk(struct pci_bus *bus, int devfn, u32 *pl, int rrs_timeout); int pci_setup_device(struct pci_dev *dev); int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); void pci_configure_ari(struct pci_dev *dev); void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head); void __pci_bus_assign_resources(const struct pci_bus *bus, struct list_head *realloc_head, struct list_head *fail_head); bool pci_bus_clip_resource(struct pci_dev *dev, int idx); const char *pci_resource_name(struct pci_dev *dev, unsigned int i); void pci_reassigndev_resource_alignment(struct pci_dev *dev); void pci_disable_bridge_window(struct pci_dev *dev); struct pci_bus *pci_bus_get(struct pci_bus *bus); void pci_bus_put(struct pci_bus *bus); /* PCIe link information from Link Capabilities 2 */ #define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \ ((lnkcap2) & PCI_EXP_LNKCAP2_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ PCI_SPEED_UNKNOWN) /* PCIe speed to Mb/s reduced by encoding overhead */ #define PCIE_SPEED2MBS_ENC(speed) \ ((speed) == PCIE_SPEED_64_0GT ? 64000*1/1 : \ (speed) == PCIE_SPEED_32_0GT ? 32000*128/130 : \ (speed) == PCIE_SPEED_16_0GT ? 16000*128/130 : \ (speed) == PCIE_SPEED_8_0GT ? 8000*128/130 : \ (speed) == PCIE_SPEED_5_0GT ? 5000*8/10 : \ (speed) == PCIE_SPEED_2_5GT ? 2500*8/10 : \ 0) static inline int pcie_dev_speed_mbps(enum pci_bus_speed speed) { switch (speed) { case PCIE_SPEED_2_5GT: return 2500; case PCIE_SPEED_5_0GT: return 5000; case PCIE_SPEED_8_0GT: return 8000; case PCIE_SPEED_16_0GT: return 16000; case PCIE_SPEED_32_0GT: return 32000; case PCIE_SPEED_64_0GT: return 64000; default: break; } return -EINVAL; } const char *pci_speed_string(enum pci_bus_speed speed); enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); void __pcie_print_link_status(struct pci_dev *dev, bool verbose); void pcie_report_downtraining(struct pci_dev *dev); void pcie_update_link_speed(struct pci_bus *bus, u16 link_status); /* Single Root I/O Virtualization */ struct pci_sriov { int pos; /* Capability position */ int nres; /* Number of resources */ u32 cap; /* SR-IOV Capabilities */ u16 ctrl; /* SR-IOV Control */ u16 total_VFs; /* Total VFs associated with the PF */ u16 initial_VFs; /* Initial VFs associated with the PF */ u16 num_VFs; /* Number of VFs available */ u16 offset; /* First VF Routing ID offset */ u16 stride; /* Following VF stride */ u16 vf_device; /* VF device ID */ u32 pgsz; /* Page size for BAR alignment */ u8 link; /* Function Dependency Link */ u8 max_VF_buses; /* Max buses consumed by VFs */ u16 driver_max_VFs; /* Max num VFs driver supports */ struct pci_dev *dev; /* Lowest numbered PF */ struct pci_dev *self; /* This PF */ u32 class; /* VF device */ u8 hdr_type; /* VF header type */ u16 subsystem_vendor; /* VF subsystem vendor */ u16 subsystem_device; /* VF subsystem device */ resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */ bool drivers_autoprobe; /* Auto probing of VFs by driver */ }; #ifdef CONFIG_PCI_DOE void pci_doe_init(struct pci_dev *pdev); void pci_doe_destroy(struct pci_dev *pdev); void pci_doe_disconnected(struct pci_dev *pdev); #else static inline void pci_doe_init(struct pci_dev *pdev) { } static inline void pci_doe_destroy(struct pci_dev *pdev) { } static inline void pci_doe_disconnected(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCI_NPEM void pci_npem_create(struct pci_dev *dev); void pci_npem_remove(struct pci_dev *dev); #else static inline void pci_npem_create(struct pci_dev *dev) { } static inline void pci_npem_remove(struct pci_dev *dev) { } #endif /** * pci_dev_set_io_state - Set the new error state if possible. * * @dev: PCI device to set new error_state * @new: the state we want dev to be in * * If the device is experiencing perm_failure, it has to remain in that state. * Any other transition is allowed. * * Returns true if state has been changed to the requested state. */ static inline bool pci_dev_set_io_state(struct pci_dev *dev, pci_channel_state_t new) { pci_channel_state_t old; switch (new) { case pci_channel_io_perm_failure: xchg(&dev->error_state, pci_channel_io_perm_failure); return true; case pci_channel_io_frozen: old = cmpxchg(&dev->error_state, pci_channel_io_normal, pci_channel_io_frozen); return old != pci_channel_io_perm_failure; case pci_channel_io_normal: old = cmpxchg(&dev->error_state, pci_channel_io_frozen, pci_channel_io_normal); return old != pci_channel_io_perm_failure; default: return false; } } static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) { pci_dev_set_io_state(dev, pci_channel_io_perm_failure); pci_doe_disconnected(dev); return 0; } /* pci_dev priv_flags */ #define PCI_DEV_ADDED 0 #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) { assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added); } static inline bool pci_dev_is_added(const struct pci_dev *dev) { return test_bit(PCI_DEV_ADDED, &dev->priv_flags); } #ifdef CONFIG_PCIEAER #include <linux/aer.h> #define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; unsigned int id:16; unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ unsigned int __pad1:5; unsigned int multi_error_valid:1; unsigned int first_error:5; unsigned int __pad2:2; unsigned int tlp_header_valid:1; unsigned int status; /* COR/UNCOR Error Status */ unsigned int mask; /* COR/UNCOR Error Mask */ struct pcie_tlp_log tlp; /* TLP Header */ }; int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); #endif /* CONFIG_PCIEAER */ #ifdef CONFIG_PCIEPORTBUS /* Cached RCEC Endpoint Association */ struct rcec_ea { u8 nextbusn; u8 lastbusn; u32 bitmap; }; #endif #ifdef CONFIG_PCIE_DPC void pci_save_dpc_state(struct pci_dev *dev); void pci_restore_dpc_state(struct pci_dev *dev); void pci_dpc_init(struct pci_dev *pdev); void dpc_process_error(struct pci_dev *pdev); pci_ers_result_t dpc_reset_link(struct pci_dev *pdev); bool pci_dpc_recovered(struct pci_dev *pdev); #else static inline void pci_save_dpc_state(struct pci_dev *dev) { } static inline void pci_restore_dpc_state(struct pci_dev *dev) { } static inline void pci_dpc_init(struct pci_dev *pdev) { } static inline bool pci_dpc_recovered(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_PCIEPORTBUS void pci_rcec_init(struct pci_dev *dev); void pci_rcec_exit(struct pci_dev *dev); void pcie_link_rcec(struct pci_dev *rcec); void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *), void *userdata); #else static inline void pci_rcec_init(struct pci_dev *dev) { } static inline void pci_rcec_exit(struct pci_dev *dev) { } static inline void pcie_link_rcec(struct pci_dev *rcec) { } static inline void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *), void *userdata) { } #endif #ifdef CONFIG_PCI_ATS /* Address Translation Service */ void pci_ats_init(struct pci_dev *dev); void pci_restore_ats_state(struct pci_dev *dev); #else static inline void pci_ats_init(struct pci_dev *d) { } static inline void pci_restore_ats_state(struct pci_dev *dev) { } #endif /* CONFIG_PCI_ATS */ #ifdef CONFIG_PCI_PRI void pci_pri_init(struct pci_dev *dev); void pci_restore_pri_state(struct pci_dev *pdev); #else static inline void pci_pri_init(struct pci_dev *dev) { } static inline void pci_restore_pri_state(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCI_PASID void pci_pasid_init(struct pci_dev *dev); void pci_restore_pasid_state(struct pci_dev *pdev); #else static inline void pci_pasid_init(struct pci_dev *dev) { } static inline void pci_restore_pasid_state(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCI_IOV int pci_iov_init(struct pci_dev *dev); void pci_iov_release(struct pci_dev *dev); void pci_iov_remove(struct pci_dev *dev); void pci_iov_update_resource(struct pci_dev *dev, int resno); resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); int pci_iov_bus_range(struct pci_bus *bus); extern const struct attribute_group sriov_pf_dev_attr_group; extern const struct attribute_group sriov_vf_dev_attr_group; #else static inline int pci_iov_init(struct pci_dev *dev) { return -ENODEV; } static inline void pci_iov_release(struct pci_dev *dev) { } static inline void pci_iov_remove(struct pci_dev *dev) { } static inline void pci_restore_iov_state(struct pci_dev *dev) { } static inline int pci_iov_bus_range(struct pci_bus *bus) { return 0; } #endif /* CONFIG_PCI_IOV */ #ifdef CONFIG_PCIE_PTM void pci_ptm_init(struct pci_dev *dev); void pci_save_ptm_state(struct pci_dev *dev); void pci_restore_ptm_state(struct pci_dev *dev); void pci_suspend_ptm(struct pci_dev *dev); void pci_resume_ptm(struct pci_dev *dev); #else static inline void pci_ptm_init(struct pci_dev *dev) { } static inline void pci_save_ptm_state(struct pci_dev *dev) { } static inline void pci_restore_ptm_state(struct pci_dev *dev) { } static inline void pci_suspend_ptm(struct pci_dev *dev) { } static inline void pci_resume_ptm(struct pci_dev *dev) { } #endif unsigned long pci_cardbus_resource_alignment(struct resource *); static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, struct resource *res) { #ifdef CONFIG_PCI_IOV int resno = res - dev->resource; if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) return pci_sriov_resource_alignment(dev, resno); #endif if (dev->class >> 8 == PCI_CLASS_BRIDGE_CARDBUS) return pci_cardbus_resource_alignment(res); return resource_alignment(res); } void pci_acs_init(struct pci_dev *dev); #ifdef CONFIG_PCI_QUIRKS int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags); int pci_dev_specific_enable_acs(struct pci_dev *dev); int pci_dev_specific_disable_acs_redir(struct pci_dev *dev); int pcie_failed_link_retrain(struct pci_dev *dev); #else static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags) { return -ENOTTY; } static inline int pci_dev_specific_enable_acs(struct pci_dev *dev) { return -ENOTTY; } static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) { return -ENOTTY; } static inline int pcie_failed_link_retrain(struct pci_dev *dev) { return -ENOTTY; } #endif /* PCI error reporting and recovery */ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_channel_state_t state, pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); int pcie_retrain_link(struct pci_dev *pdev, bool use_lt); /* ASPM-related functionality we need even without CONFIG_PCIEASPM */ void pci_save_ltr_state(struct pci_dev *dev); void pci_restore_ltr_state(struct pci_dev *dev); void pci_configure_aspm_l1ss(struct pci_dev *dev); void pci_save_aspm_l1ss_state(struct pci_dev *dev); void pci_restore_aspm_l1ss_state(struct pci_dev *dev); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); void pci_configure_ltr(struct pci_dev *pdev); void pci_bridge_reconfigure_ltr(struct pci_dev *pdev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } static inline void pci_configure_ltr(struct pci_dev *pdev) { } static inline void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCIE_ECRC void pcie_set_ecrc_checking(struct pci_dev *dev); void pcie_ecrc_get_policy(char *str); #else static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { } static inline void pcie_ecrc_get_policy(char *str) { } #endif struct pci_dev_reset_methods { u16 vendor; u16 device; int (*reset)(struct pci_dev *dev, bool probe); }; struct pci_reset_fn_method { int (*reset_fn)(struct pci_dev *pdev, bool probe); char *name; }; #ifdef CONFIG_PCI_QUIRKS int pci_dev_specific_reset(struct pci_dev *dev, bool probe); #else static inline int pci_dev_specific_reset(struct pci_dev *dev, bool probe) { return -ENOTTY; } #endif #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64) int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment, struct resource *res); #else static inline int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment, struct resource *res) { return -ENODEV; } #endif int pci_rebar_get_current_size(struct pci_dev *pdev, int bar); int pci_rebar_set_size(struct pci_dev *pdev, int bar, int size); static inline u64 pci_rebar_size_to_bytes(int size) { return 1ULL << (size + 20); } struct device_node; #ifdef CONFIG_OF int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); int of_pci_get_max_link_speed(struct device_node *node); u32 of_pci_get_slot_power_limit(struct device_node *node, u8 *slot_power_limit_value, u8 *slot_power_limit_scale); bool of_pci_preserve_config(struct device_node *node); int pci_set_of_node(struct pci_dev *dev); void pci_release_of_node(struct pci_dev *dev); void pci_set_bus_of_node(struct pci_bus *bus); void pci_release_bus_of_node(struct pci_bus *bus); int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge); #else static inline int of_pci_parse_bus_range(struct device_node *node, struct resource *res) { return -EINVAL; } static inline int of_get_pci_domain_nr(struct device_node *node) { return -1; } static inline int of_pci_get_max_link_speed(struct device_node *node) { return -EINVAL; } static inline u32 of_pci_get_slot_power_limit(struct device_node *node, u8 *slot_power_limit_value, u8 *slot_power_limit_scale) { if (slot_power_limit_value) *slot_power_limit_value = 0; if (slot_power_limit_scale) *slot_power_limit_scale = 0; return 0; } static inline bool of_pci_preserve_config(struct device_node *node) { return false; } static inline int pci_set_of_node(struct pci_dev *dev) { return 0; } static inline void pci_release_of_node(struct pci_dev *dev) { } static inline void pci_set_bus_of_node(struct pci_bus *bus) { } static inline void pci_release_bus_of_node(struct pci_bus *bus) { } static inline int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge) { return 0; } #endif /* CONFIG_OF */ struct of_changeset; #ifdef CONFIG_PCI_DYNAMIC_OF_NODES void of_pci_make_dev_node(struct pci_dev *pdev); void of_pci_remove_node(struct pci_dev *pdev); int of_pci_add_properties(struct pci_dev *pdev, struct of_changeset *ocs, struct device_node *np); #else static inline void of_pci_make_dev_node(struct pci_dev *pdev) { } static inline void of_pci_remove_node(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCIEAER void pci_no_aer(void); void pci_aer_init(struct pci_dev *dev); void pci_aer_exit(struct pci_dev *dev); extern const struct attribute_group aer_stats_attr_group; void pci_aer_clear_fatal_status(struct pci_dev *dev); int pci_aer_clear_status(struct pci_dev *dev); int pci_aer_raw_clear_status(struct pci_dev *dev); void pci_save_aer_state(struct pci_dev *dev); void pci_restore_aer_state(struct pci_dev *dev); #else static inline void pci_no_aer(void) { } static inline void pci_aer_init(struct pci_dev *d) { } static inline void pci_aer_exit(struct pci_dev *d) { } static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline void pci_save_aer_state(struct pci_dev *dev) { } static inline void pci_restore_aer_state(struct pci_dev *dev) { } #endif #ifdef CONFIG_ACPI bool pci_acpi_preserve_config(struct pci_host_bridge *bridge); int pci_acpi_program_hp_params(struct pci_dev *dev); extern const struct attribute_group pci_dev_acpi_attr_group; void pci_set_acpi_fwnode(struct pci_dev *dev); int pci_dev_acpi_reset(struct pci_dev *dev, bool probe); bool acpi_pci_power_manageable(struct pci_dev *dev); bool acpi_pci_bridge_d3(struct pci_dev *dev); int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t acpi_pci_get_power_state(struct pci_dev *dev); void acpi_pci_refresh_power_state(struct pci_dev *dev); int acpi_pci_wakeup(struct pci_dev *dev, bool enable); bool acpi_pci_need_resume(struct pci_dev *dev); pci_power_t acpi_pci_choose_state(struct pci_dev *pdev); #else static inline bool pci_acpi_preserve_config(struct pci_host_bridge *bridge) { return false; } static inline int pci_dev_acpi_reset(struct pci_dev *dev, bool probe) { return -ENOTTY; } static inline void pci_set_acpi_fwnode(struct pci_dev *dev) { } static inline int pci_acpi_program_hp_params(struct pci_dev *dev) { return -ENODEV; } static inline bool acpi_pci_power_manageable(struct pci_dev *dev) { return false; } static inline bool acpi_pci_bridge_d3(struct pci_dev *dev) { return false; } static inline int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) { return -ENODEV; } static inline pci_power_t acpi_pci_get_power_state(struct pci_dev *dev) { return PCI_UNKNOWN; } static inline void acpi_pci_refresh_power_state(struct pci_dev *dev) { } static inline int acpi_pci_wakeup(struct pci_dev *dev, bool enable) { return -ENODEV; } static inline bool acpi_pci_need_resume(struct pci_dev *dev) { return false; } static inline pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) { return PCI_POWER_ERROR; } #endif #ifdef CONFIG_PCIEASPM extern const struct attribute_group aspm_ctrl_attr_group; #endif extern const struct attribute_group pci_dev_reset_method_attr_group; #ifdef CONFIG_X86_INTEL_MID bool pci_use_mid_pm(void); int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); pci_power_t mid_pci_get_power_state(struct pci_dev *pdev); #else static inline bool pci_use_mid_pm(void) { return false; } static inline int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) { return -ENODEV; } static inline pci_power_t mid_pci_get_power_state(struct pci_dev *pdev) { return PCI_UNKNOWN; } #endif int pcim_intx(struct pci_dev *dev, int enable); int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *name); void pcim_release_region(struct pci_dev *pdev, int bar); /* * Config Address for PCI Configuration Mechanism #1 * * See PCI Local Bus Specification, Revision 3.0, * Section 3.2.2.3.2, Figure 3-2, p. 50. */ #define PCI_CONF1_BUS_SHIFT 16 /* Bus number */ #define PCI_CONF1_DEV_SHIFT 11 /* Device number */ #define PCI_CONF1_FUNC_SHIFT 8 /* Function number */ #define PCI_CONF1_BUS_MASK 0xff #define PCI_CONF1_DEV_MASK 0x1f #define PCI_CONF1_FUNC_MASK 0x7 #define PCI_CONF1_REG_MASK 0xfc /* Limit aligned offset to a maximum of 256B */ #define PCI_CONF1_ENABLE BIT(31) #define PCI_CONF1_BUS(x) (((x) & PCI_CONF1_BUS_MASK) << PCI_CONF1_BUS_SHIFT) #define PCI_CONF1_DEV(x) (((x) & PCI_CONF1_DEV_MASK) << PCI_CONF1_DEV_SHIFT) #define PCI_CONF1_FUNC(x) (((x) & PCI_CONF1_FUNC_MASK) << PCI_CONF1_FUNC_SHIFT) #define PCI_CONF1_REG(x) ((x) & PCI_CONF1_REG_MASK) #define PCI_CONF1_ADDRESS(bus, dev, func, reg) \ (PCI_CONF1_ENABLE | \ PCI_CONF1_BUS(bus) | \ PCI_CONF1_DEV(dev) | \ PCI_CONF1_FUNC(func) | \ PCI_CONF1_REG(reg)) /* * Extension of PCI Config Address for accessing extended PCIe registers * * No standardized specification, but used on lot of non-ECAM-compliant ARM SoCs * or on AMD Barcelona and new CPUs. Reserved bits [27:24] of PCI Config Address * are used for specifying additional 4 high bits of PCI Express register. */ #define PCI_CONF1_EXT_REG_SHIFT 16 #define PCI_CONF1_EXT_REG_MASK 0xf00 #define PCI_CONF1_EXT_REG(x) (((x) & PCI_CONF1_EXT_REG_MASK) << PCI_CONF1_EXT_REG_SHIFT) #define PCI_CONF1_EXT_ADDRESS(bus, dev, func, reg) \ (PCI_CONF1_ADDRESS(bus, dev, func, reg) | \ PCI_CONF1_EXT_REG(reg)) #endif /* DRIVERS_PCI_H */ |
2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014 SGI. * All rights reserved. */ #include "utf8n.h" int utf8version_is_supported(const struct unicode_map *um, unsigned int version) { int i = um->tables->utf8agetab_size - 1; while (i >= 0 && um->tables->utf8agetab[i] != 0) { if (version == um->tables->utf8agetab[i]) return 1; i--; } return 0; } /* * UTF-8 valid ranges. * * The UTF-8 encoding spreads the bits of a 32bit word over several * bytes. This table gives the ranges that can be held and how they'd * be represented. * * 0x00000000 0x0000007F: 0xxxxxxx * 0x00000000 0x000007FF: 110xxxxx 10xxxxxx * 0x00000000 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx * 0x00000000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * 0x00000000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 0x00000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * * There is an additional requirement on UTF-8, in that only the * shortest representation of a 32bit value is to be used. A decoder * must not decode sequences that do not satisfy this requirement. * Thus the allowed ranges have a lower bound. * * 0x00000000 0x0000007F: 0xxxxxxx * 0x00000080 0x000007FF: 110xxxxx 10xxxxxx * 0x00000800 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx * 0x00010000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * 0x00200000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 0x04000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * * Actual unicode characters are limited to the range 0x0 - 0x10FFFF, * 17 planes of 65536 values. This limits the sequences actually seen * even more, to just the following. * * 0 - 0x7F: 0 - 0x7F * 0x80 - 0x7FF: 0xC2 0x80 - 0xDF 0xBF * 0x800 - 0xFFFF: 0xE0 0xA0 0x80 - 0xEF 0xBF 0xBF * 0x10000 - 0x10FFFF: 0xF0 0x90 0x80 0x80 - 0xF4 0x8F 0xBF 0xBF * * Within those ranges the surrogates 0xD800 - 0xDFFF are not allowed. * * Note that the longest sequence seen with valid usage is 4 bytes, * the same a single UTF-32 character. This makes the UTF-8 * representation of Unicode strictly smaller than UTF-32. * * The shortest sequence requirement was introduced by: * Corrigendum #1: UTF-8 Shortest Form * It can be found here: * http://www.unicode.org/versions/corrigendum1.html * */ /* * Return the number of bytes used by the current UTF-8 sequence. * Assumes the input points to the first byte of a valid UTF-8 * sequence. */ static inline int utf8clen(const char *s) { unsigned char c = *s; return 1 + (c >= 0xC0) + (c >= 0xE0) + (c >= 0xF0); } /* * Decode a 3-byte UTF-8 sequence. */ static unsigned int utf8decode3(const char *str) { unsigned int uc; uc = *str++ & 0x0F; uc <<= 6; uc |= *str++ & 0x3F; uc <<= 6; uc |= *str++ & 0x3F; return uc; } /* * Encode a 3-byte UTF-8 sequence. */ static int utf8encode3(char *str, unsigned int val) { str[2] = (val & 0x3F) | 0x80; val >>= 6; str[1] = (val & 0x3F) | 0x80; val >>= 6; str[0] = val | 0xE0; return 3; } /* * utf8trie_t * * A compact binary tree, used to decode UTF-8 characters. * * Internal nodes are one byte for the node itself, and up to three * bytes for an offset into the tree. The first byte contains the * following information: * NEXTBYTE - flag - advance to next byte if set * BITNUM - 3 bit field - the bit number to tested * OFFLEN - 2 bit field - number of bytes in the offset * if offlen == 0 (non-branching node) * RIGHTPATH - 1 bit field - set if the following node is for the * right-hand path (tested bit is set) * TRIENODE - 1 bit field - set if the following node is an internal * node, otherwise it is a leaf node * if offlen != 0 (branching node) * LEFTNODE - 1 bit field - set if the left-hand node is internal * RIGHTNODE - 1 bit field - set if the right-hand node is internal * * Due to the way utf8 works, there cannot be branching nodes with * NEXTBYTE set, and moreover those nodes always have a righthand * descendant. */ typedef const unsigned char utf8trie_t; #define BITNUM 0x07 #define NEXTBYTE 0x08 #define OFFLEN 0x30 #define OFFLEN_SHIFT 4 #define RIGHTPATH 0x40 #define TRIENODE 0x80 #define RIGHTNODE 0x40 #define LEFTNODE 0x80 /* * utf8leaf_t * * The leaves of the trie are embedded in the trie, and so the same * underlying datatype: unsigned char. * * leaf[0]: The unicode version, stored as a generation number that is * an index into ->utf8agetab[]. With this we can filter code * points based on the unicode version in which they were * defined. The CCC of a non-defined code point is 0. * leaf[1]: Canonical Combining Class. During normalization, we need * to do a stable sort into ascending order of all characters * with a non-zero CCC that occur between two characters with * a CCC of 0, or at the begin or end of a string. * The unicode standard guarantees that all CCC values are * between 0 and 254 inclusive, which leaves 255 available as * a special value. * Code points with CCC 0 are known as stoppers. * leaf[2]: Decomposition. If leaf[1] == 255, then leaf[2] is the * start of a NUL-terminated string that is the decomposition * of the character. * The CCC of a decomposable character is the same as the CCC * of the first character of its decomposition. * Some characters decompose as the empty string: these are * characters with the Default_Ignorable_Code_Point property. * These do affect normalization, as they all have CCC 0. * * The decompositions in the trie have been fully expanded, with the * exception of Hangul syllables, which are decomposed algorithmically. * * Casefolding, if applicable, is also done using decompositions. * * The trie is constructed in such a way that leaves exist for all * UTF-8 sequences that match the criteria from the "UTF-8 valid * ranges" comment above, and only for those sequences. Therefore a * lookup in the trie can be used to validate the UTF-8 input. */ typedef const unsigned char utf8leaf_t; #define LEAF_GEN(LEAF) ((LEAF)[0]) #define LEAF_CCC(LEAF) ((LEAF)[1]) #define LEAF_STR(LEAF) ((const char *)((LEAF) + 2)) #define MINCCC (0) #define MAXCCC (254) #define STOPPER (0) #define DECOMPOSE (255) /* Marker for hangul syllable decomposition. */ #define HANGUL ((char)(255)) /* Size of the synthesized leaf used for Hangul syllable decomposition. */ #define UTF8HANGULLEAF (12) /* * Hangul decomposition (algorithm from Section 3.12 of Unicode 6.3.0) * * AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;; * D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;; * * SBase = 0xAC00 * LBase = 0x1100 * VBase = 0x1161 * TBase = 0x11A7 * LCount = 19 * VCount = 21 * TCount = 28 * NCount = 588 (VCount * TCount) * SCount = 11172 (LCount * NCount) * * Decomposition: * SIndex = s - SBase * * LV (Canonical/Full) * LIndex = SIndex / NCount * VIndex = (Sindex % NCount) / TCount * LPart = LBase + LIndex * VPart = VBase + VIndex * * LVT (Canonical) * LVIndex = (SIndex / TCount) * TCount * TIndex = (Sindex % TCount) * LVPart = SBase + LVIndex * TPart = TBase + TIndex * * LVT (Full) * LIndex = SIndex / NCount * VIndex = (Sindex % NCount) / TCount * TIndex = (Sindex % TCount) * LPart = LBase + LIndex * VPart = VBase + VIndex * if (TIndex == 0) { * d = <LPart, VPart> * } else { * TPart = TBase + TIndex * d = <LPart, TPart, VPart> * } */ /* Constants */ #define SB (0xAC00) #define LB (0x1100) #define VB (0x1161) #define TB (0x11A7) #define LC (19) #define VC (21) #define TC (28) #define NC (VC * TC) #define SC (LC * NC) /* Algorithmic decomposition of hangul syllable. */ static utf8leaf_t * utf8hangul(const char *str, unsigned char *hangul) { unsigned int si; unsigned int li; unsigned int vi; unsigned int ti; unsigned char *h; /* Calculate the SI, LI, VI, and TI values. */ si = utf8decode3(str) - SB; li = si / NC; vi = (si % NC) / TC; ti = si % TC; /* Fill in base of leaf. */ h = hangul; LEAF_GEN(h) = 2; LEAF_CCC(h) = DECOMPOSE; h += 2; /* Add LPart, a 3-byte UTF-8 sequence. */ h += utf8encode3((char *)h, li + LB); /* Add VPart, a 3-byte UTF-8 sequence. */ h += utf8encode3((char *)h, vi + VB); /* Add TPart if required, also a 3-byte UTF-8 sequence. */ if (ti) h += utf8encode3((char *)h, ti + TB); /* Terminate string. */ h[0] = '\0'; return hangul; } /* * Use trie to scan s, touching at most len bytes. * Returns the leaf if one exists, NULL otherwise. * * A non-NULL return guarantees that the UTF-8 sequence starting at s * is well-formed and corresponds to a known unicode code point. The * shorthand for this will be "is valid UTF-8 unicode". */ static utf8leaf_t *utf8nlookup(const struct unicode_map *um, enum utf8_normalization n, unsigned char *hangul, const char *s, size_t len) { utf8trie_t *trie = um->tables->utf8data + um->ntab[n]->offset; int offlen; int offset; int mask; int node; if (len == 0) return NULL; node = 1; while (node) { offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT; if (*trie & NEXTBYTE) { if (--len == 0) return NULL; s++; } mask = 1 << (*trie & BITNUM); if (*s & mask) { /* Right leg */ if (offlen) { /* Right node at offset of trie */ node = (*trie & RIGHTNODE); offset = trie[offlen]; while (--offlen) { offset <<= 8; offset |= trie[offlen]; } trie += offset; } else if (*trie & RIGHTPATH) { /* Right node after this node */ node = (*trie & TRIENODE); trie++; } else { /* No right node. */ return NULL; } } else { /* Left leg */ if (offlen) { /* Left node after this node. */ node = (*trie & LEFTNODE); trie += offlen + 1; } else if (*trie & RIGHTPATH) { /* No left node. */ return NULL; } else { /* Left node after this node */ node = (*trie & TRIENODE); trie++; } } } /* * Hangul decomposition is done algorithmically. These are the * codepoints >= 0xAC00 and <= 0xD7A3. Their UTF-8 encoding is * always 3 bytes long, so s has been advanced twice, and the * start of the sequence is at s-2. */ if (LEAF_CCC(trie) == DECOMPOSE && LEAF_STR(trie)[0] == HANGUL) trie = utf8hangul(s - 2, hangul); return trie; } /* * Use trie to scan s. * Returns the leaf if one exists, NULL otherwise. * * Forwards to utf8nlookup(). */ static utf8leaf_t *utf8lookup(const struct unicode_map *um, enum utf8_normalization n, unsigned char *hangul, const char *s) { return utf8nlookup(um, n, hangul, s, (size_t)-1); } /* * Length of the normalization of s, touch at most len bytes. * Return -1 if s is not valid UTF-8 unicode. */ ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n, const char *s, size_t len) { utf8leaf_t *leaf; size_t ret = 0; unsigned char hangul[UTF8HANGULLEAF]; while (len && *s) { leaf = utf8nlookup(um, n, hangul, s, len); if (!leaf) return -1; if (um->tables->utf8agetab[LEAF_GEN(leaf)] > um->ntab[n]->maxage) ret += utf8clen(s); else if (LEAF_CCC(leaf) == DECOMPOSE) ret += strlen(LEAF_STR(leaf)); else ret += utf8clen(s); len -= utf8clen(s); s += utf8clen(s); } return ret; } /* * Set up an utf8cursor for use by utf8byte(). * * u8c : pointer to cursor. * data : const struct utf8data to use for normalization. * s : string. * len : length of s. * * Returns -1 on error, 0 on success. */ int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um, enum utf8_normalization n, const char *s, size_t len) { if (!s) return -1; u8c->um = um; u8c->n = n; u8c->s = s; u8c->p = NULL; u8c->ss = NULL; u8c->sp = NULL; u8c->len = len; u8c->slen = 0; u8c->ccc = STOPPER; u8c->nccc = STOPPER; /* Check we didn't clobber the maximum length. */ if (u8c->len != len) return -1; /* The first byte of s may not be an utf8 continuation. */ if (len > 0 && (*s & 0xC0) == 0x80) return -1; return 0; } /* * Get one byte from the normalized form of the string described by u8c. * * Returns the byte cast to an unsigned char on succes, and -1 on failure. * * The cursor keeps track of the location in the string in u8c->s. * When a character is decomposed, the current location is stored in * u8c->p, and u8c->s is set to the start of the decomposition. Note * that bytes from a decomposition do not count against u8c->len. * * Characters are emitted if they match the current CCC in u8c->ccc. * Hitting end-of-string while u8c->ccc == STOPPER means we're done, * and the function returns 0 in that case. * * Sorting by CCC is done by repeatedly scanning the string. The * values of u8c->s and u8c->p are stored in u8c->ss and u8c->sp at * the start of the scan. The first pass finds the lowest CCC to be * emitted and stores it in u8c->nccc, the second pass emits the * characters with this CCC and finds the next lowest CCC. This limits * the number of passes to 1 + the number of different CCCs in the * sequence being scanned. * * Therefore: * u8c->p != NULL -> a decomposition is being scanned. * u8c->ss != NULL -> this is a repeating scan. * u8c->ccc == -1 -> this is the first scan of a repeating scan. */ int utf8byte(struct utf8cursor *u8c) { utf8leaf_t *leaf; int ccc; for (;;) { /* Check for the end of a decomposed character. */ if (u8c->p && *u8c->s == '\0') { u8c->s = u8c->p; u8c->p = NULL; } /* Check for end-of-string. */ if (!u8c->p && (u8c->len == 0 || *u8c->s == '\0')) { /* There is no next byte. */ if (u8c->ccc == STOPPER) return 0; /* End-of-string during a scan counts as a stopper. */ ccc = STOPPER; goto ccc_mismatch; } else if ((*u8c->s & 0xC0) == 0x80) { /* This is a continuation of the current character. */ if (!u8c->p) u8c->len--; return (unsigned char)*u8c->s++; } /* Look up the data for the current character. */ if (u8c->p) { leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s); } else { leaf = utf8nlookup(u8c->um, u8c->n, u8c->hangul, u8c->s, u8c->len); } /* No leaf found implies that the input is a binary blob. */ if (!leaf) return -1; ccc = LEAF_CCC(leaf); /* Characters that are too new have CCC 0. */ if (u8c->um->tables->utf8agetab[LEAF_GEN(leaf)] > u8c->um->ntab[u8c->n]->maxage) { ccc = STOPPER; } else if (ccc == DECOMPOSE) { u8c->len -= utf8clen(u8c->s); u8c->p = u8c->s + utf8clen(u8c->s); u8c->s = LEAF_STR(leaf); /* Empty decomposition implies CCC 0. */ if (*u8c->s == '\0') { if (u8c->ccc == STOPPER) continue; ccc = STOPPER; goto ccc_mismatch; } leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s); if (!leaf) return -1; ccc = LEAF_CCC(leaf); } /* * If this is not a stopper, then see if it updates * the next canonical class to be emitted. */ if (ccc != STOPPER && u8c->ccc < ccc && ccc < u8c->nccc) u8c->nccc = ccc; /* * Return the current byte if this is the current * combining class. */ if (ccc == u8c->ccc) { if (!u8c->p) u8c->len--; return (unsigned char)*u8c->s++; } /* Current combining class mismatch. */ ccc_mismatch: if (u8c->nccc == STOPPER) { /* * Scan forward for the first canonical class * to be emitted. Save the position from * which to restart. */ u8c->ccc = MINCCC - 1; u8c->nccc = ccc; u8c->sp = u8c->p; u8c->ss = u8c->s; u8c->slen = u8c->len; if (!u8c->p) u8c->len -= utf8clen(u8c->s); u8c->s += utf8clen(u8c->s); } else if (ccc != STOPPER) { /* Not a stopper, and not the ccc we're emitting. */ if (!u8c->p) u8c->len -= utf8clen(u8c->s); u8c->s += utf8clen(u8c->s); } else if (u8c->nccc != MAXCCC + 1) { /* At a stopper, restart for next ccc. */ u8c->ccc = u8c->nccc; u8c->nccc = MAXCCC + 1; u8c->s = u8c->ss; u8c->p = u8c->sp; u8c->len = u8c->slen; } else { /* All done, proceed from here. */ u8c->ccc = STOPPER; u8c->nccc = STOPPER; u8c->sp = NULL; u8c->ss = NULL; u8c->slen = 0; } } } #ifdef CONFIG_UNICODE_NORMALIZATION_SELFTEST_MODULE EXPORT_SYMBOL_GPL(utf8version_is_supported); EXPORT_SYMBOL_GPL(utf8nlen); EXPORT_SYMBOL_GPL(utf8ncursor); EXPORT_SYMBOL_GPL(utf8byte); #endif |
4869 1236 4323 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM vmalloc #if !defined(_TRACE_VMALLOC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_VMALLOC_H #include <linux/tracepoint.h> /** * alloc_vmap_area - called when a new vmap allocation occurs * @addr: an allocated address * @size: a requested size * @align: a requested alignment * @vstart: a requested start range * @vend: a requested end range * @failed: an allocation failed or not * * This event is used for a debug purpose, it can give an extra * information for a developer about how often it occurs and which * parameters are passed for further validation. */ TRACE_EVENT(alloc_vmap_area, TP_PROTO(unsigned long addr, unsigned long size, unsigned long align, unsigned long vstart, unsigned long vend, int failed), TP_ARGS(addr, size, align, vstart, vend, failed), TP_STRUCT__entry( __field(unsigned long, addr) __field(unsigned long, size) __field(unsigned long, align) __field(unsigned long, vstart) __field(unsigned long, vend) __field(int, failed) ), TP_fast_assign( __entry->addr = addr; __entry->size = size; __entry->align = align; __entry->vstart = vstart; __entry->vend = vend; __entry->failed = failed; ), TP_printk("va_start: %lu size=%lu align=%lu vstart=0x%lx vend=0x%lx failed=%d", __entry->addr, __entry->size, __entry->align, __entry->vstart, __entry->vend, __entry->failed) ); /** * purge_vmap_area_lazy - called when vmap areas were lazily freed * @start: purging start address * @end: purging end address * @npurged: numbed of purged vmap areas * * This event is used for a debug purpose. It gives some * indication about start:end range and how many objects * are released. */ TRACE_EVENT(purge_vmap_area_lazy, TP_PROTO(unsigned long start, unsigned long end, unsigned int npurged), TP_ARGS(start, end, npurged), TP_STRUCT__entry( __field(unsigned long, start) __field(unsigned long, end) __field(unsigned int, npurged) ), TP_fast_assign( __entry->start = start; __entry->end = end; __entry->npurged = npurged; ), TP_printk("start=0x%lx end=0x%lx num_purged=%u", __entry->start, __entry->end, __entry->npurged) ); /** * free_vmap_area_noflush - called when a vmap area is freed * @va_start: a start address of VA * @nr_lazy: number of current lazy pages * @nr_lazy_max: number of maximum lazy pages * * This event is used for a debug purpose. It gives some * indication about a VA that is released, number of current * outstanding areas and a maximum allowed threshold before * dropping all of them. */ TRACE_EVENT(free_vmap_area_noflush, TP_PROTO(unsigned long va_start, unsigned long nr_lazy, unsigned long nr_lazy_max), TP_ARGS(va_start, nr_lazy, nr_lazy_max), TP_STRUCT__entry( __field(unsigned long, va_start) __field(unsigned long, nr_lazy) __field(unsigned long, nr_lazy_max) ), TP_fast_assign( __entry->va_start = va_start; __entry->nr_lazy = nr_lazy; __entry->nr_lazy_max = nr_lazy_max; ), TP_printk("va_start=0x%lx nr_lazy=%lu nr_lazy_max=%lu", __entry->va_start, __entry->nr_lazy, __entry->nr_lazy_max) ); #endif /* _TRACE_VMALLOC_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
25 88 79 2 2 1 1 7 3 127 101 39 2 9 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_UDP_TUNNEL_H #define __NET_UDP_TUNNEL_H #include <net/ip_tunnels.h> #include <net/udp.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ipv6_stubs.h> #endif struct udp_port_cfg { u8 family; /* Used only for kernel-created sockets */ union { struct in_addr local_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr local_ip6; #endif }; union { struct in_addr peer_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr peer_ip6; #endif }; __be16 local_udp_port; __be16 peer_udp_port; int bind_ifindex; unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1; }; int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp); #if IS_ENABLED(CONFIG_IPV6) int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp); #else static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { return 0; } #endif static inline int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { if (cfg->family == AF_INET) return udp_sock_create4(net, cfg, sockp); if (cfg->family == AF_INET6) return udp_sock_create6(net, cfg, sockp); return -EPFNOSUPPORT; } typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, struct sk_buff *skb); typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, struct sk_buff *skb); typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb, int nhoff); struct udp_tunnel_sock_cfg { void *sk_user_data; /* user data used by encap_rcv call back */ /* Used for setting up udp_sock fields, see udp.h for details */ __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; udp_tunnel_encap_err_lookup_t encap_err_lookup; udp_tunnel_encap_err_rcv_t encap_err_rcv; udp_tunnel_encap_destroy_t encap_destroy; udp_tunnel_gro_receive_t gro_receive; udp_tunnel_gro_complete_t gro_complete; }; /* Setup the given (UDP) sock to receive UDP encapsulated packets */ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); /* -- List of parsable UDP tunnel types -- * * Adding to this list will result in serious debate. The main issue is * that this list is essentially a list of workarounds for either poorly * designed tunnels, or poorly designed device offloads. * * The parsing supported via these types should really be used for Rx * traffic only as the network stack will have already inserted offsets for * the location of the headers in the skb. In addition any ports that are * pushed should be kept within the namespace without leaking to other * devices such as VFs or other ports on the same device. * * It is strongly encouraged to use CHECKSUM_COMPLETE for Rx to avoid the * need to use this for Rx checksum offload. It should not be necessary to * call this function to perform Tx offloads on outgoing traffic. */ enum udp_parsable_tunnel_type { UDP_TUNNEL_TYPE_VXLAN = BIT(0), /* RFC 7348 */ UDP_TUNNEL_TYPE_GENEVE = BIT(1), /* draft-ietf-nvo3-geneve */ UDP_TUNNEL_TYPE_VXLAN_GPE = BIT(2), /* draft-ietf-nvo3-vxlan-gpe */ }; struct udp_tunnel_info { unsigned short type; sa_family_t sa_family; __be16 port; u8 hw_priv; }; /* Notify network devices of offloadable types */ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); static inline void udp_tunnel_get_rx_info(struct net_device *dev) { ASSERT_RTNL(); if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } static inline void udp_tunnel_drop_rx_info(struct net_device *dev) { ASSERT_RTNL(); if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); } /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck); int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck); void udp_tunnel_sock_release(struct socket *sock); struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, int oif, __be32 *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 tos, struct dst_cache *dst_cache); struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, struct socket *sock, int oif, struct in6_addr *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 dsfield, struct dst_cache *dst_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, const unsigned long *flags, __be64 tunnel_id, int md_size); #ifdef CONFIG_INET static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) { int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; return iptunnel_handle_offloads(skb, type); } #endif static inline void udp_tunnel_encap_enable(struct sock *sk) { if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) return; #if IS_ENABLED(CONFIG_IPV6) if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif udp_encap_enable(); } #define UDP_TUNNEL_NIC_MAX_TABLES 4 enum udp_tunnel_nic_info_flags { /* Device callbacks may sleep */ UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0), /* Device only supports offloads when it's open, all ports * will be removed before close and re-added after open. */ UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1), /* Device supports only IPv4 tunnels */ UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2), /* Device has hard-coded the IANA VXLAN port (4789) as VXLAN. * This port must not be counted towards n_entries of any table. * Driver will not receive any callback associated with port 4789. */ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), }; struct udp_tunnel_nic; #define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2) struct udp_tunnel_nic_shared { struct udp_tunnel_nic *udp_tunnel_nic_info; struct list_head devices; }; struct udp_tunnel_nic_shared_node { struct net_device *dev; struct list_head list; }; /** * struct udp_tunnel_nic_info - driver UDP tunnel offload information * @set_port: callback for adding a new port * @unset_port: callback for removing a port * @sync_table: callback for syncing the entire port table at once * @shared: reference to device global state (optional) * @flags: device flags from enum udp_tunnel_nic_info_flags * @tables: UDP port tables this device has * @tables.n_entries: number of entries in this table * @tables.tunnel_types: types of tunnels this table accepts * * Drivers are expected to provide either @set_port and @unset_port callbacks * or the @sync_table callback. Callbacks are invoked with rtnl lock held. * * Devices which (misguidedly) share the UDP tunnel port table across multiple * netdevs should allocate an instance of struct udp_tunnel_nic_shared and * point @shared at it. * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices * sharing a table. * * Known limitations: * - UDP tunnel port notifications are fundamentally best-effort - * it is likely the driver will both see skbs which use a UDP tunnel port, * while not being a tunneled skb, and tunnel skbs from other ports - * drivers should only use these ports for non-critical RX-side offloads, * e.g. the checksum offload; * - none of the devices care about the socket family at present, so we don't * track it. Please extend this code if you care. */ struct udp_tunnel_nic_info { /* one-by-one */ int (*set_port)(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti); int (*unset_port)(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti); /* all at once */ int (*sync_table)(struct net_device *dev, unsigned int table); struct udp_tunnel_nic_shared *shared; unsigned int flags; struct udp_tunnel_nic_table_info { unsigned int n_entries; unsigned int tunnel_types; } tables[UDP_TUNNEL_NIC_MAX_TABLES]; }; /* UDP tunnel module dependencies * * Tunnel drivers are expected to have a hard dependency on the udp_tunnel * module. NIC drivers are not, they just attach their * struct udp_tunnel_nic_info to the netdev and wait for callbacks to come. * Loading a tunnel driver will cause the udp_tunnel module to be loaded * and only then will all the required state structures be allocated. * Since we want a weak dependency from the drivers and the core to udp_tunnel * we call things through the following stubs. */ struct udp_tunnel_nic_ops { void (*get_port)(struct net_device *dev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti); void (*set_port_priv)(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv); void (*add_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*del_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*reset_ntf)(struct net_device *dev); size_t (*dump_size)(struct net_device *dev, unsigned int table); int (*dump_write)(struct net_device *dev, unsigned int table, struct sk_buff *skb); }; #ifdef CONFIG_INET extern const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; #else #define udp_tunnel_nic_ops ((struct udp_tunnel_nic_ops *)NULL) #endif static inline void udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { /* This helper is used from .sync_table, we indicate empty entries * by zero'ed @ti. Drivers which need to know the details of a port * when it gets deleted should use the .set_port / .unset_port * callbacks. * Zero out here, otherwise !CONFIG_INET causes uninitilized warnings. */ memset(ti, 0, sizeof(*ti)); if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->get_port(dev, table, idx, ti); } static inline void udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); } static inline void udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) { if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->add_port(dev, ti); } static inline void udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) { if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->del_port(dev, ti); } /** * udp_tunnel_nic_reset_ntf() - device-originating reset notification * @dev: network interface device structure * * Called by the driver to inform the core that the entire UDP tunnel port * state has been lost, usually due to device reset. Core will assume device * forgot all the ports and issue .set_port and .sync_table callbacks as * necessary. * * This function must be called with rtnl lock held, and will issue all * the callbacks before returning. */ static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->reset_ntf(dev); } static inline size_t udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) { if (!udp_tunnel_nic_ops) return 0; return udp_tunnel_nic_ops->dump_size(dev, table); } static inline int udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, struct sk_buff *skb) { if (!udp_tunnel_nic_ops) return 0; return udp_tunnel_nic_ops->dump_write(dev, table, skb); } #endif |
74 67 71 71 71 71 61 64 7 4 2 1 4 4 42 42 42 2 39 38 39 63 2 62 63 63 7 63 63 2 62 65 65 1 1 1 62 2 2 2 1 2 10 2 2 1 3 4 2 2 67 2 1 65 3 1 64 2 2 2 63 63 63 64 65 64 54 13 13 1 1 72 6 2 2 69 65 40 1 1 38 40 42 42 42 34 8 2 39 38 3 8 2 42 34 8 8 4 1 4 4 1 14 14 8 7 4 2 2 1 8 8 3 1 1 1 1 1 1 1 1 13 13 1 1 9 65 65 63 63 61 6 44 44 30 30 44 44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 | // SPDX-License-Identifier: GPL-2.0-only /* binder_alloc.c * * Android IPC Subsystem * * Copyright (C) 2007-2017 Google, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/list.h> #include <linux/sched/mm.h> #include <linux/module.h> #include <linux/rtmutex.h> #include <linux/rbtree.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/list_lru.h> #include <linux/ratelimit.h> #include <asm/cacheflush.h> #include <linux/uaccess.h> #include <linux/highmem.h> #include <linux/sizes.h> #include "binder_alloc.h" #include "binder_trace.h" struct list_lru binder_freelist; static DEFINE_MUTEX(binder_alloc_mmap_lock); enum { BINDER_DEBUG_USER_ERROR = 1U << 0, BINDER_DEBUG_OPEN_CLOSE = 1U << 1, BINDER_DEBUG_BUFFER_ALLOC = 1U << 2, BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 3, }; static uint32_t binder_alloc_debug_mask = BINDER_DEBUG_USER_ERROR; module_param_named(debug_mask, binder_alloc_debug_mask, uint, 0644); #define binder_alloc_debug(mask, x...) \ do { \ if (binder_alloc_debug_mask & mask) \ pr_info_ratelimited(x); \ } while (0) static struct binder_buffer *binder_buffer_next(struct binder_buffer *buffer) { return list_entry(buffer->entry.next, struct binder_buffer, entry); } static struct binder_buffer *binder_buffer_prev(struct binder_buffer *buffer) { return list_entry(buffer->entry.prev, struct binder_buffer, entry); } static size_t binder_alloc_buffer_size(struct binder_alloc *alloc, struct binder_buffer *buffer) { if (list_is_last(&buffer->entry, &alloc->buffers)) return alloc->buffer + alloc->buffer_size - buffer->user_data; return binder_buffer_next(buffer)->user_data - buffer->user_data; } static void binder_insert_free_buffer(struct binder_alloc *alloc, struct binder_buffer *new_buffer) { struct rb_node **p = &alloc->free_buffers.rb_node; struct rb_node *parent = NULL; struct binder_buffer *buffer; size_t buffer_size; size_t new_buffer_size; BUG_ON(!new_buffer->free); new_buffer_size = binder_alloc_buffer_size(alloc, new_buffer); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: add free buffer, size %zd, at %pK\n", alloc->pid, new_buffer_size, new_buffer); while (*p) { parent = *p; buffer = rb_entry(parent, struct binder_buffer, rb_node); BUG_ON(!buffer->free); buffer_size = binder_alloc_buffer_size(alloc, buffer); if (new_buffer_size < buffer_size) p = &parent->rb_left; else p = &parent->rb_right; } rb_link_node(&new_buffer->rb_node, parent, p); rb_insert_color(&new_buffer->rb_node, &alloc->free_buffers); } static void binder_insert_allocated_buffer_locked( struct binder_alloc *alloc, struct binder_buffer *new_buffer) { struct rb_node **p = &alloc->allocated_buffers.rb_node; struct rb_node *parent = NULL; struct binder_buffer *buffer; BUG_ON(new_buffer->free); while (*p) { parent = *p; buffer = rb_entry(parent, struct binder_buffer, rb_node); BUG_ON(buffer->free); if (new_buffer->user_data < buffer->user_data) p = &parent->rb_left; else if (new_buffer->user_data > buffer->user_data) p = &parent->rb_right; else BUG(); } rb_link_node(&new_buffer->rb_node, parent, p); rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers); } static struct binder_buffer *binder_alloc_prepare_to_free_locked( struct binder_alloc *alloc, unsigned long user_ptr) { struct rb_node *n = alloc->allocated_buffers.rb_node; struct binder_buffer *buffer; while (n) { buffer = rb_entry(n, struct binder_buffer, rb_node); BUG_ON(buffer->free); if (user_ptr < buffer->user_data) { n = n->rb_left; } else if (user_ptr > buffer->user_data) { n = n->rb_right; } else { /* * Guard against user threads attempting to * free the buffer when in use by kernel or * after it's already been freed. */ if (!buffer->allow_user_free) return ERR_PTR(-EPERM); buffer->allow_user_free = 0; return buffer; } } return NULL; } /** * binder_alloc_prepare_to_free() - get buffer given user ptr * @alloc: binder_alloc for this proc * @user_ptr: User pointer to buffer data * * Validate userspace pointer to buffer data and return buffer corresponding to * that user pointer. Search the rb tree for buffer that matches user data * pointer. * * Return: Pointer to buffer or NULL */ struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc, unsigned long user_ptr) { struct binder_buffer *buffer; spin_lock(&alloc->lock); buffer = binder_alloc_prepare_to_free_locked(alloc, user_ptr); spin_unlock(&alloc->lock); return buffer; } static inline void binder_set_installed_page(struct binder_lru_page *lru_page, struct page *page) { /* Pairs with acquire in binder_get_installed_page() */ smp_store_release(&lru_page->page_ptr, page); } static inline struct page * binder_get_installed_page(struct binder_lru_page *lru_page) { /* Pairs with release in binder_set_installed_page() */ return smp_load_acquire(&lru_page->page_ptr); } static void binder_lru_freelist_add(struct binder_alloc *alloc, unsigned long start, unsigned long end) { struct binder_lru_page *page; unsigned long page_addr; trace_binder_update_page_range(alloc, false, start, end); for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { size_t index; int ret; index = (page_addr - alloc->buffer) / PAGE_SIZE; page = &alloc->pages[index]; if (!binder_get_installed_page(page)) continue; trace_binder_free_lru_start(alloc, index); ret = list_lru_add_obj(&binder_freelist, &page->lru); WARN_ON(!ret); trace_binder_free_lru_end(alloc, index); } } static int binder_install_single_page(struct binder_alloc *alloc, struct binder_lru_page *lru_page, unsigned long addr) { struct page *page; int ret = 0; if (!mmget_not_zero(alloc->mm)) return -ESRCH; /* * Protected with mmap_sem in write mode as multiple tasks * might race to install the same page. */ mmap_write_lock(alloc->mm); if (binder_get_installed_page(lru_page)) goto out; if (!alloc->vma) { pr_err("%d: %s failed, no vma\n", alloc->pid, __func__); ret = -ESRCH; goto out; } page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); if (!page) { pr_err("%d: failed to allocate page\n", alloc->pid); ret = -ENOMEM; goto out; } ret = vm_insert_page(alloc->vma, addr, page); if (ret) { pr_err("%d: %s failed to insert page at offset %lx with %d\n", alloc->pid, __func__, addr - alloc->buffer, ret); __free_page(page); ret = -ENOMEM; goto out; } /* Mark page installation complete and safe to use */ binder_set_installed_page(lru_page, page); out: mmap_write_unlock(alloc->mm); mmput_async(alloc->mm); return ret; } static int binder_install_buffer_pages(struct binder_alloc *alloc, struct binder_buffer *buffer, size_t size) { struct binder_lru_page *page; unsigned long start, final; unsigned long page_addr; start = buffer->user_data & PAGE_MASK; final = PAGE_ALIGN(buffer->user_data + size); for (page_addr = start; page_addr < final; page_addr += PAGE_SIZE) { unsigned long index; int ret; index = (page_addr - alloc->buffer) / PAGE_SIZE; page = &alloc->pages[index]; if (binder_get_installed_page(page)) continue; trace_binder_alloc_page_start(alloc, index); ret = binder_install_single_page(alloc, page, page_addr); if (ret) return ret; trace_binder_alloc_page_end(alloc, index); } return 0; } /* The range of pages should exclude those shared with other buffers */ static void binder_lru_freelist_del(struct binder_alloc *alloc, unsigned long start, unsigned long end) { struct binder_lru_page *page; unsigned long page_addr; trace_binder_update_page_range(alloc, true, start, end); for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { unsigned long index; bool on_lru; index = (page_addr - alloc->buffer) / PAGE_SIZE; page = &alloc->pages[index]; if (page->page_ptr) { trace_binder_alloc_lru_start(alloc, index); on_lru = list_lru_del_obj(&binder_freelist, &page->lru); WARN_ON(!on_lru); trace_binder_alloc_lru_end(alloc, index); continue; } if (index + 1 > alloc->pages_high) alloc->pages_high = index + 1; } } static inline void binder_alloc_set_vma(struct binder_alloc *alloc, struct vm_area_struct *vma) { /* pairs with smp_load_acquire in binder_alloc_get_vma() */ smp_store_release(&alloc->vma, vma); } static inline struct vm_area_struct *binder_alloc_get_vma( struct binder_alloc *alloc) { /* pairs with smp_store_release in binder_alloc_set_vma() */ return smp_load_acquire(&alloc->vma); } static void debug_no_space_locked(struct binder_alloc *alloc) { size_t largest_alloc_size = 0; struct binder_buffer *buffer; size_t allocated_buffers = 0; size_t largest_free_size = 0; size_t total_alloc_size = 0; size_t total_free_size = 0; size_t free_buffers = 0; size_t buffer_size; struct rb_node *n; for (n = rb_first(&alloc->allocated_buffers); n; n = rb_next(n)) { buffer = rb_entry(n, struct binder_buffer, rb_node); buffer_size = binder_alloc_buffer_size(alloc, buffer); allocated_buffers++; total_alloc_size += buffer_size; if (buffer_size > largest_alloc_size) largest_alloc_size = buffer_size; } for (n = rb_first(&alloc->free_buffers); n; n = rb_next(n)) { buffer = rb_entry(n, struct binder_buffer, rb_node); buffer_size = binder_alloc_buffer_size(alloc, buffer); free_buffers++; total_free_size += buffer_size; if (buffer_size > largest_free_size) largest_free_size = buffer_size; } binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "allocated: %zd (num: %zd largest: %zd), free: %zd (num: %zd largest: %zd)\n", total_alloc_size, allocated_buffers, largest_alloc_size, total_free_size, free_buffers, largest_free_size); } static bool debug_low_async_space_locked(struct binder_alloc *alloc) { /* * Find the amount and size of buffers allocated by the current caller; * The idea is that once we cross the threshold, whoever is responsible * for the low async space is likely to try to send another async txn, * and at some point we'll catch them in the act. This is more efficient * than keeping a map per pid. */ struct binder_buffer *buffer; size_t total_alloc_size = 0; int pid = current->tgid; size_t num_buffers = 0; struct rb_node *n; /* * Only start detecting spammers once we have less than 20% of async * space left (which is less than 10% of total buffer size). */ if (alloc->free_async_space >= alloc->buffer_size / 10) { alloc->oneway_spam_detected = false; return false; } for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) { buffer = rb_entry(n, struct binder_buffer, rb_node); if (buffer->pid != pid) continue; if (!buffer->async_transaction) continue; total_alloc_size += binder_alloc_buffer_size(alloc, buffer); num_buffers++; } /* * Warn if this pid has more than 50 transactions, or more than 50% of * async space (which is 25% of total buffer size). Oneway spam is only * detected when the threshold is exceeded. */ if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) { binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n", alloc->pid, pid, num_buffers, total_alloc_size); if (!alloc->oneway_spam_detected) { alloc->oneway_spam_detected = true; return true; } } return false; } /* Callers preallocate @new_buffer, it is freed by this function if unused */ static struct binder_buffer *binder_alloc_new_buf_locked( struct binder_alloc *alloc, struct binder_buffer *new_buffer, size_t size, int is_async) { struct rb_node *n = alloc->free_buffers.rb_node; struct rb_node *best_fit = NULL; struct binder_buffer *buffer; unsigned long next_used_page; unsigned long curr_last_page; size_t buffer_size; if (is_async && alloc->free_async_space < size) { binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: binder_alloc_buf size %zd failed, no async space left\n", alloc->pid, size); buffer = ERR_PTR(-ENOSPC); goto out; } while (n) { buffer = rb_entry(n, struct binder_buffer, rb_node); BUG_ON(!buffer->free); buffer_size = binder_alloc_buffer_size(alloc, buffer); if (size < buffer_size) { best_fit = n; n = n->rb_left; } else if (size > buffer_size) { n = n->rb_right; } else { best_fit = n; break; } } if (unlikely(!best_fit)) { binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%d: binder_alloc_buf size %zd failed, no address space\n", alloc->pid, size); debug_no_space_locked(alloc); buffer = ERR_PTR(-ENOSPC); goto out; } if (buffer_size != size) { /* Found an oversized buffer and needs to be split */ buffer = rb_entry(best_fit, struct binder_buffer, rb_node); buffer_size = binder_alloc_buffer_size(alloc, buffer); WARN_ON(n || buffer_size == size); new_buffer->user_data = buffer->user_data + size; list_add(&new_buffer->entry, &buffer->entry); new_buffer->free = 1; binder_insert_free_buffer(alloc, new_buffer); new_buffer = NULL; } binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n", alloc->pid, size, buffer, buffer_size); /* * Now we remove the pages from the freelist. A clever calculation * with buffer_size determines if the last page is shared with an * adjacent in-use buffer. In such case, the page has been already * removed from the freelist so we trim our range short. */ next_used_page = (buffer->user_data + buffer_size) & PAGE_MASK; curr_last_page = PAGE_ALIGN(buffer->user_data + size); binder_lru_freelist_del(alloc, PAGE_ALIGN(buffer->user_data), min(next_used_page, curr_last_page)); rb_erase(&buffer->rb_node, &alloc->free_buffers); buffer->free = 0; buffer->allow_user_free = 0; binder_insert_allocated_buffer_locked(alloc, buffer); buffer->async_transaction = is_async; buffer->oneway_spam_suspect = false; if (is_async) { alloc->free_async_space -= size; binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_alloc_buf size %zd async free %zd\n", alloc->pid, size, alloc->free_async_space); if (debug_low_async_space_locked(alloc)) buffer->oneway_spam_suspect = true; } out: /* Discard possibly unused new_buffer */ kfree(new_buffer); return buffer; } /* Calculate the sanitized total size, returns 0 for invalid request */ static inline size_t sanitized_size(size_t data_size, size_t offsets_size, size_t extra_buffers_size) { size_t total, tmp; /* Align to pointer size and check for overflows */ tmp = ALIGN(data_size, sizeof(void *)) + ALIGN(offsets_size, sizeof(void *)); if (tmp < data_size || tmp < offsets_size) return 0; total = tmp + ALIGN(extra_buffers_size, sizeof(void *)); if (total < tmp || total < extra_buffers_size) return 0; /* Pad 0-sized buffers so they get a unique address */ total = max(total, sizeof(void *)); return total; } /** * binder_alloc_new_buf() - Allocate a new binder buffer * @alloc: binder_alloc for this proc * @data_size: size of user data buffer * @offsets_size: user specified buffer offset * @extra_buffers_size: size of extra space for meta-data (eg, security context) * @is_async: buffer for async transaction * * Allocate a new buffer given the requested sizes. Returns * the kernel version of the buffer pointer. The size allocated * is the sum of the three given sizes (each rounded up to * pointer-sized boundary) * * Return: The allocated buffer or %ERR_PTR(-errno) if error */ struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, size_t data_size, size_t offsets_size, size_t extra_buffers_size, int is_async) { struct binder_buffer *buffer, *next; size_t size; int ret; /* Check binder_alloc is fully initialized */ if (!binder_alloc_get_vma(alloc)) { binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%d: binder_alloc_buf, no vma\n", alloc->pid); return ERR_PTR(-ESRCH); } size = sanitized_size(data_size, offsets_size, extra_buffers_size); if (unlikely(!size)) { binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: got transaction with invalid size %zd-%zd-%zd\n", alloc->pid, data_size, offsets_size, extra_buffers_size); return ERR_PTR(-EINVAL); } /* Preallocate the next buffer */ next = kzalloc(sizeof(*next), GFP_KERNEL); if (!next) return ERR_PTR(-ENOMEM); spin_lock(&alloc->lock); buffer = binder_alloc_new_buf_locked(alloc, next, size, is_async); if (IS_ERR(buffer)) { spin_unlock(&alloc->lock); goto out; } buffer->data_size = data_size; buffer->offsets_size = offsets_size; buffer->extra_buffers_size = extra_buffers_size; buffer->pid = current->tgid; spin_unlock(&alloc->lock); ret = binder_install_buffer_pages(alloc, buffer, size); if (ret) { binder_alloc_free_buf(alloc, buffer); buffer = ERR_PTR(ret); } out: return buffer; } static unsigned long buffer_start_page(struct binder_buffer *buffer) { return buffer->user_data & PAGE_MASK; } static unsigned long prev_buffer_end_page(struct binder_buffer *buffer) { return (buffer->user_data - 1) & PAGE_MASK; } static void binder_delete_free_buffer(struct binder_alloc *alloc, struct binder_buffer *buffer) { struct binder_buffer *prev, *next; if (PAGE_ALIGNED(buffer->user_data)) goto skip_freelist; BUG_ON(alloc->buffers.next == &buffer->entry); prev = binder_buffer_prev(buffer); BUG_ON(!prev->free); if (prev_buffer_end_page(prev) == buffer_start_page(buffer)) goto skip_freelist; if (!list_is_last(&buffer->entry, &alloc->buffers)) { next = binder_buffer_next(buffer); if (buffer_start_page(next) == buffer_start_page(buffer)) goto skip_freelist; } binder_lru_freelist_add(alloc, buffer_start_page(buffer), buffer_start_page(buffer) + PAGE_SIZE); skip_freelist: list_del(&buffer->entry); kfree(buffer); } static void binder_free_buf_locked(struct binder_alloc *alloc, struct binder_buffer *buffer) { size_t size, buffer_size; buffer_size = binder_alloc_buffer_size(alloc, buffer); size = ALIGN(buffer->data_size, sizeof(void *)) + ALIGN(buffer->offsets_size, sizeof(void *)) + ALIGN(buffer->extra_buffers_size, sizeof(void *)); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: binder_free_buf %pK size %zd buffer_size %zd\n", alloc->pid, buffer, size, buffer_size); BUG_ON(buffer->free); BUG_ON(size > buffer_size); BUG_ON(buffer->transaction != NULL); BUG_ON(buffer->user_data < alloc->buffer); BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size); if (buffer->async_transaction) { alloc->free_async_space += buffer_size; binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_free_buf size %zd async free %zd\n", alloc->pid, size, alloc->free_async_space); } binder_lru_freelist_add(alloc, PAGE_ALIGN(buffer->user_data), (buffer->user_data + buffer_size) & PAGE_MASK); rb_erase(&buffer->rb_node, &alloc->allocated_buffers); buffer->free = 1; if (!list_is_last(&buffer->entry, &alloc->buffers)) { struct binder_buffer *next = binder_buffer_next(buffer); if (next->free) { rb_erase(&next->rb_node, &alloc->free_buffers); binder_delete_free_buffer(alloc, next); } } if (alloc->buffers.next != &buffer->entry) { struct binder_buffer *prev = binder_buffer_prev(buffer); if (prev->free) { binder_delete_free_buffer(alloc, buffer); rb_erase(&prev->rb_node, &alloc->free_buffers); buffer = prev; } } binder_insert_free_buffer(alloc, buffer); } /** * binder_alloc_get_page() - get kernel pointer for given buffer offset * @alloc: binder_alloc for this proc * @buffer: binder buffer to be accessed * @buffer_offset: offset into @buffer data * @pgoffp: address to copy final page offset to * * Lookup the struct page corresponding to the address * at @buffer_offset into @buffer->user_data. If @pgoffp is not * NULL, the byte-offset into the page is written there. * * The caller is responsible to ensure that the offset points * to a valid address within the @buffer and that @buffer is * not freeable by the user. Since it can't be freed, we are * guaranteed that the corresponding elements of @alloc->pages[] * cannot change. * * Return: struct page */ static struct page *binder_alloc_get_page(struct binder_alloc *alloc, struct binder_buffer *buffer, binder_size_t buffer_offset, pgoff_t *pgoffp) { binder_size_t buffer_space_offset = buffer_offset + (buffer->user_data - alloc->buffer); pgoff_t pgoff = buffer_space_offset & ~PAGE_MASK; size_t index = buffer_space_offset >> PAGE_SHIFT; struct binder_lru_page *lru_page; lru_page = &alloc->pages[index]; *pgoffp = pgoff; return lru_page->page_ptr; } /** * binder_alloc_clear_buf() - zero out buffer * @alloc: binder_alloc for this proc * @buffer: binder buffer to be cleared * * memset the given buffer to 0 */ static void binder_alloc_clear_buf(struct binder_alloc *alloc, struct binder_buffer *buffer) { size_t bytes = binder_alloc_buffer_size(alloc, buffer); binder_size_t buffer_offset = 0; while (bytes) { unsigned long size; struct page *page; pgoff_t pgoff; page = binder_alloc_get_page(alloc, buffer, buffer_offset, &pgoff); size = min_t(size_t, bytes, PAGE_SIZE - pgoff); memset_page(page, pgoff, 0, size); bytes -= size; buffer_offset += size; } } /** * binder_alloc_free_buf() - free a binder buffer * @alloc: binder_alloc for this proc * @buffer: kernel pointer to buffer * * Free the buffer allocated via binder_alloc_new_buf() */ void binder_alloc_free_buf(struct binder_alloc *alloc, struct binder_buffer *buffer) { /* * We could eliminate the call to binder_alloc_clear_buf() * from binder_alloc_deferred_release() by moving this to * binder_free_buf_locked(). However, that could * increase contention for the alloc->lock if clear_on_free * is used frequently for large buffers. This lock is not * needed for correctness here. */ if (buffer->clear_on_free) { binder_alloc_clear_buf(alloc, buffer); buffer->clear_on_free = false; } spin_lock(&alloc->lock); binder_free_buf_locked(alloc, buffer); spin_unlock(&alloc->lock); } /** * binder_alloc_mmap_handler() - map virtual address space for proc * @alloc: alloc structure for this proc * @vma: vma passed to mmap() * * Called by binder_mmap() to initialize the space specified in * vma for allocating binder buffers * * Return: * 0 = success * -EBUSY = address space already mapped * -ENOMEM = failed to map memory to given address space */ int binder_alloc_mmap_handler(struct binder_alloc *alloc, struct vm_area_struct *vma) { struct binder_buffer *buffer; const char *failure_string; int ret, i; if (unlikely(vma->vm_mm != alloc->mm)) { ret = -EINVAL; failure_string = "invalid vma->vm_mm"; goto err_invalid_mm; } mutex_lock(&binder_alloc_mmap_lock); if (alloc->buffer_size) { ret = -EBUSY; failure_string = "already mapped"; goto err_already_mapped; } alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start, SZ_4M); mutex_unlock(&binder_alloc_mmap_lock); alloc->buffer = vma->vm_start; alloc->pages = kvcalloc(alloc->buffer_size / PAGE_SIZE, sizeof(alloc->pages[0]), GFP_KERNEL); if (alloc->pages == NULL) { ret = -ENOMEM; failure_string = "alloc page array"; goto err_alloc_pages_failed; } for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { alloc->pages[i].alloc = alloc; INIT_LIST_HEAD(&alloc->pages[i].lru); } buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); if (!buffer) { ret = -ENOMEM; failure_string = "alloc buffer struct"; goto err_alloc_buf_struct_failed; } buffer->user_data = alloc->buffer; list_add(&buffer->entry, &alloc->buffers); buffer->free = 1; binder_insert_free_buffer(alloc, buffer); alloc->free_async_space = alloc->buffer_size / 2; /* Signal binder_alloc is fully initialized */ binder_alloc_set_vma(alloc, vma); return 0; err_alloc_buf_struct_failed: kvfree(alloc->pages); alloc->pages = NULL; err_alloc_pages_failed: alloc->buffer = 0; mutex_lock(&binder_alloc_mmap_lock); alloc->buffer_size = 0; err_already_mapped: mutex_unlock(&binder_alloc_mmap_lock); err_invalid_mm: binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%s: %d %lx-%lx %s failed %d\n", __func__, alloc->pid, vma->vm_start, vma->vm_end, failure_string, ret); return ret; } void binder_alloc_deferred_release(struct binder_alloc *alloc) { struct rb_node *n; int buffers, page_count; struct binder_buffer *buffer; buffers = 0; spin_lock(&alloc->lock); BUG_ON(alloc->vma); while ((n = rb_first(&alloc->allocated_buffers))) { buffer = rb_entry(n, struct binder_buffer, rb_node); /* Transaction should already have been freed */ BUG_ON(buffer->transaction); if (buffer->clear_on_free) { binder_alloc_clear_buf(alloc, buffer); buffer->clear_on_free = false; } binder_free_buf_locked(alloc, buffer); buffers++; } while (!list_empty(&alloc->buffers)) { buffer = list_first_entry(&alloc->buffers, struct binder_buffer, entry); WARN_ON(!buffer->free); list_del(&buffer->entry); WARN_ON_ONCE(!list_empty(&alloc->buffers)); kfree(buffer); } page_count = 0; if (alloc->pages) { int i; for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { bool on_lru; if (!alloc->pages[i].page_ptr) continue; on_lru = list_lru_del_obj(&binder_freelist, &alloc->pages[i].lru); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%s: %d: page %d %s\n", __func__, alloc->pid, i, on_lru ? "on lru" : "active"); __free_page(alloc->pages[i].page_ptr); page_count++; } } spin_unlock(&alloc->lock); kvfree(alloc->pages); if (alloc->mm) mmdrop(alloc->mm); binder_alloc_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d buffers %d, pages %d\n", __func__, alloc->pid, buffers, page_count); } /** * binder_alloc_print_allocated() - print buffer info * @m: seq_file for output via seq_printf() * @alloc: binder_alloc for this proc * * Prints information about every buffer associated with * the binder_alloc state to the given seq_file */ void binder_alloc_print_allocated(struct seq_file *m, struct binder_alloc *alloc) { struct binder_buffer *buffer; struct rb_node *n; spin_lock(&alloc->lock); for (n = rb_first(&alloc->allocated_buffers); n; n = rb_next(n)) { buffer = rb_entry(n, struct binder_buffer, rb_node); seq_printf(m, " buffer %d: %lx size %zd:%zd:%zd %s\n", buffer->debug_id, buffer->user_data - alloc->buffer, buffer->data_size, buffer->offsets_size, buffer->extra_buffers_size, buffer->transaction ? "active" : "delivered"); } spin_unlock(&alloc->lock); } /** * binder_alloc_print_pages() - print page usage * @m: seq_file for output via seq_printf() * @alloc: binder_alloc for this proc */ void binder_alloc_print_pages(struct seq_file *m, struct binder_alloc *alloc) { struct binder_lru_page *page; int i; int active = 0; int lru = 0; int free = 0; spin_lock(&alloc->lock); /* * Make sure the binder_alloc is fully initialized, otherwise we might * read inconsistent state. */ if (binder_alloc_get_vma(alloc) != NULL) { for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { page = &alloc->pages[i]; if (!page->page_ptr) free++; else if (list_empty(&page->lru)) active++; else lru++; } } spin_unlock(&alloc->lock); seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); } /** * binder_alloc_get_allocated_count() - return count of buffers * @alloc: binder_alloc for this proc * * Return: count of allocated buffers */ int binder_alloc_get_allocated_count(struct binder_alloc *alloc) { struct rb_node *n; int count = 0; spin_lock(&alloc->lock); for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) count++; spin_unlock(&alloc->lock); return count; } /** * binder_alloc_vma_close() - invalidate address space * @alloc: binder_alloc for this proc * * Called from binder_vma_close() when releasing address space. * Clears alloc->vma to prevent new incoming transactions from * allocating more buffers. */ void binder_alloc_vma_close(struct binder_alloc *alloc) { binder_alloc_set_vma(alloc, NULL); } /** * binder_alloc_free_page() - shrinker callback to free pages * @item: item to free * @lock: lock protecting the item * @cb_arg: callback argument * * Called from list_lru_walk() in binder_shrink_scan() to free * up pages when the system is under memory pressure. */ enum lru_status binder_alloc_free_page(struct list_head *item, struct list_lru_one *lru, spinlock_t *lock, void *cb_arg) __must_hold(lock) { struct binder_lru_page *page = container_of(item, typeof(*page), lru); struct binder_alloc *alloc = page->alloc; struct mm_struct *mm = alloc->mm; struct vm_area_struct *vma; struct page *page_to_free; unsigned long page_addr; size_t index; if (!mmget_not_zero(mm)) goto err_mmget; if (!mmap_read_trylock(mm)) goto err_mmap_read_lock_failed; if (!spin_trylock(&alloc->lock)) goto err_get_alloc_lock_failed; if (!page->page_ptr) goto err_page_already_freed; index = page - alloc->pages; page_addr = alloc->buffer + index * PAGE_SIZE; vma = vma_lookup(mm, page_addr); if (vma && vma != binder_alloc_get_vma(alloc)) goto err_invalid_vma; trace_binder_unmap_kernel_start(alloc, index); page_to_free = page->page_ptr; page->page_ptr = NULL; trace_binder_unmap_kernel_end(alloc, index); list_lru_isolate(lru, item); spin_unlock(&alloc->lock); spin_unlock(lock); if (vma) { trace_binder_unmap_user_start(alloc, index); zap_page_range_single(vma, page_addr, PAGE_SIZE, NULL); trace_binder_unmap_user_end(alloc, index); } mmap_read_unlock(mm); mmput_async(mm); __free_page(page_to_free); spin_lock(lock); return LRU_REMOVED_RETRY; err_invalid_vma: err_page_already_freed: spin_unlock(&alloc->lock); err_get_alloc_lock_failed: mmap_read_unlock(mm); err_mmap_read_lock_failed: mmput_async(mm); err_mmget: return LRU_SKIP; } static unsigned long binder_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { return list_lru_count(&binder_freelist); } static unsigned long binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { return list_lru_walk(&binder_freelist, binder_alloc_free_page, NULL, sc->nr_to_scan); } static struct shrinker *binder_shrinker; /** * binder_alloc_init() - called by binder_open() for per-proc initialization * @alloc: binder_alloc for this proc * * Called from binder_open() to initialize binder_alloc fields for * new binder proc */ void binder_alloc_init(struct binder_alloc *alloc) { alloc->pid = current->group_leader->pid; alloc->mm = current->mm; mmgrab(alloc->mm); spin_lock_init(&alloc->lock); INIT_LIST_HEAD(&alloc->buffers); } int binder_alloc_shrinker_init(void) { int ret; ret = list_lru_init(&binder_freelist); if (ret) return ret; binder_shrinker = shrinker_alloc(0, "android-binder"); if (!binder_shrinker) { list_lru_destroy(&binder_freelist); return -ENOMEM; } binder_shrinker->count_objects = binder_shrink_count; binder_shrinker->scan_objects = binder_shrink_scan; shrinker_register(binder_shrinker); return 0; } void binder_alloc_shrinker_exit(void) { shrinker_free(binder_shrinker); list_lru_destroy(&binder_freelist); } /** * check_buffer() - verify that buffer/offset is safe to access * @alloc: binder_alloc for this proc * @buffer: binder buffer to be accessed * @offset: offset into @buffer data * @bytes: bytes to access from offset * * Check that the @offset/@bytes are within the size of the given * @buffer and that the buffer is currently active and not freeable. * Offsets must also be multiples of sizeof(u32). The kernel is * allowed to touch the buffer in two cases: * * 1) when the buffer is being created: * (buffer->free == 0 && buffer->allow_user_free == 0) * 2) when the buffer is being torn down: * (buffer->free == 0 && buffer->transaction == NULL). * * Return: true if the buffer is safe to access */ static inline bool check_buffer(struct binder_alloc *alloc, struct binder_buffer *buffer, binder_size_t offset, size_t bytes) { size_t buffer_size = binder_alloc_buffer_size(alloc, buffer); return buffer_size >= bytes && offset <= buffer_size - bytes && IS_ALIGNED(offset, sizeof(u32)) && !buffer->free && (!buffer->allow_user_free || !buffer->transaction); } /** * binder_alloc_copy_user_to_buffer() - copy src user to tgt user * @alloc: binder_alloc for this proc * @buffer: binder buffer to be accessed * @buffer_offset: offset into @buffer data * @from: userspace pointer to source buffer * @bytes: bytes to copy * * Copy bytes from source userspace to target buffer. * * Return: bytes remaining to be copied */ unsigned long binder_alloc_copy_user_to_buffer(struct binder_alloc *alloc, struct binder_buffer *buffer, binder_size_t buffer_offset, const void __user *from, size_t bytes) { if (!check_buffer(alloc, buffer, buffer_offset, bytes)) return bytes; while (bytes) { unsigned long size; unsigned long ret; struct page *page; pgoff_t pgoff; void *kptr; page = binder_alloc_get_page(alloc, buffer, buffer_offset, &pgoff); size = min_t(size_t, bytes, PAGE_SIZE - pgoff); kptr = kmap_local_page(page) + pgoff; ret = copy_from_user(kptr, from, size); kunmap_local(kptr); if (ret) return bytes - size + ret; bytes -= size; from += size; buffer_offset += size; } return 0; } static int binder_alloc_do_buffer_copy(struct binder_alloc *alloc, bool to_buffer, struct binder_buffer *buffer, binder_size_t buffer_offset, void *ptr, size_t bytes) { /* All copies must be 32-bit aligned and 32-bit size */ if (!check_buffer(alloc, buffer, buffer_offset, bytes)) return -EINVAL; while (bytes) { unsigned long size; struct page *page; pgoff_t pgoff; page = binder_alloc_get_page(alloc, buffer, buffer_offset, &pgoff); size = min_t(size_t, bytes, PAGE_SIZE - pgoff); if (to_buffer) memcpy_to_page(page, pgoff, ptr, size); else memcpy_from_page(ptr, page, pgoff, size); bytes -= size; pgoff = 0; ptr = ptr + size; buffer_offset += size; } return 0; } int binder_alloc_copy_to_buffer(struct binder_alloc *alloc, struct binder_buffer *buffer, binder_size_t buffer_offset, void *src, size_t bytes) { return binder_alloc_do_buffer_copy(alloc, true, buffer, buffer_offset, src, bytes); } int binder_alloc_copy_from_buffer(struct binder_alloc *alloc, void *dest, struct binder_buffer *buffer, binder_size_t buffer_offset, size_t bytes) { return binder_alloc_do_buffer_copy(alloc, false, buffer, buffer_offset, dest, bytes); } |
31 31 37 37 37 37 4 40 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 | // SPDX-License-Identifier: GPL-2.0 /* * Implementation of HKDF ("HMAC-based Extract-and-Expand Key Derivation * Function"), aka RFC 5869. See also the original paper (Krawczyk 2010): * "Cryptographic Extraction and Key Derivation: The HKDF Scheme". * * This is used to derive keys from the fscrypt master keys. * * Copyright 2019 Google LLC */ #include <crypto/hash.h> #include <crypto/sha2.h> #include "fscrypt_private.h" /* * HKDF supports any unkeyed cryptographic hash algorithm, but fscrypt uses * SHA-512 because it is well-established, secure, and reasonably efficient. * * HKDF-SHA256 was also considered, as its 256-bit security strength would be * sufficient here. A 512-bit security strength is "nice to have", though. * Also, on 64-bit CPUs, SHA-512 is usually just as fast as SHA-256. In the * common case of deriving an AES-256-XTS key (512 bits), that can result in * HKDF-SHA512 being much faster than HKDF-SHA256, as the longer digest size of * SHA-512 causes HKDF-Expand to only need to do one iteration rather than two. */ #define HKDF_HMAC_ALG "hmac(sha512)" #define HKDF_HASHLEN SHA512_DIGEST_SIZE /* * HKDF consists of two steps: * * 1. HKDF-Extract: extract a pseudorandom key of length HKDF_HASHLEN bytes from * the input keying material and optional salt. * 2. HKDF-Expand: expand the pseudorandom key into output keying material of * any length, parameterized by an application-specific info string. * * HKDF-Extract can be skipped if the input is already a pseudorandom key of * length HKDF_HASHLEN bytes. However, cipher modes other than AES-256-XTS take * shorter keys, and we don't want to force users of those modes to provide * unnecessarily long master keys. Thus fscrypt still does HKDF-Extract. No * salt is used, since fscrypt master keys should already be pseudorandom and * there's no way to persist a random salt per master key from kernel mode. */ /* HKDF-Extract (RFC 5869 section 2.2), unsalted */ static int hkdf_extract(struct crypto_shash *hmac_tfm, const u8 *ikm, unsigned int ikmlen, u8 prk[HKDF_HASHLEN]) { static const u8 default_salt[HKDF_HASHLEN]; int err; err = crypto_shash_setkey(hmac_tfm, default_salt, HKDF_HASHLEN); if (err) return err; return crypto_shash_tfm_digest(hmac_tfm, ikm, ikmlen, prk); } /* * Compute HKDF-Extract using the given master key as the input keying material, * and prepare an HMAC transform object keyed by the resulting pseudorandom key. * * Afterwards, the keyed HMAC transform object can be used for HKDF-Expand many * times without having to recompute HKDF-Extract each time. */ int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, unsigned int master_key_size) { struct crypto_shash *hmac_tfm; u8 prk[HKDF_HASHLEN]; int err; hmac_tfm = crypto_alloc_shash(HKDF_HMAC_ALG, 0, 0); if (IS_ERR(hmac_tfm)) { fscrypt_err(NULL, "Error allocating " HKDF_HMAC_ALG ": %ld", PTR_ERR(hmac_tfm)); return PTR_ERR(hmac_tfm); } if (WARN_ON_ONCE(crypto_shash_digestsize(hmac_tfm) != sizeof(prk))) { err = -EINVAL; goto err_free_tfm; } err = hkdf_extract(hmac_tfm, master_key, master_key_size, prk); if (err) goto err_free_tfm; err = crypto_shash_setkey(hmac_tfm, prk, sizeof(prk)); if (err) goto err_free_tfm; hkdf->hmac_tfm = hmac_tfm; goto out; err_free_tfm: crypto_free_shash(hmac_tfm); out: memzero_explicit(prk, sizeof(prk)); return err; } /* * HKDF-Expand (RFC 5869 section 2.3). This expands the pseudorandom key, which * was already keyed into 'hkdf->hmac_tfm' by fscrypt_init_hkdf(), into 'okmlen' * bytes of output keying material parameterized by the application-specific * 'info' of length 'infolen' bytes, prefixed by "fscrypt\0" and the 'context' * byte. This is thread-safe and may be called by multiple threads in parallel. * * ('context' isn't part of the HKDF specification; it's just a prefix fscrypt * adds to its application-specific info strings to guarantee that it doesn't * accidentally repeat an info string when using HKDF for different purposes.) */ int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen) { SHASH_DESC_ON_STACK(desc, hkdf->hmac_tfm); u8 prefix[9]; unsigned int i; int err; const u8 *prev = NULL; u8 counter = 1; u8 tmp[HKDF_HASHLEN]; if (WARN_ON_ONCE(okmlen > 255 * HKDF_HASHLEN)) return -EINVAL; desc->tfm = hkdf->hmac_tfm; memcpy(prefix, "fscrypt\0", 8); prefix[8] = context; for (i = 0; i < okmlen; i += HKDF_HASHLEN) { err = crypto_shash_init(desc); if (err) goto out; if (prev) { err = crypto_shash_update(desc, prev, HKDF_HASHLEN); if (err) goto out; } err = crypto_shash_update(desc, prefix, sizeof(prefix)); if (err) goto out; err = crypto_shash_update(desc, info, infolen); if (err) goto out; BUILD_BUG_ON(sizeof(counter) != 1); if (okmlen - i < HKDF_HASHLEN) { err = crypto_shash_finup(desc, &counter, 1, tmp); if (err) goto out; memcpy(&okm[i], tmp, okmlen - i); memzero_explicit(tmp, sizeof(tmp)); } else { err = crypto_shash_finup(desc, &counter, 1, &okm[i]); if (err) goto out; } counter++; prev = &okm[i]; } err = 0; out: if (unlikely(err)) memzero_explicit(okm, okmlen); /* so caller doesn't need to */ shash_desc_zero(desc); return err; } void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf) { crypto_free_shash(hkdf->hmac_tfm); } |
13 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 | // SPDX-License-Identifier: ISC /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include <linux/etherdevice.h> #include "htt.h" #include "mac.h" #include "hif.h" #include "txrx.h" #include "debug.h" static u8 ath10k_htt_tx_txq_calc_size(size_t count) { int exp; int factor; exp = 0; factor = count >> 7; while (factor >= 64 && exp < 4) { factor >>= 3; exp++; } if (exp == 4) return 0xff; if (count > 0) factor = max(1, factor); return SM(exp, HTT_TX_Q_STATE_ENTRY_EXP) | SM(factor, HTT_TX_Q_STATE_ENTRY_FACTOR); } static void __ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { struct ath10k *ar = hw->priv; struct ath10k_sta *arsta; struct ath10k_vif *arvif = (void *)txq->vif->drv_priv; unsigned long byte_cnt; int idx; u32 bit; u16 peer_id; u8 tid; u8 count; lockdep_assert_held(&ar->htt.tx_lock); if (!ar->htt.tx_q_state.enabled) return; if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL) return; if (txq->sta) { arsta = (void *)txq->sta->drv_priv; peer_id = arsta->peer_id; } else { peer_id = arvif->peer_id; } tid = txq->tid; bit = BIT(peer_id % 32); idx = peer_id / 32; ieee80211_txq_get_depth(txq, NULL, &byte_cnt); count = ath10k_htt_tx_txq_calc_size(byte_cnt); if (unlikely(peer_id >= ar->htt.tx_q_state.num_peers) || unlikely(tid >= ar->htt.tx_q_state.num_tids)) { ath10k_warn(ar, "refusing to update txq for peer_id %u tid %u due to out of bounds\n", peer_id, tid); return; } ar->htt.tx_q_state.vaddr->count[tid][peer_id] = count; ar->htt.tx_q_state.vaddr->map[tid][idx] &= ~bit; ar->htt.tx_q_state.vaddr->map[tid][idx] |= count ? bit : 0; ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx txq state update peer_id %u tid %u count %u\n", peer_id, tid, count); } static void __ath10k_htt_tx_txq_sync(struct ath10k *ar) { u32 seq; size_t size; lockdep_assert_held(&ar->htt.tx_lock); if (!ar->htt.tx_q_state.enabled) return; if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL) return; seq = le32_to_cpu(ar->htt.tx_q_state.vaddr->seq); seq++; ar->htt.tx_q_state.vaddr->seq = cpu_to_le32(seq); ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx txq state update commit seq %u\n", seq); size = sizeof(*ar->htt.tx_q_state.vaddr); dma_sync_single_for_device(ar->dev, ar->htt.tx_q_state.paddr, size, DMA_TO_DEVICE); } void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { struct ath10k *ar = hw->priv; spin_lock_bh(&ar->htt.tx_lock); __ath10k_htt_tx_txq_recalc(hw, txq); spin_unlock_bh(&ar->htt.tx_lock); } void ath10k_htt_tx_txq_sync(struct ath10k *ar) { spin_lock_bh(&ar->htt.tx_lock); __ath10k_htt_tx_txq_sync(ar); spin_unlock_bh(&ar->htt.tx_lock); } void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { struct ath10k *ar = hw->priv; spin_lock_bh(&ar->htt.tx_lock); __ath10k_htt_tx_txq_recalc(hw, txq); __ath10k_htt_tx_txq_sync(ar); spin_unlock_bh(&ar->htt.tx_lock); } void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt) { lockdep_assert_held(&htt->tx_lock); htt->num_pending_tx--; if (htt->num_pending_tx == htt->max_num_pending_tx - 1) ath10k_mac_tx_unlock(htt->ar, ATH10K_TX_PAUSE_Q_FULL); if (htt->num_pending_tx == 0) wake_up(&htt->empty_tx_wq); } int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt) { lockdep_assert_held(&htt->tx_lock); if (htt->num_pending_tx >= htt->max_num_pending_tx) return -EBUSY; htt->num_pending_tx++; if (htt->num_pending_tx == htt->max_num_pending_tx) ath10k_mac_tx_lock(htt->ar, ATH10K_TX_PAUSE_Q_FULL); return 0; } int ath10k_htt_tx_mgmt_inc_pending(struct ath10k_htt *htt, bool is_mgmt, bool is_presp) { struct ath10k *ar = htt->ar; lockdep_assert_held(&htt->tx_lock); if (!is_mgmt || !ar->hw_params.max_probe_resp_desc_thres) return 0; if (is_presp && ar->hw_params.max_probe_resp_desc_thres < htt->num_pending_mgmt_tx) return -EBUSY; htt->num_pending_mgmt_tx++; return 0; } void ath10k_htt_tx_mgmt_dec_pending(struct ath10k_htt *htt) { lockdep_assert_held(&htt->tx_lock); if (!htt->ar->hw_params.max_probe_resp_desc_thres) return; htt->num_pending_mgmt_tx--; } int ath10k_htt_tx_alloc_msdu_id(struct ath10k_htt *htt, struct sk_buff *skb) { struct ath10k *ar = htt->ar; int ret; spin_lock_bh(&htt->tx_lock); ret = idr_alloc(&htt->pending_tx, skb, 0, htt->max_num_pending_tx, GFP_ATOMIC); spin_unlock_bh(&htt->tx_lock); ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx alloc msdu_id %d\n", ret); return ret; } void ath10k_htt_tx_free_msdu_id(struct ath10k_htt *htt, u16 msdu_id) { struct ath10k *ar = htt->ar; lockdep_assert_held(&htt->tx_lock); ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx free msdu_id %u\n", msdu_id); idr_remove(&htt->pending_tx, msdu_id); } static void ath10k_htt_tx_free_cont_txbuf_32(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; size_t size; if (!htt->txbuf.vaddr_txbuff_32) return; size = htt->txbuf.size; dma_free_coherent(ar->dev, size, htt->txbuf.vaddr_txbuff_32, htt->txbuf.paddr); htt->txbuf.vaddr_txbuff_32 = NULL; } static int ath10k_htt_tx_alloc_cont_txbuf_32(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; size_t size; size = htt->max_num_pending_tx * sizeof(struct ath10k_htt_txbuf_32); htt->txbuf.vaddr_txbuff_32 = dma_alloc_coherent(ar->dev, size, &htt->txbuf.paddr, GFP_KERNEL); if (!htt->txbuf.vaddr_txbuff_32) return -ENOMEM; htt->txbuf.size = size; return 0; } static void ath10k_htt_tx_free_cont_txbuf_64(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; size_t size; if (!htt->txbuf.vaddr_txbuff_64) return; size = htt->txbuf.size; dma_free_coherent(ar->dev, size, htt->txbuf.vaddr_txbuff_64, htt->txbuf.paddr); htt->txbuf.vaddr_txbuff_64 = NULL; } static int ath10k_htt_tx_alloc_cont_txbuf_64(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; size_t size; size = htt->max_num_pending_tx * sizeof(struct ath10k_htt_txbuf_64); htt->txbuf.vaddr_txbuff_64 = dma_alloc_coherent(ar->dev, size, &htt->txbuf.paddr, GFP_KERNEL); if (!htt->txbuf.vaddr_txbuff_64) return -ENOMEM; htt->txbuf.size = size; return 0; } static void ath10k_htt_tx_free_cont_frag_desc_32(struct ath10k_htt *htt) { size_t size; if (!htt->frag_desc.vaddr_desc_32) return; size = htt->max_num_pending_tx * sizeof(struct htt_msdu_ext_desc); dma_free_coherent(htt->ar->dev, size, htt->frag_desc.vaddr_desc_32, htt->frag_desc.paddr); htt->frag_desc.vaddr_desc_32 = NULL; } static int ath10k_htt_tx_alloc_cont_frag_desc_32(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; size_t size; if (!ar->hw_params.continuous_frag_desc) return 0; size = htt->max_num_pending_tx * sizeof(struct htt_msdu_ext_desc); htt->frag_desc.vaddr_desc_32 = dma_alloc_coherent(ar->dev, size, &htt->frag_desc.paddr, GFP_KERNEL); if (!htt->frag_desc.vaddr_desc_32) { ath10k_err(ar, "failed to alloc fragment desc memory\n"); return -ENOMEM; } htt->frag_desc.size = size; return 0; } static void ath10k_htt_tx_free_cont_frag_desc_64(struct ath10k_htt *htt) { size_t size; if (!htt->frag_desc.vaddr_desc_64) return; size = htt->max_num_pending_tx * sizeof(struct htt_msdu_ext_desc_64); dma_free_coherent(htt->ar->dev, size, htt->frag_desc.vaddr_desc_64, htt->frag_desc.paddr); htt->frag_desc.vaddr_desc_64 = NULL; } static int ath10k_htt_tx_alloc_cont_frag_desc_64(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; size_t size; if (!ar->hw_params.continuous_frag_desc) return 0; size = htt->max_num_pending_tx * sizeof(struct htt_msdu_ext_desc_64); htt->frag_desc.vaddr_desc_64 = dma_alloc_coherent(ar->dev, size, &htt->frag_desc.paddr, GFP_KERNEL); if (!htt->frag_desc.vaddr_desc_64) { ath10k_err(ar, "failed to alloc fragment desc memory\n"); return -ENOMEM; } htt->frag_desc.size = size; return 0; } static void ath10k_htt_tx_free_txq(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; size_t size; if (!test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL, ar->running_fw->fw_file.fw_features)) return; size = sizeof(*htt->tx_q_state.vaddr); dma_unmap_single(ar->dev, htt->tx_q_state.paddr, size, DMA_TO_DEVICE); kfree(htt->tx_q_state.vaddr); } static int ath10k_htt_tx_alloc_txq(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; size_t size; int ret; if (!test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL, ar->running_fw->fw_file.fw_features)) return 0; htt->tx_q_state.num_peers = HTT_TX_Q_STATE_NUM_PEERS; htt->tx_q_state.num_tids = HTT_TX_Q_STATE_NUM_TIDS; htt->tx_q_state.type = HTT_Q_DEPTH_TYPE_BYTES; size = sizeof(*htt->tx_q_state.vaddr); htt->tx_q_state.vaddr = kzalloc(size, GFP_KERNEL); if (!htt->tx_q_state.vaddr) return -ENOMEM; htt->tx_q_state.paddr = dma_map_single(ar->dev, htt->tx_q_state.vaddr, size, DMA_TO_DEVICE); ret = dma_mapping_error(ar->dev, htt->tx_q_state.paddr); if (ret) { ath10k_warn(ar, "failed to dma map tx_q_state: %d\n", ret); kfree(htt->tx_q_state.vaddr); return -EIO; } return 0; } static void ath10k_htt_tx_free_txdone_fifo(struct ath10k_htt *htt) { WARN_ON(!kfifo_is_empty(&htt->txdone_fifo)); kfifo_free(&htt->txdone_fifo); } static int ath10k_htt_tx_alloc_txdone_fifo(struct ath10k_htt *htt) { int ret; size_t size; size = roundup_pow_of_two(htt->max_num_pending_tx); ret = kfifo_alloc(&htt->txdone_fifo, size, GFP_KERNEL); return ret; } static int ath10k_htt_tx_alloc_buf(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; int ret; ret = ath10k_htt_alloc_txbuff(htt); if (ret) { ath10k_err(ar, "failed to alloc cont tx buffer: %d\n", ret); return ret; } ret = ath10k_htt_alloc_frag_desc(htt); if (ret) { ath10k_err(ar, "failed to alloc cont frag desc: %d\n", ret); goto free_txbuf; } ret = ath10k_htt_tx_alloc_txq(htt); if (ret) { ath10k_err(ar, "failed to alloc txq: %d\n", ret); goto free_frag_desc; } ret = ath10k_htt_tx_alloc_txdone_fifo(htt); if (ret) { ath10k_err(ar, "failed to alloc txdone fifo: %d\n", ret); goto free_txq; } return 0; free_txq: ath10k_htt_tx_free_txq(htt); free_frag_desc: ath10k_htt_free_frag_desc(htt); free_txbuf: ath10k_htt_free_txbuff(htt); return ret; } int ath10k_htt_tx_start(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; int ret; ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt tx max num pending tx %d\n", htt->max_num_pending_tx); spin_lock_init(&htt->tx_lock); idr_init(&htt->pending_tx); if (htt->tx_mem_allocated) return 0; if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) return 0; ret = ath10k_htt_tx_alloc_buf(htt); if (ret) goto free_idr_pending_tx; htt->tx_mem_allocated = true; return 0; free_idr_pending_tx: idr_destroy(&htt->pending_tx); return ret; } static int ath10k_htt_tx_clean_up_pending(int msdu_id, void *skb, void *ctx) { struct ath10k *ar = ctx; struct ath10k_htt *htt = &ar->htt; struct htt_tx_done tx_done = {0}; ath10k_dbg(ar, ATH10K_DBG_HTT, "force cleanup msdu_id %u\n", msdu_id); tx_done.msdu_id = msdu_id; tx_done.status = HTT_TX_COMPL_STATE_DISCARD; ath10k_txrx_tx_unref(htt, &tx_done); return 0; } void ath10k_htt_tx_destroy(struct ath10k_htt *htt) { if (!htt->tx_mem_allocated) return; ath10k_htt_free_txbuff(htt); ath10k_htt_tx_free_txq(htt); ath10k_htt_free_frag_desc(htt); ath10k_htt_tx_free_txdone_fifo(htt); htt->tx_mem_allocated = false; } static void ath10k_htt_flush_tx_queue(struct ath10k_htt *htt) { ath10k_htc_stop_hl(htt->ar); idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar); } void ath10k_htt_tx_stop(struct ath10k_htt *htt) { ath10k_htt_flush_tx_queue(htt); idr_destroy(&htt->pending_tx); } void ath10k_htt_tx_free(struct ath10k_htt *htt) { ath10k_htt_tx_stop(htt); ath10k_htt_tx_destroy(htt); } void ath10k_htt_op_ep_tx_credits(struct ath10k *ar) { queue_work(ar->workqueue, &ar->bundle_tx_work); } void ath10k_htt_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb) { struct ath10k_htt *htt = &ar->htt; struct htt_tx_done tx_done = {0}; struct htt_cmd_hdr *htt_hdr; struct htt_data_tx_desc *desc_hdr = NULL; u16 flags1 = 0; u8 msg_type = 0; if (htt->disable_tx_comp) { htt_hdr = (struct htt_cmd_hdr *)skb->data; msg_type = htt_hdr->msg_type; if (msg_type == HTT_H2T_MSG_TYPE_TX_FRM) { desc_hdr = (struct htt_data_tx_desc *) (skb->data + sizeof(*htt_hdr)); flags1 = __le16_to_cpu(desc_hdr->flags1); skb_pull(skb, sizeof(struct htt_cmd_hdr)); skb_pull(skb, sizeof(struct htt_data_tx_desc)); } } dev_kfree_skb_any(skb); if ((!htt->disable_tx_comp) || (msg_type != HTT_H2T_MSG_TYPE_TX_FRM)) return; ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx complete msdu id:%u ,flags1:%x\n", __le16_to_cpu(desc_hdr->id), flags1); if (flags1 & HTT_DATA_TX_DESC_FLAGS1_TX_COMPLETE) return; tx_done.status = HTT_TX_COMPL_STATE_ACK; tx_done.msdu_id = __le16_to_cpu(desc_hdr->id); ath10k_txrx_tx_unref(&ar->htt, &tx_done); } void ath10k_htt_hif_tx_complete(struct ath10k *ar, struct sk_buff *skb) { dev_kfree_skb_any(skb); } EXPORT_SYMBOL(ath10k_htt_hif_tx_complete); int ath10k_htt_h2t_ver_req_msg(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; struct sk_buff *skb; struct htt_cmd *cmd; int len = 0; int ret; len += sizeof(cmd->hdr); len += sizeof(cmd->ver_req); skb = ath10k_htc_alloc_skb(ar, len); if (!skb) return -ENOMEM; skb_put(skb, len); cmd = (struct htt_cmd *)skb->data; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_VERSION_REQ; ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); if (ret) { dev_kfree_skb_any(skb); return ret; } return 0; } int ath10k_htt_h2t_stats_req(struct ath10k_htt *htt, u32 mask, u32 reset_mask, u64 cookie) { struct ath10k *ar = htt->ar; struct htt_stats_req *req; struct sk_buff *skb; struct htt_cmd *cmd; int len = 0, ret; len += sizeof(cmd->hdr); len += sizeof(cmd->stats_req); skb = ath10k_htc_alloc_skb(ar, len); if (!skb) return -ENOMEM; skb_put(skb, len); cmd = (struct htt_cmd *)skb->data; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_STATS_REQ; req = &cmd->stats_req; memset(req, 0, sizeof(*req)); /* currently we support only max 24 bit masks so no need to worry * about endian support */ memcpy(req->upload_types, &mask, 3); memcpy(req->reset_types, &reset_mask, 3); req->stat_type = HTT_STATS_REQ_CFG_STAT_TYPE_INVALID; req->cookie_lsb = cpu_to_le32(cookie & 0xffffffff); req->cookie_msb = cpu_to_le32((cookie & 0xffffffff00000000ULL) >> 32); ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); if (ret) { ath10k_warn(ar, "failed to send htt type stats request: %d", ret); dev_kfree_skb_any(skb); return ret; } return 0; } static int ath10k_htt_send_frag_desc_bank_cfg_32(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; struct sk_buff *skb; struct htt_cmd *cmd; struct htt_frag_desc_bank_cfg32 *cfg; int ret, size; u8 info; if (!ar->hw_params.continuous_frag_desc) return 0; if (!htt->frag_desc.paddr) { ath10k_warn(ar, "invalid frag desc memory\n"); return -EINVAL; } size = sizeof(cmd->hdr) + sizeof(cmd->frag_desc_bank_cfg32); skb = ath10k_htc_alloc_skb(ar, size); if (!skb) return -ENOMEM; skb_put(skb, size); cmd = (struct htt_cmd *)skb->data; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_FRAG_DESC_BANK_CFG; info = 0; info |= SM(htt->tx_q_state.type, HTT_FRAG_DESC_BANK_CFG_INFO_Q_STATE_DEPTH_TYPE); if (test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL, ar->running_fw->fw_file.fw_features)) info |= HTT_FRAG_DESC_BANK_CFG_INFO_Q_STATE_VALID; cfg = &cmd->frag_desc_bank_cfg32; cfg->info = info; cfg->num_banks = 1; cfg->desc_size = sizeof(struct htt_msdu_ext_desc); cfg->bank_base_addrs[0] = __cpu_to_le32(htt->frag_desc.paddr); cfg->bank_id[0].bank_min_id = 0; cfg->bank_id[0].bank_max_id = __cpu_to_le16(htt->max_num_pending_tx - 1); cfg->q_state.paddr = cpu_to_le32(htt->tx_q_state.paddr); cfg->q_state.num_peers = cpu_to_le16(htt->tx_q_state.num_peers); cfg->q_state.num_tids = cpu_to_le16(htt->tx_q_state.num_tids); cfg->q_state.record_size = HTT_TX_Q_STATE_ENTRY_SIZE; cfg->q_state.record_multiplier = HTT_TX_Q_STATE_ENTRY_MULTIPLIER; ath10k_dbg(ar, ATH10K_DBG_HTT, "htt frag desc bank cmd\n"); ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); if (ret) { ath10k_warn(ar, "failed to send frag desc bank cfg request: %d\n", ret); dev_kfree_skb_any(skb); return ret; } return 0; } static int ath10k_htt_send_frag_desc_bank_cfg_64(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; struct sk_buff *skb; struct htt_cmd *cmd; struct htt_frag_desc_bank_cfg64 *cfg; int ret, size; u8 info; if (!ar->hw_params.continuous_frag_desc) return 0; if (!htt->frag_desc.paddr) { ath10k_warn(ar, "invalid frag desc memory\n"); return -EINVAL; } size = sizeof(cmd->hdr) + sizeof(cmd->frag_desc_bank_cfg64); skb = ath10k_htc_alloc_skb(ar, size); if (!skb) return -ENOMEM; skb_put(skb, size); cmd = (struct htt_cmd *)skb->data; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_FRAG_DESC_BANK_CFG; info = 0; info |= SM(htt->tx_q_state.type, HTT_FRAG_DESC_BANK_CFG_INFO_Q_STATE_DEPTH_TYPE); if (test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL, ar->running_fw->fw_file.fw_features)) info |= HTT_FRAG_DESC_BANK_CFG_INFO_Q_STATE_VALID; cfg = &cmd->frag_desc_bank_cfg64; cfg->info = info; cfg->num_banks = 1; cfg->desc_size = sizeof(struct htt_msdu_ext_desc_64); cfg->bank_base_addrs[0] = __cpu_to_le64(htt->frag_desc.paddr); cfg->bank_id[0].bank_min_id = 0; cfg->bank_id[0].bank_max_id = __cpu_to_le16(htt->max_num_pending_tx - 1); cfg->q_state.paddr = cpu_to_le32(htt->tx_q_state.paddr); cfg->q_state.num_peers = cpu_to_le16(htt->tx_q_state.num_peers); cfg->q_state.num_tids = cpu_to_le16(htt->tx_q_state.num_tids); cfg->q_state.record_size = HTT_TX_Q_STATE_ENTRY_SIZE; cfg->q_state.record_multiplier = HTT_TX_Q_STATE_ENTRY_MULTIPLIER; ath10k_dbg(ar, ATH10K_DBG_HTT, "htt frag desc bank cmd\n"); ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); if (ret) { ath10k_warn(ar, "failed to send frag desc bank cfg request: %d\n", ret); dev_kfree_skb_any(skb); return ret; } return 0; } static void ath10k_htt_fill_rx_desc_offset_32(struct ath10k_hw_params *hw, struct htt_rx_ring_setup_ring32 *rx_ring) { ath10k_htt_rx_desc_get_offsets(hw, &rx_ring->offsets); } static void ath10k_htt_fill_rx_desc_offset_64(struct ath10k_hw_params *hw, struct htt_rx_ring_setup_ring64 *rx_ring) { ath10k_htt_rx_desc_get_offsets(hw, &rx_ring->offsets); } static int ath10k_htt_send_rx_ring_cfg_32(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; struct ath10k_hw_params *hw = &ar->hw_params; struct sk_buff *skb; struct htt_cmd *cmd; struct htt_rx_ring_setup_ring32 *ring; const int num_rx_ring = 1; u16 flags; u32 fw_idx; int len; int ret; /* * the HW expects the buffer to be an integral number of 4-byte * "words" */ BUILD_BUG_ON(!IS_ALIGNED(HTT_RX_BUF_SIZE, 4)); BUILD_BUG_ON((HTT_RX_BUF_SIZE & HTT_MAX_CACHE_LINE_SIZE_MASK) != 0); len = sizeof(cmd->hdr) + sizeof(cmd->rx_setup_32.hdr) + (sizeof(*ring) * num_rx_ring); skb = ath10k_htc_alloc_skb(ar, len); if (!skb) return -ENOMEM; skb_put(skb, len); cmd = (struct htt_cmd *)skb->data; ring = &cmd->rx_setup_32.rings[0]; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_RX_RING_CFG; cmd->rx_setup_32.hdr.num_rings = 1; /* FIXME: do we need all of this? */ flags = 0; flags |= HTT_RX_RING_FLAGS_MAC80211_HDR; flags |= HTT_RX_RING_FLAGS_MSDU_PAYLOAD; flags |= HTT_RX_RING_FLAGS_PPDU_START; flags |= HTT_RX_RING_FLAGS_PPDU_END; flags |= HTT_RX_RING_FLAGS_MPDU_START; flags |= HTT_RX_RING_FLAGS_MPDU_END; flags |= HTT_RX_RING_FLAGS_MSDU_START; flags |= HTT_RX_RING_FLAGS_MSDU_END; flags |= HTT_RX_RING_FLAGS_RX_ATTENTION; flags |= HTT_RX_RING_FLAGS_FRAG_INFO; flags |= HTT_RX_RING_FLAGS_UNICAST_RX; flags |= HTT_RX_RING_FLAGS_MULTICAST_RX; flags |= HTT_RX_RING_FLAGS_CTRL_RX; flags |= HTT_RX_RING_FLAGS_MGMT_RX; flags |= HTT_RX_RING_FLAGS_NULL_RX; flags |= HTT_RX_RING_FLAGS_PHY_DATA_RX; fw_idx = __le32_to_cpu(*htt->rx_ring.alloc_idx.vaddr); ring->fw_idx_shadow_reg_paddr = __cpu_to_le32(htt->rx_ring.alloc_idx.paddr); ring->rx_ring_base_paddr = __cpu_to_le32(htt->rx_ring.base_paddr); ring->rx_ring_len = __cpu_to_le16(htt->rx_ring.size); ring->rx_ring_bufsize = __cpu_to_le16(HTT_RX_BUF_SIZE); ring->flags = __cpu_to_le16(flags); ring->fw_idx_init_val = __cpu_to_le16(fw_idx); ath10k_htt_fill_rx_desc_offset_32(hw, ring); ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); if (ret) { dev_kfree_skb_any(skb); return ret; } return 0; } static int ath10k_htt_send_rx_ring_cfg_64(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; struct ath10k_hw_params *hw = &ar->hw_params; struct sk_buff *skb; struct htt_cmd *cmd; struct htt_rx_ring_setup_ring64 *ring; const int num_rx_ring = 1; u16 flags; u32 fw_idx; int len; int ret; /* HW expects the buffer to be an integral number of 4-byte * "words" */ BUILD_BUG_ON(!IS_ALIGNED(HTT_RX_BUF_SIZE, 4)); BUILD_BUG_ON((HTT_RX_BUF_SIZE & HTT_MAX_CACHE_LINE_SIZE_MASK) != 0); len = sizeof(cmd->hdr) + sizeof(cmd->rx_setup_64.hdr) + (sizeof(*ring) * num_rx_ring); skb = ath10k_htc_alloc_skb(ar, len); if (!skb) return -ENOMEM; skb_put(skb, len); cmd = (struct htt_cmd *)skb->data; ring = &cmd->rx_setup_64.rings[0]; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_RX_RING_CFG; cmd->rx_setup_64.hdr.num_rings = 1; flags = 0; flags |= HTT_RX_RING_FLAGS_MAC80211_HDR; flags |= HTT_RX_RING_FLAGS_MSDU_PAYLOAD; flags |= HTT_RX_RING_FLAGS_PPDU_START; flags |= HTT_RX_RING_FLAGS_PPDU_END; flags |= HTT_RX_RING_FLAGS_MPDU_START; flags |= HTT_RX_RING_FLAGS_MPDU_END; flags |= HTT_RX_RING_FLAGS_MSDU_START; flags |= HTT_RX_RING_FLAGS_MSDU_END; flags |= HTT_RX_RING_FLAGS_RX_ATTENTION; flags |= HTT_RX_RING_FLAGS_FRAG_INFO; flags |= HTT_RX_RING_FLAGS_UNICAST_RX; flags |= HTT_RX_RING_FLAGS_MULTICAST_RX; flags |= HTT_RX_RING_FLAGS_CTRL_RX; flags |= HTT_RX_RING_FLAGS_MGMT_RX; flags |= HTT_RX_RING_FLAGS_NULL_RX; flags |= HTT_RX_RING_FLAGS_PHY_DATA_RX; fw_idx = __le32_to_cpu(*htt->rx_ring.alloc_idx.vaddr); ring->fw_idx_shadow_reg_paddr = __cpu_to_le64(htt->rx_ring.alloc_idx.paddr); ring->rx_ring_base_paddr = __cpu_to_le64(htt->rx_ring.base_paddr); ring->rx_ring_len = __cpu_to_le16(htt->rx_ring.size); ring->rx_ring_bufsize = __cpu_to_le16(HTT_RX_BUF_SIZE); ring->flags = __cpu_to_le16(flags); ring->fw_idx_init_val = __cpu_to_le16(fw_idx); ath10k_htt_fill_rx_desc_offset_64(hw, ring); ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); if (ret) { dev_kfree_skb_any(skb); return ret; } return 0; } static int ath10k_htt_send_rx_ring_cfg_hl(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; struct sk_buff *skb; struct htt_cmd *cmd; struct htt_rx_ring_setup_ring32 *ring; const int num_rx_ring = 1; u16 flags; int len; int ret; /* * the HW expects the buffer to be an integral number of 4-byte * "words" */ BUILD_BUG_ON(!IS_ALIGNED(HTT_RX_BUF_SIZE, 4)); BUILD_BUG_ON((HTT_RX_BUF_SIZE & HTT_MAX_CACHE_LINE_SIZE_MASK) != 0); len = sizeof(cmd->hdr) + sizeof(cmd->rx_setup_32.hdr) + (sizeof(*ring) * num_rx_ring); skb = ath10k_htc_alloc_skb(ar, len); if (!skb) return -ENOMEM; skb_put(skb, len); cmd = (struct htt_cmd *)skb->data; ring = &cmd->rx_setup_32.rings[0]; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_RX_RING_CFG; cmd->rx_setup_32.hdr.num_rings = 1; flags = 0; flags |= HTT_RX_RING_FLAGS_MSDU_PAYLOAD; flags |= HTT_RX_RING_FLAGS_UNICAST_RX; flags |= HTT_RX_RING_FLAGS_MULTICAST_RX; memset(ring, 0, sizeof(*ring)); ring->rx_ring_len = __cpu_to_le16(HTT_RX_RING_SIZE_MIN); ring->rx_ring_bufsize = __cpu_to_le16(HTT_RX_BUF_SIZE); ring->flags = __cpu_to_le16(flags); ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); if (ret) { dev_kfree_skb_any(skb); return ret; } return 0; } static int ath10k_htt_h2t_aggr_cfg_msg_32(struct ath10k_htt *htt, u8 max_subfrms_ampdu, u8 max_subfrms_amsdu) { struct ath10k *ar = htt->ar; struct htt_aggr_conf *aggr_conf; struct sk_buff *skb; struct htt_cmd *cmd; int len; int ret; /* Firmware defaults are: amsdu = 3 and ampdu = 64 */ if (max_subfrms_ampdu == 0 || max_subfrms_ampdu > 64) return -EINVAL; if (max_subfrms_amsdu == 0 || max_subfrms_amsdu > 31) return -EINVAL; len = sizeof(cmd->hdr); len += sizeof(cmd->aggr_conf); skb = ath10k_htc_alloc_skb(ar, len); if (!skb) return -ENOMEM; skb_put(skb, len); cmd = (struct htt_cmd *)skb->data; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_AGGR_CFG; aggr_conf = &cmd->aggr_conf; aggr_conf->max_num_ampdu_subframes = max_subfrms_ampdu; aggr_conf->max_num_amsdu_subframes = max_subfrms_amsdu; ath10k_dbg(ar, ATH10K_DBG_HTT, "htt h2t aggr cfg msg amsdu %d ampdu %d", aggr_conf->max_num_amsdu_subframes, aggr_conf->max_num_ampdu_subframes); ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); if (ret) { dev_kfree_skb_any(skb); return ret; } return 0; } static int ath10k_htt_h2t_aggr_cfg_msg_v2(struct ath10k_htt *htt, u8 max_subfrms_ampdu, u8 max_subfrms_amsdu) { struct ath10k *ar = htt->ar; struct htt_aggr_conf_v2 *aggr_conf; struct sk_buff *skb; struct htt_cmd *cmd; int len; int ret; /* Firmware defaults are: amsdu = 3 and ampdu = 64 */ if (max_subfrms_ampdu == 0 || max_subfrms_ampdu > 64) return -EINVAL; if (max_subfrms_amsdu == 0 || max_subfrms_amsdu > 31) return -EINVAL; len = sizeof(cmd->hdr); len += sizeof(cmd->aggr_conf_v2); skb = ath10k_htc_alloc_skb(ar, len); if (!skb) return -ENOMEM; skb_put(skb, len); cmd = (struct htt_cmd *)skb->data; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_AGGR_CFG; aggr_conf = &cmd->aggr_conf_v2; aggr_conf->max_num_ampdu_subframes = max_subfrms_ampdu; aggr_conf->max_num_amsdu_subframes = max_subfrms_amsdu; ath10k_dbg(ar, ATH10K_DBG_HTT, "htt h2t aggr cfg msg amsdu %d ampdu %d", aggr_conf->max_num_amsdu_subframes, aggr_conf->max_num_ampdu_subframes); ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); if (ret) { dev_kfree_skb_any(skb); return ret; } return 0; } int ath10k_htt_tx_fetch_resp(struct ath10k *ar, __le32 token, __le16 fetch_seq_num, struct htt_tx_fetch_record *records, size_t num_records) { struct sk_buff *skb; struct htt_cmd *cmd; const u16 resp_id = 0; int len = 0; int ret; /* Response IDs are echo-ed back only for host driver convenience * purposes. They aren't used for anything in the driver yet so use 0. */ len += sizeof(cmd->hdr); len += sizeof(cmd->tx_fetch_resp); len += sizeof(cmd->tx_fetch_resp.records[0]) * num_records; skb = ath10k_htc_alloc_skb(ar, len); if (!skb) return -ENOMEM; skb_put(skb, len); cmd = (struct htt_cmd *)skb->data; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FETCH_RESP; cmd->tx_fetch_resp.resp_id = cpu_to_le16(resp_id); cmd->tx_fetch_resp.fetch_seq_num = fetch_seq_num; cmd->tx_fetch_resp.num_records = cpu_to_le16(num_records); cmd->tx_fetch_resp.token = token; memcpy(cmd->tx_fetch_resp.records, records, sizeof(records[0]) * num_records); ret = ath10k_htc_send(&ar->htc, ar->htt.eid, skb); if (ret) { ath10k_warn(ar, "failed to submit htc command: %d\n", ret); goto err_free_skb; } return 0; err_free_skb: dev_kfree_skb_any(skb); return ret; } static u8 ath10k_htt_tx_get_vdev_id(struct ath10k *ar, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb); struct ath10k_vif *arvif; if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { return ar->scan.vdev_id; } else if (cb->vif) { arvif = (void *)cb->vif->drv_priv; return arvif->vdev_id; } else if (ar->monitor_started) { return ar->monitor_vdev_id; } else { return 0; } } static u8 ath10k_htt_tx_get_tid(struct sk_buff *skb, bool is_eth) { struct ieee80211_hdr *hdr = (void *)skb->data; struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb); if (!is_eth && ieee80211_is_mgmt(hdr->frame_control)) return HTT_DATA_TX_EXT_TID_MGMT; else if (cb->flags & ATH10K_SKB_F_QOS) return skb->priority & IEEE80211_QOS_CTL_TID_MASK; else return HTT_DATA_TX_EXT_TID_NON_QOS_MCAST_BCAST; } int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) { struct ath10k *ar = htt->ar; struct device *dev = ar->dev; struct sk_buff *txdesc = NULL; struct htt_cmd *cmd; struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu); u8 vdev_id = ath10k_htt_tx_get_vdev_id(ar, msdu); int len = 0; int msdu_id = -1; int res; const u8 *peer_addr; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data; len += sizeof(cmd->hdr); len += sizeof(cmd->mgmt_tx); res = ath10k_htt_tx_alloc_msdu_id(htt, msdu); if (res < 0) goto err; msdu_id = res; if ((ieee80211_is_action(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control) || ieee80211_is_disassoc(hdr->frame_control)) && ieee80211_has_protected(hdr->frame_control)) { peer_addr = hdr->addr1; if (is_multicast_ether_addr(peer_addr)) { skb_put(msdu, sizeof(struct ieee80211_mmie_16)); } else { if (skb_cb->ucast_cipher == WLAN_CIPHER_SUITE_GCMP || skb_cb->ucast_cipher == WLAN_CIPHER_SUITE_GCMP_256) skb_put(msdu, IEEE80211_GCMP_MIC_LEN); else skb_put(msdu, IEEE80211_CCMP_MIC_LEN); } } txdesc = ath10k_htc_alloc_skb(ar, len); if (!txdesc) { res = -ENOMEM; goto err_free_msdu_id; } skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len, DMA_TO_DEVICE); res = dma_mapping_error(dev, skb_cb->paddr); if (res) { res = -EIO; goto err_free_txdesc; } skb_put(txdesc, len); cmd = (struct htt_cmd *)txdesc->data; memset(cmd, 0, len); cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_MGMT_TX; cmd->mgmt_tx.msdu_paddr = __cpu_to_le32(ATH10K_SKB_CB(msdu)->paddr); cmd->mgmt_tx.len = __cpu_to_le32(msdu->len); cmd->mgmt_tx.desc_id = __cpu_to_le32(msdu_id); cmd->mgmt_tx.vdev_id = __cpu_to_le32(vdev_id); memcpy(cmd->mgmt_tx.hdr, msdu->data, min_t(int, msdu->len, HTT_MGMT_FRM_HDR_DOWNLOAD_LEN)); res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc); if (res) goto err_unmap_msdu; return 0; err_unmap_msdu: if (ar->bus_param.dev_type != ATH10K_DEV_TYPE_HL) dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); err_free_txdesc: dev_kfree_skb_any(txdesc); err_free_msdu_id: spin_lock_bh(&htt->tx_lock); ath10k_htt_tx_free_msdu_id(htt, msdu_id); spin_unlock_bh(&htt->tx_lock); err: return res; } #define HTT_TX_HL_NEEDED_HEADROOM \ (unsigned int)(sizeof(struct htt_cmd_hdr) + \ sizeof(struct htt_data_tx_desc) + \ sizeof(struct ath10k_htc_hdr)) static int ath10k_htt_tx_hl(struct ath10k_htt *htt, enum ath10k_hw_txrx_mode txmode, struct sk_buff *msdu) { struct ath10k *ar = htt->ar; int res, data_len; struct htt_cmd_hdr *cmd_hdr; struct htt_data_tx_desc *tx_desc; struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu); struct sk_buff *tmp_skb; bool is_eth = (txmode == ATH10K_HW_TXRX_ETHERNET); u8 vdev_id = ath10k_htt_tx_get_vdev_id(ar, msdu); u8 tid = ath10k_htt_tx_get_tid(msdu, is_eth); u8 flags0 = 0; u16 flags1 = 0; u16 msdu_id = 0; if (!is_eth) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data; if ((ieee80211_is_action(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control) || ieee80211_is_disassoc(hdr->frame_control)) && ieee80211_has_protected(hdr->frame_control)) { skb_put(msdu, IEEE80211_CCMP_MIC_LEN); } } data_len = msdu->len; switch (txmode) { case ATH10K_HW_TXRX_RAW: case ATH10K_HW_TXRX_NATIVE_WIFI: flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT; fallthrough; case ATH10K_HW_TXRX_ETHERNET: flags0 |= SM(txmode, HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE); break; case ATH10K_HW_TXRX_MGMT: flags0 |= SM(ATH10K_HW_TXRX_MGMT, HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE); flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT; if (htt->disable_tx_comp) flags1 |= HTT_DATA_TX_DESC_FLAGS1_TX_COMPLETE; break; } if (skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT) flags0 |= HTT_DATA_TX_DESC_FLAGS0_NO_ENCRYPT; flags1 |= SM((u16)vdev_id, HTT_DATA_TX_DESC_FLAGS1_VDEV_ID); flags1 |= SM((u16)tid, HTT_DATA_TX_DESC_FLAGS1_EXT_TID); if (msdu->ip_summed == CHECKSUM_PARTIAL && !test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) { flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD; flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD; } /* Prepend the HTT header and TX desc struct to the data message * and realloc the skb if it does not have enough headroom. */ if (skb_headroom(msdu) < HTT_TX_HL_NEEDED_HEADROOM) { tmp_skb = msdu; ath10k_dbg(htt->ar, ATH10K_DBG_HTT, "Not enough headroom in skb. Current headroom: %u, needed: %u. Reallocating...\n", skb_headroom(msdu), HTT_TX_HL_NEEDED_HEADROOM); msdu = skb_realloc_headroom(msdu, HTT_TX_HL_NEEDED_HEADROOM); kfree_skb(tmp_skb); if (!msdu) { ath10k_warn(htt->ar, "htt hl tx: Unable to realloc skb!\n"); res = -ENOMEM; goto out; } } if (ar->bus_param.hl_msdu_ids) { flags1 |= HTT_DATA_TX_DESC_FLAGS1_POSTPONED; res = ath10k_htt_tx_alloc_msdu_id(htt, msdu); if (res < 0) { ath10k_err(ar, "msdu_id allocation failed %d\n", res); goto out; } msdu_id = res; } /* As msdu is freed by mac80211 (in ieee80211_tx_status()) and by * ath10k (in ath10k_htt_htc_tx_complete()) we have to increase * reference by one to avoid a use-after-free case and a double * free. */ skb_get(msdu); skb_push(msdu, sizeof(*cmd_hdr)); skb_push(msdu, sizeof(*tx_desc)); cmd_hdr = (struct htt_cmd_hdr *)msdu->data; tx_desc = (struct htt_data_tx_desc *)(msdu->data + sizeof(*cmd_hdr)); cmd_hdr->msg_type = HTT_H2T_MSG_TYPE_TX_FRM; tx_desc->flags0 = flags0; tx_desc->flags1 = __cpu_to_le16(flags1); tx_desc->len = __cpu_to_le16(data_len); tx_desc->id = __cpu_to_le16(msdu_id); tx_desc->frags_paddr = 0; /* always zero */ /* Initialize peer_id to INVALID_PEER because this is NOT * Reinjection path */ tx_desc->peerid = __cpu_to_le32(HTT_INVALID_PEERID); res = ath10k_htc_send_hl(&htt->ar->htc, htt->eid, msdu); out: return res; } static int ath10k_htt_tx_32(struct ath10k_htt *htt, enum ath10k_hw_txrx_mode txmode, struct sk_buff *msdu) { struct ath10k *ar = htt->ar; struct device *dev = ar->dev; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(msdu); struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu); struct ath10k_hif_sg_item sg_items[2]; struct ath10k_htt_txbuf_32 *txbuf; struct htt_data_tx_desc_frag *frags; bool is_eth = (txmode == ATH10K_HW_TXRX_ETHERNET); u8 vdev_id = ath10k_htt_tx_get_vdev_id(ar, msdu); u8 tid = ath10k_htt_tx_get_tid(msdu, is_eth); int prefetch_len; int res; u8 flags0 = 0; u16 msdu_id, flags1 = 0; u16 freq = 0; u32 frags_paddr = 0; u32 txbuf_paddr; struct htt_msdu_ext_desc *ext_desc = NULL; struct htt_msdu_ext_desc *ext_desc_t = NULL; res = ath10k_htt_tx_alloc_msdu_id(htt, msdu); if (res < 0) goto err; msdu_id = res; prefetch_len = min(htt->prefetch_len, msdu->len); prefetch_len = roundup(prefetch_len, 4); txbuf = htt->txbuf.vaddr_txbuff_32 + msdu_id; txbuf_paddr = htt->txbuf.paddr + (sizeof(struct ath10k_htt_txbuf_32) * msdu_id); if (!is_eth) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data; if ((ieee80211_is_action(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control) || ieee80211_is_disassoc(hdr->frame_control)) && ieee80211_has_protected(hdr->frame_control)) { skb_put(msdu, IEEE80211_CCMP_MIC_LEN); } else if (!(skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT) && txmode == ATH10K_HW_TXRX_RAW && ieee80211_has_protected(hdr->frame_control)) { skb_put(msdu, IEEE80211_CCMP_MIC_LEN); } } skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len, DMA_TO_DEVICE); res = dma_mapping_error(dev, skb_cb->paddr); if (res) { res = -EIO; goto err_free_msdu_id; } if (unlikely(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN)) freq = ar->scan.roc_freq; switch (txmode) { case ATH10K_HW_TXRX_RAW: case ATH10K_HW_TXRX_NATIVE_WIFI: flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT; fallthrough; case ATH10K_HW_TXRX_ETHERNET: if (ar->hw_params.continuous_frag_desc) { ext_desc_t = htt->frag_desc.vaddr_desc_32; memset(&ext_desc_t[msdu_id], 0, sizeof(struct htt_msdu_ext_desc)); frags = (struct htt_data_tx_desc_frag *) &ext_desc_t[msdu_id].frags; ext_desc = &ext_desc_t[msdu_id]; frags[0].tword_addr.paddr_lo = __cpu_to_le32(skb_cb->paddr); frags[0].tword_addr.paddr_hi = 0; frags[0].tword_addr.len_16 = __cpu_to_le16(msdu->len); frags_paddr = htt->frag_desc.paddr + (sizeof(struct htt_msdu_ext_desc) * msdu_id); } else { frags = txbuf->frags; frags[0].dword_addr.paddr = __cpu_to_le32(skb_cb->paddr); frags[0].dword_addr.len = __cpu_to_le32(msdu->len); frags[1].dword_addr.paddr = 0; frags[1].dword_addr.len = 0; frags_paddr = txbuf_paddr; } flags0 |= SM(txmode, HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE); break; case ATH10K_HW_TXRX_MGMT: flags0 |= SM(ATH10K_HW_TXRX_MGMT, HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE); flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT; frags_paddr = skb_cb->paddr; break; } /* Normally all commands go through HTC which manages tx credits for * each endpoint and notifies when tx is completed. * * HTT endpoint is creditless so there's no need to care about HTC * flags. In that case it is trivial to fill the HTC header here. * * MSDU transmission is considered completed upon HTT event. This * implies no relevant resources can be freed until after the event is * received. That's why HTC tx completion handler itself is ignored by * setting NULL to transfer_context for all sg items. * * There is simply no point in pushing HTT TX_FRM through HTC tx path * as it's a waste of resources. By bypassing HTC it is possible to * avoid extra memory allocations, compress data structures and thus * improve performance. */ txbuf->htc_hdr.eid = htt->eid; txbuf->htc_hdr.len = __cpu_to_le16(sizeof(txbuf->cmd_hdr) + sizeof(txbuf->cmd_tx) + prefetch_len); txbuf->htc_hdr.flags = 0; if (skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT) flags0 |= HTT_DATA_TX_DESC_FLAGS0_NO_ENCRYPT; flags1 |= SM((u16)vdev_id, HTT_DATA_TX_DESC_FLAGS1_VDEV_ID); flags1 |= SM((u16)tid, HTT_DATA_TX_DESC_FLAGS1_EXT_TID); if (msdu->ip_summed == CHECKSUM_PARTIAL && !test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) { flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD; flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD; if (ar->hw_params.continuous_frag_desc) ext_desc->flags |= HTT_MSDU_CHECKSUM_ENABLE; } /* Prevent firmware from sending up tx inspection requests. There's * nothing ath10k can do with frames requested for inspection so force * it to simply rely a regular tx completion with discard status. */ flags1 |= HTT_DATA_TX_DESC_FLAGS1_POSTPONED; txbuf->cmd_hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM; txbuf->cmd_tx.flags0 = flags0; txbuf->cmd_tx.flags1 = __cpu_to_le16(flags1); txbuf->cmd_tx.len = __cpu_to_le16(msdu->len); txbuf->cmd_tx.id = __cpu_to_le16(msdu_id); txbuf->cmd_tx.frags_paddr = __cpu_to_le32(frags_paddr); if (ath10k_mac_tx_frm_has_freq(ar)) { txbuf->cmd_tx.offchan_tx.peerid = __cpu_to_le16(HTT_INVALID_PEERID); txbuf->cmd_tx.offchan_tx.freq = __cpu_to_le16(freq); } else { txbuf->cmd_tx.peerid = __cpu_to_le32(HTT_INVALID_PEERID); } trace_ath10k_htt_tx(ar, msdu_id, msdu->len, vdev_id, tid); ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx flags0 %u flags1 %u len %d id %u frags_paddr %pad, msdu_paddr %pad vdev %u tid %u freq %u\n", flags0, flags1, msdu->len, msdu_id, &frags_paddr, &skb_cb->paddr, vdev_id, tid, freq); ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt tx msdu: ", msdu->data, msdu->len); trace_ath10k_tx_hdr(ar, msdu->data, msdu->len); trace_ath10k_tx_payload(ar, msdu->data, msdu->len); sg_items[0].transfer_id = 0; sg_items[0].transfer_context = NULL; sg_items[0].vaddr = &txbuf->htc_hdr; sg_items[0].paddr = txbuf_paddr + sizeof(txbuf->frags); sg_items[0].len = sizeof(txbuf->htc_hdr) + sizeof(txbuf->cmd_hdr) + sizeof(txbuf->cmd_tx); sg_items[1].transfer_id = 0; sg_items[1].transfer_context = NULL; sg_items[1].vaddr = msdu->data; sg_items[1].paddr = skb_cb->paddr; sg_items[1].len = prefetch_len; res = ath10k_hif_tx_sg(htt->ar, htt->ar->htc.endpoint[htt->eid].ul_pipe_id, sg_items, ARRAY_SIZE(sg_items)); if (res) goto err_unmap_msdu; return 0; err_unmap_msdu: dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); err_free_msdu_id: spin_lock_bh(&htt->tx_lock); ath10k_htt_tx_free_msdu_id(htt, msdu_id); spin_unlock_bh(&htt->tx_lock); err: return res; } static int ath10k_htt_tx_64(struct ath10k_htt *htt, enum ath10k_hw_txrx_mode txmode, struct sk_buff *msdu) { struct ath10k *ar = htt->ar; struct device *dev = ar->dev; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(msdu); struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu); struct ath10k_hif_sg_item sg_items[2]; struct ath10k_htt_txbuf_64 *txbuf; struct htt_data_tx_desc_frag *frags; bool is_eth = (txmode == ATH10K_HW_TXRX_ETHERNET); u8 vdev_id = ath10k_htt_tx_get_vdev_id(ar, msdu); u8 tid = ath10k_htt_tx_get_tid(msdu, is_eth); int prefetch_len; int res; u8 flags0 = 0; u16 msdu_id, flags1 = 0; u16 freq = 0; dma_addr_t frags_paddr = 0; dma_addr_t txbuf_paddr; struct htt_msdu_ext_desc_64 *ext_desc = NULL; struct htt_msdu_ext_desc_64 *ext_desc_t = NULL; res = ath10k_htt_tx_alloc_msdu_id(htt, msdu); if (res < 0) goto err; msdu_id = res; prefetch_len = min(htt->prefetch_len, msdu->len); prefetch_len = roundup(prefetch_len, 4); txbuf = htt->txbuf.vaddr_txbuff_64 + msdu_id; txbuf_paddr = htt->txbuf.paddr + (sizeof(struct ath10k_htt_txbuf_64) * msdu_id); if (!is_eth) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data; if ((ieee80211_is_action(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control) || ieee80211_is_disassoc(hdr->frame_control)) && ieee80211_has_protected(hdr->frame_control)) { skb_put(msdu, IEEE80211_CCMP_MIC_LEN); } else if (!(skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT) && txmode == ATH10K_HW_TXRX_RAW && ieee80211_has_protected(hdr->frame_control)) { skb_put(msdu, IEEE80211_CCMP_MIC_LEN); } } skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len, DMA_TO_DEVICE); res = dma_mapping_error(dev, skb_cb->paddr); if (res) { res = -EIO; goto err_free_msdu_id; } if (unlikely(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN)) freq = ar->scan.roc_freq; switch (txmode) { case ATH10K_HW_TXRX_RAW: case ATH10K_HW_TXRX_NATIVE_WIFI: flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT; fallthrough; case ATH10K_HW_TXRX_ETHERNET: if (ar->hw_params.continuous_frag_desc) { ext_desc_t = htt->frag_desc.vaddr_desc_64; memset(&ext_desc_t[msdu_id], 0, sizeof(struct htt_msdu_ext_desc_64)); frags = (struct htt_data_tx_desc_frag *) &ext_desc_t[msdu_id].frags; ext_desc = &ext_desc_t[msdu_id]; frags[0].tword_addr.paddr_lo = __cpu_to_le32(skb_cb->paddr); frags[0].tword_addr.paddr_hi = __cpu_to_le16(upper_32_bits(skb_cb->paddr)); frags[0].tword_addr.len_16 = __cpu_to_le16(msdu->len); frags_paddr = htt->frag_desc.paddr + (sizeof(struct htt_msdu_ext_desc_64) * msdu_id); } else { frags = txbuf->frags; frags[0].tword_addr.paddr_lo = __cpu_to_le32(skb_cb->paddr); frags[0].tword_addr.paddr_hi = __cpu_to_le16(upper_32_bits(skb_cb->paddr)); frags[0].tword_addr.len_16 = __cpu_to_le16(msdu->len); frags[1].tword_addr.paddr_lo = 0; frags[1].tword_addr.paddr_hi = 0; frags[1].tword_addr.len_16 = 0; } flags0 |= SM(txmode, HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE); break; case ATH10K_HW_TXRX_MGMT: flags0 |= SM(ATH10K_HW_TXRX_MGMT, HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE); flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT; frags_paddr = skb_cb->paddr; break; } /* Normally all commands go through HTC which manages tx credits for * each endpoint and notifies when tx is completed. * * HTT endpoint is creditless so there's no need to care about HTC * flags. In that case it is trivial to fill the HTC header here. * * MSDU transmission is considered completed upon HTT event. This * implies no relevant resources can be freed until after the event is * received. That's why HTC tx completion handler itself is ignored by * setting NULL to transfer_context for all sg items. * * There is simply no point in pushing HTT TX_FRM through HTC tx path * as it's a waste of resources. By bypassing HTC it is possible to * avoid extra memory allocations, compress data structures and thus * improve performance. */ txbuf->htc_hdr.eid = htt->eid; txbuf->htc_hdr.len = __cpu_to_le16(sizeof(txbuf->cmd_hdr) + sizeof(txbuf->cmd_tx) + prefetch_len); txbuf->htc_hdr.flags = 0; if (skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT) flags0 |= HTT_DATA_TX_DESC_FLAGS0_NO_ENCRYPT; flags1 |= SM((u16)vdev_id, HTT_DATA_TX_DESC_FLAGS1_VDEV_ID); flags1 |= SM((u16)tid, HTT_DATA_TX_DESC_FLAGS1_EXT_TID); if (msdu->ip_summed == CHECKSUM_PARTIAL && !test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) { flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD; flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD; if (ar->hw_params.continuous_frag_desc) { memset(ext_desc->tso_flag, 0, sizeof(ext_desc->tso_flag)); ext_desc->tso_flag[3] |= __cpu_to_le32(HTT_MSDU_CHECKSUM_ENABLE_64); } } /* Prevent firmware from sending up tx inspection requests. There's * nothing ath10k can do with frames requested for inspection so force * it to simply rely a regular tx completion with discard status. */ flags1 |= HTT_DATA_TX_DESC_FLAGS1_POSTPONED; txbuf->cmd_hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM; txbuf->cmd_tx.flags0 = flags0; txbuf->cmd_tx.flags1 = __cpu_to_le16(flags1); txbuf->cmd_tx.len = __cpu_to_le16(msdu->len); txbuf->cmd_tx.id = __cpu_to_le16(msdu_id); /* fill fragment descriptor */ txbuf->cmd_tx.frags_paddr = __cpu_to_le64(frags_paddr); if (ath10k_mac_tx_frm_has_freq(ar)) { txbuf->cmd_tx.offchan_tx.peerid = __cpu_to_le16(HTT_INVALID_PEERID); txbuf->cmd_tx.offchan_tx.freq = __cpu_to_le16(freq); } else { txbuf->cmd_tx.peerid = __cpu_to_le32(HTT_INVALID_PEERID); } trace_ath10k_htt_tx(ar, msdu_id, msdu->len, vdev_id, tid); ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx flags0 %u flags1 %u len %d id %u frags_paddr %pad, msdu_paddr %pad vdev %u tid %u freq %u\n", flags0, flags1, msdu->len, msdu_id, &frags_paddr, &skb_cb->paddr, vdev_id, tid, freq); ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt tx msdu: ", msdu->data, msdu->len); trace_ath10k_tx_hdr(ar, msdu->data, msdu->len); trace_ath10k_tx_payload(ar, msdu->data, msdu->len); sg_items[0].transfer_id = 0; sg_items[0].transfer_context = NULL; sg_items[0].vaddr = &txbuf->htc_hdr; sg_items[0].paddr = txbuf_paddr + sizeof(txbuf->frags); sg_items[0].len = sizeof(txbuf->htc_hdr) + sizeof(txbuf->cmd_hdr) + sizeof(txbuf->cmd_tx); sg_items[1].transfer_id = 0; sg_items[1].transfer_context = NULL; sg_items[1].vaddr = msdu->data; sg_items[1].paddr = skb_cb->paddr; sg_items[1].len = prefetch_len; res = ath10k_hif_tx_sg(htt->ar, htt->ar->htc.endpoint[htt->eid].ul_pipe_id, sg_items, ARRAY_SIZE(sg_items)); if (res) goto err_unmap_msdu; return 0; err_unmap_msdu: dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); err_free_msdu_id: spin_lock_bh(&htt->tx_lock); ath10k_htt_tx_free_msdu_id(htt, msdu_id); spin_unlock_bh(&htt->tx_lock); err: return res; } static const struct ath10k_htt_tx_ops htt_tx_ops_32 = { .htt_send_rx_ring_cfg = ath10k_htt_send_rx_ring_cfg_32, .htt_send_frag_desc_bank_cfg = ath10k_htt_send_frag_desc_bank_cfg_32, .htt_alloc_frag_desc = ath10k_htt_tx_alloc_cont_frag_desc_32, .htt_free_frag_desc = ath10k_htt_tx_free_cont_frag_desc_32, .htt_tx = ath10k_htt_tx_32, .htt_alloc_txbuff = ath10k_htt_tx_alloc_cont_txbuf_32, .htt_free_txbuff = ath10k_htt_tx_free_cont_txbuf_32, .htt_h2t_aggr_cfg_msg = ath10k_htt_h2t_aggr_cfg_msg_32, }; static const struct ath10k_htt_tx_ops htt_tx_ops_64 = { .htt_send_rx_ring_cfg = ath10k_htt_send_rx_ring_cfg_64, .htt_send_frag_desc_bank_cfg = ath10k_htt_send_frag_desc_bank_cfg_64, .htt_alloc_frag_desc = ath10k_htt_tx_alloc_cont_frag_desc_64, .htt_free_frag_desc = ath10k_htt_tx_free_cont_frag_desc_64, .htt_tx = ath10k_htt_tx_64, .htt_alloc_txbuff = ath10k_htt_tx_alloc_cont_txbuf_64, .htt_free_txbuff = ath10k_htt_tx_free_cont_txbuf_64, .htt_h2t_aggr_cfg_msg = ath10k_htt_h2t_aggr_cfg_msg_v2, }; static const struct ath10k_htt_tx_ops htt_tx_ops_hl = { .htt_send_rx_ring_cfg = ath10k_htt_send_rx_ring_cfg_hl, .htt_send_frag_desc_bank_cfg = ath10k_htt_send_frag_desc_bank_cfg_32, .htt_tx = ath10k_htt_tx_hl, .htt_h2t_aggr_cfg_msg = ath10k_htt_h2t_aggr_cfg_msg_32, .htt_flush_tx = ath10k_htt_flush_tx_queue, }; void ath10k_htt_set_tx_ops(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) htt->tx_ops = &htt_tx_ops_hl; else if (ar->hw_params.target_64bit) htt->tx_ops = &htt_tx_ops_64; else htt->tx_ops = &htt_tx_ops_32; } |
2 4 4 5 5 1 2 2 1 10 14 10 17 12 4 41 15 2 2 2 13 3 11 6 2 1 3 10 5 5 5 7 12 10 2 1 12 12 1 1 9 9 2 2 5 5 5 7 3 2 2 4 4 14 14 3 11 11 11 10 49 57 51 39 4 2 2 3 3 9 3 3 3 4 6 6 5 1 1 5 5 5 3 2 2 9 3 1 5 28 11 16 6 22 1 1 1 47 47 21 9 3 3 3 44 44 3 66 66 66 65 66 19 47 66 3 2 1 3 3 3 1 2 66 66 15 51 15 42 24 66 65 14 52 66 19 47 45 46 1 44 3 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 | // SPDX-License-Identifier: GPL-2.0-or-later /* * uvc_driver.c -- USB Video Class driver * * Copyright (C) 2005-2010 * Laurent Pinchart (laurent.pinchart@ideasonboard.com) */ #include <linux/atomic.h> #include <linux/bits.h> #include <linux/gpio/consumer.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/quirks.h> #include <linux/usb/uvc.h> #include <linux/videodev2.h> #include <linux/vmalloc.h> #include <linux/wait.h> #include <linux/unaligned.h> #include <media/v4l2-common.h> #include <media/v4l2-ioctl.h> #include "uvcvideo.h" #define DRIVER_AUTHOR "Laurent Pinchart " \ "<laurent.pinchart@ideasonboard.com>" #define DRIVER_DESC "USB Video Class driver" unsigned int uvc_clock_param = CLOCK_MONOTONIC; unsigned int uvc_hw_timestamps_param; unsigned int uvc_no_drop_param; static unsigned int uvc_quirks_param = -1; unsigned int uvc_dbg_param; unsigned int uvc_timeout_param = UVC_CTRL_STREAMING_TIMEOUT; /* ------------------------------------------------------------------------ * Utility functions */ struct usb_host_endpoint *uvc_find_endpoint(struct usb_host_interface *alts, u8 epaddr) { struct usb_host_endpoint *ep; unsigned int i; for (i = 0; i < alts->desc.bNumEndpoints; ++i) { ep = &alts->endpoint[i]; if (ep->desc.bEndpointAddress == epaddr) return ep; } return NULL; } static enum v4l2_colorspace uvc_colorspace(const u8 primaries) { static const enum v4l2_colorspace colorprimaries[] = { V4L2_COLORSPACE_SRGB, /* Unspecified */ V4L2_COLORSPACE_SRGB, V4L2_COLORSPACE_470_SYSTEM_M, V4L2_COLORSPACE_470_SYSTEM_BG, V4L2_COLORSPACE_SMPTE170M, V4L2_COLORSPACE_SMPTE240M, }; if (primaries < ARRAY_SIZE(colorprimaries)) return colorprimaries[primaries]; return V4L2_COLORSPACE_SRGB; /* Reserved */ } static enum v4l2_xfer_func uvc_xfer_func(const u8 transfer_characteristics) { /* * V4L2 does not currently have definitions for all possible values of * UVC transfer characteristics. If v4l2_xfer_func is extended with new * values, the mapping below should be updated. * * Substitutions are taken from the mapping given for * V4L2_XFER_FUNC_DEFAULT documented in videodev2.h. */ static const enum v4l2_xfer_func xfer_funcs[] = { V4L2_XFER_FUNC_DEFAULT, /* Unspecified */ V4L2_XFER_FUNC_709, V4L2_XFER_FUNC_709, /* Substitution for BT.470-2 M */ V4L2_XFER_FUNC_709, /* Substitution for BT.470-2 B, G */ V4L2_XFER_FUNC_709, /* Substitution for SMPTE 170M */ V4L2_XFER_FUNC_SMPTE240M, V4L2_XFER_FUNC_NONE, V4L2_XFER_FUNC_SRGB, }; if (transfer_characteristics < ARRAY_SIZE(xfer_funcs)) return xfer_funcs[transfer_characteristics]; return V4L2_XFER_FUNC_DEFAULT; /* Reserved */ } static enum v4l2_ycbcr_encoding uvc_ycbcr_enc(const u8 matrix_coefficients) { /* * V4L2 does not currently have definitions for all possible values of * UVC matrix coefficients. If v4l2_ycbcr_encoding is extended with new * values, the mapping below should be updated. * * Substitutions are taken from the mapping given for * V4L2_YCBCR_ENC_DEFAULT documented in videodev2.h. * * FCC is assumed to be close enough to 601. */ static const enum v4l2_ycbcr_encoding ycbcr_encs[] = { V4L2_YCBCR_ENC_DEFAULT, /* Unspecified */ V4L2_YCBCR_ENC_709, V4L2_YCBCR_ENC_601, /* Substitution for FCC */ V4L2_YCBCR_ENC_601, /* Substitution for BT.470-2 B, G */ V4L2_YCBCR_ENC_601, V4L2_YCBCR_ENC_SMPTE240M, }; if (matrix_coefficients < ARRAY_SIZE(ycbcr_encs)) return ycbcr_encs[matrix_coefficients]; return V4L2_YCBCR_ENC_DEFAULT; /* Reserved */ } /* ------------------------------------------------------------------------ * Terminal and unit management */ struct uvc_entity *uvc_entity_by_id(struct uvc_device *dev, int id) { struct uvc_entity *entity; list_for_each_entry(entity, &dev->entities, list) { if (entity->id == id) return entity; } return NULL; } static struct uvc_entity *uvc_entity_by_reference(struct uvc_device *dev, int id, struct uvc_entity *entity) { unsigned int i; if (entity == NULL) entity = list_entry(&dev->entities, struct uvc_entity, list); list_for_each_entry_continue(entity, &dev->entities, list) { for (i = 0; i < entity->bNrInPins; ++i) if (entity->baSourceID[i] == id) return entity; } return NULL; } static struct uvc_streaming *uvc_stream_by_id(struct uvc_device *dev, int id) { struct uvc_streaming *stream; list_for_each_entry(stream, &dev->streams, list) { if (stream->header.bTerminalLink == id) return stream; } return NULL; } /* ------------------------------------------------------------------------ * Streaming Object Management */ static void uvc_stream_delete(struct uvc_streaming *stream) { if (stream->async_wq) destroy_workqueue(stream->async_wq); mutex_destroy(&stream->mutex); usb_put_intf(stream->intf); kfree(stream->formats); kfree(stream->header.bmaControls); kfree(stream); } static struct uvc_streaming *uvc_stream_new(struct uvc_device *dev, struct usb_interface *intf) { struct uvc_streaming *stream; stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (stream == NULL) return NULL; mutex_init(&stream->mutex); stream->dev = dev; stream->intf = usb_get_intf(intf); stream->intfnum = intf->cur_altsetting->desc.bInterfaceNumber; /* Allocate a stream specific work queue for asynchronous tasks. */ stream->async_wq = alloc_workqueue("uvcvideo", WQ_UNBOUND | WQ_HIGHPRI, 0); if (!stream->async_wq) { uvc_stream_delete(stream); return NULL; } return stream; } /* ------------------------------------------------------------------------ * Descriptors parsing */ static int uvc_parse_format(struct uvc_device *dev, struct uvc_streaming *streaming, struct uvc_format *format, struct uvc_frame *frames, u32 **intervals, const unsigned char *buffer, int buflen) { struct usb_interface *intf = streaming->intf; struct usb_host_interface *alts = intf->cur_altsetting; const struct uvc_format_desc *fmtdesc; struct uvc_frame *frame; const unsigned char *start = buffer; unsigned int width_multiplier = 1; unsigned int interval; unsigned int i, n; u8 ftype; format->type = buffer[2]; format->index = buffer[3]; format->frames = frames; switch (buffer[2]) { case UVC_VS_FORMAT_UNCOMPRESSED: case UVC_VS_FORMAT_FRAME_BASED: n = buffer[2] == UVC_VS_FORMAT_UNCOMPRESSED ? 27 : 28; if (buflen < n) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FORMAT error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } /* Find the format descriptor from its GUID. */ fmtdesc = uvc_format_by_guid(&buffer[5]); if (!fmtdesc) { /* * Unknown video formats are not fatal errors, the * caller will skip this descriptor. */ dev_info(&streaming->intf->dev, "Unknown video format %pUl\n", &buffer[5]); return 0; } format->fcc = fmtdesc->fcc; format->bpp = buffer[21]; /* * Some devices report a format that doesn't match what they * really send. */ if (dev->quirks & UVC_QUIRK_FORCE_Y8) { if (format->fcc == V4L2_PIX_FMT_YUYV) { format->fcc = V4L2_PIX_FMT_GREY; format->bpp = 8; width_multiplier = 2; } } /* Some devices report bpp that doesn't match the format. */ if (dev->quirks & UVC_QUIRK_FORCE_BPP) { const struct v4l2_format_info *info = v4l2_format_info(format->fcc); if (info) { unsigned int div = info->hdiv * info->vdiv; n = info->bpp[0] * div; for (i = 1; i < info->comp_planes; i++) n += info->bpp[i]; format->bpp = DIV_ROUND_UP(8 * n, div); } } if (buffer[2] == UVC_VS_FORMAT_UNCOMPRESSED) { ftype = UVC_VS_FRAME_UNCOMPRESSED; } else { ftype = UVC_VS_FRAME_FRAME_BASED; if (buffer[27]) format->flags = UVC_FMT_FLAG_COMPRESSED; } break; case UVC_VS_FORMAT_MJPEG: if (buflen < 11) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FORMAT error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } format->fcc = V4L2_PIX_FMT_MJPEG; format->flags = UVC_FMT_FLAG_COMPRESSED; format->bpp = 0; ftype = UVC_VS_FRAME_MJPEG; break; case UVC_VS_FORMAT_DV: if (buflen < 9) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FORMAT error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } if ((buffer[8] & 0x7f) > 2) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d: unknown DV format %u\n", dev->udev->devnum, alts->desc.bInterfaceNumber, buffer[8]); return -EINVAL; } format->fcc = V4L2_PIX_FMT_DV; format->flags = UVC_FMT_FLAG_COMPRESSED | UVC_FMT_FLAG_STREAM; format->bpp = 0; ftype = 0; /* Create a dummy frame descriptor. */ frame = &frames[0]; memset(frame, 0, sizeof(*frame)); frame->bFrameIntervalType = 1; frame->dwDefaultFrameInterval = 1; frame->dwFrameInterval = *intervals; *(*intervals)++ = 1; format->nframes = 1; break; case UVC_VS_FORMAT_MPEG2TS: case UVC_VS_FORMAT_STREAM_BASED: /* Not supported yet. */ default: uvc_dbg(dev, DESCR, "device %d videostreaming interface %d unsupported format %u\n", dev->udev->devnum, alts->desc.bInterfaceNumber, buffer[2]); return -EINVAL; } uvc_dbg(dev, DESCR, "Found format %p4cc", &format->fcc); buflen -= buffer[0]; buffer += buffer[0]; /* * Parse the frame descriptors. Only uncompressed, MJPEG and frame * based formats have frame descriptors. */ while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == ftype) { unsigned int maxIntervalIndex; frame = &frames[format->nframes]; if (ftype != UVC_VS_FRAME_FRAME_BASED) n = buflen > 25 ? buffer[25] : 0; else n = buflen > 21 ? buffer[21] : 0; n = n ? n : 3; if (buflen < 26 + 4*n) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FRAME error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } frame->bFrameIndex = buffer[3]; frame->bmCapabilities = buffer[4]; frame->wWidth = get_unaligned_le16(&buffer[5]) * width_multiplier; frame->wHeight = get_unaligned_le16(&buffer[7]); frame->dwMinBitRate = get_unaligned_le32(&buffer[9]); frame->dwMaxBitRate = get_unaligned_le32(&buffer[13]); if (ftype != UVC_VS_FRAME_FRAME_BASED) { frame->dwMaxVideoFrameBufferSize = get_unaligned_le32(&buffer[17]); frame->dwDefaultFrameInterval = get_unaligned_le32(&buffer[21]); frame->bFrameIntervalType = buffer[25]; } else { frame->dwMaxVideoFrameBufferSize = 0; frame->dwDefaultFrameInterval = get_unaligned_le32(&buffer[17]); frame->bFrameIntervalType = buffer[21]; } /* * Copy the frame intervals. * * Some bogus devices report dwMinFrameInterval equal to * dwMaxFrameInterval and have dwFrameIntervalStep set to * zero. Setting all null intervals to 1 fixes the problem and * some other divisions by zero that could happen. */ frame->dwFrameInterval = *intervals; for (i = 0; i < n; ++i) { interval = get_unaligned_le32(&buffer[26+4*i]); (*intervals)[i] = interval ? interval : 1; } /* * Apply more fixes, quirks and workarounds to handle incorrect * or broken descriptors. */ /* * Several UVC chipsets screw up dwMaxVideoFrameBufferSize * completely. Observed behaviours range from setting the * value to 1.1x the actual frame size to hardwiring the * 16 low bits to 0. This results in a higher than necessary * memory usage as well as a wrong image size information. For * uncompressed formats this can be fixed by computing the * value from the frame size. */ if (!(format->flags & UVC_FMT_FLAG_COMPRESSED)) frame->dwMaxVideoFrameBufferSize = format->bpp * frame->wWidth * frame->wHeight / 8; /* * Clamp the default frame interval to the boundaries. A zero * bFrameIntervalType value indicates a continuous frame * interval range, with dwFrameInterval[0] storing the minimum * value and dwFrameInterval[1] storing the maximum value. */ maxIntervalIndex = frame->bFrameIntervalType ? n - 1 : 1; frame->dwDefaultFrameInterval = clamp(frame->dwDefaultFrameInterval, frame->dwFrameInterval[0], frame->dwFrameInterval[maxIntervalIndex]); /* * Some devices report frame intervals that are not functional. * If the corresponding quirk is set, restrict operation to the * first interval only. */ if (dev->quirks & UVC_QUIRK_RESTRICT_FRAME_RATE) { frame->bFrameIntervalType = 1; (*intervals)[0] = frame->dwDefaultFrameInterval; } uvc_dbg(dev, DESCR, "- %ux%u (%u.%u fps)\n", frame->wWidth, frame->wHeight, 10000000 / frame->dwDefaultFrameInterval, (100000000 / frame->dwDefaultFrameInterval) % 10); format->nframes++; *intervals += n; buflen -= buffer[0]; buffer += buffer[0]; } if (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == UVC_VS_STILL_IMAGE_FRAME) { buflen -= buffer[0]; buffer += buffer[0]; } if (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == UVC_VS_COLORFORMAT) { if (buflen < 6) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d COLORFORMAT error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } format->colorspace = uvc_colorspace(buffer[3]); format->xfer_func = uvc_xfer_func(buffer[4]); format->ycbcr_enc = uvc_ycbcr_enc(buffer[5]); buflen -= buffer[0]; buffer += buffer[0]; } else { format->colorspace = V4L2_COLORSPACE_SRGB; } return buffer - start; } static int uvc_parse_streaming(struct uvc_device *dev, struct usb_interface *intf) { struct uvc_streaming *streaming = NULL; struct uvc_format *format; struct uvc_frame *frame; struct usb_host_interface *alts = &intf->altsetting[0]; const unsigned char *_buffer, *buffer = alts->extra; int _buflen, buflen = alts->extralen; unsigned int nformats = 0, nframes = 0, nintervals = 0; unsigned int size, i, n, p; u32 *interval; u16 psize; int ret = -EINVAL; if (intf->cur_altsetting->desc.bInterfaceSubClass != UVC_SC_VIDEOSTREAMING) { uvc_dbg(dev, DESCR, "device %d interface %d isn't a video streaming interface\n", dev->udev->devnum, intf->altsetting[0].desc.bInterfaceNumber); return -EINVAL; } if (usb_driver_claim_interface(&uvc_driver.driver, intf, dev)) { uvc_dbg(dev, DESCR, "device %d interface %d is already claimed\n", dev->udev->devnum, intf->altsetting[0].desc.bInterfaceNumber); return -EINVAL; } streaming = uvc_stream_new(dev, intf); if (streaming == NULL) { usb_driver_release_interface(&uvc_driver.driver, intf); return -ENOMEM; } /* * The Pico iMage webcam has its class-specific interface descriptors * after the endpoint descriptors. */ if (buflen == 0) { for (i = 0; i < alts->desc.bNumEndpoints; ++i) { struct usb_host_endpoint *ep = &alts->endpoint[i]; if (ep->extralen == 0) continue; if (ep->extralen > 2 && ep->extra[1] == USB_DT_CS_INTERFACE) { uvc_dbg(dev, DESCR, "trying extra data from endpoint %u\n", i); buffer = alts->endpoint[i].extra; buflen = alts->endpoint[i].extralen; break; } } } /* Skip the standard interface descriptors. */ while (buflen > 2 && buffer[1] != USB_DT_CS_INTERFACE) { buflen -= buffer[0]; buffer += buffer[0]; } if (buflen <= 2) { uvc_dbg(dev, DESCR, "no class-specific streaming interface descriptors found\n"); goto error; } /* Parse the header descriptor. */ switch (buffer[2]) { case UVC_VS_OUTPUT_HEADER: streaming->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; size = 9; break; case UVC_VS_INPUT_HEADER: streaming->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; size = 13; break; default: uvc_dbg(dev, DESCR, "device %d videostreaming interface %d HEADER descriptor not found\n", dev->udev->devnum, alts->desc.bInterfaceNumber); goto error; } p = buflen >= 4 ? buffer[3] : 0; n = buflen >= size ? buffer[size-1] : 0; if (buflen < size + p*n) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d HEADER descriptor is invalid\n", dev->udev->devnum, alts->desc.bInterfaceNumber); goto error; } streaming->header.bNumFormats = p; streaming->header.bEndpointAddress = buffer[6]; if (buffer[2] == UVC_VS_INPUT_HEADER) { streaming->header.bmInfo = buffer[7]; streaming->header.bTerminalLink = buffer[8]; streaming->header.bStillCaptureMethod = buffer[9]; streaming->header.bTriggerSupport = buffer[10]; streaming->header.bTriggerUsage = buffer[11]; } else { streaming->header.bTerminalLink = buffer[7]; } streaming->header.bControlSize = n; streaming->header.bmaControls = kmemdup(&buffer[size], p * n, GFP_KERNEL); if (streaming->header.bmaControls == NULL) { ret = -ENOMEM; goto error; } buflen -= buffer[0]; buffer += buffer[0]; _buffer = buffer; _buflen = buflen; /* Count the format and frame descriptors. */ while (_buflen > 2 && _buffer[1] == USB_DT_CS_INTERFACE) { switch (_buffer[2]) { case UVC_VS_FORMAT_UNCOMPRESSED: case UVC_VS_FORMAT_MJPEG: case UVC_VS_FORMAT_FRAME_BASED: nformats++; break; case UVC_VS_FORMAT_DV: /* * DV format has no frame descriptor. We will create a * dummy frame descriptor with a dummy frame interval. */ nformats++; nframes++; nintervals++; break; case UVC_VS_FORMAT_MPEG2TS: case UVC_VS_FORMAT_STREAM_BASED: uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FORMAT %u is not supported\n", dev->udev->devnum, alts->desc.bInterfaceNumber, _buffer[2]); break; case UVC_VS_FRAME_UNCOMPRESSED: case UVC_VS_FRAME_MJPEG: nframes++; if (_buflen > 25) nintervals += _buffer[25] ? _buffer[25] : 3; break; case UVC_VS_FRAME_FRAME_BASED: nframes++; if (_buflen > 21) nintervals += _buffer[21] ? _buffer[21] : 3; break; } _buflen -= _buffer[0]; _buffer += _buffer[0]; } if (nformats == 0) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d has no supported formats defined\n", dev->udev->devnum, alts->desc.bInterfaceNumber); goto error; } /* * Allocate memory for the formats, the frames and the intervals, * plus any required padding to guarantee that everything has the * correct alignment. */ size = nformats * sizeof(*format); size = ALIGN(size, __alignof__(*frame)) + nframes * sizeof(*frame); size = ALIGN(size, __alignof__(*interval)) + nintervals * sizeof(*interval); format = kzalloc(size, GFP_KERNEL); if (!format) { ret = -ENOMEM; goto error; } frame = (void *)format + nformats * sizeof(*format); frame = PTR_ALIGN(frame, __alignof__(*frame)); interval = (void *)frame + nframes * sizeof(*frame); interval = PTR_ALIGN(interval, __alignof__(*interval)); streaming->formats = format; streaming->nformats = 0; /* Parse the format descriptors. */ while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE) { switch (buffer[2]) { case UVC_VS_FORMAT_UNCOMPRESSED: case UVC_VS_FORMAT_MJPEG: case UVC_VS_FORMAT_DV: case UVC_VS_FORMAT_FRAME_BASED: ret = uvc_parse_format(dev, streaming, format, frame, &interval, buffer, buflen); if (ret < 0) goto error; if (!ret) break; streaming->nformats++; frame += format->nframes; format++; buflen -= ret; buffer += ret; continue; default: break; } buflen -= buffer[0]; buffer += buffer[0]; } if (buflen) uvc_dbg(dev, DESCR, "device %d videostreaming interface %d has %u bytes of trailing descriptor garbage\n", dev->udev->devnum, alts->desc.bInterfaceNumber, buflen); /* Parse the alternate settings to find the maximum bandwidth. */ for (i = 0; i < intf->num_altsetting; ++i) { struct usb_host_endpoint *ep; alts = &intf->altsetting[i]; ep = uvc_find_endpoint(alts, streaming->header.bEndpointAddress); if (ep == NULL) continue; psize = uvc_endpoint_max_bpi(dev->udev, ep); if (psize > streaming->maxpsize) streaming->maxpsize = psize; } list_add_tail(&streaming->list, &dev->streams); return 0; error: usb_driver_release_interface(&uvc_driver.driver, intf); uvc_stream_delete(streaming); return ret; } static const u8 uvc_camera_guid[16] = UVC_GUID_UVC_CAMERA; static const u8 uvc_gpio_guid[16] = UVC_GUID_EXT_GPIO_CONTROLLER; static const u8 uvc_media_transport_input_guid[16] = UVC_GUID_UVC_MEDIA_TRANSPORT_INPUT; static const u8 uvc_processing_guid[16] = UVC_GUID_UVC_PROCESSING; static struct uvc_entity *uvc_alloc_entity(u16 type, u16 id, unsigned int num_pads, unsigned int extra_size) { struct uvc_entity *entity; unsigned int num_inputs; unsigned int size; unsigned int i; extra_size = roundup(extra_size, sizeof(*entity->pads)); if (num_pads) num_inputs = type & UVC_TERM_OUTPUT ? num_pads : num_pads - 1; else num_inputs = 0; size = sizeof(*entity) + extra_size + sizeof(*entity->pads) * num_pads + num_inputs; entity = kzalloc(size, GFP_KERNEL); if (entity == NULL) return NULL; entity->id = id; entity->type = type; /* * Set the GUID for standard entity types. For extension units, the GUID * is initialized by the caller. */ switch (type) { case UVC_EXT_GPIO_UNIT: memcpy(entity->guid, uvc_gpio_guid, 16); break; case UVC_ITT_CAMERA: memcpy(entity->guid, uvc_camera_guid, 16); break; case UVC_ITT_MEDIA_TRANSPORT_INPUT: memcpy(entity->guid, uvc_media_transport_input_guid, 16); break; case UVC_VC_PROCESSING_UNIT: memcpy(entity->guid, uvc_processing_guid, 16); break; } entity->num_links = 0; entity->num_pads = num_pads; entity->pads = ((void *)(entity + 1)) + extra_size; for (i = 0; i < num_inputs; ++i) entity->pads[i].flags = MEDIA_PAD_FL_SINK; if (!UVC_ENTITY_IS_OTERM(entity) && num_pads) entity->pads[num_pads-1].flags = MEDIA_PAD_FL_SOURCE; entity->bNrInPins = num_inputs; entity->baSourceID = (u8 *)(&entity->pads[num_pads]); return entity; } static void uvc_entity_set_name(struct uvc_device *dev, struct uvc_entity *entity, const char *type_name, u8 string_id) { int ret; /* * First attempt to read the entity name from the device. If the entity * has no associated string, or if reading the string fails (most * likely due to a buggy firmware), fall back to default names based on * the entity type. */ if (string_id) { ret = usb_string(dev->udev, string_id, entity->name, sizeof(entity->name)); if (!ret) return; } sprintf(entity->name, "%s %u", type_name, entity->id); } /* Parse vendor-specific extensions. */ static int uvc_parse_vendor_control(struct uvc_device *dev, const unsigned char *buffer, int buflen) { struct usb_device *udev = dev->udev; struct usb_host_interface *alts = dev->intf->cur_altsetting; struct uvc_entity *unit; unsigned int n, p; int handled = 0; switch (le16_to_cpu(dev->udev->descriptor.idVendor)) { case 0x046d: /* Logitech */ if (buffer[1] != 0x41 || buffer[2] != 0x01) break; /* * Logitech implements several vendor specific functions * through vendor specific extension units (LXU). * * The LXU descriptors are similar to XU descriptors * (see "USB Device Video Class for Video Devices", section * 3.7.2.6 "Extension Unit Descriptor") with the following * differences: * * ---------------------------------------------------------- * 0 bLength 1 Number * Size of this descriptor, in bytes: 24+p+n*2 * ---------------------------------------------------------- * 23+p+n bmControlsType N Bitmap * Individual bits in the set are defined: * 0: Absolute * 1: Relative * * This bitset is mapped exactly the same as bmControls. * ---------------------------------------------------------- * 23+p+n*2 bReserved 1 Boolean * ---------------------------------------------------------- * 24+p+n*2 iExtension 1 Index * Index of a string descriptor that describes this * extension unit. * ---------------------------------------------------------- */ p = buflen >= 22 ? buffer[21] : 0; n = buflen >= 25 + p ? buffer[22+p] : 0; if (buflen < 25 + p + 2*n) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d EXTENSION_UNIT error\n", udev->devnum, alts->desc.bInterfaceNumber); break; } unit = uvc_alloc_entity(UVC_VC_EXTENSION_UNIT, buffer[3], p + 1, 2*n); if (unit == NULL) return -ENOMEM; memcpy(unit->guid, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; memcpy(unit->baSourceID, &buffer[22], p); unit->extension.bControlSize = buffer[22+p]; unit->extension.bmControls = (u8 *)unit + sizeof(*unit); unit->extension.bmControlsType = (u8 *)unit + sizeof(*unit) + n; memcpy(unit->extension.bmControls, &buffer[23+p], 2*n); uvc_entity_set_name(dev, unit, "Extension", buffer[24+p+2*n]); list_add_tail(&unit->list, &dev->entities); handled = 1; break; } return handled; } static int uvc_parse_standard_control(struct uvc_device *dev, const unsigned char *buffer, int buflen) { struct usb_device *udev = dev->udev; struct uvc_entity *unit, *term; struct usb_interface *intf; struct usb_host_interface *alts = dev->intf->cur_altsetting; unsigned int i, n, p, len; const char *type_name; u16 type; switch (buffer[2]) { case UVC_VC_HEADER: n = buflen >= 12 ? buffer[11] : 0; if (buflen < 12 + n) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d HEADER error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } dev->uvc_version = get_unaligned_le16(&buffer[3]); dev->clock_frequency = get_unaligned_le32(&buffer[7]); /* Parse all USB Video Streaming interfaces. */ for (i = 0; i < n; ++i) { intf = usb_ifnum_to_if(udev, buffer[12+i]); if (intf == NULL) { uvc_dbg(dev, DESCR, "device %d interface %d doesn't exists\n", udev->devnum, i); continue; } uvc_parse_streaming(dev, intf); } break; case UVC_VC_INPUT_TERMINAL: if (buflen < 8) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d INPUT_TERMINAL error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } /* * Reject invalid terminal types that would cause issues: * * - The high byte must be non-zero, otherwise it would be * confused with a unit. * * - Bit 15 must be 0, as we use it internally as a terminal * direction flag. * * Other unknown types are accepted. */ type = get_unaligned_le16(&buffer[4]); if ((type & 0x7f00) == 0 || (type & 0x8000) != 0) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d INPUT_TERMINAL %d has invalid type 0x%04x, skipping\n", udev->devnum, alts->desc.bInterfaceNumber, buffer[3], type); return 0; } n = 0; p = 0; len = 8; if (type == UVC_ITT_CAMERA) { n = buflen >= 15 ? buffer[14] : 0; len = 15; } else if (type == UVC_ITT_MEDIA_TRANSPORT_INPUT) { n = buflen >= 9 ? buffer[8] : 0; p = buflen >= 10 + n ? buffer[9+n] : 0; len = 10; } if (buflen < len + n + p) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d INPUT_TERMINAL error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } term = uvc_alloc_entity(type | UVC_TERM_INPUT, buffer[3], 1, n + p); if (term == NULL) return -ENOMEM; if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA) { term->camera.bControlSize = n; term->camera.bmControls = (u8 *)term + sizeof(*term); term->camera.wObjectiveFocalLengthMin = get_unaligned_le16(&buffer[8]); term->camera.wObjectiveFocalLengthMax = get_unaligned_le16(&buffer[10]); term->camera.wOcularFocalLength = get_unaligned_le16(&buffer[12]); memcpy(term->camera.bmControls, &buffer[15], n); } else if (UVC_ENTITY_TYPE(term) == UVC_ITT_MEDIA_TRANSPORT_INPUT) { term->media.bControlSize = n; term->media.bmControls = (u8 *)term + sizeof(*term); term->media.bTransportModeSize = p; term->media.bmTransportModes = (u8 *)term + sizeof(*term) + n; memcpy(term->media.bmControls, &buffer[9], n); memcpy(term->media.bmTransportModes, &buffer[10+n], p); } if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA) type_name = "Camera"; else if (UVC_ENTITY_TYPE(term) == UVC_ITT_MEDIA_TRANSPORT_INPUT) type_name = "Media"; else type_name = "Input"; uvc_entity_set_name(dev, term, type_name, buffer[7]); list_add_tail(&term->list, &dev->entities); break; case UVC_VC_OUTPUT_TERMINAL: if (buflen < 9) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d OUTPUT_TERMINAL error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } /* * Make sure the terminal type MSB is not null, otherwise it * could be confused with a unit. */ type = get_unaligned_le16(&buffer[4]); if ((type & 0xff00) == 0) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d OUTPUT_TERMINAL %d has invalid type 0x%04x, skipping\n", udev->devnum, alts->desc.bInterfaceNumber, buffer[3], type); return 0; } term = uvc_alloc_entity(type | UVC_TERM_OUTPUT, buffer[3], 1, 0); if (term == NULL) return -ENOMEM; memcpy(term->baSourceID, &buffer[7], 1); uvc_entity_set_name(dev, term, "Output", buffer[8]); list_add_tail(&term->list, &dev->entities); break; case UVC_VC_SELECTOR_UNIT: p = buflen >= 5 ? buffer[4] : 0; if (buflen < 5 || buflen < 6 + p) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d SELECTOR_UNIT error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, 0); if (unit == NULL) return -ENOMEM; memcpy(unit->baSourceID, &buffer[5], p); uvc_entity_set_name(dev, unit, "Selector", buffer[5+p]); list_add_tail(&unit->list, &dev->entities); break; case UVC_VC_PROCESSING_UNIT: n = buflen >= 8 ? buffer[7] : 0; p = dev->uvc_version >= 0x0110 ? 10 : 9; if (buflen < p + n) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d PROCESSING_UNIT error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } unit = uvc_alloc_entity(buffer[2], buffer[3], 2, n); if (unit == NULL) return -ENOMEM; memcpy(unit->baSourceID, &buffer[4], 1); unit->processing.wMaxMultiplier = get_unaligned_le16(&buffer[5]); unit->processing.bControlSize = buffer[7]; unit->processing.bmControls = (u8 *)unit + sizeof(*unit); memcpy(unit->processing.bmControls, &buffer[8], n); if (dev->uvc_version >= 0x0110) unit->processing.bmVideoStandards = buffer[9+n]; uvc_entity_set_name(dev, unit, "Processing", buffer[8+n]); list_add_tail(&unit->list, &dev->entities); break; case UVC_VC_EXTENSION_UNIT: p = buflen >= 22 ? buffer[21] : 0; n = buflen >= 24 + p ? buffer[22+p] : 0; if (buflen < 24 + p + n) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d EXTENSION_UNIT error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, n); if (unit == NULL) return -ENOMEM; memcpy(unit->guid, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; memcpy(unit->baSourceID, &buffer[22], p); unit->extension.bControlSize = buffer[22+p]; unit->extension.bmControls = (u8 *)unit + sizeof(*unit); memcpy(unit->extension.bmControls, &buffer[23+p], n); uvc_entity_set_name(dev, unit, "Extension", buffer[23+p+n]); list_add_tail(&unit->list, &dev->entities); break; default: uvc_dbg(dev, DESCR, "Found an unknown CS_INTERFACE descriptor (%u)\n", buffer[2]); break; } return 0; } static int uvc_parse_control(struct uvc_device *dev) { struct usb_host_interface *alts = dev->intf->cur_altsetting; const unsigned char *buffer = alts->extra; int buflen = alts->extralen; int ret; /* * Parse the default alternate setting only, as the UVC specification * defines a single alternate setting, the default alternate setting * zero. */ while (buflen > 2) { if (uvc_parse_vendor_control(dev, buffer, buflen) || buffer[1] != USB_DT_CS_INTERFACE) goto next_descriptor; ret = uvc_parse_standard_control(dev, buffer, buflen); if (ret < 0) return ret; next_descriptor: buflen -= buffer[0]; buffer += buffer[0]; } /* * Check if the optional status endpoint is present. Built-in iSight * webcams have an interrupt endpoint but spit proprietary data that * don't conform to the UVC status endpoint messages. Don't try to * handle the interrupt endpoint for those cameras. */ if (alts->desc.bNumEndpoints == 1 && !(dev->quirks & UVC_QUIRK_BUILTIN_ISIGHT)) { struct usb_host_endpoint *ep = &alts->endpoint[0]; struct usb_endpoint_descriptor *desc = &ep->desc; if (usb_endpoint_is_int_in(desc) && le16_to_cpu(desc->wMaxPacketSize) >= 8 && desc->bInterval != 0) { uvc_dbg(dev, DESCR, "Found a Status endpoint (addr %02x)\n", desc->bEndpointAddress); dev->int_ep = ep; } } return 0; } /* ----------------------------------------------------------------------------- * Privacy GPIO */ static void uvc_gpio_event(struct uvc_device *dev) { struct uvc_entity *unit = dev->gpio_unit; struct uvc_video_chain *chain; u8 new_val; if (!unit) return; new_val = gpiod_get_value_cansleep(unit->gpio.gpio_privacy); /* GPIO entities are always on the first chain. */ chain = list_first_entry(&dev->chains, struct uvc_video_chain, list); uvc_ctrl_status_event(chain, unit->controls, &new_val); } static int uvc_gpio_get_cur(struct uvc_device *dev, struct uvc_entity *entity, u8 cs, void *data, u16 size) { if (cs != UVC_CT_PRIVACY_CONTROL || size < 1) return -EINVAL; *(u8 *)data = gpiod_get_value_cansleep(entity->gpio.gpio_privacy); return 0; } static int uvc_gpio_get_info(struct uvc_device *dev, struct uvc_entity *entity, u8 cs, u8 *caps) { if (cs != UVC_CT_PRIVACY_CONTROL) return -EINVAL; *caps = UVC_CONTROL_CAP_GET | UVC_CONTROL_CAP_AUTOUPDATE; return 0; } static irqreturn_t uvc_gpio_irq(int irq, void *data) { struct uvc_device *dev = data; uvc_gpio_event(dev); return IRQ_HANDLED; } static int uvc_gpio_parse(struct uvc_device *dev) { struct uvc_entity *unit; struct gpio_desc *gpio_privacy; int irq; gpio_privacy = devm_gpiod_get_optional(&dev->udev->dev, "privacy", GPIOD_IN); if (IS_ERR_OR_NULL(gpio_privacy)) return PTR_ERR_OR_ZERO(gpio_privacy); irq = gpiod_to_irq(gpio_privacy); if (irq < 0) return dev_err_probe(&dev->udev->dev, irq, "No IRQ for privacy GPIO\n"); unit = uvc_alloc_entity(UVC_EXT_GPIO_UNIT, UVC_EXT_GPIO_UNIT_ID, 0, 1); if (!unit) return -ENOMEM; unit->gpio.gpio_privacy = gpio_privacy; unit->gpio.irq = irq; unit->gpio.bControlSize = 1; unit->gpio.bmControls = (u8 *)unit + sizeof(*unit); unit->gpio.bmControls[0] = 1; unit->get_cur = uvc_gpio_get_cur; unit->get_info = uvc_gpio_get_info; strscpy(unit->name, "GPIO", sizeof(unit->name)); list_add_tail(&unit->list, &dev->entities); dev->gpio_unit = unit; return 0; } static int uvc_gpio_init_irq(struct uvc_device *dev) { struct uvc_entity *unit = dev->gpio_unit; if (!unit || unit->gpio.irq < 0) return 0; return devm_request_threaded_irq(&dev->udev->dev, unit->gpio.irq, NULL, uvc_gpio_irq, IRQF_ONESHOT | IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, "uvc_privacy_gpio", dev); } /* ------------------------------------------------------------------------ * UVC device scan */ /* * Scan the UVC descriptors to locate a chain starting at an Output Terminal * and containing the following units: * * - one or more Output Terminals (USB Streaming or Display) * - zero or one Processing Unit * - zero, one or more single-input Selector Units * - zero or one multiple-input Selector Units, provided all inputs are * connected to input terminals * - zero, one or mode single-input Extension Units * - one or more Input Terminals (Camera, External or USB Streaming) * * The terminal and units must match on of the following structures: * * ITT_*(0) -> +---------+ +---------+ +---------+ -> TT_STREAMING(0) * ... | SU{0,1} | -> | PU{0,1} | -> | XU{0,n} | ... * ITT_*(n) -> +---------+ +---------+ +---------+ -> TT_STREAMING(n) * * +---------+ +---------+ -> OTT_*(0) * TT_STREAMING -> | PU{0,1} | -> | XU{0,n} | ... * +---------+ +---------+ -> OTT_*(n) * * The Processing Unit and Extension Units can be in any order. Additional * Extension Units connected to the main chain as single-unit branches are * also supported. Single-input Selector Units are ignored. */ static int uvc_scan_chain_entity(struct uvc_video_chain *chain, struct uvc_entity *entity) { switch (UVC_ENTITY_TYPE(entity)) { case UVC_VC_EXTENSION_UNIT: uvc_dbg_cont(PROBE, " <- XU %d", entity->id); if (entity->bNrInPins != 1) { uvc_dbg(chain->dev, DESCR, "Extension unit %d has more than 1 input pin\n", entity->id); return -1; } break; case UVC_VC_PROCESSING_UNIT: uvc_dbg_cont(PROBE, " <- PU %d", entity->id); if (chain->processing != NULL) { uvc_dbg(chain->dev, DESCR, "Found multiple Processing Units in chain\n"); return -1; } chain->processing = entity; break; case UVC_VC_SELECTOR_UNIT: uvc_dbg_cont(PROBE, " <- SU %d", entity->id); /* Single-input selector units are ignored. */ if (entity->bNrInPins == 1) break; if (chain->selector != NULL) { uvc_dbg(chain->dev, DESCR, "Found multiple Selector Units in chain\n"); return -1; } chain->selector = entity; break; case UVC_ITT_VENDOR_SPECIFIC: case UVC_ITT_CAMERA: case UVC_ITT_MEDIA_TRANSPORT_INPUT: uvc_dbg_cont(PROBE, " <- IT %d\n", entity->id); break; case UVC_OTT_VENDOR_SPECIFIC: case UVC_OTT_DISPLAY: case UVC_OTT_MEDIA_TRANSPORT_OUTPUT: uvc_dbg_cont(PROBE, " OT %d", entity->id); break; case UVC_TT_STREAMING: if (UVC_ENTITY_IS_ITERM(entity)) uvc_dbg_cont(PROBE, " <- IT %d\n", entity->id); else uvc_dbg_cont(PROBE, " OT %d", entity->id); break; default: uvc_dbg(chain->dev, DESCR, "Unsupported entity type 0x%04x found in chain\n", UVC_ENTITY_TYPE(entity)); return -1; } list_add_tail(&entity->chain, &chain->entities); return 0; } static int uvc_scan_chain_forward(struct uvc_video_chain *chain, struct uvc_entity *entity, struct uvc_entity *prev) { struct uvc_entity *forward; int found; /* Forward scan */ forward = NULL; found = 0; while (1) { forward = uvc_entity_by_reference(chain->dev, entity->id, forward); if (forward == NULL) break; if (forward == prev) continue; if (forward->chain.next || forward->chain.prev) { uvc_dbg(chain->dev, DESCR, "Found reference to entity %d already in chain\n", forward->id); return -EINVAL; } switch (UVC_ENTITY_TYPE(forward)) { case UVC_VC_EXTENSION_UNIT: if (forward->bNrInPins != 1) { uvc_dbg(chain->dev, DESCR, "Extension unit %d has more than 1 input pin\n", forward->id); return -EINVAL; } /* * Some devices reference an output terminal as the * source of extension units. This is incorrect, as * output terminals only have an input pin, and thus * can't be connected to any entity in the forward * direction. The resulting topology would cause issues * when registering the media controller graph. To * avoid this problem, connect the extension unit to * the source of the output terminal instead. */ if (UVC_ENTITY_IS_OTERM(entity)) { struct uvc_entity *source; source = uvc_entity_by_id(chain->dev, entity->baSourceID[0]); if (!source) { uvc_dbg(chain->dev, DESCR, "Can't connect extension unit %u in chain\n", forward->id); break; } forward->baSourceID[0] = source->id; } list_add_tail(&forward->chain, &chain->entities); if (!found) uvc_dbg_cont(PROBE, " (->"); uvc_dbg_cont(PROBE, " XU %d", forward->id); found = 1; break; case UVC_OTT_VENDOR_SPECIFIC: case UVC_OTT_DISPLAY: case UVC_OTT_MEDIA_TRANSPORT_OUTPUT: case UVC_TT_STREAMING: if (UVC_ENTITY_IS_ITERM(forward)) { uvc_dbg(chain->dev, DESCR, "Unsupported input terminal %u\n", forward->id); return -EINVAL; } if (UVC_ENTITY_IS_OTERM(entity)) { uvc_dbg(chain->dev, DESCR, "Unsupported connection between output terminals %u and %u\n", entity->id, forward->id); break; } list_add_tail(&forward->chain, &chain->entities); if (!found) uvc_dbg_cont(PROBE, " (->"); uvc_dbg_cont(PROBE, " OT %d", forward->id); found = 1; break; } } if (found) uvc_dbg_cont(PROBE, ")"); return 0; } static int uvc_scan_chain_backward(struct uvc_video_chain *chain, struct uvc_entity **_entity) { struct uvc_entity *entity = *_entity; struct uvc_entity *term; int id = -EINVAL, i; switch (UVC_ENTITY_TYPE(entity)) { case UVC_VC_EXTENSION_UNIT: case UVC_VC_PROCESSING_UNIT: id = entity->baSourceID[0]; break; case UVC_VC_SELECTOR_UNIT: /* Single-input selector units are ignored. */ if (entity->bNrInPins == 1) { id = entity->baSourceID[0]; break; } uvc_dbg_cont(PROBE, " <- IT"); chain->selector = entity; for (i = 0; i < entity->bNrInPins; ++i) { id = entity->baSourceID[i]; term = uvc_entity_by_id(chain->dev, id); if (term == NULL || !UVC_ENTITY_IS_ITERM(term)) { uvc_dbg(chain->dev, DESCR, "Selector unit %d input %d isn't connected to an input terminal\n", entity->id, i); return -1; } if (term->chain.next || term->chain.prev) { uvc_dbg(chain->dev, DESCR, "Found reference to entity %d already in chain\n", term->id); return -EINVAL; } uvc_dbg_cont(PROBE, " %d", term->id); list_add_tail(&term->chain, &chain->entities); uvc_scan_chain_forward(chain, term, entity); } uvc_dbg_cont(PROBE, "\n"); id = 0; break; case UVC_ITT_VENDOR_SPECIFIC: case UVC_ITT_CAMERA: case UVC_ITT_MEDIA_TRANSPORT_INPUT: case UVC_OTT_VENDOR_SPECIFIC: case UVC_OTT_DISPLAY: case UVC_OTT_MEDIA_TRANSPORT_OUTPUT: case UVC_TT_STREAMING: id = UVC_ENTITY_IS_OTERM(entity) ? entity->baSourceID[0] : 0; break; } if (id <= 0) { *_entity = NULL; return id; } entity = uvc_entity_by_id(chain->dev, id); if (entity == NULL) { uvc_dbg(chain->dev, DESCR, "Found reference to unknown entity %d\n", id); return -EINVAL; } *_entity = entity; return 0; } static int uvc_scan_chain(struct uvc_video_chain *chain, struct uvc_entity *term) { struct uvc_entity *entity, *prev; uvc_dbg(chain->dev, PROBE, "Scanning UVC chain:"); entity = term; prev = NULL; while (entity != NULL) { /* Entity must not be part of an existing chain */ if (entity->chain.next || entity->chain.prev) { uvc_dbg(chain->dev, DESCR, "Found reference to entity %d already in chain\n", entity->id); return -EINVAL; } /* Process entity */ if (uvc_scan_chain_entity(chain, entity) < 0) return -EINVAL; /* Forward scan */ if (uvc_scan_chain_forward(chain, entity, prev) < 0) return -EINVAL; /* Backward scan */ prev = entity; if (uvc_scan_chain_backward(chain, &entity) < 0) return -EINVAL; } return 0; } static unsigned int uvc_print_terms(struct list_head *terms, u16 dir, char *buffer) { struct uvc_entity *term; unsigned int nterms = 0; char *p = buffer; list_for_each_entry(term, terms, chain) { if (!UVC_ENTITY_IS_TERM(term) || UVC_TERM_DIRECTION(term) != dir) continue; if (nterms) p += sprintf(p, ","); if (++nterms >= 4) { p += sprintf(p, "..."); break; } p += sprintf(p, "%u", term->id); } return p - buffer; } static const char *uvc_print_chain(struct uvc_video_chain *chain) { static char buffer[43]; char *p = buffer; p += uvc_print_terms(&chain->entities, UVC_TERM_INPUT, p); p += sprintf(p, " -> "); uvc_print_terms(&chain->entities, UVC_TERM_OUTPUT, p); return buffer; } static struct uvc_video_chain *uvc_alloc_chain(struct uvc_device *dev) { struct uvc_video_chain *chain; chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) return NULL; INIT_LIST_HEAD(&chain->entities); mutex_init(&chain->ctrl_mutex); chain->dev = dev; v4l2_prio_init(&chain->prio); return chain; } /* * Fallback heuristic for devices that don't connect units and terminals in a * valid chain. * * Some devices have invalid baSourceID references, causing uvc_scan_chain() * to fail, but if we just take the entities we can find and put them together * in the most sensible chain we can think of, turns out they do work anyway. * Note: This heuristic assumes there is a single chain. * * At the time of writing, devices known to have such a broken chain are * - Acer Integrated Camera (5986:055a) * - Realtek rtl157a7 (0bda:57a7) */ static int uvc_scan_fallback(struct uvc_device *dev) { struct uvc_video_chain *chain; struct uvc_entity *iterm = NULL; struct uvc_entity *oterm = NULL; struct uvc_entity *entity; struct uvc_entity *prev; /* * Start by locating the input and output terminals. We only support * devices with exactly one of each for now. */ list_for_each_entry(entity, &dev->entities, list) { if (UVC_ENTITY_IS_ITERM(entity)) { if (iterm) return -EINVAL; iterm = entity; } if (UVC_ENTITY_IS_OTERM(entity)) { if (oterm) return -EINVAL; oterm = entity; } } if (iterm == NULL || oterm == NULL) return -EINVAL; /* Allocate the chain and fill it. */ chain = uvc_alloc_chain(dev); if (chain == NULL) return -ENOMEM; if (uvc_scan_chain_entity(chain, oterm) < 0) goto error; prev = oterm; /* * Add all Processing and Extension Units with two pads. The order * doesn't matter much, use reverse list traversal to connect units in * UVC descriptor order as we build the chain from output to input. This * leads to units appearing in the order meant by the manufacturer for * the cameras known to require this heuristic. */ list_for_each_entry_reverse(entity, &dev->entities, list) { if (entity->type != UVC_VC_PROCESSING_UNIT && entity->type != UVC_VC_EXTENSION_UNIT) continue; if (entity->num_pads != 2) continue; if (uvc_scan_chain_entity(chain, entity) < 0) goto error; prev->baSourceID[0] = entity->id; prev = entity; } if (uvc_scan_chain_entity(chain, iterm) < 0) goto error; prev->baSourceID[0] = iterm->id; list_add_tail(&chain->list, &dev->chains); uvc_dbg(dev, PROBE, "Found a video chain by fallback heuristic (%s)\n", uvc_print_chain(chain)); return 0; error: kfree(chain); return -EINVAL; } /* * Scan the device for video chains and register video devices. * * Chains are scanned starting at their output terminals and walked backwards. */ static int uvc_scan_device(struct uvc_device *dev) { struct uvc_video_chain *chain; struct uvc_entity *term; list_for_each_entry(term, &dev->entities, list) { if (!UVC_ENTITY_IS_OTERM(term)) continue; /* * If the terminal is already included in a chain, skip it. * This can happen for chains that have multiple output * terminals, where all output terminals beside the first one * will be inserted in the chain in forward scans. */ if (term->chain.next || term->chain.prev) continue; chain = uvc_alloc_chain(dev); if (chain == NULL) return -ENOMEM; term->flags |= UVC_ENTITY_FLAG_DEFAULT; if (uvc_scan_chain(chain, term) < 0) { kfree(chain); continue; } uvc_dbg(dev, PROBE, "Found a valid video chain (%s)\n", uvc_print_chain(chain)); list_add_tail(&chain->list, &dev->chains); } if (list_empty(&dev->chains)) uvc_scan_fallback(dev); if (list_empty(&dev->chains)) { dev_info(&dev->udev->dev, "No valid video chain found.\n"); return -1; } /* Add GPIO entity to the first chain. */ if (dev->gpio_unit) { chain = list_first_entry(&dev->chains, struct uvc_video_chain, list); list_add_tail(&dev->gpio_unit->chain, &chain->entities); } return 0; } /* ------------------------------------------------------------------------ * Video device registration and unregistration */ /* * Delete the UVC device. * * Called by the kernel when the last reference to the uvc_device structure * is released. * * As this function is called after or during disconnect(), all URBs have * already been cancelled by the USB core. There is no need to kill the * interrupt URB manually. */ static void uvc_delete(struct kref *kref) { struct uvc_device *dev = container_of(kref, struct uvc_device, ref); struct list_head *p, *n; uvc_status_cleanup(dev); uvc_ctrl_cleanup_device(dev); usb_put_intf(dev->intf); usb_put_dev(dev->udev); #ifdef CONFIG_MEDIA_CONTROLLER media_device_cleanup(&dev->mdev); #endif list_for_each_safe(p, n, &dev->chains) { struct uvc_video_chain *chain; chain = list_entry(p, struct uvc_video_chain, list); kfree(chain); } list_for_each_safe(p, n, &dev->entities) { struct uvc_entity *entity; entity = list_entry(p, struct uvc_entity, list); #ifdef CONFIG_MEDIA_CONTROLLER uvc_mc_cleanup_entity(entity); #endif kfree(entity); } list_for_each_safe(p, n, &dev->streams) { struct uvc_streaming *streaming; streaming = list_entry(p, struct uvc_streaming, list); usb_driver_release_interface(&uvc_driver.driver, streaming->intf); uvc_stream_delete(streaming); } kfree(dev); } static void uvc_release(struct video_device *vdev) { struct uvc_streaming *stream = video_get_drvdata(vdev); struct uvc_device *dev = stream->dev; kref_put(&dev->ref, uvc_delete); } /* * Unregister the video devices. */ static void uvc_unregister_video(struct uvc_device *dev) { struct uvc_streaming *stream; list_for_each_entry(stream, &dev->streams, list) { if (!video_is_registered(&stream->vdev)) continue; video_unregister_device(&stream->vdev); video_unregister_device(&stream->meta.vdev); uvc_debugfs_cleanup_stream(stream); } uvc_status_unregister(dev); if (dev->vdev.dev) v4l2_device_unregister(&dev->vdev); #ifdef CONFIG_MEDIA_CONTROLLER if (media_devnode_is_registered(dev->mdev.devnode)) media_device_unregister(&dev->mdev); #endif } int uvc_register_video_device(struct uvc_device *dev, struct uvc_streaming *stream, struct video_device *vdev, struct uvc_video_queue *queue, enum v4l2_buf_type type, const struct v4l2_file_operations *fops, const struct v4l2_ioctl_ops *ioctl_ops) { int ret; /* Initialize the video buffers queue. */ ret = uvc_queue_init(queue, type, !uvc_no_drop_param); if (ret) return ret; /* Register the device with V4L. */ /* * We already hold a reference to dev->udev. The video device will be * unregistered before the reference is released, so we don't need to * get another one. */ vdev->v4l2_dev = &dev->vdev; vdev->fops = fops; vdev->ioctl_ops = ioctl_ops; vdev->release = uvc_release; vdev->prio = &stream->chain->prio; if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT) vdev->vfl_dir = VFL_DIR_TX; else vdev->vfl_dir = VFL_DIR_RX; switch (type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: default: vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING; break; case V4L2_BUF_TYPE_VIDEO_OUTPUT: vdev->device_caps = V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_STREAMING; break; case V4L2_BUF_TYPE_META_CAPTURE: vdev->device_caps = V4L2_CAP_META_CAPTURE | V4L2_CAP_STREAMING; break; } strscpy(vdev->name, dev->name, sizeof(vdev->name)); /* * Set the driver data before calling video_register_device, otherwise * the file open() handler might race us. */ video_set_drvdata(vdev, stream); ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1); if (ret < 0) { dev_err(&stream->intf->dev, "Failed to register %s device (%d).\n", v4l2_type_names[type], ret); return ret; } kref_get(&dev->ref); return 0; } static int uvc_register_video(struct uvc_device *dev, struct uvc_streaming *stream) { int ret; /* Initialize the streaming interface with default parameters. */ ret = uvc_video_init(stream); if (ret < 0) { dev_err(&stream->intf->dev, "Failed to initialize the device (%d).\n", ret); return ret; } if (stream->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) stream->chain->caps |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_META_CAPTURE; else stream->chain->caps |= V4L2_CAP_VIDEO_OUTPUT; uvc_debugfs_init_stream(stream); /* Register the device with V4L. */ return uvc_register_video_device(dev, stream, &stream->vdev, &stream->queue, stream->type, &uvc_fops, &uvc_ioctl_ops); } /* * Register all video devices in all chains. */ static int uvc_register_terms(struct uvc_device *dev, struct uvc_video_chain *chain) { struct uvc_streaming *stream; struct uvc_entity *term; int ret; list_for_each_entry(term, &chain->entities, chain) { if (UVC_ENTITY_TYPE(term) != UVC_TT_STREAMING) continue; stream = uvc_stream_by_id(dev, term->id); if (stream == NULL) { dev_info(&dev->udev->dev, "No streaming interface found for terminal %u.", term->id); continue; } stream->chain = chain; ret = uvc_register_video(dev, stream); if (ret < 0) return ret; /* * Register a metadata node, but ignore a possible failure, * complete registration of video nodes anyway. */ uvc_meta_register(stream); term->vdev = &stream->vdev; } return 0; } static int uvc_register_chains(struct uvc_device *dev) { struct uvc_video_chain *chain; int ret; list_for_each_entry(chain, &dev->chains, list) { ret = uvc_register_terms(dev, chain); if (ret < 0) return ret; #ifdef CONFIG_MEDIA_CONTROLLER ret = uvc_mc_register_entities(chain); if (ret < 0) dev_info(&dev->udev->dev, "Failed to register entities (%d).\n", ret); #endif } return 0; } /* ------------------------------------------------------------------------ * USB probe, disconnect, suspend and resume */ static const struct uvc_device_info uvc_quirk_none = { 0 }; static int uvc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct uvc_device *dev; const struct uvc_device_info *info = (const struct uvc_device_info *)id->driver_info; int function; int ret; /* Allocate memory for the device and initialize it. */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) return -ENOMEM; INIT_LIST_HEAD(&dev->entities); INIT_LIST_HEAD(&dev->chains); INIT_LIST_HEAD(&dev->streams); kref_init(&dev->ref); atomic_set(&dev->nmappings, 0); mutex_init(&dev->lock); dev->udev = usb_get_dev(udev); dev->intf = usb_get_intf(intf); dev->intfnum = intf->cur_altsetting->desc.bInterfaceNumber; dev->info = info ? info : &uvc_quirk_none; dev->quirks = uvc_quirks_param == -1 ? dev->info->quirks : uvc_quirks_param; if (id->idVendor && id->idProduct) uvc_dbg(dev, PROBE, "Probing known UVC device %s (%04x:%04x)\n", udev->devpath, id->idVendor, id->idProduct); else uvc_dbg(dev, PROBE, "Probing generic UVC device %s\n", udev->devpath); if (udev->product != NULL) strscpy(dev->name, udev->product, sizeof(dev->name)); else snprintf(dev->name, sizeof(dev->name), "UVC Camera (%04x:%04x)", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); /* * Add iFunction or iInterface to names when available as additional * distinguishers between interfaces. iFunction is prioritized over * iInterface which matches Windows behavior at the point of writing. */ if (intf->intf_assoc && intf->intf_assoc->iFunction != 0) function = intf->intf_assoc->iFunction; else function = intf->cur_altsetting->desc.iInterface; if (function != 0) { size_t len; strlcat(dev->name, ": ", sizeof(dev->name)); len = strlen(dev->name); usb_string(udev, function, dev->name + len, sizeof(dev->name) - len); } /* Initialize the media device. */ #ifdef CONFIG_MEDIA_CONTROLLER dev->mdev.dev = &intf->dev; strscpy(dev->mdev.model, dev->name, sizeof(dev->mdev.model)); if (udev->serial) strscpy(dev->mdev.serial, udev->serial, sizeof(dev->mdev.serial)); usb_make_path(udev, dev->mdev.bus_info, sizeof(dev->mdev.bus_info)); dev->mdev.hw_revision = le16_to_cpu(udev->descriptor.bcdDevice); media_device_init(&dev->mdev); dev->vdev.mdev = &dev->mdev; #endif /* Parse the Video Class control descriptor. */ if (uvc_parse_control(dev) < 0) { uvc_dbg(dev, PROBE, "Unable to parse UVC descriptors\n"); goto error; } /* Parse the associated GPIOs. */ if (uvc_gpio_parse(dev) < 0) { uvc_dbg(dev, PROBE, "Unable to parse UVC GPIOs\n"); goto error; } dev_info(&dev->udev->dev, "Found UVC %u.%02x device %s (%04x:%04x)\n", dev->uvc_version >> 8, dev->uvc_version & 0xff, udev->product ? udev->product : "<unnamed>", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); if (dev->quirks != dev->info->quirks) { dev_info(&dev->udev->dev, "Forcing device quirks to 0x%x by module parameter for testing purpose.\n", dev->quirks); dev_info(&dev->udev->dev, "Please report required quirks to the linux-media mailing list.\n"); } if (dev->info->uvc_version) { dev->uvc_version = dev->info->uvc_version; dev_info(&dev->udev->dev, "Forcing UVC version to %u.%02x\n", dev->uvc_version >> 8, dev->uvc_version & 0xff); } /* Register the V4L2 device. */ if (v4l2_device_register(&intf->dev, &dev->vdev) < 0) goto error; /* Scan the device for video chains. */ if (uvc_scan_device(dev) < 0) goto error; /* Initialize controls. */ if (uvc_ctrl_init_device(dev) < 0) goto error; /* Register video device nodes. */ if (uvc_register_chains(dev) < 0) goto error; #ifdef CONFIG_MEDIA_CONTROLLER /* Register the media device node */ if (media_device_register(&dev->mdev) < 0) goto error; #endif /* Save our data pointer in the interface data. */ usb_set_intfdata(intf, dev); /* Initialize the interrupt URB. */ ret = uvc_status_init(dev); if (ret < 0) { dev_info(&dev->udev->dev, "Unable to initialize the status endpoint (%d), status interrupt will not be supported.\n", ret); } ret = uvc_gpio_init_irq(dev); if (ret < 0) { dev_err(&dev->udev->dev, "Unable to request privacy GPIO IRQ (%d)\n", ret); goto error; } if (dev->quirks & UVC_QUIRK_NO_RESET_RESUME) udev->quirks &= ~USB_QUIRK_RESET_RESUME; if (!(dev->quirks & UVC_QUIRK_DISABLE_AUTOSUSPEND)) usb_enable_autosuspend(udev); uvc_dbg(dev, PROBE, "UVC device initialized\n"); return 0; error: uvc_unregister_video(dev); kref_put(&dev->ref, uvc_delete); return -ENODEV; } static void uvc_disconnect(struct usb_interface *intf) { struct uvc_device *dev = usb_get_intfdata(intf); /* * Set the USB interface data to NULL. This can be done outside the * lock, as there's no other reader. */ usb_set_intfdata(intf, NULL); if (intf->cur_altsetting->desc.bInterfaceSubClass == UVC_SC_VIDEOSTREAMING) return; uvc_unregister_video(dev); kref_put(&dev->ref, uvc_delete); } static int uvc_suspend(struct usb_interface *intf, pm_message_t message) { struct uvc_device *dev = usb_get_intfdata(intf); struct uvc_streaming *stream; uvc_dbg(dev, SUSPEND, "Suspending interface %u\n", intf->cur_altsetting->desc.bInterfaceNumber); /* Controls are cached on the fly so they don't need to be saved. */ if (intf->cur_altsetting->desc.bInterfaceSubClass == UVC_SC_VIDEOCONTROL) { mutex_lock(&dev->lock); if (dev->users) uvc_status_stop(dev); mutex_unlock(&dev->lock); return 0; } list_for_each_entry(stream, &dev->streams, list) { if (stream->intf == intf) return uvc_video_suspend(stream); } uvc_dbg(dev, SUSPEND, "Suspend: video streaming USB interface mismatch\n"); return -EINVAL; } static int __uvc_resume(struct usb_interface *intf, int reset) { struct uvc_device *dev = usb_get_intfdata(intf); struct uvc_streaming *stream; int ret = 0; uvc_dbg(dev, SUSPEND, "Resuming interface %u\n", intf->cur_altsetting->desc.bInterfaceNumber); if (intf->cur_altsetting->desc.bInterfaceSubClass == UVC_SC_VIDEOCONTROL) { if (reset) { ret = uvc_ctrl_restore_values(dev); if (ret < 0) return ret; } mutex_lock(&dev->lock); if (dev->users) ret = uvc_status_start(dev, GFP_NOIO); mutex_unlock(&dev->lock); return ret; } list_for_each_entry(stream, &dev->streams, list) { if (stream->intf == intf) { ret = uvc_video_resume(stream, reset); if (ret < 0) uvc_queue_streamoff(&stream->queue, stream->queue.queue.type); return ret; } } uvc_dbg(dev, SUSPEND, "Resume: video streaming USB interface mismatch\n"); return -EINVAL; } static int uvc_resume(struct usb_interface *intf) { return __uvc_resume(intf, 0); } static int uvc_reset_resume(struct usb_interface *intf) { return __uvc_resume(intf, 1); } /* ------------------------------------------------------------------------ * Module parameters */ static int uvc_clock_param_get(char *buffer, const struct kernel_param *kp) { if (uvc_clock_param == CLOCK_MONOTONIC) return sprintf(buffer, "CLOCK_MONOTONIC"); else return sprintf(buffer, "CLOCK_REALTIME"); } static int uvc_clock_param_set(const char *val, const struct kernel_param *kp) { if (strncasecmp(val, "clock_", strlen("clock_")) == 0) val += strlen("clock_"); if (strcasecmp(val, "monotonic") == 0) uvc_clock_param = CLOCK_MONOTONIC; else if (strcasecmp(val, "realtime") == 0) uvc_clock_param = CLOCK_REALTIME; else return -EINVAL; return 0; } module_param_call(clock, uvc_clock_param_set, uvc_clock_param_get, &uvc_clock_param, 0644); MODULE_PARM_DESC(clock, "Video buffers timestamp clock"); module_param_named(hwtimestamps, uvc_hw_timestamps_param, uint, 0644); MODULE_PARM_DESC(hwtimestamps, "Use hardware timestamps"); module_param_named(nodrop, uvc_no_drop_param, uint, 0644); MODULE_PARM_DESC(nodrop, "Don't drop incomplete frames"); module_param_named(quirks, uvc_quirks_param, uint, 0644); MODULE_PARM_DESC(quirks, "Forced device quirks"); module_param_named(trace, uvc_dbg_param, uint, 0644); MODULE_PARM_DESC(trace, "Trace level bitmask"); module_param_named(timeout, uvc_timeout_param, uint, 0644); MODULE_PARM_DESC(timeout, "Streaming control requests timeout"); /* ------------------------------------------------------------------------ * Driver initialization and cleanup */ static const struct uvc_device_info uvc_quirk_probe_minmax = { .quirks = UVC_QUIRK_PROBE_MINMAX, }; static const struct uvc_device_info uvc_quirk_fix_bandwidth = { .quirks = UVC_QUIRK_FIX_BANDWIDTH, }; static const struct uvc_device_info uvc_quirk_probe_def = { .quirks = UVC_QUIRK_PROBE_DEF, }; static const struct uvc_device_info uvc_quirk_stream_no_fid = { .quirks = UVC_QUIRK_STREAM_NO_FID, }; static const struct uvc_device_info uvc_quirk_force_y8 = { .quirks = UVC_QUIRK_FORCE_Y8, }; #define UVC_INFO_QUIRK(q) (kernel_ulong_t)&(struct uvc_device_info){.quirks = q} #define UVC_INFO_META(m) (kernel_ulong_t)&(struct uvc_device_info) \ {.meta_format = m} /* * The Logitech cameras listed below have their interface class set to * VENDOR_SPEC because they don't announce themselves as UVC devices, even * though they are compliant. */ static const struct usb_device_id uvc_ids[] = { /* Quanta ACER HD User Facing */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0408, .idProduct = 0x4035, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = UVC_PC_PROTOCOL_15, .driver_info = (kernel_ulong_t)&(const struct uvc_device_info){ .uvc_version = 0x010a, } }, /* LogiLink Wireless Webcam */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0416, .idProduct = 0xa91a, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Genius eFace 2025 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0458, .idProduct = 0x706e, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Microsoft Lifecam NX-6000 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x045e, .idProduct = 0x00f8, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Microsoft Lifecam NX-3000 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x045e, .idProduct = 0x0721, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Microsoft Lifecam VX-7000 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x045e, .idProduct = 0x0723, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Logitech, Webcam C910 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x0821, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_WAKE_AUTOSUSPEND)}, /* Logitech, Webcam B910 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x0823, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_WAKE_AUTOSUSPEND)}, /* Logitech Quickcam Fusion */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c1, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam Orbit MP */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c2, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam Pro for Notebook */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c3, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam Pro 5000 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c5, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam OEM Dell Notebook */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c6, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam OEM Cisco VT Camera II */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c7, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech HD Pro Webcam C920 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x082d, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_RESTORE_CTRLS_ON_INIT | UVC_QUIRK_INVALID_DEVICE_SOF) }, /* Logitech HD Pro Webcam C922 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x085c, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_INVALID_DEVICE_SOF) }, /* Logitech Rally Bar Huddle */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x087c, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_NO_RESET_RESUME) }, /* Logitech Rally Bar */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x089b, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_NO_RESET_RESUME) }, /* Logitech Rally Bar Mini */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08d3, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_NO_RESET_RESUME) }, /* Chicony CNF7129 (Asus EEE 100HE) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x04f2, .idProduct = 0xb071, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_RESTRICT_FRAME_RATE) }, /* Alcor Micro AU3820 (Future Boy PC USB Webcam) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x058f, .idProduct = 0x3820, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Dell XPS m1530 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x2640, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Dell SP2008WFP Monitor */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x2641, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Dell Alienware X51 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x2643, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Dell Studio Hybrid 140g (OmniVision webcam) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x264a, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Dell XPS M1330 (OmniVision OV7670 webcam) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x7670, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Apple Built-In iSight */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05ac, .idProduct = 0x8501, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_MINMAX | UVC_QUIRK_BUILTIN_ISIGHT) }, /* Apple FaceTime HD Camera (Built-In) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05ac, .idProduct = 0x8514, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Apple Built-In iSight via iBridge */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05ac, .idProduct = 0x8600, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Foxlink ("HP Webcam" on HP Mini 5103) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05c8, .idProduct = 0x0403, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* Genesys Logic USB 2.0 PC Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05e3, .idProduct = 0x0505, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Hercules Classic Silver */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x06f8, .idProduct = 0x300c, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* ViMicro Vega */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0ac8, .idProduct = 0x332d, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* ViMicro - Minoru3D */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0ac8, .idProduct = 0x3410, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* ViMicro Venus - Minoru3D */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0ac8, .idProduct = 0x3420, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* Ophir Optronics - SPCAM 620U */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0bd3, .idProduct = 0x0555, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* MT6227 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0e8d, .idProduct = 0x0004, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_MINMAX | UVC_QUIRK_PROBE_DEF) }, /* IMC Networks (Medion Akoya) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x13d3, .idProduct = 0x5103, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* JMicron USB2.0 XGA WebCam */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x152d, .idProduct = 0x0310, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Syntek (HP Spartan) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x5212, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (Samsung Q310) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x5931, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (Packard Bell EasyNote MX52 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x8a12, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (Asus F9SG) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x8a31, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (Asus U3S) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x8a33, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (JAOtech Smart Terminal) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x8a34, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Miricle 307K */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x17dc, .idProduct = 0x0202, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Lenovo Thinkpad SL400/SL500 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x17ef, .idProduct = 0x480b, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Aveo Technology USB 2.0 Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1871, .idProduct = 0x0306, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_MINMAX | UVC_QUIRK_PROBE_EXTRAFIELDS) }, /* Aveo Technology USB 2.0 Camera (Tasco USB Microscope) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1871, .idProduct = 0x0516, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Ecamm Pico iMage */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x18cd, .idProduct = 0xcafe, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_EXTRAFIELDS) }, /* Manta MM-353 Plako */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x18ec, .idProduct = 0x3188, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* FSC WebCam V30S */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x18ec, .idProduct = 0x3288, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Arkmicro unbranded */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x18ec, .idProduct = 0x3290, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* The Imaging Source USB CCD cameras */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x199e, .idProduct = 0x8102, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Bodelin ProScopeHR */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_DEV_HI | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x19ab, .idProduct = 0x1000, .bcdDevice_hi = 0x0126, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_STATUS_INTERVAL) }, /* MSI StarCam 370i */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1b3b, .idProduct = 0x2951, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Generalplus Technology Inc. 808 Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1b3f, .idProduct = 0x2002, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Shenzhen Aoni Electronic Co.,Ltd 2K FHD camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1bcf, .idProduct = 0x0b40, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&(const struct uvc_device_info){ .uvc_version = 0x010a, } }, /* SiGma Micro USB Web Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1c4f, .idProduct = 0x3000, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_MINMAX | UVC_QUIRK_IGNORE_SELECTOR_UNIT) }, /* Oculus VR Positional Tracker DK2 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x2833, .idProduct = 0x0201, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_force_y8 }, /* Oculus VR Rift Sensor */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x2833, .idProduct = 0x0211, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_force_y8 }, /* GEO Semiconductor GC6500 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x29fe, .idProduct = 0x4d53, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_FORCE_BPP) }, /* Insta360 Link */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x2e1a, .idProduct = 0x4c01, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_DISABLE_AUTOSUSPEND) }, /* Intel D410/ASR depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0ad2, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D415/ASRC depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0ad3, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D430/AWG depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0ad4, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel RealSense D4M */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b03, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D435/AWGC depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b07, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D435i depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b3a, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D405 Depth Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b5b, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D455 Depth Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b5c, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Generic USB Video Class */ { USB_INTERFACE_INFO(USB_CLASS_VIDEO, 1, UVC_PC_PROTOCOL_UNDEFINED) }, { USB_INTERFACE_INFO(USB_CLASS_VIDEO, 1, UVC_PC_PROTOCOL_15) }, {} }; MODULE_DEVICE_TABLE(usb, uvc_ids); struct uvc_driver uvc_driver = { .driver = { .name = "uvcvideo", .probe = uvc_probe, .disconnect = uvc_disconnect, .suspend = uvc_suspend, .resume = uvc_resume, .reset_resume = uvc_reset_resume, .id_table = uvc_ids, .supports_autosuspend = 1, }, }; static int __init uvc_init(void) { int ret; uvc_debugfs_init(); ret = usb_register(&uvc_driver.driver); if (ret < 0) { uvc_debugfs_cleanup(); return ret; } return 0; } static void __exit uvc_cleanup(void) { usb_deregister(&uvc_driver.driver); uvc_debugfs_cleanup(); } module_init(uvc_init); module_exit(uvc_cleanup); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_VERSION(DRIVER_VERSION); |
737 26 932 6 6 1114 60 1085 399 356 5 41 663 288 521 507 29 29 518 351 3 194 194 281 97 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/l3mdev/l3mdev.c - L3 master device implementation * Copyright (c) 2015 Cumulus Networks * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> */ #include <linux/netdevice.h> #include <net/fib_rules.h> #include <net/l3mdev.h> static DEFINE_SPINLOCK(l3mdev_lock); struct l3mdev_handler { lookup_by_table_id_t dev_lookup; }; static struct l3mdev_handler l3mdev_handlers[L3MDEV_TYPE_MAX + 1]; static int l3mdev_check_type(enum l3mdev_type l3type) { if (l3type <= L3MDEV_TYPE_UNSPEC || l3type > L3MDEV_TYPE_MAX) return -EINVAL; return 0; } int l3mdev_table_lookup_register(enum l3mdev_type l3type, lookup_by_table_id_t fn) { struct l3mdev_handler *hdlr; int res; res = l3mdev_check_type(l3type); if (res) return res; hdlr = &l3mdev_handlers[l3type]; spin_lock(&l3mdev_lock); if (hdlr->dev_lookup) { res = -EBUSY; goto unlock; } hdlr->dev_lookup = fn; res = 0; unlock: spin_unlock(&l3mdev_lock); return res; } EXPORT_SYMBOL_GPL(l3mdev_table_lookup_register); void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, lookup_by_table_id_t fn) { struct l3mdev_handler *hdlr; if (l3mdev_check_type(l3type)) return; hdlr = &l3mdev_handlers[l3type]; spin_lock(&l3mdev_lock); if (hdlr->dev_lookup == fn) hdlr->dev_lookup = NULL; spin_unlock(&l3mdev_lock); } EXPORT_SYMBOL_GPL(l3mdev_table_lookup_unregister); int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, u32 table_id) { lookup_by_table_id_t lookup; struct l3mdev_handler *hdlr; int ifindex = -EINVAL; int res; res = l3mdev_check_type(l3type); if (res) return res; hdlr = &l3mdev_handlers[l3type]; spin_lock(&l3mdev_lock); lookup = hdlr->dev_lookup; if (!lookup) goto unlock; ifindex = lookup(net, table_id); unlock: spin_unlock(&l3mdev_lock); return ifindex; } EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id); /** * l3mdev_master_ifindex_rcu - get index of L3 master device * @dev: targeted interface */ int l3mdev_master_ifindex_rcu(const struct net_device *dev) { int ifindex = 0; if (!dev) return 0; if (netif_is_l3_master(dev)) { ifindex = dev->ifindex; } else if (netif_is_l3_slave(dev)) { struct net_device *master; struct net_device *_dev = (struct net_device *)dev; /* netdev_master_upper_dev_get_rcu calls * list_first_or_null_rcu to walk the upper dev list. * list_first_or_null_rcu does not handle a const arg. We aren't * making changes, just want the master device from that list so * typecast to remove the const */ master = netdev_master_upper_dev_get_rcu(_dev); if (master) ifindex = master->ifindex; } return ifindex; } EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu); /** * l3mdev_master_upper_ifindex_by_index_rcu - get index of upper l3 master * device * @net: network namespace for device index lookup * @ifindex: targeted interface */ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex) { struct net_device *dev; dev = dev_get_by_index_rcu(net, ifindex); while (dev && !netif_is_l3_master(dev)) dev = netdev_master_upper_dev_get_rcu(dev); return dev ? dev->ifindex : 0; } EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu); /** * l3mdev_fib_table_rcu - get FIB table id associated with an L3 * master interface * @dev: targeted interface */ u32 l3mdev_fib_table_rcu(const struct net_device *dev) { u32 tb_id = 0; if (!dev) return 0; if (netif_is_l3_master(dev)) { if (dev->l3mdev_ops->l3mdev_fib_table) tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev); } else if (netif_is_l3_slave(dev)) { /* Users of netdev_master_upper_dev_get_rcu need non-const, * but current inet_*type functions take a const */ struct net_device *_dev = (struct net_device *) dev; const struct net_device *master; master = netdev_master_upper_dev_get_rcu(_dev); if (master && master->l3mdev_ops->l3mdev_fib_table) tb_id = master->l3mdev_ops->l3mdev_fib_table(master); } return tb_id; } EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) { struct net_device *dev; u32 tb_id = 0; if (!ifindex) return 0; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) tb_id = l3mdev_fib_table_rcu(dev); rcu_read_unlock(); return tb_id; } EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); /** * l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link * local and multicast addresses * @net: network namespace for device index lookup * @fl6: IPv6 flow struct for lookup * This function does not hold refcnt on the returned dst. * Caller must hold rcu_read_lock(). */ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { struct dst_entry *dst = NULL; struct net_device *dev; WARN_ON_ONCE(!rcu_read_lock_held()); if (fl6->flowi6_oif) { dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); if (dev && netif_is_l3_slave(dev)) dev = netdev_master_upper_dev_get_rcu(dev); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_link_scope_lookup) dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); } return dst; } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); /** * l3mdev_fib_rule_match - Determine if flowi references an * L3 master device * @net: network namespace for device index lookup * @fl: flow struct * @arg: store the table the rule matched with here */ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) { struct net_device *dev; int rc = 0; /* update flow ensures flowi_l3mdev is set when relevant */ if (!fl->flowi_l3mdev) return 0; rcu_read_lock(); dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_fib_table) { arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); rc = 1; } rcu_read_unlock(); return rc; } void l3mdev_update_flow(struct net *net, struct flowi *fl) { struct net_device *dev; rcu_read_lock(); if (fl->flowi_oif) { dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev) { if (!fl->flowi_l3mdev) fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); /* oif set to L3mdev directs lookup to its table; * reset to avoid oif match in fib_lookup */ if (netif_is_l3_master(dev)) fl->flowi_oif = 0; goto out; } } if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) { dev = dev_get_by_index_rcu(net, fl->flowi_iif); if (dev) fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); } out: rcu_read_unlock(); } EXPORT_SYMBOL_GPL(l3mdev_update_flow); |
707 2 93 93 2 95 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the UDP protocol. * * Version: @(#)udp.h 1.0.2 04/28/93 * * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _LINUX_UDP_H #define _LINUX_UDP_H #include <net/inet_sock.h> #include <linux/skbuff.h> #include <net/netns/hash.h> #include <uapi/linux/udp.h> static inline struct udphdr *udp_hdr(const struct sk_buff *skb) { return (struct udphdr *)skb_transport_header(skb); } #define UDP_HTABLE_SIZE_MIN_PERNET 128 #define UDP_HTABLE_SIZE_MIN (IS_ENABLED(CONFIG_BASE_SMALL) ? 128 : 256) #define UDP_HTABLE_SIZE_MAX 65536 static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) { return (num + net_hash_mix(net)) & mask; } enum { UDP_FLAGS_CORK, /* Cork is required */ UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ UDP_FLAGS_ACCEPT_FRAGLIST, UDP_FLAGS_ACCEPT_L4, UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ }; struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node unsigned long udp_flags; int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. */ __u16 len; /* total length of pending frames */ __u16 gso_size; /* * Fields specific to UDP-Lite. */ __u16 pcslen; __u16 pcrlen; /* * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); /* GRO functions for UDP socket */ struct sk_buff * (*gro_receive)(struct sock *sk, struct list_head *head, struct sk_buff *skb); int (*gro_complete)(struct sock *sk, struct sk_buff *skb, int nhoff); /* udp_recvmsg try to use this before splicing sk_receive_queue */ struct sk_buff_head reader_queue ____cacheline_aligned_in_smp; /* This field is dirtied by udp_recvmsg() */ int forward_deficit; /* This fields follows rcvbuf value, and is touched by udp_recvmsg */ int forward_threshold; /* Cache friendly copy of sk->sk_peek_off >= 0 */ bool peeking_with_offset; }; #define udp_test_bit(nr, sk) \ test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_set_bit(nr, sk) \ set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_test_and_set_bit(nr, sk) \ test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_clear_bit(nr, sk) \ clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_assign_bit(nr, sk, val) \ assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) #define UDP_MAX_SEGMENTS (1 << 7UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) static inline int udp_set_peek_off(struct sock *sk, int val) { sk_set_peek_off(sk, val); WRITE_ONCE(udp_sk(sk)->peeking_with_offset, val >= 0); return 0; } static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { udp_assign_bit(NO_CHECK6_TX, sk, val); } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { udp_assign_bit(NO_CHECK6_RX, sk, val); } static inline bool udp_get_no_check6_tx(const struct sock *sk) { return udp_test_bit(NO_CHECK6_TX, sk); } static inline bool udp_get_no_check6_rx(const struct sock *sk) { return udp_test_bit(NO_CHECK6_RX, sk); } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { int gso_size; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { gso_size = skb_shinfo(skb)->gso_size; put_cmsg(msg, SOL_UDP, UDP_GRO, sizeof(gso_size), &gso_size); } } DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); #if IS_ENABLED(CONFIG_IPV6) DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); #endif static inline bool udp_encap_needed(void) { if (static_branch_unlikely(&udp_encap_needed_key)) return true; #if IS_ENABLED(CONFIG_IPV6) if (static_branch_unlikely(&udpv6_encap_needed_key)) return true; #endif return false; } static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) return false; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_test_bit(ACCEPT_L4, sk)) return true; if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still * land in a tunnel as the socket check in udp_gro_receive cannot be * foolproof. */ if (udp_encap_needed() && READ_ONCE(udp_sk(sk)->encap_rcv) && !(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) return true; return false; } static inline void udp_allow_gso(struct sock *sk) { udp_set_bit(ACCEPT_L4, sk); udp_set_bit(ACCEPT_FRAGLIST, sk); } #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) #define udp_portaddr_for_each_entry_rcu(__sk, list) \ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) #endif /* _LINUX_UDP_H */ |
1 1 965 963 963 963 963 960 962 2 965 965 1 962 715 596 966 2 964 966 679 679 1 677 679 680 1 1 678 678 677 680 965 678 676 680 965 963 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 | // SPDX-License-Identifier: GPL-2.0 /* * FPU signal frame handling routines. */ #include <linux/compat.h> #include <linux/cpu.h> #include <linux/pagemap.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> #include <asm/fpu/xstate.h> #include <asm/sigframe.h> #include <asm/trapnr.h> #include <asm/trace/fpu.h> #include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" /* * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, struct _fpx_sw_bytes *fx_sw) { int min_xstate_size = sizeof(struct fxregs_state) + sizeof(struct xstate_header); void __user *fpstate = fxbuf; unsigned int magic2; if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw))) return false; /* Check for the first magic field and other error scenarios. */ if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || fx_sw->xstate_size < min_xstate_size || fx_sw->xstate_size > current->thread.fpu.fpstate->user_size || fx_sw->xstate_size > fx_sw->extended_size) goto setfx; /* * Check for the presence of second magic word at the end of memory * layout. This detects the case where the user just copied the legacy * fpstate layout with out copying the extended state information * in the memory layout. */ if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))) return false; if (likely(magic2 == FP_XSTATE_MAGIC2)) return true; setfx: trace_x86_fpu_xstate_check_failed(¤t->thread.fpu); /* Set the parameters for fx only state */ fx_sw->magic1 = 0; fx_sw->xstate_size = sizeof(struct fxregs_state); fx_sw->xfeatures = XFEATURE_MASK_FPSSE; return true; } /* * Update the value of PKRU register that was already pushed onto the signal frame. */ static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru) { if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE))) return 0; return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU)); } /* * Signal frame handlers. */ static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) { if (use_fxsr()) { struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave; struct user_i387_ia32_struct env; struct _fpstate_32 __user *fp = buf; fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) fxsave(&tsk->thread.fpu.fpstate->regs.fxsave); fpregs_unlock(); convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env)) || __put_user(xsave->i387.swd, &fp->status) || __put_user(X86_FXSR_MAGIC, &fp->magic)) return false; } else { struct fregs_state __user *fp = buf; u32 swd; if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) return false; } return true; } /* * Prepare the SW reserved portion of the fxsave memory layout, indicating * the presence of the extended state information in the memory layout * pointed to by the fpstate pointer in the sigcontext. * This is saved when ever the FP and extended state context is * saved on the user stack during the signal handler delivery to the user. */ static inline void save_sw_bytes(struct _fpx_sw_bytes *sw_bytes, bool ia32_frame, struct fpstate *fpstate) { sw_bytes->magic1 = FP_XSTATE_MAGIC1; sw_bytes->extended_size = fpstate->user_size + FP_XSTATE_MAGIC2_SIZE; sw_bytes->xfeatures = fpstate->user_xfeatures; sw_bytes->xstate_size = fpstate->user_size; if (ia32_frame) sw_bytes->extended_size += sizeof(struct fregs_state); } static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, struct fpstate *fpstate) { struct xregs_state __user *x = buf; struct _fpx_sw_bytes sw_bytes = {}; u32 xfeatures; int err; /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ save_sw_bytes(&sw_bytes, ia32_frame, fpstate); err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes)); if (!use_xsave()) return !err; err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *)(buf + fpstate->user_size)); /* * Read the xfeatures which we copied (directly from the cpu or * from the state in task struct) to the user buffers. */ err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures); /* * For legacy compatible, we always set FP/SSE bits in the bit * vector while saving the state to the user context. This will * enable us capturing any changes(during sigreturn) to * the FP/SSE bits by the legacy applications which don't touch * xfeatures in the xsave header. * * xsave aware apps can change the xfeatures in the xsave * header as well as change any contents in the memory layout. * xrestore as part of sigreturn will capture all the changes. */ xfeatures |= XFEATURE_MASK_FPSSE; err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures); return !err; } static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru) { int err = 0; if (use_xsave()) { err = xsave_to_user_sigframe(buf); if (!err) err = update_pkru_in_sigframe(buf, pkru); return err; } if (use_fxsr()) return fxsave_to_user_sigframe((struct fxregs_state __user *) buf); else return fnsave_to_user_sigframe((struct fregs_state __user *) buf); } /* * Save the fpu, extended register state to the user signal frame. * * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save * state is copied. * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. * * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * * Save it directly to the user frame with disabled page fault handler. If * that faults, try to clear the frame which handles the page fault. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. * * For [f]xsave state, update the SW reserved fields in the [f]xsave frame * indicating the absence/presence of the extended state to the user. */ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru) { struct task_struct *tsk = current; struct fpstate *fpstate = tsk->thread.fpu.fpstate; bool ia32_fxstate = (buf != buf_fx); int ret; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); if (!static_cpu_has(X86_FEATURE_FPU)) { struct user_i387_ia32_struct fp; fpregs_soft_get(current, NULL, (struct membuf){.p = &fp, .left = sizeof(fp)}); return !copy_to_user(buf, &fp, sizeof(fp)); } if (!access_ok(buf, size)) return false; if (use_xsave()) { struct xregs_state __user *xbuf = buf_fx; /* * Clear the xsave header first, so that reserved fields are * initialized to zero. */ if (__clear_user(&xbuf->header, sizeof(xbuf->header))) return false; } retry: /* * Load the FPU registers if they are not valid for the current task. * With a valid FPU state we can attempt to save the state directly to * userland's stack frame which will likely succeed. If it does not, * resolve the fault in the user memory and try again. */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); pagefault_disable(); ret = copy_fpregs_to_sigframe(buf_fx, pkru); pagefault_enable(); fpregs_unlock(); if (ret) { if (!__clear_user(buf_fx, fpstate->user_size)) goto retry; return false; } /* Save the fsave header for the 32-bit frames. */ if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf)) return false; if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate)) return false; return true; } static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, u64 xrestore, bool fx_only) { if (use_xsave()) { u64 init_bv = ufeatures & ~xrestore; int ret; if (likely(!fx_only)) ret = xrstor_from_user_sigframe(buf, xrestore); else ret = fxrstor_from_user_sigframe(buf); if (!ret && unlikely(init_bv)) os_xrstor(&init_fpstate, init_bv); return ret; } else if (use_fxsr()) { return fxrstor_from_user_sigframe(buf); } else { return frstor_from_user_sigframe(buf); } } /* * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { struct fpu *fpu = ¤t->thread.fpu; int ret; /* Restore enabled features only. */ xrestore &= fpu->fpstate->user_xfeatures; retry: fpregs_lock(); /* Ensure that XFD is up to date */ xfd_update_state(fpu->fpstate); pagefault_disable(); ret = __restore_fpregs_from_user(buf, fpu->fpstate->user_xfeatures, xrestore, fx_only); pagefault_enable(); if (unlikely(ret)) { /* * The above did an FPU restore operation, restricted to * the user portion of the registers, and failed, but the * microcode might have modified the FPU registers * nevertheless. * * If the FPU registers do not belong to current, then * invalidate the FPU register state otherwise the task * might preempt current and return to user space with * corrupted FPU registers. */ if (test_thread_flag(TIF_NEED_FPU_LOAD)) __cpu_invalidate_fpregs_state(); fpregs_unlock(); /* Try to handle #PF, but anything else is fatal. */ if (ret != X86_TRAP_PF) return false; if (!fault_in_readable(buf, fpu->fpstate->user_size)) goto retry; return false; } /* * Restore supervisor states: previous context switch etc has done * XSAVES and saved the supervisor states in the kernel buffer from * which they can be restored now. * * It would be optimal to handle this with a single XRSTORS, but * this does not work because the rest of the FPU registers have * been restored from a user buffer directly. */ if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor()) os_xrstor_supervisor(fpu->fpstate); fpregs_mark_activate(); fpregs_unlock(); return true; } static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, bool ia32_fxstate) { struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; u64 user_xfeatures = 0; if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; if (!check_xstate_in_sigframe(buf_fx, &fx_sw_user)) return false; fx_only = !fx_sw_user.magic1; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; } if (likely(!ia32_fxstate)) { /* Restore the FPU registers directly from user memory. */ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); } /* * Copy the legacy state because the FP portion of the FX frame has * to be ignored for histerical raisins. The legacy state is folded * in once the larger state has been copied. */ if (__copy_from_user(&env, buf, sizeof(env))) return false; /* * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is * not modified on context switch and that the xstate is considered * to be loaded again on return to userland (overriding last_cpu avoids * the optimisation). */ fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { /* * If supervisor states are available then save the * hardware state in current's fpstate so that the * supervisor state is preserved. Save the full state for * simplicity. There is no point in optimizing this by only * saving the supervisor states and then shuffle them to * the right place in memory. It's ia32 mode. Shrug. */ if (xfeatures_mask_supervisor()) os_xsave(fpu->fpstate); set_thread_flag(TIF_NEED_FPU_LOAD); } __fpu_invalidate_fpregs_state(fpu); __cpu_invalidate_fpregs_state(); fpregs_unlock(); fpregs = &fpu->fpstate->regs; if (use_xsave() && !fx_only) { if (copy_sigframe_from_user_to_xstate(tsk, buf_fx)) return false; } else { if (__copy_from_user(&fpregs->fxsave, buf_fx, sizeof(fpregs->fxsave))) return false; if (IS_ENABLED(CONFIG_X86_64)) { /* Reject invalid MXCSR values. */ if (fpregs->fxsave.mxcsr & ~mxcsr_feature_mask) return false; } else { /* Mask invalid bits out for historical reasons (broken hardware). */ fpregs->fxsave.mxcsr &= mxcsr_feature_mask; } /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */ if (use_xsave()) fpregs->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; } /* Fold the legacy FP storage */ convert_to_fxsr(&fpregs->fxsave, &env); fpregs_lock(); if (use_xsave()) { /* * Remove all UABI feature bits not set in user_xfeatures * from the memory xstate header which makes the full * restore below bring them into init state. This works for * fx_only mode as well because that has only FP and SSE * set in user_xfeatures. * * Preserve supervisor states! */ u64 mask = user_xfeatures | xfeatures_mask_supervisor(); fpregs->xsave.header.xfeatures &= mask; success = !os_xrstor_safe(fpu->fpstate, fpu_kernel_cfg.max_features); } else { success = !fxrstor_safe(&fpregs->fxsave); } if (likely(success)) fpregs_mark_activate(); fpregs_unlock(); return success; } static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate) { unsigned int size = fpstate->user_size; return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size; } /* * Restore FPU state from a sigframe: */ bool fpu__restore_sig(void __user *buf, int ia32_frame) { struct fpu *fpu = ¤t->thread.fpu; void __user *buf_fx = buf; bool ia32_fxstate = false; bool success = false; unsigned int size; if (unlikely(!buf)) { fpu__clear_user_states(fpu); return true; } size = xstate_sigframe_size(fpu->fpstate); ia32_frame &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); /* * Only FXSR enabled systems need the FX state quirk. * FRSTOR does not need it and can use the fast path. */ if (ia32_frame && use_fxsr()) { buf_fx = buf + sizeof(struct fregs_state); size += sizeof(struct fregs_state); ia32_fxstate = true; } if (!access_ok(buf, size)) goto out; if (!IS_ENABLED(CONFIG_X86_64) && !cpu_feature_enabled(X86_FEATURE_FPU)) { success = !fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), NULL, buf); } else { success = __fpu_restore_sig(buf, buf_fx, ia32_fxstate); } out: if (unlikely(!success)) fpu__clear_user_states(fpu); return success; } unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size) { unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate); *buf_fx = sp = round_down(sp - frame_size, 64); if (ia32_frame && use_fxsr()) { frame_size += sizeof(struct fregs_state); sp -= sizeof(struct fregs_state); } *size = frame_size; return sp; } unsigned long __init fpu__get_fpstate_size(void) { unsigned long ret = fpu_user_cfg.max_size; if (use_xsave()) ret += FP_XSTATE_MAGIC2_SIZE; /* * This space is needed on (most) 32-bit kernels, or when a 32-bit * app is running on a 64-bit kernel. To keep things simple, just * assume the worst case and always include space for 'freg_state', * even for 64-bit apps on 64-bit kernels. This wastes a bit of * space, but keeps the code simple. */ if ((IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) && use_fxsr()) ret += sizeof(struct fregs_state); return ret; } |
1 16 16 1 1 1 7 5 4 14 2 3 13 5 5 5 5 5 5 5 5 4 1 6 3 1 5 5 5 4 1 1 3 4 3 1 9 5 2 4 4 4 4 3 1 1 8 2 4 5 6 3 3 8 8 8 8 24 5 5 3 2 4 1 5 1 5 3 3 1 2 1 2 2 9 6 5 4 1 1 2 9 5 6 6 2 2 4 2 1 3 6 6 3 2 2 1 3 2 1 4 6 6 1 5 1 7 6 1 7 51 1 17 1 1 6 1 1 1 4 7 6 10 5 23 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2009 Red Hat, Inc. * Author: Michael S. Tsirkin <mst@redhat.com> * * virtio-net server in host kernel. */ #include <linux/compat.h> #include <linux/eventfd.h> #include <linux/vhost.h> #include <linux/virtio_net.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/workqueue.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/sched/clock.h> #include <linux/sched/signal.h> #include <linux/vmalloc.h> #include <linux/net.h> #include <linux/if_packet.h> #include <linux/if_arp.h> #include <linux/if_tun.h> #include <linux/if_macvlan.h> #include <linux/if_tap.h> #include <linux/if_vlan.h> #include <linux/skb_array.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/xdp.h> #include "vhost.h" static int experimental_zcopytx = 0; module_param(experimental_zcopytx, int, 0444); MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" " 1 -Enable; 0 - Disable"); /* Max number of bytes transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others. */ #define VHOST_NET_WEIGHT 0x80000 /* Max number of packets transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others with small * pkts. */ #define VHOST_NET_PKT_WEIGHT 256 /* MAX number of TX used buffers for outstanding zerocopy */ #define VHOST_MAX_PEND 128 #define VHOST_GOODCOPY_LEN 256 /* * For transmit, used buffer len is unused; we override it to track buffer * status internally; used for zerocopy tx only. */ /* Lower device DMA failed */ #define VHOST_DMA_FAILED_LEN ((__force __virtio32)3) /* Lower device DMA done */ #define VHOST_DMA_DONE_LEN ((__force __virtio32)2) /* Lower device DMA in progress */ #define VHOST_DMA_IN_PROGRESS ((__force __virtio32)1) /* Buffer unused */ #define VHOST_DMA_CLEAR_LEN ((__force __virtio32)0) #define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN) enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | (1ULL << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_ACCESS_PLATFORM) | (1ULL << VIRTIO_F_RING_RESET) }; enum { VHOST_NET_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) }; enum { VHOST_NET_VQ_RX = 0, VHOST_NET_VQ_TX = 1, VHOST_NET_VQ_MAX = 2, }; struct vhost_net_ubuf_ref { /* refcount follows semantics similar to kref: * 0: object is released * 1: no outstanding ubufs * >1: outstanding ubufs */ atomic_t refcount; wait_queue_head_t wait; struct vhost_virtqueue *vq; }; #define VHOST_NET_BATCH 64 struct vhost_net_buf { void **queue; int tail; int head; }; struct vhost_net_virtqueue { struct vhost_virtqueue vq; size_t vhost_hlen; size_t sock_hlen; /* vhost zerocopy support fields below: */ /* last used idx for outstanding DMA zerocopy buffers */ int upend_idx; /* For TX, first used idx for DMA done zerocopy buffers * For RX, number of batched heads */ int done_idx; /* Number of XDP frames batched */ int batched_xdp; /* an array of userspace buffers info */ struct ubuf_info_msgzc *ubuf_info; /* Reference counting for outstanding ubufs. * Protected by vq mutex. Writers must also take device mutex. */ struct vhost_net_ubuf_ref *ubufs; struct ptr_ring *rx_ring; struct vhost_net_buf rxq; /* Batched XDP buffs */ struct xdp_buff *xdp; }; struct vhost_net { struct vhost_dev dev; struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; /* Number of times zerocopy TX recently failed. * Protected by tx vq lock. */ unsigned tx_zcopy_err; /* Flush in progress. Protected by tx vq lock. */ bool tx_flush; /* Private page frag cache */ struct page_frag_cache pf_cache; }; static unsigned vhost_net_zcopy_mask __read_mostly; static void *vhost_net_buf_get_ptr(struct vhost_net_buf *rxq) { if (rxq->tail != rxq->head) return rxq->queue[rxq->head]; else return NULL; } static int vhost_net_buf_get_size(struct vhost_net_buf *rxq) { return rxq->tail - rxq->head; } static int vhost_net_buf_is_empty(struct vhost_net_buf *rxq) { return rxq->tail == rxq->head; } static void *vhost_net_buf_consume(struct vhost_net_buf *rxq) { void *ret = vhost_net_buf_get_ptr(rxq); ++rxq->head; return ret; } static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; rxq->head = 0; rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue, VHOST_NET_BATCH); return rxq->tail; } static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) { ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head, vhost_net_buf_get_size(rxq), tun_ptr_free); rxq->head = rxq->tail = 0; } } static int vhost_net_buf_peek_len(void *ptr) { if (tun_is_xdp_frame(ptr)) { struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); return xdpf->len; } return __skb_array_len_with_tag(ptr); } static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; if (!vhost_net_buf_is_empty(rxq)) goto out; if (!vhost_net_buf_produce(nvq)) return 0; out: return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq)); } static void vhost_net_buf_init(struct vhost_net_buf *rxq) { rxq->head = rxq->tail = 0; } static void vhost_net_enable_zcopy(int vq) { vhost_net_zcopy_mask |= 0x1 << vq; } static struct vhost_net_ubuf_ref * vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) { struct vhost_net_ubuf_ref *ubufs; /* No zero copy backend? Nothing to count. */ if (!zcopy) return NULL; ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); if (!ubufs) return ERR_PTR(-ENOMEM); atomic_set(&ubufs->refcount, 1); init_waitqueue_head(&ubufs->wait); ubufs->vq = vq; return ubufs; } static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) { int r = atomic_sub_return(1, &ubufs->refcount); if (unlikely(!r)) wake_up(&ubufs->wait); return r; } static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) { vhost_net_ubuf_put(ubufs); wait_event(ubufs->wait, !atomic_read(&ubufs->refcount)); } static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) { vhost_net_ubuf_put_and_wait(ubufs); kfree(ubufs); } static void vhost_net_clear_ubuf_info(struct vhost_net *n) { int i; for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { kfree(n->vqs[i].ubuf_info); n->vqs[i].ubuf_info = NULL; } } static int vhost_net_set_ubuf_info(struct vhost_net *n) { bool zcopy; int i; for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { zcopy = vhost_net_zcopy_mask & (0x1 << i); if (!zcopy) continue; n->vqs[i].ubuf_info = kmalloc_array(UIO_MAXIOV, sizeof(*n->vqs[i].ubuf_info), GFP_KERNEL); if (!n->vqs[i].ubuf_info) goto err; } return 0; err: vhost_net_clear_ubuf_info(n); return -ENOMEM; } static void vhost_net_vq_reset(struct vhost_net *n) { int i; vhost_net_clear_ubuf_info(n); for (i = 0; i < VHOST_NET_VQ_MAX; i++) { n->vqs[i].done_idx = 0; n->vqs[i].upend_idx = 0; n->vqs[i].ubufs = NULL; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; vhost_net_buf_init(&n->vqs[i].rxq); } } static void vhost_net_tx_packet(struct vhost_net *net) { ++net->tx_packets; if (net->tx_packets < 1024) return; net->tx_packets = 0; net->tx_zcopy_err = 0; } static void vhost_net_tx_err(struct vhost_net *net) { ++net->tx_zcopy_err; } static bool vhost_net_tx_select_zcopy(struct vhost_net *net) { /* TX flush waits for outstanding DMAs to be done. * Don't start new DMAs. */ return !net->tx_flush && net->tx_packets / 64 >= net->tx_zcopy_err; } static bool vhost_sock_zcopy(struct socket *sock) { return unlikely(experimental_zcopytx) && sock_flag(sock->sk, SOCK_ZEROCOPY); } static bool vhost_sock_xdp(struct socket *sock) { return sock_flag(sock->sk, SOCK_XDP); } /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM * guest used idx. */ static void vhost_zerocopy_signal_used(struct vhost_net *net, struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); int i, add; int j = 0; for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) vhost_net_tx_err(net); if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { vq->heads[i].len = VHOST_DMA_CLEAR_LEN; ++j; } else break; } while (j) { add = min(UIO_MAXIOV - nvq->done_idx, j); vhost_add_used_and_signal_n(vq->dev, vq, &vq->heads[nvq->done_idx], add); nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV; j -= add; } } static void vhost_zerocopy_complete(struct sk_buff *skb, struct ubuf_info *ubuf_base, bool success) { struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base); struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; struct vhost_virtqueue *vq = ubufs->vq; int cnt; rcu_read_lock_bh(); /* set len to mark this desc buffers done DMA */ vq->heads[ubuf->desc].len = success ? VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; cnt = vhost_net_ubuf_put(ubufs); /* * Trigger polling thread if guest stopped submitting new buffers: * in this case, the refcount after decrement will eventually reach 1. * We also trigger polling periodically after each 16 packets * (the value 16 here is more or less arbitrary, it's tuned to trigger * less than 10% of times). */ if (cnt <= 1 || !(cnt % 16)) vhost_poll_queue(&vq->poll); rcu_read_unlock_bh(); } static const struct ubuf_info_ops vhost_ubuf_ops = { .complete = vhost_zerocopy_complete, }; static inline unsigned long busy_clock(void) { return local_clock() >> 10; } static bool vhost_can_busy_poll(unsigned long endtime) { return likely(!need_resched() && !time_after(busy_clock(), endtime) && !signal_pending(current)); } static void vhost_net_disable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); if (!vhost_vq_get_backend(vq)) return; vhost_poll_stop(poll); } static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; sock = vhost_vq_get_backend(vq); if (!sock) return 0; return vhost_poll_start(poll, sock->file); } static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) { struct vhost_virtqueue *vq = &nvq->vq; struct vhost_dev *dev = vq->dev; if (!nvq->done_idx) return; vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx); nvq->done_idx = 0; } static void vhost_tx_batch(struct vhost_net *net, struct vhost_net_virtqueue *nvq, struct socket *sock, struct msghdr *msghdr) { struct tun_msg_ctl ctl = { .type = TUN_MSG_PTR, .num = nvq->batched_xdp, .ptr = nvq->xdp, }; int i, err; if (nvq->batched_xdp == 0) goto signal_used; msghdr->msg_control = &ctl; msghdr->msg_controllen = sizeof(ctl); err = sock->ops->sendmsg(sock, msghdr, 0); if (unlikely(err < 0)) { vq_err(&nvq->vq, "Fail to batch sending packets\n"); /* free pages owned by XDP; since this is an unlikely error path, * keep it simple and avoid more complex bulk update for the * used pages */ for (i = 0; i < nvq->batched_xdp; ++i) put_page(virt_to_head_page(nvq->xdp[i].data)); nvq->batched_xdp = 0; nvq->done_idx = 0; return; } signal_used: vhost_net_signal_used(nvq); nvq->batched_xdp = 0; } static int sock_has_rx_data(struct socket *sock) { if (unlikely(!sock)) return 0; if (sock->ops->peek_len) return sock->ops->peek_len(sock); return skb_queue_empty(&sock->sk->sk_receive_queue); } static void vhost_net_busy_poll_try_queue(struct vhost_net *net, struct vhost_virtqueue *vq) { if (!vhost_vq_avail_empty(&net->dev, vq)) { vhost_poll_queue(&vq->poll); } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); vhost_poll_queue(&vq->poll); } } static void vhost_net_busy_poll(struct vhost_net *net, struct vhost_virtqueue *rvq, struct vhost_virtqueue *tvq, bool *busyloop_intr, bool poll_rx) { unsigned long busyloop_timeout; unsigned long endtime; struct socket *sock; struct vhost_virtqueue *vq = poll_rx ? tvq : rvq; /* Try to hold the vq mutex of the paired virtqueue. We can't * use mutex_lock() here since we could not guarantee a * consistenet lock ordering. */ if (!mutex_trylock(&vq->mutex)) return; vhost_disable_notify(&net->dev, vq); sock = vhost_vq_get_backend(rvq); busyloop_timeout = poll_rx ? rvq->busyloop_timeout: tvq->busyloop_timeout; preempt_disable(); endtime = busy_clock() + busyloop_timeout; while (vhost_can_busy_poll(endtime)) { if (vhost_vq_has_work(vq)) { *busyloop_intr = true; break; } if ((sock_has_rx_data(sock) && !vhost_vq_avail_empty(&net->dev, rvq)) || !vhost_vq_avail_empty(&net->dev, tvq)) break; cpu_relax(); } preempt_enable(); if (poll_rx || sock_has_rx_data(sock)) vhost_net_busy_poll_try_queue(net, vq); else if (!poll_rx) /* On tx here, sock has no rx data. */ vhost_enable_notify(&net->dev, rvq); mutex_unlock(&vq->mutex); } static int vhost_net_tx_get_vq_desc(struct vhost_net *net, struct vhost_net_virtqueue *tnvq, unsigned int *out_num, unsigned int *in_num, struct msghdr *msghdr, bool *busyloop_intr) { struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_virtqueue *rvq = &rnvq->vq; struct vhost_virtqueue *tvq = &tnvq->vq; int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), out_num, in_num, NULL, NULL); if (r == tvq->num && tvq->busyloop_timeout) { /* Flush batched packets first */ if (!vhost_sock_zcopy(vhost_vq_get_backend(tvq))) vhost_tx_batch(net, tnvq, vhost_vq_get_backend(tvq), msghdr); vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false); r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), out_num, in_num, NULL, NULL); } return r; } static bool vhost_exceeds_maxpend(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; return (nvq->upend_idx + UIO_MAXIOV - nvq->done_idx) % UIO_MAXIOV > min_t(unsigned int, VHOST_MAX_PEND, vq->num >> 2); } static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter, size_t hdr_size, int out) { /* Skip header. TODO: support TSO. */ size_t len = iov_length(vq->iov, out); iov_iter_init(iter, ITER_SOURCE, vq->iov, out, len); iov_iter_advance(iter, hdr_size); return iov_iter_count(iter); } static int get_tx_bufs(struct vhost_net *net, struct vhost_net_virtqueue *nvq, struct msghdr *msg, unsigned int *out, unsigned int *in, size_t *len, bool *busyloop_intr) { struct vhost_virtqueue *vq = &nvq->vq; int ret; ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr); if (ret < 0 || ret == vq->num) return ret; if (*in) { vq_err(vq, "Unexpected descriptor format for TX: out %d, int %d\n", *out, *in); return -EFAULT; } /* Sanity check */ *len = init_iov_iter(vq, &msg->msg_iter, nvq->vhost_hlen, *out); if (*len == 0) { vq_err(vq, "Unexpected header len for TX: %zd expected %zd\n", *len, nvq->vhost_hlen); return -EFAULT; } return ret; } static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) { return total_len < VHOST_NET_WEIGHT && !vhost_vq_avail_empty(vq->dev, vq); } #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, struct iov_iter *from) { struct vhost_virtqueue *vq = &nvq->vq; struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); struct socket *sock = vhost_vq_get_backend(vq); struct virtio_net_hdr *gso; struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; struct tun_xdp_hdr *hdr; size_t len = iov_iter_count(from); int headroom = vhost_sock_xdp(sock) ? XDP_PACKET_HEADROOM : 0; int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); int pad = SKB_DATA_ALIGN(VHOST_NET_RX_PAD + headroom + nvq->sock_hlen); int sock_hlen = nvq->sock_hlen; void *buf; int copied; int ret; if (unlikely(len < nvq->sock_hlen)) return -EFAULT; if (SKB_DATA_ALIGN(len + pad) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) return -ENOSPC; buflen += SKB_DATA_ALIGN(len + pad); buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL, SMP_CACHE_BYTES); if (unlikely(!buf)) return -ENOMEM; copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso), sock_hlen, from); if (copied != sock_hlen) { ret = -EFAULT; goto err; } hdr = buf; gso = &hdr->gso; if (!sock_hlen) memset(buf, 0, pad); if ((gso->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && vhost16_to_cpu(vq, gso->csum_start) + vhost16_to_cpu(vq, gso->csum_offset) + 2 > vhost16_to_cpu(vq, gso->hdr_len)) { gso->hdr_len = cpu_to_vhost16(vq, vhost16_to_cpu(vq, gso->csum_start) + vhost16_to_cpu(vq, gso->csum_offset) + 2); if (vhost16_to_cpu(vq, gso->hdr_len) > len) { ret = -EINVAL; goto err; } } len -= sock_hlen; copied = copy_from_iter(buf + pad, len, from); if (copied != len) { ret = -EFAULT; goto err; } xdp_init_buff(xdp, buflen, NULL); xdp_prepare_buff(xdp, buf, pad, len, true); hdr->buflen = buflen; ++nvq->batched_xdp; return 0; err: page_frag_free(buf); return ret; } static void handle_tx_copy(struct vhost_net *net, struct socket *sock) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned out, in; int head; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; size_t len, total_len = 0; int err; int sent_pkts = 0; bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); do { bool busyloop_intr = false; if (nvq->done_idx == VHOST_NET_BATCH) vhost_tx_batch(net, nvq, sock, &msg); head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, &busyloop_intr); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { if (unlikely(busyloop_intr)) { vhost_poll_queue(&vq->poll); } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; } break; } total_len += len; /* For simplicity, TX batching is only enabled if * sndbuf is unlimited. */ if (sock_can_batch) { err = vhost_net_build_xdp(nvq, &msg.msg_iter); if (!err) { goto done; } else if (unlikely(err != -ENOSPC)) { vhost_tx_batch(net, nvq, sock, &msg); vhost_discard_vq_desc(vq, 1); vhost_net_enable_vq(net, vq); break; } /* We can't build XDP buff, go for single * packet path but let's flush batched * packets. */ vhost_tx_batch(net, nvq, sock, &msg); msg.msg_control = NULL; } else { if (tx_can_batch(vq, total_len)) msg.msg_flags |= MSG_MORE; else msg.msg_flags &= ~MSG_MORE; } err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { vhost_discard_vq_desc(vq, 1); vhost_net_enable_vq(net, vq); break; } pr_debug("Fail to send packet: err %d", err); } else if (unlikely(err != len)) pr_debug("Truncated TX packet: len %d != %zd\n", err, len); done: vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->done_idx].len = 0; ++nvq->done_idx; } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); vhost_tx_batch(net, nvq, sock, &msg); } static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned out, in; int head; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; struct tun_msg_ctl ctl; size_t len, total_len = 0; int err; struct vhost_net_ubuf_ref *ubufs; struct ubuf_info_msgzc *ubuf; bool zcopy_used; int sent_pkts = 0; do { bool busyloop_intr; /* Release DMAs done buffers first */ vhost_zerocopy_signal_used(net, vq); busyloop_intr = false; head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, &busyloop_intr); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { if (unlikely(busyloop_intr)) { vhost_poll_queue(&vq->poll); } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; } break; } zcopy_used = len >= VHOST_GOODCOPY_LEN && !vhost_exceeds_maxpend(net) && vhost_net_tx_select_zcopy(net); /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { ubuf = nvq->ubuf_info + nvq->upend_idx; vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->ctx = nvq->ubufs; ubuf->desc = nvq->upend_idx; ubuf->ubuf.ops = &vhost_ubuf_ops; ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG; refcount_set(&ubuf->ubuf.refcnt, 1); msg.msg_control = &ctl; ctl.type = TUN_MSG_UBUF; ctl.ptr = &ubuf->ubuf; msg.msg_controllen = sizeof(ctl); ubufs = nvq->ubufs; atomic_inc(&ubufs->refcount); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; } else { msg.msg_control = NULL; ubufs = NULL; } total_len += len; if (tx_can_batch(vq, total_len) && likely(!vhost_exceeds_maxpend(net))) { msg.msg_flags |= MSG_MORE; } else { msg.msg_flags &= ~MSG_MORE; } err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS; if (zcopy_used) { if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) vhost_net_ubuf_put(ubufs); if (retry) nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; else vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; } if (retry) { vhost_discard_vq_desc(vq, 1); vhost_net_enable_vq(net, vq); break; } pr_debug("Fail to send packet: err %d", err); } else if (unlikely(err != len)) pr_debug("Truncated TX packet: " " len %d != %zd\n", err, len); if (!zcopy_used) vhost_add_used_and_signal(&net->dev, vq, head, 0); else vhost_zerocopy_signal_used(net, vq); vhost_net_tx_packet(net); } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); } /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; struct socket *sock; mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); sock = vhost_vq_get_backend(vq); if (!sock) goto out; if (!vq_meta_prefetch(vq)) goto out; vhost_disable_notify(&net->dev, vq); vhost_net_disable_vq(net, vq); if (vhost_sock_zcopy(sock)) handle_tx_zerocopy(net, sock); else handle_tx_copy(net, sock); out: mutex_unlock(&vq->mutex); } static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) { struct sk_buff *head; int len = 0; unsigned long flags; if (rvq->rx_ring) return vhost_net_buf_peek(rvq); spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); head = skb_peek(&sk->sk_receive_queue); if (likely(head)) { len = head->len; if (skb_vlan_tag_present(head)) len += VLAN_HLEN; } spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); return len; } static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, bool *busyloop_intr) { struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *rvq = &rnvq->vq; struct vhost_virtqueue *tvq = &tnvq->vq; int len = peek_head_len(rnvq, sk); if (!len && rvq->busyloop_timeout) { /* Flush batched heads first */ vhost_net_signal_used(rnvq); /* Both tx vq and rx socket were polled here */ vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true); len = peek_head_len(rnvq, sk); } return len; } /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue * @datalen - data length we'll be reading * @iovcount - returned count of io vectors we fill * @log - vhost log * @log_num - log offset * @quota - headcount quota, 1 for big buffer * returns number of buffer heads allocated, negative on error */ static int get_rx_bufs(struct vhost_virtqueue *vq, struct vring_used_elem *heads, int datalen, unsigned *iovcount, struct vhost_log *log, unsigned *log_num, unsigned int quota) { unsigned int out, in; int seg = 0; int headcount = 0; unsigned d; int r, nlogs = 0; /* len is always initialized before use since we are always called with * datalen > 0. */ u32 len; while (datalen > 0 && headcount < quota) { if (unlikely(seg >= UIO_MAXIOV)) { r = -ENOBUFS; goto err; } r = vhost_get_vq_desc(vq, vq->iov + seg, ARRAY_SIZE(vq->iov) - seg, &out, &in, log, log_num); if (unlikely(r < 0)) goto err; d = r; if (d == vq->num) { r = 0; goto err; } if (unlikely(out || in <= 0)) { vq_err(vq, "unexpected descriptor format for RX: " "out %d, in %d\n", out, in); r = -EINVAL; goto err; } if (unlikely(log)) { nlogs += *log_num; log += *log_num; } heads[headcount].id = cpu_to_vhost32(vq, d); len = iov_length(vq->iov + seg, in); heads[headcount].len = cpu_to_vhost32(vq, len); datalen -= len; ++headcount; seg += in; } heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen); *iovcount = seg; if (unlikely(log)) *log_num = nlogs; /* Detect overrun */ if (unlikely(datalen > 0)) { r = UIO_MAXIOV + 1; goto err; } return headcount; err: vhost_discard_vq_desc(vq, headcount); return r; } /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_rx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned in, log; struct vhost_log *vq_log; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, /* FIXME: get and handle RX aux data. */ .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; struct virtio_net_hdr hdr = { .flags = 0, .gso_type = VIRTIO_NET_HDR_GSO_NONE }; size_t total_len = 0; int err, mergeable; s16 headcount; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; bool busyloop_intr = false; struct socket *sock; struct iov_iter fixup; __virtio16 num_buffers; int recv_pkts = 0; mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX); sock = vhost_vq_get_backend(vq); if (!sock) goto out; if (!vq_meta_prefetch(vq)) goto out; vhost_disable_notify(&net->dev, vq); vhost_net_disable_vq(net, vq); vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ? vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); do { sock_len = vhost_net_rx_peek_head_len(net, sock->sk, &busyloop_intr); if (!sock_len) break; sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, vhost_len, &in, vq_log, &log, likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) goto out; /* OK, now we need to know about added descriptors. */ if (!headcount) { if (unlikely(busyloop_intr)) { vhost_poll_queue(&vq->poll); } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { /* They have slipped one in as we were * doing that: check again. */ vhost_disable_notify(&net->dev, vq); continue; } /* Nothing new? Wait for eventfd to tell us * they refilled. */ goto out; } busyloop_intr = false; if (nvq->rx_ring) msg.msg_control = vhost_net_buf_consume(&nvq->rxq); /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, ITER_DEST, vq->iov, 1, 1); err = sock->ops->recvmsg(sock, &msg, 1, MSG_DONTWAIT | MSG_TRUNC); pr_debug("Discarded rx packet: len %zd\n", sock_len); continue; } /* We don't need to be notified again. */ iov_iter_init(&msg.msg_iter, ITER_DEST, vq->iov, in, vhost_len); fixup = msg.msg_iter; if (unlikely((vhost_hlen))) { /* We will supply the header ourselves * TODO: support TSO. */ iov_iter_advance(&msg.msg_iter, vhost_hlen); } err = sock->ops->recvmsg(sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); /* Userspace might have consumed the packet meanwhile: * it's not supposed to do this usually, but might be hard * to prevent. Discard data we got (if any) and keep going. */ if (unlikely(err != sock_len)) { pr_debug("Discarded rx packet: " " len %d, expected %zd\n", err, sock_len); vhost_discard_vq_desc(vq, headcount); continue; } /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */ if (unlikely(vhost_hlen)) { if (copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) { vq_err(vq, "Unable to write vnet_hdr " "at addr %p\n", vq->iov->iov_base); goto out; } } else { /* Header came from socket; we'll need to patch * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF */ iov_iter_advance(&fixup, sizeof(hdr)); } /* TODO: Should check and handle checksum. */ num_buffers = cpu_to_vhost16(vq, headcount); if (likely(mergeable) && copy_to_iter(&num_buffers, sizeof num_buffers, &fixup) != sizeof num_buffers) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); goto out; } nvq->done_idx += headcount; if (nvq->done_idx > VHOST_NET_BATCH) vhost_net_signal_used(nvq); if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len, vq->iov, in); total_len += vhost_len; } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len))); if (unlikely(busyloop_intr)) vhost_poll_queue(&vq->poll); else if (!sock_len) vhost_net_enable_vq(net, vq); out: vhost_net_signal_used(nvq); mutex_unlock(&vq->mutex); } static void handle_tx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); handle_tx(net); } static void handle_rx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); handle_rx(net); } static void handle_tx_net(struct vhost_work *work) { struct vhost_net *net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work); handle_tx(net); } static void handle_rx_net(struct vhost_work *work) { struct vhost_net *net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work); handle_rx(net); } static int vhost_net_open(struct inode *inode, struct file *f) { struct vhost_net *n; struct vhost_dev *dev; struct vhost_virtqueue **vqs; void **queue; struct xdp_buff *xdp; int i; n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!n) return -ENOMEM; vqs = kmalloc_array(VHOST_NET_VQ_MAX, sizeof(*vqs), GFP_KERNEL); if (!vqs) { kvfree(n); return -ENOMEM; } queue = kmalloc_array(VHOST_NET_BATCH, sizeof(void *), GFP_KERNEL); if (!queue) { kfree(vqs); kvfree(n); return -ENOMEM; } n->vqs[VHOST_NET_VQ_RX].rxq.queue = queue; xdp = kmalloc_array(VHOST_NET_BATCH, sizeof(*xdp), GFP_KERNEL); if (!xdp) { kfree(vqs); kvfree(n); kfree(queue); return -ENOMEM; } n->vqs[VHOST_NET_VQ_TX].xdp = xdp; dev = &n->dev; vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq; vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; for (i = 0; i < VHOST_NET_VQ_MAX; i++) { n->vqs[i].ubufs = NULL; n->vqs[i].ubuf_info = NULL; n->vqs[i].upend_idx = 0; n->vqs[i].done_idx = 0; n->vqs[i].batched_xdp = 0; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; n->vqs[i].rx_ring = NULL; vhost_net_buf_init(&n->vqs[i].rxq); } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, UIO_MAXIOV + VHOST_NET_BATCH, VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, NULL); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev, vqs[VHOST_NET_VQ_TX]); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev, vqs[VHOST_NET_VQ_RX]); f->private_data = n; n->pf_cache.va = NULL; return 0; } static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct socket *sock; struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); mutex_lock(&vq->mutex); sock = vhost_vq_get_backend(vq); vhost_net_disable_vq(n, vq); vhost_vq_set_backend(vq, NULL); vhost_net_buf_unproduce(nvq); nvq->rx_ring = NULL; mutex_unlock(&vq->mutex); return sock; } static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, struct socket **rx_sock) { *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq); *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); } static void vhost_net_flush(struct vhost_net *n) { vhost_dev_flush(&n->dev); if (n->vqs[VHOST_NET_VQ_TX].ubufs) { mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = true; mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); /* Wait for all lower device DMAs done. */ vhost_net_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = false; atomic_set(&n->vqs[VHOST_NET_VQ_TX].ubufs->refcount, 1); mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); } } static int vhost_net_release(struct inode *inode, struct file *f) { struct vhost_net *n = f->private_data; struct socket *tx_sock; struct socket *rx_sock; vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev); vhost_net_vq_reset(n); if (tx_sock) sockfd_put(tx_sock); if (rx_sock) sockfd_put(rx_sock); /* Make sure no callbacks are outstanding */ synchronize_rcu(); /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); kfree(n->vqs[VHOST_NET_VQ_TX].xdp); kfree(n->dev.vqs); page_frag_cache_drain(&n->pf_cache); kvfree(n); return 0; } static struct socket *get_raw_socket(int fd) { int r; struct socket *sock = sockfd_lookup(fd, &r); if (!sock) return ERR_PTR(-ENOTSOCK); /* Parameter checking */ if (sock->sk->sk_type != SOCK_RAW) { r = -ESOCKTNOSUPPORT; goto err; } if (sock->sk->sk_family != AF_PACKET) { r = -EPFNOSUPPORT; goto err; } return sock; err: sockfd_put(sock); return ERR_PTR(r); } static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; ring = tap_get_ptr_ring(file); if (!IS_ERR(ring)) goto out; ring = NULL; out: return ring; } static struct socket *get_tap_socket(int fd) { struct file *file = fget(fd); struct socket *sock; if (!file) return ERR_PTR(-EBADF); sock = tun_get_socket(file); if (!IS_ERR(sock)) return sock; sock = tap_get_socket(file); if (IS_ERR(sock)) fput(file); return sock; } static struct socket *get_socket(int fd) { struct socket *sock; /* special case to disable backend */ if (fd == -1) return NULL; sock = get_raw_socket(fd); if (!IS_ERR(sock)) return sock; sock = get_tap_socket(fd); if (!IS_ERR(sock)) return sock; return ERR_PTR(-ENOTSOCK); } static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) { struct socket *sock, *oldsock; struct vhost_virtqueue *vq; struct vhost_net_virtqueue *nvq; struct vhost_net_ubuf_ref *ubufs, *oldubufs = NULL; int r; mutex_lock(&n->dev.mutex); r = vhost_dev_check_owner(&n->dev); if (r) goto err; if (index >= VHOST_NET_VQ_MAX) { r = -ENOBUFS; goto err; } vq = &n->vqs[index].vq; nvq = &n->vqs[index]; mutex_lock(&vq->mutex); if (fd == -1) vhost_clear_msg(&n->dev); /* Verify that ring has been setup correctly. */ if (!vhost_vq_access_ok(vq)) { r = -EFAULT; goto err_vq; } sock = get_socket(fd); if (IS_ERR(sock)) { r = PTR_ERR(sock); goto err_vq; } /* start polling new socket */ oldsock = vhost_vq_get_backend(vq); if (sock != oldsock) { ubufs = vhost_net_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock)); if (IS_ERR(ubufs)) { r = PTR_ERR(ubufs); goto err_ubufs; } vhost_net_disable_vq(n, vq); vhost_vq_set_backend(vq, sock); vhost_net_buf_unproduce(nvq); r = vhost_vq_init_access(vq); if (r) goto err_used; r = vhost_net_enable_vq(n, vq); if (r) goto err_used; if (index == VHOST_NET_VQ_RX) { if (sock) nvq->rx_ring = get_tap_ptr_ring(sock->file); else nvq->rx_ring = NULL; } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; n->tx_packets = 0; n->tx_zcopy_err = 0; n->tx_flush = false; } mutex_unlock(&vq->mutex); if (oldubufs) { vhost_net_ubuf_put_wait_and_free(oldubufs); mutex_lock(&vq->mutex); vhost_zerocopy_signal_used(n, vq); mutex_unlock(&vq->mutex); } if (oldsock) { vhost_dev_flush(&n->dev); sockfd_put(oldsock); } mutex_unlock(&n->dev.mutex); return 0; err_used: vhost_vq_set_backend(vq, oldsock); vhost_net_enable_vq(n, vq); if (ubufs) vhost_net_ubuf_put_wait_and_free(ubufs); err_ubufs: if (sock) sockfd_put(sock); err_vq: mutex_unlock(&vq->mutex); err: mutex_unlock(&n->dev.mutex); return r; } static long vhost_net_reset_owner(struct vhost_net *n) { struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; struct vhost_iotlb *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; umem = vhost_dev_reset_owner_prepare(); if (!umem) { err = -ENOMEM; goto done; } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); vhost_dev_stop(&n->dev); vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); if (tx_sock) sockfd_put(tx_sock); if (rx_sock) sockfd_put(rx_sock); return err; } static int vhost_net_set_features(struct vhost_net *n, u64 features) { size_t vhost_hlen, sock_hlen, hdr_len; int i; hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) ? sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr); if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) { /* vhost provides vnet_hdr */ vhost_hlen = hdr_len; sock_hlen = 0; } else { /* socket provides vnet_hdr */ vhost_hlen = 0; sock_hlen = hdr_len; } mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&n->dev)) goto out_unlock; if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { if (vhost_init_device_iotlb(&n->dev)) goto out_unlock; } for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); n->vqs[i].vq.acked_features = features; n->vqs[i].vhost_hlen = vhost_hlen; n->vqs[i].sock_hlen = sock_hlen; mutex_unlock(&n->vqs[i].vq.mutex); } mutex_unlock(&n->dev.mutex); return 0; out_unlock: mutex_unlock(&n->dev.mutex); return -EFAULT; } static long vhost_net_set_owner(struct vhost_net *n) { int r; mutex_lock(&n->dev.mutex); if (vhost_dev_has_owner(&n->dev)) { r = -EBUSY; goto out; } r = vhost_net_set_ubuf_info(n); if (r) goto out; r = vhost_dev_set_owner(&n->dev); if (r) vhost_net_clear_ubuf_info(n); vhost_net_flush(n); out: mutex_unlock(&n->dev.mutex); return r; } static long vhost_net_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { struct vhost_net *n = f->private_data; void __user *argp = (void __user *)arg; u64 __user *featurep = argp; struct vhost_vring_file backend; u64 features; int r; switch (ioctl) { case VHOST_NET_SET_BACKEND: if (copy_from_user(&backend, argp, sizeof backend)) return -EFAULT; return vhost_net_set_backend(n, backend.index, backend.fd); case VHOST_GET_FEATURES: features = VHOST_NET_FEATURES; if (copy_to_user(featurep, &features, sizeof features)) return -EFAULT; return 0; case VHOST_SET_FEATURES: if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; if (features & ~VHOST_NET_FEATURES) return -EOPNOTSUPP; return vhost_net_set_features(n, features); case VHOST_GET_BACKEND_FEATURES: features = VHOST_NET_BACKEND_FEATURES; if (copy_to_user(featurep, &features, sizeof(features))) return -EFAULT; return 0; case VHOST_SET_BACKEND_FEATURES: if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; if (features & ~VHOST_NET_BACKEND_FEATURES) return -EOPNOTSUPP; vhost_set_backend_features(&n->dev, features); return 0; case VHOST_RESET_OWNER: return vhost_net_reset_owner(n); case VHOST_SET_OWNER: return vhost_net_set_owner(n); default: mutex_lock(&n->dev.mutex); r = vhost_dev_ioctl(&n->dev, ioctl, argp); if (r == -ENOIOCTLCMD) r = vhost_vring_ioctl(&n->dev, ioctl, argp); else vhost_net_flush(n); mutex_unlock(&n->dev.mutex); return r; } } static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct vhost_net *n = file->private_data; struct vhost_dev *dev = &n->dev; int noblock = file->f_flags & O_NONBLOCK; return vhost_chr_read_iter(dev, to, noblock); } static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct vhost_net *n = file->private_data; struct vhost_dev *dev = &n->dev; return vhost_chr_write_iter(dev, from); } static __poll_t vhost_net_chr_poll(struct file *file, poll_table *wait) { struct vhost_net *n = file->private_data; struct vhost_dev *dev = &n->dev; return vhost_chr_poll(file, dev, wait); } static const struct file_operations vhost_net_fops = { .owner = THIS_MODULE, .release = vhost_net_release, .read_iter = vhost_net_chr_read_iter, .write_iter = vhost_net_chr_write_iter, .poll = vhost_net_chr_poll, .unlocked_ioctl = vhost_net_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = vhost_net_open, .llseek = noop_llseek, }; static struct miscdevice vhost_net_misc = { .minor = VHOST_NET_MINOR, .name = "vhost-net", .fops = &vhost_net_fops, }; static int __init vhost_net_init(void) { if (experimental_zcopytx) vhost_net_enable_zcopy(VHOST_NET_VQ_TX); return misc_register(&vhost_net_misc); } module_init(vhost_net_init); static void __exit vhost_net_exit(void) { misc_deregister(&vhost_net_misc); } module_exit(vhost_net_exit); MODULE_VERSION("0.0.1"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Michael S. Tsirkin"); MODULE_DESCRIPTION("Host kernel accelerator for virtio net"); MODULE_ALIAS_MISCDEV(VHOST_NET_MINOR); MODULE_ALIAS("devname:vhost-net"); |
5 2 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> */ #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_SYNPROXY.h> #include <net/netfilter/nf_synproxy.h> static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); if (th == NULL) return NF_DROP; if (!synproxy_parse_options(skb, par->thoff, th, &opts)) return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ this_cpu_inc(snet->stats->syn_received); if (th->ece && th->cwr) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; opts.mss_encode = opts.mss_option; opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else opts.options &= ~(XT_SYNPROXY_OPT_WSCALE | XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); synproxy_send_client_synack_ipv6(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ if (synproxy_recv_client_ack_ipv6(net, skb, th, &opts, ntohl(th->seq))) { consume_skb(skb); return NF_STOLEN; } else { return NF_DROP; } } return XT_CONTINUE; } static int synproxy_tg6_check(const struct xt_tgchk_param *par) { struct synproxy_net *snet = synproxy_pernet(par->net); const struct ip6t_entry *e = par->entryinfo; int err; if (!(e->ipv6.flags & IP6T_F_PROTO) || e->ipv6.proto != IPPROTO_TCP || e->ipv6.invflags & XT_INV_PROTO) return -EINVAL; err = nf_ct_netns_get(par->net, par->family); if (err) return err; err = nf_synproxy_ipv6_init(snet, par->net); if (err) { nf_ct_netns_put(par->net, par->family); return err; } return err; } static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) { struct synproxy_net *snet = synproxy_pernet(par->net); nf_synproxy_ipv6_fini(snet, par->net); nf_ct_netns_put(par->net, par->family); } static struct xt_target synproxy_tg6_reg __read_mostly = { .name = "SYNPROXY", .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), .target = synproxy_tg6, .targetsize = sizeof(struct xt_synproxy_info), .checkentry = synproxy_tg6_check, .destroy = synproxy_tg6_destroy, .me = THIS_MODULE, }; static int __init synproxy_tg6_init(void) { return xt_register_target(&synproxy_tg6_reg); } static void __exit synproxy_tg6_exit(void) { xt_unregister_target(&synproxy_tg6_reg); } module_init(synproxy_tg6_init); module_exit(synproxy_tg6_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Intercept IPv6 TCP connections and establish them using syncookies"); |
86 22 1903 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | /* SPDX-License-Identifier: GPL-2.0 */ /* File: linux/xattr.h Extended attributes handling. Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org> Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> */ #ifndef _LINUX_XATTR_H #define _LINUX_XATTR_H #include <linux/slab.h> #include <linux/types.h> #include <linux/spinlock.h> #include <linux/mm.h> #include <linux/user_namespace.h> #include <uapi/linux/xattr.h> struct inode; struct dentry; static inline bool is_posix_acl_xattr(const char *name) { return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0); } /* * struct xattr_handler: When @name is set, match attributes with exactly that * name. When @prefix is set instead, match attributes with that prefix and * with a non-empty suffix. */ struct xattr_handler { const char *name; const char *prefix; int flags; /* fs private flags */ bool (*list)(struct dentry *dentry); int (*get)(const struct xattr_handler *, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); int (*set)(const struct xattr_handler *, struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags); }; /** * xattr_handler_can_list - check whether xattr can be listed * @handler: handler for this type of xattr * @dentry: dentry whose inode xattr to list * * Determine whether the xattr associated with @dentry can be listed given * @handler. * * Return: true if xattr can be listed, false if not. */ static inline bool xattr_handler_can_list(const struct xattr_handler *handler, struct dentry *dentry) { return handler && (!handler->list || handler->list(dentry)); } const char *xattr_full_name(const struct xattr_handler *, const char *); struct xattr { const char *name; void *value; size_t value_len; }; ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct mnt_idmap *, struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); int __vfs_setxattr(struct mnt_idmap *, struct dentry *, struct inode *, const char *, const void *, size_t, int); int __vfs_setxattr_noperm(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_setxattr_locked(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int, struct inode **); int vfs_setxattr(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); int __vfs_removexattr_locked(struct mnt_idmap *, struct dentry *, const char *, struct inode **); int vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); int vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, char **xattr_value, size_t size, gfp_t flags); int xattr_supports_user_prefix(struct inode *inode); static inline const char *xattr_prefix(const struct xattr_handler *handler) { return handler->prefix ?: handler->name; } struct simple_xattrs { struct rb_root rb_root; rwlock_t lock; }; struct simple_xattr { struct rb_node rb_node; char *name; size_t size; char value[]; }; void simple_xattrs_init(struct simple_xattrs *xattrs); void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space); size_t simple_xattr_space(const char *name, size_t size); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); void simple_xattr_free(struct simple_xattr *xattr); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); void simple_xattr_add(struct simple_xattrs *xattrs, struct simple_xattr *new_xattr); int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name); #endif /* _LINUX_XATTR_H */ |
39 2 39 2 8 8 4 22 15 15 11 4 11 20 4 33 33 31 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/u64_stats_sync.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables_offload.h> struct nft_counter { u64_stats_t bytes; u64_stats_t packets; }; struct nft_counter_tot { s64 bytes; s64 packets; }; struct nft_counter_percpu_priv { struct nft_counter __percpu *counter; }; static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync); static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct u64_stats_sync *nft_sync; struct nft_counter *this_cpu; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); nft_sync = this_cpu_ptr(&nft_counter_sync); u64_stats_update_begin(nft_sync); u64_stats_add(&this_cpu->bytes, pkt->skb->len); u64_stats_inc(&this_cpu->packets); u64_stats_update_end(nft_sync); local_bh_enable(); } static inline void nft_counter_obj_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); nft_counter_do_eval(priv, regs, pkt); } static int nft_counter_do_init(const struct nlattr * const tb[], struct nft_counter_percpu_priv *priv) { struct nft_counter __percpu *cpu_stats; struct nft_counter *this_cpu; cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT); if (cpu_stats == NULL) return -ENOMEM; this_cpu = raw_cpu_ptr(cpu_stats); if (tb[NFTA_COUNTER_PACKETS]) { u64_stats_set(&this_cpu->packets, be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]))); } if (tb[NFTA_COUNTER_BYTES]) { u64_stats_set(&this_cpu->bytes, be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]))); } priv->counter = cpu_stats; return 0; } static int nft_counter_obj_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); return nft_counter_do_init(tb, priv); } static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv) { free_percpu(priv->counter); } static void nft_counter_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); nft_counter_do_destroy(priv); } static void nft_counter_reset(struct nft_counter_percpu_priv *priv, struct nft_counter_tot *total) { struct u64_stats_sync *nft_sync; struct nft_counter *this_cpu; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); nft_sync = this_cpu_ptr(&nft_counter_sync); u64_stats_update_begin(nft_sync); u64_stats_add(&this_cpu->packets, -total->packets); u64_stats_add(&this_cpu->bytes, -total->bytes); u64_stats_update_end(nft_sync); local_bh_enable(); } static void nft_counter_fetch(struct nft_counter_percpu_priv *priv, struct nft_counter_tot *total) { struct nft_counter *this_cpu; u64 bytes, packets; unsigned int seq; int cpu; memset(total, 0, sizeof(*total)); for_each_possible_cpu(cpu) { struct u64_stats_sync *nft_sync = per_cpu_ptr(&nft_counter_sync, cpu); this_cpu = per_cpu_ptr(priv->counter, cpu); do { seq = u64_stats_fetch_begin(nft_sync); bytes = u64_stats_read(&this_cpu->bytes); packets = u64_stats_read(&this_cpu->packets); } while (u64_stats_fetch_retry(nft_sync, seq)); total->bytes += bytes; total->packets += packets; } } static int nft_counter_do_dump(struct sk_buff *skb, struct nft_counter_percpu_priv *priv, bool reset) { struct nft_counter_tot total; nft_counter_fetch(priv, &total); if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), NFTA_COUNTER_PAD) || nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets), NFTA_COUNTER_PAD)) goto nla_put_failure; if (reset) nft_counter_reset(priv, &total); return 0; nla_put_failure: return -1; } static int nft_counter_obj_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); return nft_counter_do_dump(skb, priv, reset); } static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; struct nft_object_type nft_counter_obj_type; static const struct nft_object_ops nft_counter_obj_ops = { .type = &nft_counter_obj_type, .size = sizeof(struct nft_counter_percpu_priv), .eval = nft_counter_obj_eval, .init = nft_counter_obj_init, .destroy = nft_counter_obj_destroy, .dump = nft_counter_obj_dump, }; struct nft_object_type nft_counter_obj_type __read_mostly = { .type = NFT_OBJECT_COUNTER, .ops = &nft_counter_obj_ops, .maxattr = NFTA_COUNTER_MAX, .policy = nft_counter_policy, .owner = THIS_MODULE, }; void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); nft_counter_do_eval(priv, regs, pkt); } static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); return nft_counter_do_dump(skb, priv, reset); } static int nft_counter_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); return nft_counter_do_init(tb, priv); } static void nft_counter_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); nft_counter_do_destroy(priv); } static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_counter_percpu_priv *priv = nft_expr_priv(src); struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst); struct nft_counter __percpu *cpu_stats; struct nft_counter *this_cpu; struct nft_counter_tot total; nft_counter_fetch(priv, &total); cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp); if (cpu_stats == NULL) return -ENOMEM; this_cpu = raw_cpu_ptr(cpu_stats); u64_stats_set(&this_cpu->packets, total.packets); u64_stats_set(&this_cpu->bytes, total.bytes); priv_clone->counter = cpu_stats; return 0; } static int nft_counter_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { /* No specific offload action is needed, but report success. */ return 0; } static void nft_counter_offload_stats(struct nft_expr *expr, const struct flow_stats *stats) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); struct u64_stats_sync *nft_sync; struct nft_counter *this_cpu; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); nft_sync = this_cpu_ptr(&nft_counter_sync); u64_stats_update_begin(nft_sync); u64_stats_add(&this_cpu->packets, stats->pkts); u64_stats_add(&this_cpu->bytes, stats->bytes); u64_stats_update_end(nft_sync); local_bh_enable(); } void nft_counter_init_seqcount(void) { int cpu; for_each_possible_cpu(cpu) u64_stats_init(per_cpu_ptr(&nft_counter_sync, cpu)); } struct nft_expr_type nft_counter_type; static const struct nft_expr_ops nft_counter_ops = { .type = &nft_counter_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)), .eval = nft_counter_eval, .init = nft_counter_init, .destroy = nft_counter_destroy, .destroy_clone = nft_counter_destroy, .dump = nft_counter_dump, .clone = nft_counter_clone, .reduce = NFT_REDUCE_READONLY, .offload = nft_counter_offload, .offload_stats = nft_counter_offload_stats, }; struct nft_expr_type nft_counter_type __read_mostly = { .name = "counter", .ops = &nft_counter_ops, .policy = nft_counter_policy, .maxattr = NFTA_COUNTER_MAX, .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, }; |
92 1444 5 39 1396 1384 1396 1398 232 42 19 16 168 47 40 14 989 540 513 63 768 232 762 226 675 12 9 275 280 1 839 122 902 246 358 44 795 795 792 757 131 568 329 391 506 612 273 603 486 951 1 32 3 989 988 990 805 257 16 953 119 956 117 890 14 89 972 12 973 2 983 982 978 979 978 953 29 206 797 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/attr.c * * Copyright (C) 1991, 1992 Linus Torvalds * changes by Thomas Schoebel-Theuer */ #include <linux/export.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/sched/signal.h> #include <linux/capability.h> #include <linux/fsnotify.h> #include <linux/fcntl.h> #include <linux/filelock.h> #include <linux/security.h> /** * setattr_should_drop_sgid - determine whether the setgid bit needs to be * removed * @idmap: idmap of the mount @inode was found from * @inode: inode to check * * This function determines whether the setgid bit needs to be removed. * We retain backwards compatibility and require setgid bit to be removed * unconditionally if S_IXGRP is set. Otherwise we have the exact same * requirements as setattr_prepare() and setattr_copy(). * * Return: ATTR_KILL_SGID if setgid bit needs to be removed, 0 otherwise. */ int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode) { umode_t mode = inode->i_mode; if (!(mode & S_ISGID)) return 0; if (mode & S_IXGRP) return ATTR_KILL_SGID; if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) return ATTR_KILL_SGID; return 0; } EXPORT_SYMBOL(setattr_should_drop_sgid); /** * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to * be dropped * @idmap: idmap of the mount @inode was found from * @inode: inode to check * * This function determines whether the set{g,u}id bits need to be removed. * If the setuid bit needs to be removed ATTR_KILL_SUID is returned. If the * setgid bit needs to be removed ATTR_KILL_SGID is returned. If both * set{g,u}id bits need to be removed the corresponding mask of both flags is * returned. * * Return: A mask of ATTR_KILL_S{G,U}ID indicating which - if any - setid bits * to remove, 0 otherwise. */ int setattr_should_drop_suidgid(struct mnt_idmap *idmap, struct inode *inode) { umode_t mode = inode->i_mode; int kill = 0; /* suid always must be killed */ if (unlikely(mode & S_ISUID)) kill = ATTR_KILL_SUID; kill |= setattr_should_drop_sgid(idmap, inode); if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) return kill; return 0; } EXPORT_SYMBOL(setattr_should_drop_suidgid); /** * chown_ok - verify permissions to chown inode * @idmap: idmap of the mount @inode was found from * @inode: inode to check permissions on * @ia_vfsuid: uid to chown @inode to * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. */ static bool chown_ok(struct mnt_idmap *idmap, const struct inode *inode, vfsuid_t ia_vfsuid) { vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq_kuid(vfsuid, current_fsuid()) && vfsuid_eq(ia_vfsuid, vfsuid)) return true; if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN)) return true; if (!vfsuid_valid(vfsuid) && ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN)) return true; return false; } /** * chgrp_ok - verify permissions to chgrp inode * @idmap: idmap of the mount @inode was found from * @inode: inode to check permissions on * @ia_vfsgid: gid to chown @inode to * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. */ static bool chgrp_ok(struct mnt_idmap *idmap, const struct inode *inode, vfsgid_t ia_vfsgid) { vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) { if (vfsgid_eq(ia_vfsgid, vfsgid)) return true; if (vfsgid_in_group_p(ia_vfsgid)) return true; } if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN)) return true; if (!vfsgid_valid(vfsgid) && ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN)) return true; return false; } /** * setattr_prepare - check if attribute changes to a dentry are allowed * @idmap: idmap of the mount the inode was found from * @dentry: dentry to check * @attr: attributes to change * * Check if we are allowed to change the attributes contained in @attr * in the given dentry. This includes the normal unix access permission * checks, as well as checks for rlimits and others. The function also clears * SGID bit from mode if user is not allowed to set it. Also file capabilities * and IMA extended attributes are cleared if ATTR_KILL_PRIV is set. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. * * Should be called as the first thing in ->setattr implementations, * possibly after taking additional locks. */ int setattr_prepare(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); unsigned int ia_valid = attr->ia_valid; /* * First check size constraints. These can't be overriden using * ATTR_FORCE. */ if (ia_valid & ATTR_SIZE) { int error = inode_newsize_ok(inode, attr->ia_size); if (error) return error; } /* If force is set do it anyway. */ if (ia_valid & ATTR_FORCE) goto kill_priv; /* Make sure a caller can chown. */ if ((ia_valid & ATTR_UID) && !chown_ok(idmap, inode, attr->ia_vfsuid)) return -EPERM; /* Make sure caller can chgrp. */ if ((ia_valid & ATTR_GID) && !chgrp_ok(idmap, inode, attr->ia_vfsgid)) return -EPERM; /* Make sure a caller can chmod. */ if (ia_valid & ATTR_MODE) { vfsgid_t vfsgid; if (!inode_owner_or_capable(idmap, inode)) return -EPERM; if (ia_valid & ATTR_GID) vfsgid = attr->ia_vfsgid; else vfsgid = i_gid_into_vfsgid(idmap, inode); /* Also check the setgid bit! */ if (!in_group_or_capable(idmap, inode, vfsgid)) attr->ia_mode &= ~S_ISGID; } /* Check for setting the inode time. */ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { if (!inode_owner_or_capable(idmap, inode)) return -EPERM; } kill_priv: /* User has permission for the change */ if (ia_valid & ATTR_KILL_PRIV) { int error; error = security_inode_killpriv(idmap, dentry); if (error) return error; } return 0; } EXPORT_SYMBOL(setattr_prepare); /** * inode_newsize_ok - may this inode be truncated to a given size * @inode: the inode to be truncated * @offset: the new size to assign to the inode * * inode_newsize_ok must be called with i_mutex held. * * inode_newsize_ok will check filesystem limits and ulimits to check that the * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ * when necessary. Caller must not proceed with inode size change if failure is * returned. @inode must be a file (not directory), with appropriate * permissions to allow truncate (inode_newsize_ok does NOT check these * conditions). * * Return: 0 on success, -ve errno on failure */ int inode_newsize_ok(const struct inode *inode, loff_t offset) { if (offset < 0) return -EINVAL; if (inode->i_size < offset) { unsigned long limit; limit = rlimit(RLIMIT_FSIZE); if (limit != RLIM_INFINITY && offset > limit) goto out_sig; if (offset > inode->i_sb->s_maxbytes) goto out_big; } else { /* * truncation of in-use swapfiles is disallowed - it would * cause subsequent swapout to scribble on the now-freed * blocks. */ if (IS_SWAPFILE(inode)) return -ETXTBSY; } return 0; out_sig: send_sig(SIGXFSZ, current, 0); out_big: return -EFBIG; } EXPORT_SYMBOL(inode_newsize_ok); /** * setattr_copy - copy simple metadata updates into the generic inode * @idmap: idmap of the mount the inode was found from * @inode: the inode to be updated * @attr: the new attributes * * setattr_copy must be called with i_mutex held. * * setattr_copy updates the inode's metadata with that specified * in attr on idmapped mounts. Necessary permission checks to determine * whether or not the S_ISGID property needs to be removed are performed with * the correct idmapped mount permission helpers. * Noticeably missing is inode size update, which is more complex * as it requires pagecache updates. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. * * The inode is not marked as dirty after this operation. The rationale is * that for "simple" filesystems, the struct inode is the inode storage. * The caller is free to mark the inode dirty afterwards if needed. */ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode, const struct iattr *attr) { unsigned int ia_valid = attr->ia_valid; i_uid_update(idmap, attr, inode); i_gid_update(idmap, attr, inode); if (ia_valid & ATTR_ATIME) inode_set_atime_to_ts(inode, attr->ia_atime); if (ia_valid & ATTR_MTIME) inode_set_mtime_to_ts(inode, attr->ia_mtime); if (ia_valid & ATTR_CTIME) inode_set_ctime_to_ts(inode, attr->ia_ctime); if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; inode->i_mode = mode; } } EXPORT_SYMBOL(setattr_copy); int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid) { int error; if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) { if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; } /* * If utimes(2) and friends are called with times == NULL (or both * times are UTIME_NOW), then we need to check for write permission */ if (ia_valid & ATTR_TOUCH) { if (IS_IMMUTABLE(inode)) return -EPERM; if (!inode_owner_or_capable(idmap, inode)) { error = inode_permission(idmap, inode, MAY_WRITE); if (error) return error; } } return 0; } EXPORT_SYMBOL(may_setattr); /** * notify_change - modify attributes of a filesystem object * @idmap: idmap of the mount the inode was found from * @dentry: object affected * @attr: new attributes * @delegated_inode: returns inode, if the inode is delegated * * The caller must hold the i_mutex on the affected object. * * If notify_change discovers a delegation in need of breaking, * it will return -EWOULDBLOCK and return a reference to the inode in * delegated_inode. The caller should then break the delegation and * retry. Because breaking a delegation may take a long time, the * caller should drop the i_mutex before doing so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. Also, passing NULL is fine for callers holding * the file open for write, as there can be no conflicting delegation in * that case. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. */ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; int error; struct timespec64 now; unsigned int ia_valid = attr->ia_valid; WARN_ON_ONCE(!inode_is_locked(inode)); error = may_setattr(idmap, inode, ia_valid); if (error) return error; if ((ia_valid & ATTR_MODE)) { /* * Don't allow changing the mode of symlinks: * * (1) The vfs doesn't take the mode of symlinks into account * during permission checking. * (2) This has never worked correctly. Most major filesystems * did return EOPNOTSUPP due to interactions with POSIX ACLs * but did still updated the mode of the symlink. * This inconsistency led system call wrapper providers such * as libc to block changing the mode of symlinks with * EOPNOTSUPP already. * (3) To even do this in the first place one would have to use * specific file descriptors and quite some effort. */ if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; /* Flag setting protected by i_mutex */ if (is_sxid(attr->ia_mode)) inode->i_flags &= ~S_NOSEC; } now = current_time(inode); attr->ia_ctime = now; if (!(ia_valid & ATTR_ATIME_SET)) attr->ia_atime = now; else attr->ia_atime = timestamp_truncate(attr->ia_atime, inode); if (!(ia_valid & ATTR_MTIME_SET)) attr->ia_mtime = now; else attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode); if (ia_valid & ATTR_KILL_PRIV) { error = security_inode_need_killpriv(dentry); if (error < 0) return error; if (error == 0) ia_valid = attr->ia_valid &= ~ATTR_KILL_PRIV; } /* * We now pass ATTR_KILL_S*ID to the lower level setattr function so * that the function has the ability to reinterpret a mode change * that's due to these bits. This adds an implicit restriction that * no function will ever call notify_change with both ATTR_MODE and * ATTR_KILL_S*ID set. */ if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) && (ia_valid & ATTR_MODE)) BUG(); if (ia_valid & ATTR_KILL_SUID) { if (mode & S_ISUID) { ia_valid = attr->ia_valid |= ATTR_MODE; attr->ia_mode = (inode->i_mode & ~S_ISUID); } } if (ia_valid & ATTR_KILL_SGID) { if (mode & S_ISGID) { if (!(ia_valid & ATTR_MODE)) { ia_valid = attr->ia_valid |= ATTR_MODE; attr->ia_mode = inode->i_mode; } attr->ia_mode &= ~S_ISGID; } } if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) return 0; /* * Verify that uid/gid changes are valid in the target * namespace of the superblock. */ if (ia_valid & ATTR_UID && !vfsuid_has_fsmapping(idmap, inode->i_sb->s_user_ns, attr->ia_vfsuid)) return -EOVERFLOW; if (ia_valid & ATTR_GID && !vfsgid_has_fsmapping(idmap, inode->i_sb->s_user_ns, attr->ia_vfsgid)) return -EOVERFLOW; /* Don't allow modifications of files with invalid uids or * gids unless those uids & gids are being made valid. */ if (!(ia_valid & ATTR_UID) && !vfsuid_valid(i_uid_into_vfsuid(idmap, inode))) return -EOVERFLOW; if (!(ia_valid & ATTR_GID) && !vfsgid_valid(i_gid_into_vfsgid(idmap, inode))) return -EOVERFLOW; error = security_inode_setattr(idmap, dentry, attr); if (error) return error; /* * If ATTR_DELEG is set, then these attributes are being set on * behalf of the holder of a write delegation. We want to avoid * breaking the delegation in this case. */ if (!(ia_valid & ATTR_DELEG)) { error = try_break_deleg(inode, delegated_inode); if (error) return error; } if (inode->i_op->setattr) error = inode->i_op->setattr(idmap, dentry, attr); else error = simple_setattr(idmap, dentry, attr); if (!error) { fsnotify_change(dentry, ia_valid); security_inode_post_setattr(idmap, dentry, ia_valid); } return error; } EXPORT_SYMBOL(notify_change); |
10 10 11 11 11 11 8 8 8 7 7 7 7 7 7 6 6 8 2 8 8 8 8 5 7 5 4 5 5 5 5 5 5 8 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 8 8 8 8 6 2 7 8 8 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 | // SPDX-License-Identifier: GPL-2.0 #include <linux/sizes.h> #include <linux/list_sort.h> #include "misc.h" #include "ctree.h" #include "block-group.h" #include "space-info.h" #include "disk-io.h" #include "free-space-cache.h" #include "free-space-tree.h" #include "volumes.h" #include "transaction.h" #include "ref-verify.h" #include "sysfs.h" #include "tree-log.h" #include "delalloc-space.h" #include "discard.h" #include "raid56.h" #include "zoned.h" #include "fs.h" #include "accessors.h" #include "extent-tree.h" #ifdef CONFIG_BTRFS_DEBUG int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = block_group->fs_info; return (btrfs_test_opt(fs_info, FRAGMENT_METADATA) && block_group->flags & BTRFS_BLOCK_GROUP_METADATA) || (btrfs_test_opt(fs_info, FRAGMENT_DATA) && block_group->flags & BTRFS_BLOCK_GROUP_DATA); } #endif /* * Return target flags in extended format or 0 if restripe for this chunk_type * is not in progress * * Should be called with balance_lock held */ static u64 get_restripe_target(const struct btrfs_fs_info *fs_info, u64 flags) { const struct btrfs_balance_control *bctl = fs_info->balance_ctl; u64 target = 0; if (!bctl) return 0; if (flags & BTRFS_BLOCK_GROUP_DATA && bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM && bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; } else if (flags & BTRFS_BLOCK_GROUP_METADATA && bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) { target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; } return target; } /* * @flags: available profiles in extended format (see ctree.h) * * Return reduced profile in chunk format. If profile changing is in progress * (either running or paused) picks the target profile (if it's already * available), otherwise falls back to plain reducing. */ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags) { u64 num_devices = fs_info->fs_devices->rw_devices; u64 target; u64 raid_type; u64 allowed = 0; /* * See if restripe for this chunk_type is in progress, if so try to * reduce to the target profile */ spin_lock(&fs_info->balance_lock); target = get_restripe_target(fs_info, flags); if (target) { spin_unlock(&fs_info->balance_lock); return extended_to_chunk(target); } spin_unlock(&fs_info->balance_lock); /* First, mask out the RAID levels which aren't possible */ for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) { if (num_devices >= btrfs_raid_array[raid_type].devs_min) allowed |= btrfs_raid_array[raid_type].bg_flag; } allowed &= flags; /* Select the highest-redundancy RAID level. */ if (allowed & BTRFS_BLOCK_GROUP_RAID1C4) allowed = BTRFS_BLOCK_GROUP_RAID1C4; else if (allowed & BTRFS_BLOCK_GROUP_RAID6) allowed = BTRFS_BLOCK_GROUP_RAID6; else if (allowed & BTRFS_BLOCK_GROUP_RAID1C3) allowed = BTRFS_BLOCK_GROUP_RAID1C3; else if (allowed & BTRFS_BLOCK_GROUP_RAID5) allowed = BTRFS_BLOCK_GROUP_RAID5; else if (allowed & BTRFS_BLOCK_GROUP_RAID10) allowed = BTRFS_BLOCK_GROUP_RAID10; else if (allowed & BTRFS_BLOCK_GROUP_RAID1) allowed = BTRFS_BLOCK_GROUP_RAID1; else if (allowed & BTRFS_BLOCK_GROUP_DUP) allowed = BTRFS_BLOCK_GROUP_DUP; else if (allowed & BTRFS_BLOCK_GROUP_RAID0) allowed = BTRFS_BLOCK_GROUP_RAID0; flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK; return extended_to_chunk(flags | allowed); } u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags) { unsigned seq; u64 flags; do { flags = orig_flags; seq = read_seqbegin(&fs_info->profiles_lock); if (flags & BTRFS_BLOCK_GROUP_DATA) flags |= fs_info->avail_data_alloc_bits; else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) flags |= fs_info->avail_system_alloc_bits; else if (flags & BTRFS_BLOCK_GROUP_METADATA) flags |= fs_info->avail_metadata_alloc_bits; } while (read_seqretry(&fs_info->profiles_lock, seq)); return btrfs_reduce_alloc_profile(fs_info, flags); } void btrfs_get_block_group(struct btrfs_block_group *cache) { refcount_inc(&cache->refs); } void btrfs_put_block_group(struct btrfs_block_group *cache) { if (refcount_dec_and_test(&cache->refs)) { WARN_ON(cache->pinned > 0); /* * If there was a failure to cleanup a log tree, very likely due * to an IO failure on a writeback attempt of one or more of its * extent buffers, we could not do proper (and cheap) unaccounting * of their reserved space, so don't warn on reserved > 0 in that * case. */ if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) || !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info)) WARN_ON(cache->reserved > 0); /* * A block_group shouldn't be on the discard_list anymore. * Remove the block_group from the discard_list to prevent us * from causing a panic due to NULL pointer dereference. */ if (WARN_ON(!list_empty(&cache->discard_list))) btrfs_discard_cancel_work(&cache->fs_info->discard_ctl, cache); kfree(cache->free_space_ctl); btrfs_free_chunk_map(cache->physical_map); kfree(cache); } } /* * This adds the block group to the fs_info rb tree for the block group cache */ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, struct btrfs_block_group *block_group) { struct rb_node **p; struct rb_node *parent = NULL; struct btrfs_block_group *cache; bool leftmost = true; ASSERT(block_group->length != 0); write_lock(&info->block_group_cache_lock); p = &info->block_group_cache_tree.rb_root.rb_node; while (*p) { parent = *p; cache = rb_entry(parent, struct btrfs_block_group, cache_node); if (block_group->start < cache->start) { p = &(*p)->rb_left; } else if (block_group->start > cache->start) { p = &(*p)->rb_right; leftmost = false; } else { write_unlock(&info->block_group_cache_lock); return -EEXIST; } } rb_link_node(&block_group->cache_node, parent, p); rb_insert_color_cached(&block_group->cache_node, &info->block_group_cache_tree, leftmost); write_unlock(&info->block_group_cache_lock); return 0; } /* * This will return the block group at or after bytenr if contains is 0, else * it will return the block group that contains the bytenr */ static struct btrfs_block_group *block_group_cache_tree_search( struct btrfs_fs_info *info, u64 bytenr, int contains) { struct btrfs_block_group *cache, *ret = NULL; struct rb_node *n; u64 end, start; read_lock(&info->block_group_cache_lock); n = info->block_group_cache_tree.rb_root.rb_node; while (n) { cache = rb_entry(n, struct btrfs_block_group, cache_node); end = cache->start + cache->length - 1; start = cache->start; if (bytenr < start) { if (!contains && (!ret || start < ret->start)) ret = cache; n = n->rb_left; } else if (bytenr > start) { if (contains && bytenr <= end) { ret = cache; break; } n = n->rb_right; } else { ret = cache; break; } } if (ret) btrfs_get_block_group(ret); read_unlock(&info->block_group_cache_lock); return ret; } /* * Return the block group that starts at or after bytenr */ struct btrfs_block_group *btrfs_lookup_first_block_group( struct btrfs_fs_info *info, u64 bytenr) { return block_group_cache_tree_search(info, bytenr, 0); } /* * Return the block group that contains the given bytenr */ struct btrfs_block_group *btrfs_lookup_block_group( struct btrfs_fs_info *info, u64 bytenr) { return block_group_cache_tree_search(info, bytenr, 1); } struct btrfs_block_group *btrfs_next_block_group( struct btrfs_block_group *cache) { struct btrfs_fs_info *fs_info = cache->fs_info; struct rb_node *node; read_lock(&fs_info->block_group_cache_lock); /* If our block group was removed, we need a full search. */ if (RB_EMPTY_NODE(&cache->cache_node)) { const u64 next_bytenr = cache->start + cache->length; read_unlock(&fs_info->block_group_cache_lock); btrfs_put_block_group(cache); return btrfs_lookup_first_block_group(fs_info, next_bytenr); } node = rb_next(&cache->cache_node); btrfs_put_block_group(cache); if (node) { cache = rb_entry(node, struct btrfs_block_group, cache_node); btrfs_get_block_group(cache); } else cache = NULL; read_unlock(&fs_info->block_group_cache_lock); return cache; } /* * Check if we can do a NOCOW write for a given extent. * * @fs_info: The filesystem information object. * @bytenr: Logical start address of the extent. * * Check if we can do a NOCOW write for the given extent, and increments the * number of NOCOW writers in the block group that contains the extent, as long * as the block group exists and it's currently not in read-only mode. * * Returns: A non-NULL block group pointer if we can do a NOCOW write, the caller * is responsible for calling btrfs_dec_nocow_writers() later. * * Or NULL if we can not do a NOCOW write */ struct btrfs_block_group *btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) { struct btrfs_block_group *bg; bool can_nocow = true; bg = btrfs_lookup_block_group(fs_info, bytenr); if (!bg) return NULL; spin_lock(&bg->lock); if (bg->ro) can_nocow = false; else atomic_inc(&bg->nocow_writers); spin_unlock(&bg->lock); if (!can_nocow) { btrfs_put_block_group(bg); return NULL; } /* No put on block group, done by btrfs_dec_nocow_writers(). */ return bg; } /* * Decrement the number of NOCOW writers in a block group. * * This is meant to be called after a previous call to btrfs_inc_nocow_writers(), * and on the block group returned by that call. Typically this is called after * creating an ordered extent for a NOCOW write, to prevent races with scrub and * relocation. * * After this call, the caller should not use the block group anymore. It it wants * to use it, then it should get a reference on it before calling this function. */ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg) { if (atomic_dec_and_test(&bg->nocow_writers)) wake_up_var(&bg->nocow_writers); /* For the lookup done by a previous call to btrfs_inc_nocow_writers(). */ btrfs_put_block_group(bg); } void btrfs_wait_nocow_writers(struct btrfs_block_group *bg) { wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers)); } void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, const u64 start) { struct btrfs_block_group *bg; bg = btrfs_lookup_block_group(fs_info, start); ASSERT(bg); if (atomic_dec_and_test(&bg->reservations)) wake_up_var(&bg->reservations); btrfs_put_block_group(bg); } void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg) { struct btrfs_space_info *space_info = bg->space_info; ASSERT(bg->ro); if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA)) return; /* * Our block group is read only but before we set it to read only, * some task might have had allocated an extent from it already, but it * has not yet created a respective ordered extent (and added it to a * root's list of ordered extents). * Therefore wait for any task currently allocating extents, since the * block group's reservations counter is incremented while a read lock * on the groups' semaphore is held and decremented after releasing * the read access on that semaphore and creating the ordered extent. */ down_write(&space_info->groups_sem); up_write(&space_info->groups_sem); wait_var_event(&bg->reservations, !atomic_read(&bg->reservations)); } struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache) { struct btrfs_caching_control *ctl; spin_lock(&cache->lock); if (!cache->caching_ctl) { spin_unlock(&cache->lock); return NULL; } ctl = cache->caching_ctl; refcount_inc(&ctl->count); spin_unlock(&cache->lock); return ctl; } static void btrfs_put_caching_control(struct btrfs_caching_control *ctl) { if (refcount_dec_and_test(&ctl->count)) kfree(ctl); } /* * When we wait for progress in the block group caching, its because our * allocation attempt failed at least once. So, we must sleep and let some * progress happen before we try again. * * This function will sleep at least once waiting for new free space to show * up, and then it will check the block group free space numbers for our min * num_bytes. Another option is to have it go ahead and look in the rbtree for * a free extent of a given size, but this is a good start. * * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using * any of the information in this block group. */ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, u64 num_bytes) { struct btrfs_caching_control *caching_ctl; int progress; caching_ctl = btrfs_get_caching_control(cache); if (!caching_ctl) return; /* * We've already failed to allocate from this block group, so even if * there's enough space in the block group it isn't contiguous enough to * allow for an allocation, so wait for at least the next wakeup tick, * or for the thing to be done. */ progress = atomic_read(&caching_ctl->progress); wait_event(caching_ctl->wait, btrfs_block_group_done(cache) || (progress != atomic_read(&caching_ctl->progress) && (cache->free_space_ctl->free_space >= num_bytes))); btrfs_put_caching_control(caching_ctl); } static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache, struct btrfs_caching_control *caching_ctl) { wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0; } static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) { struct btrfs_caching_control *caching_ctl; int ret; caching_ctl = btrfs_get_caching_control(cache); if (!caching_ctl) return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); btrfs_put_caching_control(caching_ctl); return ret; } #ifdef CONFIG_BTRFS_DEBUG static void fragment_free_space(struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = block_group->fs_info; u64 start = block_group->start; u64 len = block_group->length; u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ? fs_info->nodesize : fs_info->sectorsize; u64 step = chunk << 1; while (len > chunk) { btrfs_remove_free_space(block_group, start, chunk); start += step; if (len < step) len = 0; else len -= step; } } #endif /* * Add a free space range to the in memory free space cache of a block group. * This checks if the range contains super block locations and any such * locations are not added to the free space cache. * * @block_group: The target block group. * @start: Start offset of the range. * @end: End offset of the range (exclusive). * @total_added_ret: Optional pointer to return the total amount of space * added to the block group's free space cache. * * Returns 0 on success or < 0 on error. */ int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end, u64 *total_added_ret) { struct btrfs_fs_info *info = block_group->fs_info; u64 extent_start, extent_end, size; int ret; if (total_added_ret) *total_added_ret = 0; while (start < end) { if (!find_first_extent_bit(&info->excluded_extents, start, &extent_start, &extent_end, EXTENT_DIRTY | EXTENT_UPTODATE, NULL)) break; if (extent_start <= start) { start = extent_end + 1; } else if (extent_start > start && extent_start < end) { size = extent_start - start; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); if (ret) return ret; if (total_added_ret) *total_added_ret += size; start = extent_end + 1; } else { break; } } if (start < end) { size = end - start; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); if (ret) return ret; if (total_added_ret) *total_added_ret += size; } return 0; } /* * Get an arbitrary extent item index / max_index through the block group * * @block_group the block group to sample from * @index: the integral step through the block group to grab from * @max_index: the granularity of the sampling * @key: return value parameter for the item we find * * Pre-conditions on indices: * 0 <= index <= max_index * 0 < max_index * * Returns: 0 on success, 1 if the search didn't yield a useful item, negative * error code on error. */ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ctl, struct btrfs_block_group *block_group, int index, int max_index, struct btrfs_key *found_key) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_root *extent_root; u64 search_offset; u64 search_end = block_group->start + block_group->length; struct btrfs_path *path; struct btrfs_key search_key; int ret = 0; ASSERT(index >= 0); ASSERT(index <= max_index); ASSERT(max_index > 0); lockdep_assert_held(&caching_ctl->mutex); lockdep_assert_held_read(&fs_info->commit_root_sem); path = btrfs_alloc_path(); if (!path) return -ENOMEM; extent_root = btrfs_extent_root(fs_info, max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET)); path->skip_locking = 1; path->search_commit_root = 1; path->reada = READA_FORWARD; search_offset = index * div_u64(block_group->length, max_index); search_key.objectid = block_group->start + search_offset; search_key.type = BTRFS_EXTENT_ITEM_KEY; search_key.offset = 0; btrfs_for_each_slot(extent_root, &search_key, found_key, path, ret) { /* Success; sampled an extent item in the block group */ if (found_key->type == BTRFS_EXTENT_ITEM_KEY && found_key->objectid >= block_group->start && found_key->objectid + found_key->offset <= search_end) break; /* We can't possibly find a valid extent item anymore */ if (found_key->objectid >= search_end) { ret = 1; break; } } lockdep_assert_held(&caching_ctl->mutex); lockdep_assert_held_read(&fs_info->commit_root_sem); btrfs_free_path(path); return ret; } /* * Best effort attempt to compute a block group's size class while caching it. * * @block_group: the block group we are caching * * We cannot infer the size class while adding free space extents, because that * logic doesn't care about contiguous file extents (it doesn't differentiate * between a 100M extent and 100 contiguous 1M extents). So we need to read the * file extent items. Reading all of them is quite wasteful, because usually * only a handful are enough to give a good answer. Therefore, we just grab 5 of * them at even steps through the block group and pick the smallest size class * we see. Since size class is best effort, and not guaranteed in general, * inaccuracy is acceptable. * * To be more explicit about why this algorithm makes sense: * * If we are caching in a block group from disk, then there are three major cases * to consider: * 1. the block group is well behaved and all extents in it are the same size * class. * 2. the block group is mostly one size class with rare exceptions for last * ditch allocations * 3. the block group was populated before size classes and can have a totally * arbitrary mix of size classes. * * In case 1, looking at any extent in the block group will yield the correct * result. For the mixed cases, taking the minimum size class seems like a good * approximation, since gaps from frees will be usable to the size class. For * 2., a small handful of file extents is likely to yield the right answer. For * 3, we can either read every file extent, or admit that this is best effort * anyway and try to stay fast. * * Returns: 0 on success, negative error code on error. */ static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl, struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_key key; int i; u64 min_size = block_group->length; enum btrfs_block_group_size_class size_class = BTRFS_BG_SZ_NONE; int ret; if (!btrfs_block_group_should_use_size_class(block_group)) return 0; lockdep_assert_held(&caching_ctl->mutex); lockdep_assert_held_read(&fs_info->commit_root_sem); for (i = 0; i < 5; ++i) { ret = sample_block_group_extent_item(caching_ctl, block_group, i, 5, &key); if (ret < 0) goto out; if (ret > 0) continue; min_size = min_t(u64, min_size, key.offset); size_class = btrfs_calc_block_group_size_class(min_size); } if (size_class != BTRFS_BG_SZ_NONE) { spin_lock(&block_group->lock); block_group->size_class = size_class; spin_unlock(&block_group->lock); } out: return ret; } static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) { struct btrfs_block_group *block_group = caching_ctl->block_group; struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_root *extent_root; struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_key key; u64 total_found = 0; u64 last = 0; u32 nritems; int ret; bool wakeup = true; path = btrfs_alloc_path(); if (!path) return -ENOMEM; last = max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET); extent_root = btrfs_extent_root(fs_info, last); #ifdef CONFIG_BTRFS_DEBUG /* * If we're fragmenting we don't want to make anybody think we can * allocate from this block group until we've had a chance to fragment * the free space. */ if (btrfs_should_fragment_free_space(block_group)) wakeup = false; #endif /* * We don't want to deadlock with somebody trying to allocate a new * extent for the extent root while also trying to search the extent * root to add free space. So we skip locking and search the commit * root, since its read-only */ path->skip_locking = 1; path->search_commit_root = 1; path->reada = READA_FORWARD; key.objectid = last; key.offset = 0; key.type = BTRFS_EXTENT_ITEM_KEY; next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto out; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); while (1) { if (btrfs_fs_closing(fs_info) > 1) { last = (u64)-1; break; } if (path->slots[0] < nritems) { btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); } else { ret = btrfs_find_next_key(extent_root, path, &key, 0, 0); if (ret) break; if (need_resched() || rwsem_is_contended(&fs_info->commit_root_sem)) { btrfs_release_path(path); up_read(&fs_info->commit_root_sem); mutex_unlock(&caching_ctl->mutex); cond_resched(); mutex_lock(&caching_ctl->mutex); down_read(&fs_info->commit_root_sem); goto next; } ret = btrfs_next_leaf(extent_root, path); if (ret < 0) goto out; if (ret) break; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); continue; } if (key.objectid < last) { key.objectid = last; key.offset = 0; key.type = BTRFS_EXTENT_ITEM_KEY; btrfs_release_path(path); goto next; } if (key.objectid < block_group->start) { path->slots[0]++; continue; } if (key.objectid >= block_group->start + block_group->length) break; if (key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY) { u64 space_added; ret = btrfs_add_new_free_space(block_group, last, key.objectid, &space_added); if (ret) goto out; total_found += space_added; if (key.type == BTRFS_METADATA_ITEM_KEY) last = key.objectid + fs_info->nodesize; else last = key.objectid + key.offset; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; if (wakeup) { atomic_inc(&caching_ctl->progress); wake_up(&caching_ctl->wait); } } } path->slots[0]++; } ret = btrfs_add_new_free_space(block_group, last, block_group->start + block_group->length, NULL); out: btrfs_free_path(path); return ret; } static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg) { clear_extent_bits(&bg->fs_info->excluded_extents, bg->start, bg->start + bg->length - 1, EXTENT_UPTODATE); } static noinline void caching_thread(struct btrfs_work *work) { struct btrfs_block_group *block_group; struct btrfs_fs_info *fs_info; struct btrfs_caching_control *caching_ctl; int ret; caching_ctl = container_of(work, struct btrfs_caching_control, work); block_group = caching_ctl->block_group; fs_info = block_group->fs_info; mutex_lock(&caching_ctl->mutex); down_read(&fs_info->commit_root_sem); load_block_group_size_class(caching_ctl, block_group); if (btrfs_test_opt(fs_info, SPACE_CACHE)) { ret = load_free_space_cache(block_group); if (ret == 1) { ret = 0; goto done; } /* * We failed to load the space cache, set ourselves to * CACHE_STARTED and carry on. */ spin_lock(&block_group->lock); block_group->cached = BTRFS_CACHE_STARTED; spin_unlock(&block_group->lock); wake_up(&caching_ctl->wait); } /* * If we are in the transaction that populated the free space tree we * can't actually cache from the free space tree as our commit root and * real root are the same, so we could change the contents of the blocks * while caching. Instead do the slow caching in this case, and after * the transaction has committed we will be safe. */ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags))) ret = load_free_space_tree(caching_ctl); else ret = load_extent_tree_free(caching_ctl); done: spin_lock(&block_group->lock); block_group->caching_ctl = NULL; block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED; spin_unlock(&block_group->lock); #ifdef CONFIG_BTRFS_DEBUG if (btrfs_should_fragment_free_space(block_group)) { u64 bytes_used; spin_lock(&block_group->space_info->lock); spin_lock(&block_group->lock); bytes_used = block_group->length - block_group->used; block_group->space_info->bytes_used += bytes_used >> 1; spin_unlock(&block_group->lock); spin_unlock(&block_group->space_info->lock); fragment_free_space(block_group); } #endif up_read(&fs_info->commit_root_sem); btrfs_free_excluded_extents(block_group); mutex_unlock(&caching_ctl->mutex); wake_up(&caching_ctl->wait); btrfs_put_caching_control(caching_ctl); btrfs_put_block_group(block_group); } int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) { struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_caching_control *caching_ctl = NULL; int ret = 0; /* Allocator for zoned filesystems does not use the cache at all */ if (btrfs_is_zoned(fs_info)) return 0; caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); if (!caching_ctl) return -ENOMEM; INIT_LIST_HEAD(&caching_ctl->list); mutex_init(&caching_ctl->mutex); init_waitqueue_head(&caching_ctl->wait); caching_ctl->block_group = cache; refcount_set(&caching_ctl->count, 2); atomic_set(&caching_ctl->progress, 0); btrfs_init_work(&caching_ctl->work, caching_thread, NULL); spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { kfree(caching_ctl); caching_ctl = cache->caching_ctl; if (caching_ctl) refcount_inc(&caching_ctl->count); spin_unlock(&cache->lock); goto out; } WARN_ON(cache->caching_ctl); cache->caching_ctl = caching_ctl; cache->cached = BTRFS_CACHE_STARTED; spin_unlock(&cache->lock); write_lock(&fs_info->block_group_cache_lock); refcount_inc(&caching_ctl->count); list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); write_unlock(&fs_info->block_group_cache_lock); btrfs_get_block_group(cache); btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); out: if (wait && caching_ctl) ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); if (caching_ctl) btrfs_put_caching_control(caching_ctl); return ret; } static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { u64 extra_flags = chunk_to_extended(flags) & BTRFS_EXTENDED_PROFILE_MASK; write_seqlock(&fs_info->profiles_lock); if (flags & BTRFS_BLOCK_GROUP_DATA) fs_info->avail_data_alloc_bits &= ~extra_flags; if (flags & BTRFS_BLOCK_GROUP_METADATA) fs_info->avail_metadata_alloc_bits &= ~extra_flags; if (flags & BTRFS_BLOCK_GROUP_SYSTEM) fs_info->avail_system_alloc_bits &= ~extra_flags; write_sequnlock(&fs_info->profiles_lock); } /* * Clear incompat bits for the following feature(s): * * - RAID56 - in case there's neither RAID5 nor RAID6 profile block group * in the whole filesystem * * - RAID1C34 - same as above for RAID1C3 and RAID1C4 block groups */ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags) { bool found_raid56 = false; bool found_raid1c34 = false; if ((flags & BTRFS_BLOCK_GROUP_RAID56_MASK) || (flags & BTRFS_BLOCK_GROUP_RAID1C3) || (flags & BTRFS_BLOCK_GROUP_RAID1C4)) { struct list_head *head = &fs_info->space_info; struct btrfs_space_info *sinfo; list_for_each_entry_rcu(sinfo, head, list) { down_read(&sinfo->groups_sem); if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5])) found_raid56 = true; if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6])) found_raid56 = true; if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C3])) found_raid1c34 = true; if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C4])) found_raid1c34 = true; up_read(&sinfo->groups_sem); } if (!found_raid56) btrfs_clear_fs_incompat(fs_info, RAID56); if (!found_raid1c34) btrfs_clear_fs_incompat(fs_info, RAID1C34); } } static struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info) { if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) return fs_info->block_group_root; return btrfs_extent_root(fs_info, 0); } static int remove_block_group_item(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root; struct btrfs_key key; int ret; root = btrfs_block_group_root(fs_info); key.objectid = block_group->start; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; key.offset = block_group->length; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) ret = -ENOENT; if (ret < 0) return ret; ret = btrfs_del_item(trans, root, path); return ret; } int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_chunk_map *map) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_path *path; struct btrfs_block_group *block_group; struct btrfs_free_cluster *cluster; struct inode *inode; struct kobject *kobj = NULL; int ret; int index; int factor; struct btrfs_caching_control *caching_ctl = NULL; bool remove_map; bool remove_rsv = false; block_group = btrfs_lookup_block_group(fs_info, map->start); if (!block_group) return -ENOENT; BUG_ON(!block_group->ro); trace_btrfs_remove_block_group(block_group); /* * Free the reserved super bytes from this block group before * remove it. */ btrfs_free_excluded_extents(block_group); btrfs_free_ref_tree_range(fs_info, block_group->start, block_group->length); index = btrfs_bg_flags_to_raid_index(block_group->flags); factor = btrfs_bg_type_to_factor(block_group->flags); /* make sure this block group isn't part of an allocation cluster */ cluster = &fs_info->data_alloc_cluster; spin_lock(&cluster->refill_lock); btrfs_return_cluster_to_free_space(block_group, cluster); spin_unlock(&cluster->refill_lock); /* * make sure this block group isn't part of a metadata * allocation cluster */ cluster = &fs_info->meta_alloc_cluster; spin_lock(&cluster->refill_lock); btrfs_return_cluster_to_free_space(block_group, cluster); spin_unlock(&cluster->refill_lock); btrfs_clear_treelog_bg(block_group); btrfs_clear_data_reloc_bg(block_group); path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } /* * get the inode first so any iput calls done for the io_list * aren't the final iput (no unlinks allowed now) */ inode = lookup_free_space_inode(block_group, path); mutex_lock(&trans->transaction->cache_write_mutex); /* * Make sure our free space cache IO is done before removing the * free space inode */ spin_lock(&trans->transaction->dirty_bgs_lock); if (!list_empty(&block_group->io_list)) { list_del_init(&block_group->io_list); WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode); spin_unlock(&trans->transaction->dirty_bgs_lock); btrfs_wait_cache_io(trans, block_group, path); btrfs_put_block_group(block_group); spin_lock(&trans->transaction->dirty_bgs_lock); } if (!list_empty(&block_group->dirty_list)) { list_del_init(&block_group->dirty_list); remove_rsv = true; btrfs_put_block_group(block_group); } spin_unlock(&trans->transaction->dirty_bgs_lock); mutex_unlock(&trans->transaction->cache_write_mutex); ret = btrfs_remove_free_space_inode(trans, inode, block_group); if (ret) goto out; write_lock(&fs_info->block_group_cache_lock); rb_erase_cached(&block_group->cache_node, &fs_info->block_group_cache_tree); RB_CLEAR_NODE(&block_group->cache_node); /* Once for the block groups rbtree */ btrfs_put_block_group(block_group); write_unlock(&fs_info->block_group_cache_lock); down_write(&block_group->space_info->groups_sem); /* * we must use list_del_init so people can check to see if they * are still on the list after taking the semaphore */ list_del_init(&block_group->list); if (list_empty(&block_group->space_info->block_groups[index])) { kobj = block_group->space_info->block_group_kobjs[index]; block_group->space_info->block_group_kobjs[index] = NULL; clear_avail_alloc_bits(fs_info, block_group->flags); } up_write(&block_group->space_info->groups_sem); clear_incompat_bg_bits(fs_info, block_group->flags); if (kobj) { kobject_del(kobj); kobject_put(kobj); } if (block_group->cached == BTRFS_CACHE_STARTED) btrfs_wait_block_group_cache_done(block_group); write_lock(&fs_info->block_group_cache_lock); caching_ctl = btrfs_get_caching_control(block_group); if (!caching_ctl) { struct btrfs_caching_control *ctl; list_for_each_entry(ctl, &fs_info->caching_block_groups, list) { if (ctl->block_group == block_group) { caching_ctl = ctl; refcount_inc(&caching_ctl->count); break; } } } if (caching_ctl) list_del_init(&caching_ctl->list); write_unlock(&fs_info->block_group_cache_lock); if (caching_ctl) { /* Once for the caching bgs list and once for us. */ btrfs_put_caching_control(caching_ctl); btrfs_put_caching_control(caching_ctl); } spin_lock(&trans->transaction->dirty_bgs_lock); WARN_ON(!list_empty(&block_group->dirty_list)); WARN_ON(!list_empty(&block_group->io_list)); spin_unlock(&trans->transaction->dirty_bgs_lock); btrfs_remove_free_space_cache(block_group); spin_lock(&block_group->space_info->lock); list_del_init(&block_group->ro_list); if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { WARN_ON(block_group->space_info->total_bytes < block_group->length); WARN_ON(block_group->space_info->bytes_readonly < block_group->length - block_group->zone_unusable); WARN_ON(block_group->space_info->bytes_zone_unusable < block_group->zone_unusable); WARN_ON(block_group->space_info->disk_total < block_group->length * factor); } block_group->space_info->total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info, -block_group->zone_unusable); block_group->space_info->disk_total -= block_group->length * factor; spin_unlock(&block_group->space_info->lock); /* * Remove the free space for the block group from the free space tree * and the block group's item from the extent tree before marking the * block group as removed. This is to prevent races with tasks that * freeze and unfreeze a block group, this task and another task * allocating a new block group - the unfreeze task ends up removing * the block group's extent map before the task calling this function * deletes the block group item from the extent tree, allowing for * another task to attempt to create another block group with the same * item key (and failing with -EEXIST and a transaction abort). */ ret = remove_block_group_free_space(trans, block_group); if (ret) goto out; ret = remove_block_group_item(trans, path, block_group); if (ret < 0) goto out; spin_lock(&block_group->lock); set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags); /* * At this point trimming or scrub can't start on this block group, * because we removed the block group from the rbtree * fs_info->block_group_cache_tree so no one can't find it anymore and * even if someone already got this block group before we removed it * from the rbtree, they have already incremented block_group->frozen - * if they didn't, for the trimming case they won't find any free space * entries because we already removed them all when we called * btrfs_remove_free_space_cache(). * * And we must not remove the chunk map from the fs_info->mapping_tree * to prevent the same logical address range and physical device space * ranges from being reused for a new block group. This is needed to * avoid races with trimming and scrub. * * An fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is * completely transactionless, so while it is trimming a range the * currently running transaction might finish and a new one start, * allowing for new block groups to be created that can reuse the same * physical device locations unless we take this special care. * * There may also be an implicit trim operation if the file system * is mounted with -odiscard. The same protections must remain * in place until the extents have been discarded completely when * the transaction commit has completed. */ remove_map = (atomic_read(&block_group->frozen) == 0); spin_unlock(&block_group->lock); if (remove_map) btrfs_remove_chunk_map(fs_info, map); out: /* Once for the lookup reference */ btrfs_put_block_group(block_group); if (remove_rsv) btrfs_dec_delayed_refs_rsv_bg_updates(fs_info); btrfs_free_path(path); return ret; } struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset) { struct btrfs_root *root = btrfs_block_group_root(fs_info); struct btrfs_chunk_map *map; unsigned int num_items; map = btrfs_find_chunk_map(fs_info, chunk_offset, 1); ASSERT(map != NULL); ASSERT(map->start == chunk_offset); /* * We need to reserve 3 + N units from the metadata space info in order * to remove a block group (done at btrfs_remove_chunk() and at * btrfs_remove_block_group()), which are used for: * * 1 unit for adding the free space inode's orphan (located in the tree * of tree roots). * 1 unit for deleting the block group item (located in the extent * tree). * 1 unit for deleting the free space item (located in tree of tree * roots). * N units for deleting N device extent items corresponding to each * stripe (located in the device tree). * * In order to remove a block group we also need to reserve units in the * system space info in order to update the chunk tree (update one or * more device items and remove one chunk item), but this is done at * btrfs_remove_chunk() through a call to check_system_chunk(). */ num_items = 3 + map->num_stripes; btrfs_free_chunk_map(map); return btrfs_start_transaction_fallback_global_rsv(root, num_items); } /* * Mark block group @cache read-only, so later write won't happen to block * group @cache. * * If @force is not set, this function will only mark the block group readonly * if we have enough free space (1M) in other metadata/system block groups. * If @force is not set, this function will mark the block group readonly * without checking free space. * * NOTE: This function doesn't care if other block groups can contain all the * data in this block group. That check should be done by relocation routine, * not this function. */ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) { struct btrfs_space_info *sinfo = cache->space_info; u64 num_bytes; int ret = -ENOSPC; spin_lock(&sinfo->lock); spin_lock(&cache->lock); if (cache->swap_extents) { ret = -ETXTBSY; goto out; } if (cache->ro) { cache->ro++; ret = 0; goto out; } num_bytes = cache->length - cache->reserved - cache->pinned - cache->bytes_super - cache->zone_unusable - cache->used; /* * Data never overcommits, even in mixed mode, so do just the straight * check of left over space in how much we have allocated. */ if (force) { ret = 0; } else if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) { u64 sinfo_used = btrfs_space_info_used(sinfo, true); /* * Here we make sure if we mark this bg RO, we still have enough * free space as buffer. */ if (sinfo_used + num_bytes <= sinfo->total_bytes) ret = 0; } else { /* * We overcommit metadata, so we need to do the * btrfs_can_overcommit check here, and we need to pass in * BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of * leeway to allow us to mark this block group as read only. */ if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes, BTRFS_RESERVE_NO_FLUSH)) ret = 0; } if (!ret) { sinfo->bytes_readonly += num_bytes; if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes to readonly */ sinfo->bytes_readonly += cache->zone_unusable; btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, -cache->zone_unusable); cache->zone_unusable = 0; } cache->ro++; list_add_tail(&cache->ro_list, &sinfo->ro_bgs); } out: spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) { btrfs_info(cache->fs_info, "unable to make block group %llu ro", cache->start); btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0); } return ret; } static bool clean_pinned_extents(struct btrfs_trans_handle *trans, const struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_transaction *prev_trans = NULL; const u64 start = bg->start; const u64 end = start + bg->length - 1; int ret; spin_lock(&fs_info->trans_lock); if (trans->transaction->list.prev != &fs_info->trans_list) { prev_trans = list_last_entry(&trans->transaction->list, struct btrfs_transaction, list); refcount_inc(&prev_trans->use_count); } spin_unlock(&fs_info->trans_lock); /* * Hold the unused_bg_unpin_mutex lock to avoid racing with * btrfs_finish_extent_commit(). If we are at transaction N, another * task might be running finish_extent_commit() for the previous * transaction N - 1, and have seen a range belonging to the block * group in pinned_extents before we were able to clear the whole block * group range from pinned_extents. This means that task can lookup for * the block group after we unpinned it from pinned_extents and removed * it, leading to an error at unpin_extent_range(). */ mutex_lock(&fs_info->unused_bg_unpin_mutex); if (prev_trans) { ret = clear_extent_bits(&prev_trans->pinned_extents, start, end, EXTENT_DIRTY); if (ret) goto out; } ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end, EXTENT_DIRTY); out: mutex_unlock(&fs_info->unused_bg_unpin_mutex); if (prev_trans) btrfs_put_transaction(prev_trans); return ret == 0; } /* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. */ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) { LIST_HEAD(retry_list); struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; const bool async_trim_enabled = btrfs_test_opt(fs_info, DISCARD_ASYNC); int ret = 0; if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; if (btrfs_fs_closing(fs_info)) return; /* * Long running balances can keep us blocked here for eternity, so * simply skip deletion if we're unable to get the mutex. */ if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) return; spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->unused_bgs)) { u64 used; int trimming; block_group = list_first_entry(&fs_info->unused_bgs, struct btrfs_block_group, bg_list); list_del_init(&block_group->bg_list); space_info = block_group->space_info; if (ret || btrfs_mixed_space_info(space_info)) { btrfs_put_block_group(block_group); continue; } spin_unlock(&fs_info->unused_bgs_lock); btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group); /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); /* * Async discard moves the final block group discard to be prior * to the unused_bgs code path. Therefore, if it's not fully * trimmed, punt it back to the async discard lists. */ if (btrfs_test_opt(fs_info, DISCARD_ASYNC) && !btrfs_is_free_space_trimmed(block_group)) { trace_btrfs_skip_unused_block_group(block_group); up_write(&space_info->groups_sem); /* Requeue if we failed because of async discard */ btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); goto next; } spin_lock(&space_info->lock); spin_lock(&block_group->lock); if (btrfs_is_block_group_used(block_group) || block_group->ro || list_is_singular(&block_group->list)) { /* * We want to bail if we made new allocations or have * outstanding allocations in this block group. We do * the ro check in case balance is currently acting on * this block group. * * Also bail out if this is the only block group for its * type, because otherwise we would lose profile * information from fs_info->avail_*_alloc_bits and the * next block group of this type would be created with a * "single" profile (even if we're in a raid fs) because * fs_info->avail_*_alloc_bits would be 0. */ trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } /* * The block group may be unused but there may be space reserved * accounting with the existence of that block group, that is, * space_info->bytes_may_use was incremented by a task but no * space was yet allocated from the block group by the task. * That space may or may not be allocated, as we are generally * pessimistic about space reservation for metadata as well as * for data when using compression (as we reserve space based on * the worst case, when data can't be compressed, and before * actually attempting compression, before starting writeback). * * So check if the total space of the space_info minus the size * of this block group is less than the used space of the * space_info - if that's the case, then it means we have tasks * that might be relying on the block group in order to allocate * extents, and add back the block group to the unused list when * we finish, so that we retry later in case no tasks ended up * needing to allocate extents from the block group. */ used = btrfs_space_info_used(space_info, true); if (space_info->total_bytes - block_group->length < used && block_group->zone_unusable < block_group->length) { /* * Add a reference for the list, compensate for the ref * drop under the "next" label for the * fs_info->unused_bgs list. */ btrfs_get_block_group(block_group); list_add_tail(&block_group->bg_list, &retry_list); trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); /* We don't want to force the issue, only flip if it's ok. */ ret = inc_block_group_ro(block_group, 0); up_write(&space_info->groups_sem); if (ret < 0) { ret = 0; goto next; } ret = btrfs_zone_finish(block_group); if (ret < 0) { btrfs_dec_block_group_ro(block_group); if (ret == -EAGAIN) ret = 0; goto next; } /* * Want to do this before we do anything else so we can recover * properly if we fail to join the transaction. */ trans = btrfs_start_trans_remove_block_group(fs_info, block_group->start); if (IS_ERR(trans)) { btrfs_dec_block_group_ro(block_group); ret = PTR_ERR(trans); goto next; } /* * We could have pending pinned extents for this block group, * just delete them, we don't care about them anymore. */ if (!clean_pinned_extents(trans, block_group)) { btrfs_dec_block_group_ro(block_group); goto end_trans; } /* * At this point, the block_group is read only and should fail * new allocations. However, btrfs_finish_extent_commit() can * cause this block_group to be placed back on the discard * lists because now the block_group isn't fully discarded. * Bail here and try again later after discarding everything. */ spin_lock(&fs_info->discard_ctl.lock); if (!list_empty(&block_group->discard_list)) { spin_unlock(&fs_info->discard_ctl.lock); btrfs_dec_block_group_ro(block_group); btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); goto end_trans; } spin_unlock(&fs_info->discard_ctl.lock); /* Reset pinned so btrfs_put_block_group doesn't complain */ spin_lock(&space_info->lock); spin_lock(&block_group->lock); btrfs_space_info_update_bytes_pinned(fs_info, space_info, -block_group->pinned); space_info->bytes_readonly += block_group->pinned; block_group->pinned = 0; spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); /* * The normal path here is an unused block group is passed here, * then trimming is handled in the transaction commit path. * Async discard interposes before this to do the trimming * before coming down the unused block group path as trimming * will no longer be done later in the transaction commit path. */ if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC)) goto flip_async; /* * DISCARD can flip during remount. On zoned filesystems, we * need to reset sequential-required zones. */ trimming = btrfs_test_opt(fs_info, DISCARD_SYNC) || btrfs_is_zoned(fs_info); /* Implicit trim during transaction commit. */ if (trimming) btrfs_freeze_block_group(block_group); /* * Btrfs_remove_chunk will abort the transaction if things go * horribly wrong. */ ret = btrfs_remove_chunk(trans, block_group->start); if (ret) { if (trimming) btrfs_unfreeze_block_group(block_group); goto end_trans; } /* * If we're not mounted with -odiscard, we can just forget * about this block group. Otherwise we'll need to wait * until transaction commit to do the actual discard. */ if (trimming) { spin_lock(&fs_info->unused_bgs_lock); /* * A concurrent scrub might have added us to the list * fs_info->unused_bgs, so use a list_move operation * to add the block group to the deleted_bgs list. */ list_move(&block_group->bg_list, &trans->transaction->deleted_bgs); spin_unlock(&fs_info->unused_bgs_lock); btrfs_get_block_group(block_group); } end_trans: btrfs_end_transaction(trans); next: btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } list_splice_tail(&retry_list, &fs_info->unused_bgs); spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); return; flip_async: btrfs_end_transaction(trans); spin_lock(&fs_info->unused_bgs_lock); list_splice_tail(&retry_list, &fs_info->unused_bgs); spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_put_block_group(block_group); btrfs_discard_punt_unused_bgs_list(fs_info); } void btrfs_mark_bg_unused(struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; spin_lock(&fs_info->unused_bgs_lock); if (list_empty(&bg->bg_list)) { btrfs_get_block_group(bg); trace_btrfs_add_unused_block_group(bg); list_add_tail(&bg->bg_list, &fs_info->unused_bgs); } else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) { /* Pull out the block group from the reclaim_bgs list. */ trace_btrfs_add_unused_block_group(bg); list_move_tail(&bg->bg_list, &fs_info->unused_bgs); } spin_unlock(&fs_info->unused_bgs_lock); } /* * We want block groups with a low number of used bytes to be in the beginning * of the list, so they will get reclaimed first. */ static int reclaim_bgs_cmp(void *unused, const struct list_head *a, const struct list_head *b) { const struct btrfs_block_group *bg1, *bg2; bg1 = list_entry(a, struct btrfs_block_group, bg_list); bg2 = list_entry(b, struct btrfs_block_group, bg_list); return bg1->used > bg2->used; } static inline bool btrfs_should_reclaim(const struct btrfs_fs_info *fs_info) { if (btrfs_is_zoned(fs_info)) return btrfs_zoned_should_reclaim(fs_info); return true; } static bool should_reclaim_block_group(const struct btrfs_block_group *bg, u64 bytes_freed) { const int thresh_pct = btrfs_calc_reclaim_threshold(bg->space_info); u64 thresh_bytes = mult_perc(bg->length, thresh_pct); const u64 new_val = bg->used; const u64 old_val = new_val + bytes_freed; if (thresh_bytes == 0) return false; /* * If we were below the threshold before don't reclaim, we are likely a * brand new block group and we don't want to relocate new block groups. */ if (old_val < thresh_bytes) return false; if (new_val >= thresh_bytes) return false; return true; } void btrfs_reclaim_bgs_work(struct work_struct *work) { struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, reclaim_bgs_work); struct btrfs_block_group *bg; struct btrfs_space_info *space_info; LIST_HEAD(retry_list); if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; if (btrfs_fs_closing(fs_info)) return; if (!btrfs_should_reclaim(fs_info)) return; sb_start_write(fs_info->sb); if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { sb_end_write(fs_info->sb); return; } /* * Long running balances can keep us blocked here for eternity, so * simply skip reclaim if we're unable to get the mutex. */ if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) { btrfs_exclop_finish(fs_info); sb_end_write(fs_info->sb); return; } spin_lock(&fs_info->unused_bgs_lock); /* * Sort happens under lock because we can't simply splice it and sort. * The block groups might still be in use and reachable via bg_list, * and their presence in the reclaim_bgs list must be preserved. */ list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp); while (!list_empty(&fs_info->reclaim_bgs)) { u64 zone_unusable; u64 reclaimed; int ret = 0; bg = list_first_entry(&fs_info->reclaim_bgs, struct btrfs_block_group, bg_list); list_del_init(&bg->bg_list); space_info = bg->space_info; spin_unlock(&fs_info->unused_bgs_lock); /* Don't race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&space_info->lock); spin_lock(&bg->lock); if (bg->reserved || bg->pinned || bg->ro) { /* * We want to bail if we made new allocations or have * outstanding allocations in this block group. We do * the ro check in case balance is currently acting on * this block group. */ spin_unlock(&bg->lock); spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } if (bg->used == 0) { /* * It is possible that we trigger relocation on a block * group as its extents are deleted and it first goes * below the threshold, then shortly after goes empty. * * In this case, relocating it does delete it, but has * some overhead in relocation specific metadata, looking * for the non-existent extents and running some extra * transactions, which we can avoid by using one of the * other mechanisms for dealing with empty block groups. */ if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) btrfs_mark_bg_unused(bg); spin_unlock(&bg->lock); spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } /* * The block group might no longer meet the reclaim condition by * the time we get around to reclaiming it, so to avoid * reclaiming overly full block_groups, skip reclaiming them. * * Since the decision making process also depends on the amount * being freed, pass in a fake giant value to skip that extra * check, which is more meaningful when adding to the list in * the first place. */ if (!should_reclaim_block_group(bg, bg->length)) { spin_unlock(&bg->lock); spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } spin_unlock(&bg->lock); spin_unlock(&space_info->lock); /* * Get out fast, in case we're read-only or unmounting the * filesystem. It is OK to drop block groups from the list even * for the read-only case. As we did sb_start_write(), * "mount -o remount,ro" won't happen and read-only filesystem * means it is forced read-only due to a fatal error. So, it * never gets back to read-write to let us reclaim again. */ if (btrfs_need_cleaner_sleep(fs_info)) { up_write(&space_info->groups_sem); goto next; } /* * Cache the zone_unusable value before turning the block group * to read only. As soon as the blog group is read only it's * zone_unusable value gets moved to the block group's read-only * bytes and isn't available for calculations anymore. */ zone_unusable = bg->zone_unusable; ret = inc_block_group_ro(bg, 0); up_write(&space_info->groups_sem); if (ret < 0) goto next; btrfs_info(fs_info, "reclaiming chunk %llu with %llu%% used %llu%% unusable", bg->start, div64_u64(bg->used * 100, bg->length), div64_u64(zone_unusable * 100, bg->length)); trace_btrfs_reclaim_block_group(bg); reclaimed = bg->used; ret = btrfs_relocate_chunk(fs_info, bg->start); if (ret) { btrfs_dec_block_group_ro(bg); btrfs_err(fs_info, "error relocating chunk %llu", bg->start); reclaimed = 0; spin_lock(&space_info->lock); space_info->reclaim_errors++; if (READ_ONCE(space_info->periodic_reclaim)) space_info->periodic_reclaim_ready = false; spin_unlock(&space_info->lock); } spin_lock(&space_info->lock); space_info->reclaim_count++; space_info->reclaim_bytes += reclaimed; spin_unlock(&space_info->lock); next: if (ret && !READ_ONCE(space_info->periodic_reclaim)) { /* Refcount held by the reclaim_bgs list after splice. */ spin_lock(&fs_info->unused_bgs_lock); /* * This block group might be added to the unused list * during the above process. Move it back to the * reclaim list otherwise. */ if (list_empty(&bg->bg_list)) { btrfs_get_block_group(bg); list_add_tail(&bg->bg_list, &retry_list); } spin_unlock(&fs_info->unused_bgs_lock); } btrfs_put_block_group(bg); mutex_unlock(&fs_info->reclaim_bgs_lock); /* * Reclaiming all the block groups in the list can take really * long. Prioritize cleaning up unused block groups. */ btrfs_delete_unused_bgs(fs_info); /* * If we are interrupted by a balance, we can just bail out. The * cleaner thread restart again if necessary. */ if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) goto end; spin_lock(&fs_info->unused_bgs_lock); } spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); end: spin_lock(&fs_info->unused_bgs_lock); list_splice_tail(&retry_list, &fs_info->reclaim_bgs); spin_unlock(&fs_info->unused_bgs_lock); btrfs_exclop_finish(fs_info); sb_end_write(fs_info->sb); } void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info) { btrfs_reclaim_sweep(fs_info); spin_lock(&fs_info->unused_bgs_lock); if (!list_empty(&fs_info->reclaim_bgs)) queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work); spin_unlock(&fs_info->unused_bgs_lock); } void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; spin_lock(&fs_info->unused_bgs_lock); if (list_empty(&bg->bg_list)) { btrfs_get_block_group(bg); trace_btrfs_add_reclaim_block_group(bg); list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs); } spin_unlock(&fs_info->unused_bgs_lock); } static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key *key, const struct btrfs_path *path) { struct btrfs_chunk_map *map; struct btrfs_block_group_item bg; struct extent_buffer *leaf; int slot; u64 flags; int ret = 0; slot = path->slots[0]; leaf = path->nodes[0]; map = btrfs_find_chunk_map(fs_info, key->objectid, key->offset); if (!map) { btrfs_err(fs_info, "logical %llu len %llu found bg but no related chunk", key->objectid, key->offset); return -ENOENT; } if (map->start != key->objectid || map->chunk_len != key->offset) { btrfs_err(fs_info, "block group %llu len %llu mismatch with chunk %llu len %llu", key->objectid, key->offset, map->start, map->chunk_len); ret = -EUCLEAN; goto out_free_map; } read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot), sizeof(bg)); flags = btrfs_stack_block_group_flags(&bg) & BTRFS_BLOCK_GROUP_TYPE_MASK; if (flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { btrfs_err(fs_info, "block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", key->objectid, key->offset, flags, (BTRFS_BLOCK_GROUP_TYPE_MASK & map->type)); ret = -EUCLEAN; } out_free_map: btrfs_free_chunk_map(map); return ret; } static int find_first_block_group(struct btrfs_fs_info *fs_info, struct btrfs_path *path, const struct btrfs_key *key) { struct btrfs_root *root = btrfs_block_group_root(fs_info); int ret; struct btrfs_key found_key; btrfs_for_each_slot(root, key, &found_key, path, ret) { if (found_key.objectid >= key->objectid && found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { return read_bg_from_eb(fs_info, &found_key, path); } } return ret; } static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { u64 extra_flags = chunk_to_extended(flags) & BTRFS_EXTENDED_PROFILE_MASK; write_seqlock(&fs_info->profiles_lock); if (flags & BTRFS_BLOCK_GROUP_DATA) fs_info->avail_data_alloc_bits |= extra_flags; if (flags & BTRFS_BLOCK_GROUP_METADATA) fs_info->avail_metadata_alloc_bits |= extra_flags; if (flags & BTRFS_BLOCK_GROUP_SYSTEM) fs_info->avail_system_alloc_bits |= extra_flags; write_sequnlock(&fs_info->profiles_lock); } /* * Map a physical disk address to a list of logical addresses. * * @fs_info: the filesystem * @chunk_start: logical address of block group * @physical: physical address to map to logical addresses * @logical: return array of logical addresses which map to @physical * @naddrs: length of @logical * @stripe_len: size of IO stripe for the given block group * * Maps a particular @physical disk address to a list of @logical addresses. * Used primarily to exclude those portions of a block group that contain super * block copies. */ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 **logical, int *naddrs, int *stripe_len) { struct btrfs_chunk_map *map; u64 *buf; u64 bytenr; u64 data_stripe_length; u64 io_stripe_size; int i, nr = 0; int ret = 0; map = btrfs_get_chunk_map(fs_info, chunk_start, 1); if (IS_ERR(map)) return -EIO; data_stripe_length = map->stripe_size; io_stripe_size = BTRFS_STRIPE_LEN; chunk_start = map->start; /* For RAID5/6 adjust to a full IO stripe length */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) io_stripe_size = btrfs_stripe_nr_to_offset(nr_data_stripes(map)); buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS); if (!buf) { ret = -ENOMEM; goto out; } for (i = 0; i < map->num_stripes; i++) { bool already_inserted = false; u32 stripe_nr; u32 offset; int j; if (!in_range(physical, map->stripes[i].physical, data_stripe_length)) continue; stripe_nr = (physical - map->stripes[i].physical) >> BTRFS_STRIPE_LEN_SHIFT; offset = (physical - map->stripes[i].physical) & BTRFS_STRIPE_LEN_MASK; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) stripe_nr = div_u64(stripe_nr * map->num_stripes + i, map->sub_stripes); /* * The remaining case would be for RAID56, multiply by * nr_data_stripes(). Alternatively, just use rmap_len below * instead of map->stripe_len */ bytenr = chunk_start + stripe_nr * io_stripe_size + offset; /* Ensure we don't add duplicate addresses */ for (j = 0; j < nr; j++) { if (buf[j] == bytenr) { already_inserted = true; break; } } if (!already_inserted) buf[nr++] = bytenr; } *logical = buf; *naddrs = nr; *stripe_len = io_stripe_size; out: btrfs_free_chunk_map(map); return ret; } static int exclude_super_stripes(struct btrfs_block_group *cache) { struct btrfs_fs_info *fs_info = cache->fs_info; const bool zoned = btrfs_is_zoned(fs_info); u64 bytenr; u64 *logical; int stripe_len; int i, nr, ret; if (cache->start < BTRFS_SUPER_INFO_OFFSET) { stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start; cache->bytes_super += stripe_len; ret = set_extent_bit(&fs_info->excluded_extents, cache->start, cache->start + stripe_len - 1, EXTENT_UPTODATE, NULL); if (ret) return ret; } for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); ret = btrfs_rmap_block(fs_info, cache->start, bytenr, &logical, &nr, &stripe_len); if (ret) return ret; /* Shouldn't have super stripes in sequential zones */ if (zoned && nr) { kfree(logical); btrfs_err(fs_info, "zoned: block group %llu must not contain super block", cache->start); return -EUCLEAN; } while (nr--) { u64 len = min_t(u64, stripe_len, cache->start + cache->length - logical[nr]); cache->bytes_super += len; ret = set_extent_bit(&fs_info->excluded_extents, logical[nr], logical[nr] + len - 1, EXTENT_UPTODATE, NULL); if (ret) { kfree(logical); return ret; } } kfree(logical); } return 0; } static struct btrfs_block_group *btrfs_create_block_group_cache( struct btrfs_fs_info *fs_info, u64 start) { struct btrfs_block_group *cache; cache = kzalloc(sizeof(*cache), GFP_NOFS); if (!cache) return NULL; cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), GFP_NOFS); if (!cache->free_space_ctl) { kfree(cache); return NULL; } cache->start = start; cache->fs_info = fs_info; cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start); cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED; refcount_set(&cache->refs, 1); spin_lock_init(&cache->lock); init_rwsem(&cache->data_rwsem); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); INIT_LIST_HEAD(&cache->bg_list); INIT_LIST_HEAD(&cache->ro_list); INIT_LIST_HEAD(&cache->discard_list); INIT_LIST_HEAD(&cache->dirty_list); INIT_LIST_HEAD(&cache->io_list); INIT_LIST_HEAD(&cache->active_bg_list); btrfs_init_free_space_ctl(cache, cache->free_space_ctl); atomic_set(&cache->frozen, 0); mutex_init(&cache->free_space_lock); return cache; } /* * Iterate all chunks and verify that each of them has the corresponding block * group */ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) { u64 start = 0; int ret = 0; while (1) { struct btrfs_chunk_map *map; struct btrfs_block_group *bg; /* * btrfs_find_chunk_map() will return the first chunk map * intersecting the range, so setting @length to 1 is enough to * get the first chunk. */ map = btrfs_find_chunk_map(fs_info, start, 1); if (!map) break; bg = btrfs_lookup_block_group(fs_info, map->start); if (!bg) { btrfs_err(fs_info, "chunk start=%llu len=%llu doesn't have corresponding block group", map->start, map->chunk_len); ret = -EUCLEAN; btrfs_free_chunk_map(map); break; } if (bg->start != map->start || bg->length != map->chunk_len || (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { btrfs_err(fs_info, "chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", map->start, map->chunk_len, map->type & BTRFS_BLOCK_GROUP_TYPE_MASK, bg->start, bg->length, bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); ret = -EUCLEAN; btrfs_free_chunk_map(map); btrfs_put_block_group(bg); break; } start = map->start + map->chunk_len; btrfs_free_chunk_map(map); btrfs_put_block_group(bg); } return ret; } static int read_one_block_group(struct btrfs_fs_info *info, struct btrfs_block_group_item *bgi, const struct btrfs_key *key, int need_clear) { struct btrfs_block_group *cache; const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS); int ret; ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY); cache = btrfs_create_block_group_cache(info, key->objectid); if (!cache) return -ENOMEM; cache->length = key->offset; cache->used = btrfs_stack_block_group_used(bgi); cache->commit_used = cache->used; cache->flags = btrfs_stack_block_group_flags(bgi); cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi); set_free_space_tree_thresholds(cache); if (need_clear) { /* * When we mount with old space cache, we need to * set BTRFS_DC_CLEAR and set dirty flag. * * a) Setting 'BTRFS_DC_CLEAR' makes sure that we * truncate the old free space cache inode and * setup a new one. * b) Setting 'dirty flag' makes sure that we flush * the new space cache info onto disk. */ if (btrfs_test_opt(info, SPACE_CACHE)) cache->disk_cache_state = BTRFS_DC_CLEAR; } if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) && (cache->flags & BTRFS_BLOCK_GROUP_DATA))) { btrfs_err(info, "bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", cache->start); ret = -EINVAL; goto error; } ret = btrfs_load_block_group_zone_info(cache, false); if (ret) { btrfs_err(info, "zoned: failed to load zone info of bg %llu", cache->start); goto error; } /* * We need to exclude the super stripes now so that the space info has * super bytes accounted for, otherwise we'll think we have more space * than we actually do. */ ret = exclude_super_stripes(cache); if (ret) { /* We may have excluded something, so call this just in case. */ btrfs_free_excluded_extents(cache); goto error; } /* * For zoned filesystem, space after the allocation offset is the only * free space for a block group. So, we don't need any caching work. * btrfs_calc_zone_unusable() will set the amount of free space and * zone_unusable space. * * For regular filesystem, check for two cases, either we are full, and * therefore don't need to bother with the caching work since we won't * find any space, or we are empty, and we can just add all the space * in and be done with it. This saves us _a_lot_ of time, particularly * in the full case. */ if (btrfs_is_zoned(info)) { btrfs_calc_zone_unusable(cache); /* Should not have any excluded extents. Just in case, though. */ btrfs_free_excluded_extents(cache); } else if (cache->length == cache->used) { cache->cached = BTRFS_CACHE_FINISHED; btrfs_free_excluded_extents(cache); } else if (cache->used == 0) { cache->cached = BTRFS_CACHE_FINISHED; ret = btrfs_add_new_free_space(cache, cache->start, cache->start + cache->length, NULL); btrfs_free_excluded_extents(cache); if (ret) goto error; } ret = btrfs_add_block_group_cache(info, cache); if (ret) { btrfs_remove_free_space_cache(cache); goto error; } trace_btrfs_add_block_group(info, cache, 0); btrfs_add_bg_to_space_info(info, cache); set_avail_alloc_bits(info, cache->flags); if (btrfs_chunk_writeable(info, cache->start)) { if (cache->used == 0) { ASSERT(list_empty(&cache->bg_list)); if (btrfs_test_opt(info, DISCARD_ASYNC)) btrfs_discard_queue_work(&info->discard_ctl, cache); else btrfs_mark_bg_unused(cache); } } else { inc_block_group_ro(cache, 1); } return 0; error: btrfs_put_block_group(cache); return ret; } static int fill_dummy_bgs(struct btrfs_fs_info *fs_info) { struct rb_node *node; int ret = 0; for (node = rb_first_cached(&fs_info->mapping_tree); node; node = rb_next(node)) { struct btrfs_chunk_map *map; struct btrfs_block_group *bg; map = rb_entry(node, struct btrfs_chunk_map, rb_node); bg = btrfs_create_block_group_cache(fs_info, map->start); if (!bg) { ret = -ENOMEM; break; } /* Fill dummy cache as FULL */ bg->length = map->chunk_len; bg->flags = map->type; bg->cached = BTRFS_CACHE_FINISHED; bg->used = map->chunk_len; bg->flags = map->type; ret = btrfs_add_block_group_cache(fs_info, bg); /* * We may have some valid block group cache added already, in * that case we skip to the next one. */ if (ret == -EEXIST) { ret = 0; btrfs_put_block_group(bg); continue; } if (ret) { btrfs_remove_free_space_cache(bg); btrfs_put_block_group(bg); break; } btrfs_add_bg_to_space_info(fs_info, bg); set_avail_alloc_bits(fs_info, bg->flags); } if (!ret) btrfs_init_global_block_rsv(fs_info); return ret; } int btrfs_read_block_groups(struct btrfs_fs_info *info) { struct btrfs_root *root = btrfs_block_group_root(info); struct btrfs_path *path; int ret; struct btrfs_block_group *cache; struct btrfs_space_info *space_info; struct btrfs_key key; int need_clear = 0; u64 cache_gen; /* * Either no extent root (with ibadroots rescue option) or we have * unsupported RO options. The fs can never be mounted read-write, so no * need to waste time searching block group items. * * This also allows new extent tree related changes to be RO compat, * no need for a full incompat flag. */ if (!root || (btrfs_super_compat_ro_flags(info->super_copy) & ~BTRFS_FEATURE_COMPAT_RO_SUPP)) return fill_dummy_bgs(info); key.objectid = 0; key.offset = 0; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; path = btrfs_alloc_path(); if (!path) return -ENOMEM; cache_gen = btrfs_super_cache_generation(info->super_copy); if (btrfs_test_opt(info, SPACE_CACHE) && btrfs_super_generation(info->super_copy) != cache_gen) need_clear = 1; if (btrfs_test_opt(info, CLEAR_CACHE)) need_clear = 1; while (1) { struct btrfs_block_group_item bgi; struct extent_buffer *leaf; int slot; ret = find_first_block_group(info, path, &key); if (ret > 0) break; if (ret != 0) goto error; leaf = path->nodes[0]; slot = path->slots[0]; read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), sizeof(bgi)); btrfs_item_key_to_cpu(leaf, &key, slot); btrfs_release_path(path); ret = read_one_block_group(info, &bgi, &key, need_clear); if (ret < 0) goto error; key.objectid += key.offset; key.offset = 0; } btrfs_release_path(path); list_for_each_entry(space_info, &info->space_info, list) { int i; for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { if (list_empty(&space_info->block_groups[i])) continue; cache = list_first_entry(&space_info->block_groups[i], struct btrfs_block_group, list); btrfs_sysfs_add_block_group_type(cache); } if (!(btrfs_get_alloc_profile(info, space_info->flags) & (BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID56_MASK | BTRFS_BLOCK_GROUP_DUP))) continue; /* * Avoid allocating from un-mirrored block group if there are * mirrored block groups. */ list_for_each_entry(cache, &space_info->block_groups[BTRFS_RAID_RAID0], list) inc_block_group_ro(cache, 1); list_for_each_entry(cache, &space_info->block_groups[BTRFS_RAID_SINGLE], list) inc_block_group_ro(cache, 1); } btrfs_init_global_block_rsv(info); ret = check_chunk_block_group_mappings(info); error: btrfs_free_path(path); /* * We've hit some error while reading the extent tree, and have * rescue=ibadroots mount option. * Try to fill the tree using dummy block groups so that the user can * continue to mount and grab their data. */ if (ret && btrfs_test_opt(info, IGNOREBADROOTS)) ret = fill_dummy_bgs(info); return ret; } /* * This function, insert_block_group_item(), belongs to the phase 2 of chunk * allocation. * * See the comment at btrfs_chunk_alloc() for details about the chunk allocation * phases. */ static int insert_block_group_item(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group_item bgi; struct btrfs_root *root = btrfs_block_group_root(fs_info); struct btrfs_key key; u64 old_commit_used; int ret; spin_lock(&block_group->lock); btrfs_set_stack_block_group_used(&bgi, block_group->used); btrfs_set_stack_block_group_chunk_objectid(&bgi, block_group->global_root_id); btrfs_set_stack_block_group_flags(&bgi, block_group->flags); old_commit_used = block_group->commit_used; block_group->commit_used = block_group->used; key.objectid = block_group->start; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; key.offset = block_group->length; spin_unlock(&block_group->lock); ret = btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi)); if (ret < 0) { spin_lock(&block_group->lock); block_group->commit_used = old_commit_used; spin_unlock(&block_group->lock); } return ret; } static int insert_dev_extent(struct btrfs_trans_handle *trans, const struct btrfs_device *device, u64 chunk_offset, u64 start, u64 num_bytes) { struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_root *root = fs_info->dev_root; struct btrfs_path *path; struct btrfs_dev_extent *extent; struct extent_buffer *leaf; struct btrfs_key key; int ret; WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state)); WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)); path = btrfs_alloc_path(); if (!path) return -ENOMEM; key.objectid = device->devid; key.type = BTRFS_DEV_EXTENT_KEY; key.offset = start; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent)); if (ret) goto out; leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); btrfs_set_dev_extent_chunk_tree(leaf, extent, BTRFS_CHUNK_TREE_OBJECTID); btrfs_set_dev_extent_chunk_objectid(leaf, extent, BTRFS_FIRST_CHUNK_TREE_OBJECTID); btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(trans, leaf); out: btrfs_free_path(path); return ret; } /* * This function belongs to phase 2. * * See the comment at btrfs_chunk_alloc() for details about the chunk allocation * phases. */ static int insert_dev_extents(struct btrfs_trans_handle *trans, u64 chunk_offset, u64 chunk_size) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_device *device; struct btrfs_chunk_map *map; u64 dev_offset; int i; int ret = 0; map = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size); if (IS_ERR(map)) return PTR_ERR(map); /* * Take the device list mutex to prevent races with the final phase of * a device replace operation that replaces the device object associated * with the map's stripes, because the device object's id can change * at any time during that final phase of the device replace operation * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the * replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID, * resulting in persisting a device extent item with such ID. */ mutex_lock(&fs_info->fs_devices->device_list_mutex); for (i = 0; i < map->num_stripes; i++) { device = map->stripes[i].dev; dev_offset = map->stripes[i].physical; ret = insert_dev_extent(trans, device, chunk_offset, dev_offset, map->stripe_size); if (ret) break; } mutex_unlock(&fs_info->fs_devices->device_list_mutex); btrfs_free_chunk_map(map); return ret; } /* * This function, btrfs_create_pending_block_groups(), belongs to the phase 2 of * chunk allocation. * * See the comment at btrfs_chunk_alloc() for details about the chunk allocation * phases. */ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *block_group; int ret = 0; while (!list_empty(&trans->new_bgs)) { int index; block_group = list_first_entry(&trans->new_bgs, struct btrfs_block_group, bg_list); if (ret) goto next; index = btrfs_bg_flags_to_raid_index(block_group->flags); ret = insert_block_group_item(trans, block_group); if (ret) btrfs_abort_transaction(trans, ret); if (!test_bit(BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED, &block_group->runtime_flags)) { mutex_lock(&fs_info->chunk_mutex); ret = btrfs_chunk_alloc_add_chunk_item(trans, block_group); mutex_unlock(&fs_info->chunk_mutex); if (ret) btrfs_abort_transaction(trans, ret); } ret = insert_dev_extents(trans, block_group->start, block_group->length); if (ret) btrfs_abort_transaction(trans, ret); add_block_group_free_space(trans, block_group); /* * If we restriped during balance, we may have added a new raid * type, so now add the sysfs entries when it is safe to do so. * We don't have to worry about locking here as it's handled in * btrfs_sysfs_add_block_group_type. */ if (block_group->space_info->block_group_kobjs[index] == NULL) btrfs_sysfs_add_block_group_type(block_group); /* Already aborted the transaction if it failed. */ next: btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info); list_del_init(&block_group->bg_list); clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags); /* * If the block group is still unused, add it to the list of * unused block groups. The block group may have been created in * order to satisfy a space reservation, in which case the * extent allocation only happens later. But often we don't * actually need to allocate space that we previously reserved, * so the block group may become unused for a long time. For * example for metadata we generally reserve space for a worst * possible scenario, but then don't end up allocating all that * space or none at all (due to no need to COW, extent buffers * were already COWed in the current transaction and still * unwritten, tree heights lower than the maximum possible * height, etc). For data we generally reserve the axact amount * of space we are going to allocate later, the exception is * when using compression, as we must reserve space based on the * uncompressed data size, because the compression is only done * when writeback triggered and we don't know how much space we * are actually going to need, so we reserve the uncompressed * size because the data may be uncompressible in the worst case. */ if (ret == 0) { bool used; spin_lock(&block_group->lock); used = btrfs_is_block_group_used(block_group); spin_unlock(&block_group->lock); if (!used) btrfs_mark_bg_unused(block_group); } } btrfs_trans_release_chunk_metadata(trans); } /* * For extent tree v2 we use the block_group_item->chunk_offset to point at our * global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID. */ static u64 calculate_global_root_id(const struct btrfs_fs_info *fs_info, u64 offset) { u64 div = SZ_1G; u64 index; if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) return BTRFS_FIRST_CHUNK_TREE_OBJECTID; /* If we have a smaller fs index based on 128MiB. */ if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL)) div = SZ_128M; offset = div64_u64(offset, div); div64_u64_rem(offset, fs_info->nr_global_roots, &index); return index; } struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 type, u64 chunk_offset, u64 size) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *cache; int ret; btrfs_set_log_full_commit(trans); cache = btrfs_create_block_group_cache(fs_info, chunk_offset); if (!cache) return ERR_PTR(-ENOMEM); /* * Mark it as new before adding it to the rbtree of block groups or any * list, so that no other task finds it and calls btrfs_mark_bg_unused() * before the new flag is set. */ set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags); cache->length = size; set_free_space_tree_thresholds(cache); cache->flags = type; cache->cached = BTRFS_CACHE_FINISHED; cache->global_root_id = calculate_global_root_id(fs_info, cache->start); if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) set_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &cache->runtime_flags); ret = btrfs_load_block_group_zone_info(cache, true); if (ret) { btrfs_put_block_group(cache); return ERR_PTR(ret); } ret = exclude_super_stripes(cache); if (ret) { /* We may have excluded something, so call this just in case */ btrfs_free_excluded_extents(cache); btrfs_put_block_group(cache); return ERR_PTR(ret); } ret = btrfs_add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL); btrfs_free_excluded_extents(cache); if (ret) { btrfs_put_block_group(cache); return ERR_PTR(ret); } /* * Ensure the corresponding space_info object is created and * assigned to our block group. We want our bg to be added to the rbtree * with its ->space_info set. */ cache->space_info = btrfs_find_space_info(fs_info, cache->flags); ASSERT(cache->space_info); ret = btrfs_add_block_group_cache(fs_info, cache); if (ret) { btrfs_remove_free_space_cache(cache); btrfs_put_block_group(cache); return ERR_PTR(ret); } /* * Now that our block group has its ->space_info set and is inserted in * the rbtree, update the space info's counters. */ trace_btrfs_add_block_group(fs_info, cache, 1); btrfs_add_bg_to_space_info(fs_info, cache); btrfs_update_global_block_rsv(fs_info); #ifdef CONFIG_BTRFS_DEBUG if (btrfs_should_fragment_free_space(cache)) { cache->space_info->bytes_used += size >> 1; fragment_free_space(cache); } #endif list_add_tail(&cache->bg_list, &trans->new_bgs); btrfs_inc_delayed_refs_rsv_bg_inserts(fs_info); set_avail_alloc_bits(fs_info, type); return cache; } /* * Mark one block group RO, can be called several times for the same block * group. * * @cache: the destination block group * @do_chunk_alloc: whether need to do chunk pre-allocation, this is to * ensure we still have some free space after marking this * block group RO. */ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, bool do_chunk_alloc) { struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_trans_handle *trans; struct btrfs_root *root = btrfs_block_group_root(fs_info); u64 alloc_flags; int ret; bool dirty_bg_running; /* * This can only happen when we are doing read-only scrub on read-only * mount. * In that case we should not start a new transaction on read-only fs. * Thus here we skip all chunk allocations. */ if (sb_rdonly(fs_info->sb)) { mutex_lock(&fs_info->ro_block_group_mutex); ret = inc_block_group_ro(cache, 0); mutex_unlock(&fs_info->ro_block_group_mutex); return ret; } do { trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); dirty_bg_running = false; /* * We're not allowed to set block groups readonly after the dirty * block group cache has started writing. If it already started, * back off and let this transaction commit. */ mutex_lock(&fs_info->ro_block_group_mutex); if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) { u64 transid = trans->transid; mutex_unlock(&fs_info->ro_block_group_mutex); btrfs_end_transaction(trans); ret = btrfs_wait_for_commit(fs_info, transid); if (ret) return ret; dirty_bg_running = true; } } while (dirty_bg_running); if (do_chunk_alloc) { /* * If we are changing raid levels, try to allocate a * corresponding block group with the new raid level. */ alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); if (alloc_flags != cache->flags) { ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); /* * ENOSPC is allowed here, we may have enough space * already allocated at the new raid level to carry on */ if (ret == -ENOSPC) ret = 0; if (ret < 0) goto out; } } ret = inc_block_group_ro(cache, 0); if (!ret) goto out; if (ret == -ETXTBSY) goto unlock_out; /* * Skip chunk allocation if the bg is SYSTEM, this is to avoid system * chunk allocation storm to exhaust the system chunk array. Otherwise * we still want to try our best to mark the block group read-only. */ if (!do_chunk_alloc && ret == -ENOSPC && (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM)) goto unlock_out; alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags); ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); if (ret < 0) goto out; /* * We have allocated a new chunk. We also need to activate that chunk to * grant metadata tickets for zoned filesystem. */ ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true); if (ret < 0) goto out; ret = inc_block_group_ro(cache, 0); if (ret == -ETXTBSY) goto unlock_out; out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); mutex_lock(&fs_info->chunk_mutex); check_system_chunk(trans, alloc_flags); mutex_unlock(&fs_info->chunk_mutex); } unlock_out: mutex_unlock(&fs_info->ro_block_group_mutex); btrfs_end_transaction(trans); return ret; } void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) { struct btrfs_space_info *sinfo = cache->space_info; u64 num_bytes; BUG_ON(!cache->ro); spin_lock(&sinfo->lock); spin_lock(&cache->lock); if (!--cache->ro) { if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ cache->zone_unusable = (cache->alloc_offset - cache->used - cache->pinned - cache->reserved) + (cache->length - cache->zone_capacity); btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, cache->zone_unusable); sinfo->bytes_readonly -= cache->zone_unusable; } num_bytes = cache->length - cache->reserved - cache->pinned - cache->bytes_super - cache->zone_unusable - cache->used; sinfo->bytes_readonly -= num_bytes; list_del_init(&cache->ro_list); } spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); } static int update_block_group_item(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_block_group *cache) { struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_root *root = btrfs_block_group_root(fs_info); unsigned long bi; struct extent_buffer *leaf; struct btrfs_block_group_item bgi; struct btrfs_key key; u64 old_commit_used; u64 used; /* * Block group items update can be triggered out of commit transaction * critical section, thus we need a consistent view of used bytes. * We cannot use cache->used directly outside of the spin lock, as it * may be changed. */ spin_lock(&cache->lock); old_commit_used = cache->commit_used; used = cache->used; /* No change in used bytes, can safely skip it. */ if (cache->commit_used == used) { spin_unlock(&cache->lock); return 0; } cache->commit_used = used; spin_unlock(&cache->lock); key.objectid = cache->start; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; key.offset = cache->length; ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret) { if (ret > 0) ret = -ENOENT; goto fail; } leaf = path->nodes[0]; bi = btrfs_item_ptr_offset(leaf, path->slots[0]); btrfs_set_stack_block_group_used(&bgi, used); btrfs_set_stack_block_group_chunk_objectid(&bgi, cache->global_root_id); btrfs_set_stack_block_group_flags(&bgi, cache->flags); write_extent_buffer(leaf, &bgi, bi, sizeof(bgi)); btrfs_mark_buffer_dirty(trans, leaf); fail: btrfs_release_path(path); /* * We didn't update the block group item, need to revert commit_used * unless the block group item didn't exist yet - this is to prevent a * race with a concurrent insertion of the block group item, with * insert_block_group_item(), that happened just after we attempted to * update. In that case we would reset commit_used to 0 just after the * insertion set it to a value greater than 0 - if the block group later * becomes with 0 used bytes, we would incorrectly skip its update. */ if (ret < 0 && ret != -ENOENT) { spin_lock(&cache->lock); cache->commit_used = old_commit_used; spin_unlock(&cache->lock); } return ret; } static int cache_save_setup(struct btrfs_block_group *block_group, struct btrfs_trans_handle *trans, struct btrfs_path *path) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct inode *inode = NULL; struct extent_changeset *data_reserved = NULL; u64 alloc_hint = 0; int dcs = BTRFS_DC_ERROR; u64 cache_size = 0; int retries = 0; int ret = 0; if (!btrfs_test_opt(fs_info, SPACE_CACHE)) return 0; /* * If this block group is smaller than 100 megs don't bother caching the * block group. */ if (block_group->length < (100 * SZ_1M)) { spin_lock(&block_group->lock); block_group->disk_cache_state = BTRFS_DC_WRITTEN; spin_unlock(&block_group->lock); return 0; } if (TRANS_ABORTED(trans)) return 0; again: inode = lookup_free_space_inode(block_group, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { ret = PTR_ERR(inode); btrfs_release_path(path); goto out; } if (IS_ERR(inode)) { BUG_ON(retries); retries++; if (block_group->ro) goto out_free; ret = create_free_space_inode(trans, block_group, path); if (ret) goto out_free; goto again; } /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next * time. */ BTRFS_I(inode)->generation = 0; ret = btrfs_update_inode(trans, BTRFS_I(inode)); if (ret) { /* * So theoretically we could recover from this, simply set the * super cache generation to 0 so we know to invalidate the * cache, but then we'd have to keep track of the block groups * that fail this way so we know we _have_ to reset this cache * before the next commit or risk reading stale cache. So to * limit our exposure to horrible edge cases lets just abort the * transaction, this only happens in really bad situations * anyway. */ btrfs_abort_transaction(trans, ret); goto out_put; } WARN_ON(ret); /* We've already setup this transaction, go ahead and exit */ if (block_group->cache_generation == trans->transid && i_size_read(inode)) { dcs = BTRFS_DC_SETUP; goto out_put; } if (i_size_read(inode) > 0) { ret = btrfs_check_trunc_cache_free_space(fs_info, &fs_info->global_block_rsv); if (ret) goto out_put; ret = btrfs_truncate_free_space_cache(trans, NULL, inode); if (ret) goto out_put; } spin_lock(&block_group->lock); if (block_group->cached != BTRFS_CACHE_FINISHED || !btrfs_test_opt(fs_info, SPACE_CACHE)) { /* * don't bother trying to write stuff out _if_ * a) we're not cached, * b) we're with nospace_cache mount option, * c) we're with v2 space_cache (FREE_SPACE_TREE). */ dcs = BTRFS_DC_WRITTEN; spin_unlock(&block_group->lock); goto out_put; } spin_unlock(&block_group->lock); /* * We hit an ENOSPC when setting up the cache in this transaction, just * skip doing the setup, we've already cleared the cache so we're safe. */ if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) { ret = -ENOSPC; goto out_put; } /* * Try to preallocate enough space based on how big the block group is. * Keep in mind this has to include any pinned space which could end up * taking up quite a bit since it's not folded into the other space * cache. */ cache_size = div_u64(block_group->length, SZ_256M); if (!cache_size) cache_size = 1; cache_size *= 16; cache_size *= fs_info->sectorsize; ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0, cache_size, false); if (ret) goto out_put; ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, cache_size, cache_size, cache_size, &alloc_hint); /* * Our cache requires contiguous chunks so that we don't modify a bunch * of metadata or split extents when writing the cache out, which means * we can enospc if we are heavily fragmented in addition to just normal * out of space conditions. So if we hit this just skip setting up any * other block groups for this transaction, maybe we'll unpin enough * space the next time around. */ if (!ret) dcs = BTRFS_DC_SETUP; else if (ret == -ENOSPC) set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); out_put: iput(inode); out_free: btrfs_release_path(path); out: spin_lock(&block_group->lock); if (!ret && dcs == BTRFS_DC_SETUP) block_group->cache_generation = trans->transid; block_group->disk_cache_state = dcs; spin_unlock(&block_group->lock); extent_changeset_free(data_reserved); return ret; } int btrfs_setup_space_cache(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *cache, *tmp; struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_path *path; if (list_empty(&cur_trans->dirty_bgs) || !btrfs_test_opt(fs_info, SPACE_CACHE)) return 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; /* Could add new block groups, use _safe just in case */ list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs, dirty_list) { if (cache->disk_cache_state == BTRFS_DC_CLEAR) cache_save_setup(cache, trans, path); } btrfs_free_path(path); return 0; } /* * Transaction commit does final block group cache writeback during a critical * section where nothing is allowed to change the FS. This is required in * order for the cache to actually match the block group, but can introduce a * lot of latency into the commit. * * So, btrfs_start_dirty_block_groups is here to kick off block group cache IO. * There's a chance we'll have to redo some of it if the block group changes * again during the commit, but it greatly reduces the commit latency by * getting rid of the easy block groups while we're still allowing others to * join the commit. */ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *cache; struct btrfs_transaction *cur_trans = trans->transaction; int ret = 0; int should_put; struct btrfs_path *path = NULL; LIST_HEAD(dirty); struct list_head *io = &cur_trans->io_bgs; int loops = 0; spin_lock(&cur_trans->dirty_bgs_lock); if (list_empty(&cur_trans->dirty_bgs)) { spin_unlock(&cur_trans->dirty_bgs_lock); return 0; } list_splice_init(&cur_trans->dirty_bgs, &dirty); spin_unlock(&cur_trans->dirty_bgs_lock); again: /* Make sure all the block groups on our dirty list actually exist */ btrfs_create_pending_block_groups(trans); if (!path) { path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } } /* * cache_write_mutex is here only to save us from balance or automatic * removal of empty block groups deleting this block group while we are * writing out the cache */ mutex_lock(&trans->transaction->cache_write_mutex); while (!list_empty(&dirty)) { bool drop_reserve = true; cache = list_first_entry(&dirty, struct btrfs_block_group, dirty_list); /* * This can happen if something re-dirties a block group that * is already under IO. Just wait for it to finish and then do * it all again */ if (!list_empty(&cache->io_list)) { list_del_init(&cache->io_list); btrfs_wait_cache_io(trans, cache, path); btrfs_put_block_group(cache); } /* * btrfs_wait_cache_io uses the cache->dirty_list to decide if * it should update the cache_state. Don't delete until after * we wait. * * Since we're not running in the commit critical section * we need the dirty_bgs_lock to protect from update_block_group */ spin_lock(&cur_trans->dirty_bgs_lock); list_del_init(&cache->dirty_list); spin_unlock(&cur_trans->dirty_bgs_lock); should_put = 1; cache_save_setup(cache, trans, path); if (cache->disk_cache_state == BTRFS_DC_SETUP) { cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { should_put = 0; /* * The cache_write_mutex is protecting the * io_list, also refer to the definition of * btrfs_transaction::io_bgs for more details */ list_add_tail(&cache->io_list, io); } else { /* * If we failed to write the cache, the * generation will be bad and life goes on */ ret = 0; } } if (!ret) { ret = update_block_group_item(trans, path, cache); /* * Our block group might still be attached to the list * of new block groups in the transaction handle of some * other task (struct btrfs_trans_handle->new_bgs). This * means its block group item isn't yet in the extent * tree. If this happens ignore the error, as we will * try again later in the critical section of the * transaction commit. */ if (ret == -ENOENT) { ret = 0; spin_lock(&cur_trans->dirty_bgs_lock); if (list_empty(&cache->dirty_list)) { list_add_tail(&cache->dirty_list, &cur_trans->dirty_bgs); btrfs_get_block_group(cache); drop_reserve = false; } spin_unlock(&cur_trans->dirty_bgs_lock); } else if (ret) { btrfs_abort_transaction(trans, ret); } } /* If it's not on the io list, we need to put the block group */ if (should_put) btrfs_put_block_group(cache); if (drop_reserve) btrfs_dec_delayed_refs_rsv_bg_updates(fs_info); /* * Avoid blocking other tasks for too long. It might even save * us from writing caches for block groups that are going to be * removed. */ mutex_unlock(&trans->transaction->cache_write_mutex); if (ret) goto out; mutex_lock(&trans->transaction->cache_write_mutex); } mutex_unlock(&trans->transaction->cache_write_mutex); /* * Go through delayed refs for all the stuff we've just kicked off * and then loop back (just once) */ if (!ret) ret = btrfs_run_delayed_refs(trans, 0); if (!ret && loops == 0) { loops++; spin_lock(&cur_trans->dirty_bgs_lock); list_splice_init(&cur_trans->dirty_bgs, &dirty); /* * dirty_bgs_lock protects us from concurrent block group * deletes too (not just cache_write_mutex). */ if (!list_empty(&dirty)) { spin_unlock(&cur_trans->dirty_bgs_lock); goto again; } spin_unlock(&cur_trans->dirty_bgs_lock); } out: if (ret < 0) { spin_lock(&cur_trans->dirty_bgs_lock); list_splice_init(&dirty, &cur_trans->dirty_bgs); spin_unlock(&cur_trans->dirty_bgs_lock); btrfs_cleanup_dirty_bgs(cur_trans, fs_info); } btrfs_free_path(path); return ret; } int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *cache; struct btrfs_transaction *cur_trans = trans->transaction; int ret = 0; int should_put; struct btrfs_path *path; struct list_head *io = &cur_trans->io_bgs; path = btrfs_alloc_path(); if (!path) return -ENOMEM; /* * Even though we are in the critical section of the transaction commit, * we can still have concurrent tasks adding elements to this * transaction's list of dirty block groups. These tasks correspond to * endio free space workers started when writeback finishes for a * space cache, which run inode.c:btrfs_finish_ordered_io(), and can * allocate new block groups as a result of COWing nodes of the root * tree when updating the free space inode. The writeback for the space * caches is triggered by an earlier call to * btrfs_start_dirty_block_groups() and iterations of the following * loop. * Also we want to do the cache_save_setup first and then run the * delayed refs to make sure we have the best chance at doing this all * in one shot. */ spin_lock(&cur_trans->dirty_bgs_lock); while (!list_empty(&cur_trans->dirty_bgs)) { cache = list_first_entry(&cur_trans->dirty_bgs, struct btrfs_block_group, dirty_list); /* * This can happen if cache_save_setup re-dirties a block group * that is already under IO. Just wait for it to finish and * then do it all again */ if (!list_empty(&cache->io_list)) { spin_unlock(&cur_trans->dirty_bgs_lock); list_del_init(&cache->io_list); btrfs_wait_cache_io(trans, cache, path); btrfs_put_block_group(cache); spin_lock(&cur_trans->dirty_bgs_lock); } /* * Don't remove from the dirty list until after we've waited on * any pending IO */ list_del_init(&cache->dirty_list); spin_unlock(&cur_trans->dirty_bgs_lock); should_put = 1; cache_save_setup(cache, trans, path); if (!ret) ret = btrfs_run_delayed_refs(trans, U64_MAX); if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) { cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { should_put = 0; list_add_tail(&cache->io_list, io); } else { /* * If we failed to write the cache, the * generation will be bad and life goes on */ ret = 0; } } if (!ret) { ret = update_block_group_item(trans, path, cache); /* * One of the free space endio workers might have * created a new block group while updating a free space * cache's inode (at inode.c:btrfs_finish_ordered_io()) * and hasn't released its transaction handle yet, in * which case the new block group is still attached to * its transaction handle and its creation has not * finished yet (no block group item in the extent tree * yet, etc). If this is the case, wait for all free * space endio workers to finish and retry. This is a * very rare case so no need for a more efficient and * complex approach. */ if (ret == -ENOENT) { wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); ret = update_block_group_item(trans, path, cache); } if (ret) btrfs_abort_transaction(trans, ret); } /* If its not on the io list, we need to put the block group */ if (should_put) btrfs_put_block_group(cache); btrfs_dec_delayed_refs_rsv_bg_updates(fs_info); spin_lock(&cur_trans->dirty_bgs_lock); } spin_unlock(&cur_trans->dirty_bgs_lock); /* * Refer to the definition of io_bgs member for details why it's safe * to use it without any locking */ while (!list_empty(io)) { cache = list_first_entry(io, struct btrfs_block_group, io_list); list_del_init(&cache->io_list); btrfs_wait_cache_io(trans, cache, path); btrfs_put_block_group(cache); } btrfs_free_path(path); return ret; } int btrfs_update_block_group(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, bool alloc) { struct btrfs_fs_info *info = trans->fs_info; struct btrfs_space_info *space_info; struct btrfs_block_group *cache; u64 old_val; bool reclaim = false; bool bg_already_dirty = true; int factor; /* Block accounting for super block */ spin_lock(&info->delalloc_root_lock); old_val = btrfs_super_bytes_used(info->super_copy); if (alloc) old_val += num_bytes; else old_val -= num_bytes; btrfs_set_super_bytes_used(info->super_copy, old_val); spin_unlock(&info->delalloc_root_lock); cache = btrfs_lookup_block_group(info, bytenr); if (!cache) return -ENOENT; /* An extent can not span multiple block groups. */ ASSERT(bytenr + num_bytes <= cache->start + cache->length); space_info = cache->space_info; factor = btrfs_bg_type_to_factor(cache->flags); /* * If this block group has free space cache written out, we need to make * sure to load it if we are removing space. This is because we need * the unpinning stage to actually add the space back to the block group, * otherwise we will leak space. */ if (!alloc && !btrfs_block_group_done(cache)) btrfs_cache_block_group(cache, true); spin_lock(&space_info->lock); spin_lock(&cache->lock); if (btrfs_test_opt(info, SPACE_CACHE) && cache->disk_cache_state < BTRFS_DC_CLEAR) cache->disk_cache_state = BTRFS_DC_CLEAR; old_val = cache->used; if (alloc) { old_val += num_bytes; cache->used = old_val; cache->reserved -= num_bytes; cache->reclaim_mark = 0; space_info->bytes_reserved -= num_bytes; space_info->bytes_used += num_bytes; space_info->disk_used += num_bytes * factor; if (READ_ONCE(space_info->periodic_reclaim)) btrfs_space_info_update_reclaimable(space_info, -num_bytes); spin_unlock(&cache->lock); spin_unlock(&space_info->lock); } else { old_val -= num_bytes; cache->used = old_val; cache->pinned += num_bytes; btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes); space_info->bytes_used -= num_bytes; space_info->disk_used -= num_bytes * factor; if (READ_ONCE(space_info->periodic_reclaim)) btrfs_space_info_update_reclaimable(space_info, num_bytes); else reclaim = should_reclaim_block_group(cache, num_bytes); spin_unlock(&cache->lock); spin_unlock(&space_info->lock); set_extent_bit(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, EXTENT_DIRTY, NULL); } spin_lock(&trans->transaction->dirty_bgs_lock); if (list_empty(&cache->dirty_list)) { list_add_tail(&cache->dirty_list, &trans->transaction->dirty_bgs); bg_already_dirty = false; btrfs_get_block_group(cache); } spin_unlock(&trans->transaction->dirty_bgs_lock); /* * No longer have used bytes in this block group, queue it for deletion. * We do this after adding the block group to the dirty list to avoid * races between cleaner kthread and space cache writeout. */ if (!alloc && old_val == 0) { if (!btrfs_test_opt(info, DISCARD_ASYNC)) btrfs_mark_bg_unused(cache); } else if (!alloc && reclaim) { btrfs_mark_bg_to_reclaim(cache); } btrfs_put_block_group(cache); /* Modified block groups are accounted for in the delayed_refs_rsv. */ if (!bg_already_dirty) btrfs_inc_delayed_refs_rsv_bg_updates(info); return 0; } /* * Update the block_group and space info counters. * * @cache: The cache we are manipulating * @ram_bytes: The number of bytes of file content, and will be same to * @num_bytes except for the compress path. * @num_bytes: The number of bytes in question * @delalloc: The blocks are allocated for the delalloc write * * This is called by the allocator when it reserves space. If this is a * reservation and the block group has become read only we cannot make the * reservation and return -EAGAIN, otherwise this function always succeeds. */ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache, u64 ram_bytes, u64 num_bytes, int delalloc, bool force_wrong_size_class) { struct btrfs_space_info *space_info = cache->space_info; enum btrfs_block_group_size_class size_class; int ret = 0; spin_lock(&space_info->lock); spin_lock(&cache->lock); if (cache->ro) { ret = -EAGAIN; goto out; } if (btrfs_block_group_should_use_size_class(cache)) { size_class = btrfs_calc_block_group_size_class(num_bytes); ret = btrfs_use_block_group_size_class(cache, size_class, force_wrong_size_class); if (ret) goto out; } cache->reserved += num_bytes; space_info->bytes_reserved += num_bytes; trace_btrfs_space_reservation(cache->fs_info, "space_info", space_info->flags, num_bytes, 1); btrfs_space_info_update_bytes_may_use(cache->fs_info, space_info, -ram_bytes); if (delalloc) cache->delalloc_bytes += num_bytes; /* * Compression can use less space than we reserved, so wake tickets if * that happens. */ if (num_bytes < ram_bytes) btrfs_try_granting_tickets(cache->fs_info, space_info); out: spin_unlock(&cache->lock); spin_unlock(&space_info->lock); return ret; } /* * Update the block_group and space info counters. * * @cache: The cache we are manipulating * @num_bytes: The number of bytes in question * @delalloc: The blocks are allocated for the delalloc write * * This is called by somebody who is freeing space that was never actually used * on disk. For example if you reserve some space for a new leaf in transaction * A and before transaction A commits you free that leaf, you call this with * reserve set to 0 in order to clear the reservation. */ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes, int delalloc) { struct btrfs_space_info *space_info = cache->space_info; spin_lock(&space_info->lock); spin_lock(&cache->lock); if (cache->ro) space_info->bytes_readonly += num_bytes; cache->reserved -= num_bytes; space_info->bytes_reserved -= num_bytes; space_info->max_extent_size = 0; if (delalloc) cache->delalloc_bytes -= num_bytes; spin_unlock(&cache->lock); btrfs_try_granting_tickets(cache->fs_info, space_info); spin_unlock(&space_info->lock); } static void force_metadata_allocation(struct btrfs_fs_info *info) { struct list_head *head = &info->space_info; struct btrfs_space_info *found; list_for_each_entry(found, head, list) { if (found->flags & BTRFS_BLOCK_GROUP_METADATA) found->force_alloc = CHUNK_ALLOC_FORCE; } } static int should_alloc_chunk(const struct btrfs_fs_info *fs_info, const struct btrfs_space_info *sinfo, int force) { u64 bytes_used = btrfs_space_info_used(sinfo, false); u64 thresh; if (force == CHUNK_ALLOC_FORCE) return 1; /* * in limited mode, we want to have some free space up to * about 1% of the FS size. */ if (force == CHUNK_ALLOC_LIMITED) { thresh = btrfs_super_total_bytes(fs_info->super_copy); thresh = max_t(u64, SZ_64M, mult_perc(thresh, 1)); if (sinfo->total_bytes - bytes_used < thresh) return 1; } if (bytes_used + SZ_2M < mult_perc(sinfo->total_bytes, 80)) return 0; return 1; } int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) { u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type); return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); } static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) { struct btrfs_block_group *bg; int ret; /* * Check if we have enough space in the system space info because we * will need to update device items in the chunk btree and insert a new * chunk item in the chunk btree as well. This will allocate a new * system block group if needed. */ check_system_chunk(trans, flags); bg = btrfs_create_chunk(trans, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); goto out; } ret = btrfs_chunk_alloc_add_chunk_item(trans, bg); /* * Normally we are not expected to fail with -ENOSPC here, since we have * previously reserved space in the system space_info and allocated one * new system chunk if necessary. However there are three exceptions: * * 1) We may have enough free space in the system space_info but all the * existing system block groups have a profile which can not be used * for extent allocation. * * This happens when mounting in degraded mode. For example we have a * RAID1 filesystem with 2 devices, lose one device and mount the fs * using the other device in degraded mode. If we then allocate a chunk, * we may have enough free space in the existing system space_info, but * none of the block groups can be used for extent allocation since they * have a RAID1 profile, and because we are in degraded mode with a * single device, we are forced to allocate a new system chunk with a * SINGLE profile. Making check_system_chunk() iterate over all system * block groups and check if they have a usable profile and enough space * can be slow on very large filesystems, so we tolerate the -ENOSPC and * try again after forcing allocation of a new system chunk. Like this * we avoid paying the cost of that search in normal circumstances, when * we were not mounted in degraded mode; * * 2) We had enough free space info the system space_info, and one suitable * block group to allocate from when we called check_system_chunk() * above. However right after we called it, the only system block group * with enough free space got turned into RO mode by a running scrub, * and in this case we have to allocate a new one and retry. We only * need do this allocate and retry once, since we have a transaction * handle and scrub uses the commit root to search for block groups; * * 3) We had one system block group with enough free space when we called * check_system_chunk(), but after that, right before we tried to * allocate the last extent buffer we needed, a discard operation came * in and it temporarily removed the last free space entry from the * block group (discard removes a free space entry, discards it, and * then adds back the entry to the block group cache). */ if (ret == -ENOSPC) { const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info); struct btrfs_block_group *sys_bg; sys_bg = btrfs_create_chunk(trans, sys_flags); if (IS_ERR(sys_bg)) { ret = PTR_ERR(sys_bg); btrfs_abort_transaction(trans, ret); goto out; } ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } ret = btrfs_chunk_alloc_add_chunk_item(trans, bg); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } } else if (ret) { btrfs_abort_transaction(trans, ret); goto out; } out: btrfs_trans_release_chunk_metadata(trans); if (ret) return ERR_PTR(ret); btrfs_get_block_group(bg); return bg; } /* * Chunk allocation is done in 2 phases: * * 1) Phase 1 - through btrfs_chunk_alloc() we allocate device extents for * the chunk, the chunk mapping, create its block group and add the items * that belong in the chunk btree to it - more specifically, we need to * update device items in the chunk btree and add a new chunk item to it. * * 2) Phase 2 - through btrfs_create_pending_block_groups(), we add the block * group item to the extent btree and the device extent items to the devices * btree. * * This is done to prevent deadlocks. For example when COWing a node from the * extent btree we are holding a write lock on the node's parent and if we * trigger chunk allocation and attempted to insert the new block group item * in the extent btree right way, we could deadlock because the path for the * insertion can include that parent node. At first glance it seems impossible * to trigger chunk allocation after starting a transaction since tasks should * reserve enough transaction units (metadata space), however while that is true * most of the time, chunk allocation may still be triggered for several reasons: * * 1) When reserving metadata, we check if there is enough free space in the * metadata space_info and therefore don't trigger allocation of a new chunk. * However later when the task actually tries to COW an extent buffer from * the extent btree or from the device btree for example, it is forced to * allocate a new block group (chunk) because the only one that had enough * free space was just turned to RO mode by a running scrub for example (or * device replace, block group reclaim thread, etc), so we can not use it * for allocating an extent and end up being forced to allocate a new one; * * 2) Because we only check that the metadata space_info has enough free bytes, * we end up not allocating a new metadata chunk in that case. However if * the filesystem was mounted in degraded mode, none of the existing block * groups might be suitable for extent allocation due to their incompatible * profile (for e.g. mounting a 2 devices filesystem, where all block groups * use a RAID1 profile, in degraded mode using a single device). In this case * when the task attempts to COW some extent buffer of the extent btree for * example, it will trigger allocation of a new metadata block group with a * suitable profile (SINGLE profile in the example of the degraded mount of * the RAID1 filesystem); * * 3) The task has reserved enough transaction units / metadata space, but when * it attempts to COW an extent buffer from the extent or device btree for * example, it does not find any free extent in any metadata block group, * therefore forced to try to allocate a new metadata block group. * This is because some other task allocated all available extents in the * meanwhile - this typically happens with tasks that don't reserve space * properly, either intentionally or as a bug. One example where this is * done intentionally is fsync, as it does not reserve any transaction units * and ends up allocating a variable number of metadata extents for log * tree extent buffers; * * 4) The task has reserved enough transaction units / metadata space, but right * before it tries to allocate the last extent buffer it needs, a discard * operation comes in and, temporarily, removes the last free space entry from * the only metadata block group that had free space (discard starts by * removing a free space entry from a block group, then does the discard * operation and, once it's done, it adds back the free space entry to the * block group). * * We also need this 2 phases setup when adding a device to a filesystem with * a seed device - we must create new metadata and system chunks without adding * any of the block group items to the chunk, extent and device btrees. If we * did not do it this way, we would get ENOSPC when attempting to update those * btrees, since all the chunks from the seed device are read-only. * * Phase 1 does the updates and insertions to the chunk btree because if we had * it done in phase 2 and have a thundering herd of tasks allocating chunks in * parallel, we risk having too many system chunks allocated by many tasks if * many tasks reach phase 1 without the previous ones completing phase 2. In the * extreme case this leads to exhaustion of the system chunk array in the * superblock. This is easier to trigger if using a btree node/leaf size of 64K * and with RAID filesystems (so we have more device items in the chunk btree). * This has happened before and commit eafa4fd0ad0607 ("btrfs: fix exhaustion of * the system chunk array due to concurrent allocations") provides more details. * * Allocation of system chunks does not happen through this function. A task that * needs to update the chunk btree (the only btree that uses system chunks), must * preallocate chunk space by calling either check_system_chunk() or * btrfs_reserve_chunk_metadata() - the former is used when allocating a data or * metadata chunk or when removing a chunk, while the later is used before doing * a modification to the chunk btree - use cases for the later are adding, * removing and resizing a device as well as relocation of a system chunk. * See the comment below for more details. * * The reservation of system space, done through check_system_chunk(), as well * as all the updates and insertions into the chunk btree must be done while * holding fs_info->chunk_mutex. This is important to guarantee that while COWing * an extent buffer from the chunks btree we never trigger allocation of a new * system chunk, which would result in a deadlock (trying to lock twice an * extent buffer of the chunk btree, first time before triggering the chunk * allocation and the second time during chunk allocation while attempting to * update the chunks btree). The system chunk array is also updated while holding * that mutex. The same logic applies to removing chunks - we must reserve system * space, update the chunk btree and the system chunk array in the superblock * while holding fs_info->chunk_mutex. * * This function, btrfs_chunk_alloc(), belongs to phase 1. * * If @force is CHUNK_ALLOC_FORCE: * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. * If @force is NOT CHUNK_ALLOC_FORCE: * - return 0 if it doesn't need to allocate a new chunk, * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. */ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, enum btrfs_chunk_alloc_enum force) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *space_info; struct btrfs_block_group *ret_bg; bool wait_for_alloc = false; bool should_alloc = false; bool from_extent_allocation = false; int ret = 0; if (force == CHUNK_ALLOC_FORCE_FOR_EXTENT) { from_extent_allocation = true; force = CHUNK_ALLOC_FORCE; } /* Don't re-enter if we're already allocating a chunk */ if (trans->allocating_chunk) return -ENOSPC; /* * Allocation of system chunks can not happen through this path, as we * could end up in a deadlock if we are allocating a data or metadata * chunk and there is another task modifying the chunk btree. * * This is because while we are holding the chunk mutex, we will attempt * to add the new chunk item to the chunk btree or update an existing * device item in the chunk btree, while the other task that is modifying * the chunk btree is attempting to COW an extent buffer while holding a * lock on it and on its parent - if the COW operation triggers a system * chunk allocation, then we can deadlock because we are holding the * chunk mutex and we may need to access that extent buffer or its parent * in order to add the chunk item or update a device item. * * Tasks that want to modify the chunk tree should reserve system space * before updating the chunk btree, by calling either * btrfs_reserve_chunk_metadata() or check_system_chunk(). * It's possible that after a task reserves the space, it still ends up * here - this happens in the cases described above at do_chunk_alloc(). * The task will have to either retry or fail. */ if (flags & BTRFS_BLOCK_GROUP_SYSTEM) return -ENOSPC; space_info = btrfs_find_space_info(fs_info, flags); ASSERT(space_info); do { spin_lock(&space_info->lock); if (force < space_info->force_alloc) force = space_info->force_alloc; should_alloc = should_alloc_chunk(fs_info, space_info, force); if (space_info->full) { /* No more free physical space */ if (should_alloc) ret = -ENOSPC; else ret = 0; spin_unlock(&space_info->lock); return ret; } else if (!should_alloc) { spin_unlock(&space_info->lock); return 0; } else if (space_info->chunk_alloc) { /* * Someone is already allocating, so we need to block * until this someone is finished and then loop to * recheck if we should continue with our allocation * attempt. */ wait_for_alloc = true; force = CHUNK_ALLOC_NO_FORCE; spin_unlock(&space_info->lock); mutex_lock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->chunk_mutex); } else { /* Proceed with allocation */ space_info->chunk_alloc = 1; wait_for_alloc = false; spin_unlock(&space_info->lock); } cond_resched(); } while (wait_for_alloc); mutex_lock(&fs_info->chunk_mutex); trans->allocating_chunk = true; /* * If we have mixed data/metadata chunks we want to make sure we keep * allocating mixed chunks instead of individual chunks. */ if (btrfs_mixed_space_info(space_info)) flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); /* * if we're doing a data chunk, go ahead and make sure that * we keep a reasonable number of metadata chunks allocated in the * FS as well. */ if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { fs_info->data_chunk_allocations++; if (!(fs_info->data_chunk_allocations % fs_info->metadata_ratio)) force_metadata_allocation(fs_info); } ret_bg = do_chunk_alloc(trans, flags); trans->allocating_chunk = false; if (IS_ERR(ret_bg)) { ret = PTR_ERR(ret_bg); } else if (from_extent_allocation && (flags & BTRFS_BLOCK_GROUP_DATA)) { /* * New block group is likely to be used soon. Try to activate * it now. Failure is OK for now. */ btrfs_zone_activate(ret_bg); } if (!ret) btrfs_put_block_group(ret_bg); spin_lock(&space_info->lock); if (ret < 0) { if (ret == -ENOSPC) space_info->full = 1; else goto out; } else { ret = 1; space_info->max_extent_size = 0; } space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; out: space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); mutex_unlock(&fs_info->chunk_mutex); return ret; } static u64 get_profile_num_devs(const struct btrfs_fs_info *fs_info, u64 type) { u64 num_dev; num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max; if (!num_dev) num_dev = fs_info->fs_devices->rw_devices; return num_dev; } static void reserve_chunk_space(struct btrfs_trans_handle *trans, u64 bytes, u64 type) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *info; u64 left; int ret = 0; /* * Needed because we can end up allocating a system chunk and for an * atomic and race free space reservation in the chunk block reserve. */ lockdep_assert_held(&fs_info->chunk_mutex); info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); spin_lock(&info->lock); left = info->total_bytes - btrfs_space_info_used(info, true); spin_unlock(&info->lock); if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", left, bytes, type); btrfs_dump_space_info(fs_info, info, 0, 0); } if (left < bytes) { u64 flags = btrfs_system_alloc_profile(fs_info); struct btrfs_block_group *bg; /* * Ignore failure to create system chunk. We might end up not * needing it, as we might not need to COW all nodes/leafs from * the paths we visit in the chunk tree (they were already COWed * or created in the current transaction for example). */ bg = btrfs_create_chunk(trans, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); } else { /* * We have a new chunk. We also need to activate it for * zoned filesystem. */ ret = btrfs_zoned_activate_one_bg(fs_info, info, true); if (ret < 0) return; /* * If we fail to add the chunk item here, we end up * trying again at phase 2 of chunk allocation, at * btrfs_create_pending_block_groups(). So ignore * any error here. An ENOSPC here could happen, due to * the cases described at do_chunk_alloc() - the system * block group we just created was just turned into RO * mode by a scrub for example, or a running discard * temporarily removed its free space entries, etc. */ btrfs_chunk_alloc_add_chunk_item(trans, bg); } } if (!ret) { ret = btrfs_block_rsv_add(fs_info, &fs_info->chunk_block_rsv, bytes, BTRFS_RESERVE_NO_FLUSH); if (!ret) trans->chunk_bytes_reserved += bytes; } } /* * Reserve space in the system space for allocating or removing a chunk. * The caller must be holding fs_info->chunk_mutex. */ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) { struct btrfs_fs_info *fs_info = trans->fs_info; const u64 num_devs = get_profile_num_devs(fs_info, type); u64 bytes; /* num_devs device items to update and 1 chunk item to add or remove. */ bytes = btrfs_calc_metadata_size(fs_info, num_devs) + btrfs_calc_insert_metadata_size(fs_info, 1); reserve_chunk_space(trans, bytes, type); } /* * Reserve space in the system space, if needed, for doing a modification to the * chunk btree. * * @trans: A transaction handle. * @is_item_insertion: Indicate if the modification is for inserting a new item * in the chunk btree or if it's for the deletion or update * of an existing item. * * This is used in a context where we need to update the chunk btree outside * block group allocation and removal, to avoid a deadlock with a concurrent * task that is allocating a metadata or data block group and therefore needs to * update the chunk btree while holding the chunk mutex. After the update to the * chunk btree is done, btrfs_trans_release_chunk_metadata() should be called. * */ void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans, bool is_item_insertion) { struct btrfs_fs_info *fs_info = trans->fs_info; u64 bytes; if (is_item_insertion) bytes = btrfs_calc_insert_metadata_size(fs_info, 1); else bytes = btrfs_calc_metadata_size(fs_info, 1); mutex_lock(&fs_info->chunk_mutex); reserve_chunk_space(trans, bytes, BTRFS_BLOCK_GROUP_SYSTEM); mutex_unlock(&fs_info->chunk_mutex); } void btrfs_put_block_group_cache(struct btrfs_fs_info *info) { struct btrfs_block_group *block_group; block_group = btrfs_lookup_first_block_group(info, 0); while (block_group) { btrfs_wait_block_group_cache_done(block_group); spin_lock(&block_group->lock); if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags)) { struct btrfs_inode *inode = block_group->inode; block_group->inode = NULL; spin_unlock(&block_group->lock); ASSERT(block_group->io_ctl.inode == NULL); iput(&inode->vfs_inode); } else { spin_unlock(&block_group->lock); } block_group = btrfs_next_block_group(block_group); } } /* * Must be called only after stopping all workers, since we could have block * group caching kthreads running, and therefore they could race with us if we * freed the block groups before stopping them. */ int btrfs_free_block_groups(struct btrfs_fs_info *info) { struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; struct btrfs_caching_control *caching_ctl; struct rb_node *n; if (btrfs_is_zoned(info)) { if (info->active_meta_bg) { btrfs_put_block_group(info->active_meta_bg); info->active_meta_bg = NULL; } if (info->active_system_bg) { btrfs_put_block_group(info->active_system_bg); info->active_system_bg = NULL; } } write_lock(&info->block_group_cache_lock); while (!list_empty(&info->caching_block_groups)) { caching_ctl = list_entry(info->caching_block_groups.next, struct btrfs_caching_control, list); list_del(&caching_ctl->list); btrfs_put_caching_control(caching_ctl); } write_unlock(&info->block_group_cache_lock); spin_lock(&info->unused_bgs_lock); while (!list_empty(&info->unused_bgs)) { block_group = list_first_entry(&info->unused_bgs, struct btrfs_block_group, bg_list); list_del_init(&block_group->bg_list); btrfs_put_block_group(block_group); } while (!list_empty(&info->reclaim_bgs)) { block_group = list_first_entry(&info->reclaim_bgs, struct btrfs_block_group, bg_list); list_del_init(&block_group->bg_list); btrfs_put_block_group(block_group); } spin_unlock(&info->unused_bgs_lock); spin_lock(&info->zone_active_bgs_lock); while (!list_empty(&info->zone_active_bgs)) { block_group = list_first_entry(&info->zone_active_bgs, struct btrfs_block_group, active_bg_list); list_del_init(&block_group->active_bg_list); btrfs_put_block_group(block_group); } spin_unlock(&info->zone_active_bgs_lock); write_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree.rb_root)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group, cache_node); rb_erase_cached(&block_group->cache_node, &info->block_group_cache_tree); RB_CLEAR_NODE(&block_group->cache_node); write_unlock(&info->block_group_cache_lock); down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); up_write(&block_group->space_info->groups_sem); /* * We haven't cached this block group, which means we could * possibly have excluded extents on this block group. */ if (block_group->cached == BTRFS_CACHE_NO || block_group->cached == BTRFS_CACHE_ERROR) btrfs_free_excluded_extents(block_group); btrfs_remove_free_space_cache(block_group); ASSERT(block_group->cached != BTRFS_CACHE_STARTED); ASSERT(list_empty(&block_group->dirty_list)); ASSERT(list_empty(&block_group->io_list)); ASSERT(list_empty(&block_group->bg_list)); ASSERT(refcount_read(&block_group->refs) == 1); ASSERT(block_group->swap_extents == 0); btrfs_put_block_group(block_group); write_lock(&info->block_group_cache_lock); } write_unlock(&info->block_group_cache_lock); btrfs_release_global_block_rsv(info); while (!list_empty(&info->space_info)) { space_info = list_entry(info->space_info.next, struct btrfs_space_info, list); /* * Do not hide this behind enospc_debug, this is actually * important and indicates a real bug if this happens. */ if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_may_use > 0)) btrfs_dump_space_info(info, space_info, 0, 0); /* * If there was a failure to cleanup a log tree, very likely due * to an IO failure on a writeback attempt of one or more of its * extent buffers, we could not do proper (and cheap) unaccounting * of their reserved space, so don't warn on bytes_reserved > 0 in * that case. */ if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) || !BTRFS_FS_LOG_CLEANUP_ERROR(info)) { if (WARN_ON(space_info->bytes_reserved > 0)) btrfs_dump_space_info(info, space_info, 0, 0); } WARN_ON(space_info->reclaim_size > 0); list_del(&space_info->list); btrfs_sysfs_remove_space_info(space_info); } return 0; } void btrfs_freeze_block_group(struct btrfs_block_group *cache) { atomic_inc(&cache->frozen); } void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = block_group->fs_info; bool cleanup; spin_lock(&block_group->lock); cleanup = (atomic_dec_and_test(&block_group->frozen) && test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)); spin_unlock(&block_group->lock); if (cleanup) { struct btrfs_chunk_map *map; map = btrfs_find_chunk_map(fs_info, block_group->start, 1); /* Logic error, can't happen. */ ASSERT(map); btrfs_remove_chunk_map(fs_info, map); /* Once for our lookup reference. */ btrfs_free_chunk_map(map); /* * We may have left one free space entry and other possible * tasks trimming this block group have left 1 entry each one. * Free them if any. */ btrfs_remove_free_space_cache(block_group); } } bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg) { bool ret = true; spin_lock(&bg->lock); if (bg->ro) ret = false; else bg->swap_extents++; spin_unlock(&bg->lock); return ret; } void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount) { spin_lock(&bg->lock); ASSERT(!bg->ro); ASSERT(bg->swap_extents >= amount); bg->swap_extents -= amount; spin_unlock(&bg->lock); } enum btrfs_block_group_size_class btrfs_calc_block_group_size_class(u64 size) { if (size <= SZ_128K) return BTRFS_BG_SZ_SMALL; if (size <= SZ_8M) return BTRFS_BG_SZ_MEDIUM; return BTRFS_BG_SZ_LARGE; } /* * Handle a block group allocating an extent in a size class * * @bg: The block group we allocated in. * @size_class: The size class of the allocation. * @force_wrong_size_class: Whether we are desperate enough to allow * mismatched size classes. * * Returns: 0 if the size class was valid for this block_group, -EAGAIN in the * case of a race that leads to the wrong size class without * force_wrong_size_class set. * * find_free_extent will skip block groups with a mismatched size class until * it really needs to avoid ENOSPC. In that case it will set * force_wrong_size_class. However, if a block group is newly allocated and * doesn't yet have a size class, then it is possible for two allocations of * different sizes to race and both try to use it. The loser is caught here and * has to retry. */ int btrfs_use_block_group_size_class(struct btrfs_block_group *bg, enum btrfs_block_group_size_class size_class, bool force_wrong_size_class) { ASSERT(size_class != BTRFS_BG_SZ_NONE); /* The new allocation is in the right size class, do nothing */ if (bg->size_class == size_class) return 0; /* * The new allocation is in a mismatched size class. * This means one of two things: * * 1. Two tasks in find_free_extent for different size_classes raced * and hit the same empty block_group. Make the loser try again. * 2. A call to find_free_extent got desperate enough to set * 'force_wrong_slab'. Don't change the size_class, but allow the * allocation. */ if (bg->size_class != BTRFS_BG_SZ_NONE) { if (force_wrong_size_class) return 0; return -EAGAIN; } /* * The happy new block group case: the new allocation is the first * one in the block_group so we set size_class. */ bg->size_class = size_class; return 0; } bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg) { if (btrfs_is_zoned(bg->fs_info)) return false; if (!btrfs_is_block_group_data_only(bg)) return false; return true; } |
1 1 1 3 3 3 3 7 4 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 | // SPDX-License-Identifier: GPL-2.0-only /* * Backlight Lowlevel Control Abstraction * * Copyright (C) 2003,2004 Hewlett-Packard Company * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/init.h> #include <linux/device.h> #include <linux/backlight.h> #include <linux/notifier.h> #include <linux/ctype.h> #include <linux/err.h> #include <linux/fb.h> #include <linux/slab.h> #ifdef CONFIG_PMAC_BACKLIGHT #include <asm/backlight.h> #endif /** * DOC: overview * * The backlight core supports implementing backlight drivers. * * A backlight driver registers a driver using * devm_backlight_device_register(). The properties of the backlight * driver such as type and max_brightness must be specified. * When the core detect changes in for example brightness or power state * the update_status() operation is called. The backlight driver shall * implement this operation and use it to adjust backlight. * * Several sysfs attributes are provided by the backlight core:: * * - brightness R/W, set the requested brightness level * - actual_brightness RO, the brightness level used by the HW * - max_brightness RO, the maximum brightness level supported * * See Documentation/ABI/stable/sysfs-class-backlight for the full list. * * The backlight can be adjusted using the sysfs interface, and * the backlight driver may also support adjusting backlight using * a hot-key or some other platform or firmware specific way. * * The driver must implement the get_brightness() operation if * the HW do not support all the levels that can be specified in * brightness, thus providing user-space access to the actual level * via the actual_brightness attribute. * * When the backlight changes this is reported to user-space using * an uevent connected to the actual_brightness attribute. * When brightness is set by platform specific means, for example * a hot-key to adjust backlight, the driver must notify the backlight * core that brightness has changed using backlight_force_update(). * * The backlight driver core receives notifications from fbdev and * if the event is FB_EVENT_BLANK and if the value of blank, from the * FBIOBLANK ioctrl, results in a change in the backlight state the * update_status() operation is called. */ static struct list_head backlight_dev_list; static struct mutex backlight_dev_list_mutex; static struct blocking_notifier_head backlight_notifier; static const char *const backlight_types[] = { [BACKLIGHT_RAW] = "raw", [BACKLIGHT_PLATFORM] = "platform", [BACKLIGHT_FIRMWARE] = "firmware", }; static const char *const backlight_scale_types[] = { [BACKLIGHT_SCALE_UNKNOWN] = "unknown", [BACKLIGHT_SCALE_LINEAR] = "linear", [BACKLIGHT_SCALE_NON_LINEAR] = "non-linear", }; #if defined(CONFIG_FB_CORE) || (defined(CONFIG_FB_CORE_MODULE) && \ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)) /* * fb_notifier_callback * * This callback gets called when something important happens inside a * framebuffer driver. The backlight core only cares about FB_BLANK_UNBLANK * which is reported to the driver using backlight_update_status() * as a state change. * * There may be several fbdev's connected to the backlight device, * in which case they are kept track of. A state change is only reported * if there is a change in backlight for the specified fbdev. */ static int fb_notifier_callback(struct notifier_block *self, unsigned long event, void *data) { struct backlight_device *bd; struct fb_event *evdata = data; struct fb_info *info = evdata->info; struct backlight_device *fb_bd = fb_bl_device(info); int node = info->node; int fb_blank = 0; /* If we aren't interested in this event, skip it immediately ... */ if (event != FB_EVENT_BLANK) return 0; bd = container_of(self, struct backlight_device, fb_notif); mutex_lock(&bd->ops_lock); if (!bd->ops) goto out; if (bd->ops->controls_device && !bd->ops->controls_device(bd, info->device)) goto out; if (fb_bd && fb_bd != bd) goto out; fb_blank = *(int *)evdata->data; if (fb_blank == FB_BLANK_UNBLANK && !bd->fb_bl_on[node]) { bd->fb_bl_on[node] = true; if (!bd->use_count++) { bd->props.state &= ~BL_CORE_FBBLANK; backlight_update_status(bd); } } else if (fb_blank != FB_BLANK_UNBLANK && bd->fb_bl_on[node]) { bd->fb_bl_on[node] = false; if (!(--bd->use_count)) { bd->props.state |= BL_CORE_FBBLANK; backlight_update_status(bd); } } out: mutex_unlock(&bd->ops_lock); return 0; } static int backlight_register_fb(struct backlight_device *bd) { memset(&bd->fb_notif, 0, sizeof(bd->fb_notif)); bd->fb_notif.notifier_call = fb_notifier_callback; return fb_register_client(&bd->fb_notif); } static void backlight_unregister_fb(struct backlight_device *bd) { fb_unregister_client(&bd->fb_notif); } #else static inline int backlight_register_fb(struct backlight_device *bd) { return 0; } static inline void backlight_unregister_fb(struct backlight_device *bd) { } #endif /* CONFIG_FB_CORE */ static void backlight_generate_event(struct backlight_device *bd, enum backlight_update_reason reason) { char *envp[2]; switch (reason) { case BACKLIGHT_UPDATE_SYSFS: envp[0] = "SOURCE=sysfs"; break; case BACKLIGHT_UPDATE_HOTKEY: envp[0] = "SOURCE=hotkey"; break; default: envp[0] = "SOURCE=unknown"; break; } envp[1] = NULL; kobject_uevent_env(&bd->dev.kobj, KOBJ_CHANGE, envp); sysfs_notify(&bd->dev.kobj, NULL, "actual_brightness"); } static ssize_t bl_power_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%d\n", bd->props.power); } static ssize_t bl_power_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int rc; struct backlight_device *bd = to_backlight_device(dev); unsigned long power, old_power; rc = kstrtoul(buf, 0, &power); if (rc) return rc; rc = -ENXIO; mutex_lock(&bd->ops_lock); if (bd->ops) { pr_debug("set power to %lu\n", power); if (bd->props.power != power) { old_power = bd->props.power; bd->props.power = power; rc = backlight_update_status(bd); if (rc) bd->props.power = old_power; else rc = count; } else { rc = count; } } mutex_unlock(&bd->ops_lock); return rc; } static DEVICE_ATTR_RW(bl_power); static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%d\n", bd->props.brightness); } int backlight_device_set_brightness(struct backlight_device *bd, unsigned long brightness) { int rc = -ENXIO; mutex_lock(&bd->ops_lock); if (bd->ops) { if (brightness > bd->props.max_brightness) rc = -EINVAL; else { pr_debug("set brightness to %lu\n", brightness); bd->props.brightness = brightness; rc = backlight_update_status(bd); } } mutex_unlock(&bd->ops_lock); backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS); return rc; } EXPORT_SYMBOL(backlight_device_set_brightness); static ssize_t brightness_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int rc; struct backlight_device *bd = to_backlight_device(dev); unsigned long brightness; rc = kstrtoul(buf, 0, &brightness); if (rc) return rc; rc = backlight_device_set_brightness(bd, brightness); return rc ? rc : count; } static DEVICE_ATTR_RW(brightness); static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%s\n", backlight_types[bd->props.type]); } static DEVICE_ATTR_RO(type); static ssize_t max_brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%d\n", bd->props.max_brightness); } static DEVICE_ATTR_RO(max_brightness); static ssize_t actual_brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { int rc = -ENXIO; struct backlight_device *bd = to_backlight_device(dev); mutex_lock(&bd->ops_lock); if (bd->ops && bd->ops->get_brightness) { rc = bd->ops->get_brightness(bd); if (rc >= 0) rc = sprintf(buf, "%d\n", rc); } else { rc = sprintf(buf, "%d\n", bd->props.brightness); } mutex_unlock(&bd->ops_lock); return rc; } static DEVICE_ATTR_RO(actual_brightness); static ssize_t scale_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); if (WARN_ON(bd->props.scale > BACKLIGHT_SCALE_NON_LINEAR)) return sprintf(buf, "unknown\n"); return sprintf(buf, "%s\n", backlight_scale_types[bd->props.scale]); } static DEVICE_ATTR_RO(scale); #ifdef CONFIG_PM_SLEEP static int backlight_suspend(struct device *dev) { struct backlight_device *bd = to_backlight_device(dev); mutex_lock(&bd->ops_lock); if (bd->ops && bd->ops->options & BL_CORE_SUSPENDRESUME) { bd->props.state |= BL_CORE_SUSPENDED; backlight_update_status(bd); } mutex_unlock(&bd->ops_lock); return 0; } static int backlight_resume(struct device *dev) { struct backlight_device *bd = to_backlight_device(dev); mutex_lock(&bd->ops_lock); if (bd->ops && bd->ops->options & BL_CORE_SUSPENDRESUME) { bd->props.state &= ~BL_CORE_SUSPENDED; backlight_update_status(bd); } mutex_unlock(&bd->ops_lock); return 0; } #endif static SIMPLE_DEV_PM_OPS(backlight_class_dev_pm_ops, backlight_suspend, backlight_resume); static void bl_device_release(struct device *dev) { struct backlight_device *bd = to_backlight_device(dev); kfree(bd); } static struct attribute *bl_device_attrs[] = { &dev_attr_bl_power.attr, &dev_attr_brightness.attr, &dev_attr_actual_brightness.attr, &dev_attr_max_brightness.attr, &dev_attr_scale.attr, &dev_attr_type.attr, NULL, }; ATTRIBUTE_GROUPS(bl_device); static const struct class backlight_class = { .name = "backlight", .dev_groups = bl_device_groups, .pm = &backlight_class_dev_pm_ops, }; /** * backlight_force_update - tell the backlight subsystem that hardware state * has changed * @bd: the backlight device to update * @reason: reason for update * * Updates the internal state of the backlight in response to a hardware event, * and generates an uevent to notify userspace. A backlight driver shall call * backlight_force_update() when the backlight is changed using, for example, * a hot-key. The updated brightness is read using get_brightness() and the * brightness value is reported using an uevent. */ void backlight_force_update(struct backlight_device *bd, enum backlight_update_reason reason) { int brightness; mutex_lock(&bd->ops_lock); if (bd->ops && bd->ops->get_brightness) { brightness = bd->ops->get_brightness(bd); if (brightness >= 0) bd->props.brightness = brightness; else dev_err(&bd->dev, "Could not update brightness from device: %pe\n", ERR_PTR(brightness)); } mutex_unlock(&bd->ops_lock); backlight_generate_event(bd, reason); } EXPORT_SYMBOL(backlight_force_update); /* deprecated - use devm_backlight_device_register() */ struct backlight_device *backlight_device_register(const char *name, struct device *parent, void *devdata, const struct backlight_ops *ops, const struct backlight_properties *props) { struct backlight_device *new_bd; int rc; pr_debug("backlight_device_register: name=%s\n", name); new_bd = kzalloc(sizeof(struct backlight_device), GFP_KERNEL); if (!new_bd) return ERR_PTR(-ENOMEM); mutex_init(&new_bd->update_lock); mutex_init(&new_bd->ops_lock); new_bd->dev.class = &backlight_class; new_bd->dev.parent = parent; new_bd->dev.release = bl_device_release; dev_set_name(&new_bd->dev, "%s", name); dev_set_drvdata(&new_bd->dev, devdata); /* Set default properties */ if (props) { memcpy(&new_bd->props, props, sizeof(struct backlight_properties)); if (props->type <= 0 || props->type >= BACKLIGHT_TYPE_MAX) { WARN(1, "%s: invalid backlight type", name); new_bd->props.type = BACKLIGHT_RAW; } } else { new_bd->props.type = BACKLIGHT_RAW; } rc = device_register(&new_bd->dev); if (rc) { put_device(&new_bd->dev); return ERR_PTR(rc); } rc = backlight_register_fb(new_bd); if (rc) { device_unregister(&new_bd->dev); return ERR_PTR(rc); } new_bd->ops = ops; #ifdef CONFIG_PMAC_BACKLIGHT mutex_lock(&pmac_backlight_mutex); if (!pmac_backlight) pmac_backlight = new_bd; mutex_unlock(&pmac_backlight_mutex); #endif mutex_lock(&backlight_dev_list_mutex); list_add(&new_bd->entry, &backlight_dev_list); mutex_unlock(&backlight_dev_list_mutex); blocking_notifier_call_chain(&backlight_notifier, BACKLIGHT_REGISTERED, new_bd); return new_bd; } EXPORT_SYMBOL(backlight_device_register); /** backlight_device_get_by_type - find first backlight device of a type * @type: the type of backlight device * * Look up the first backlight device of the specified type * * RETURNS: * * Pointer to backlight device if any was found. Otherwise NULL. */ struct backlight_device *backlight_device_get_by_type(enum backlight_type type) { bool found = false; struct backlight_device *bd; mutex_lock(&backlight_dev_list_mutex); list_for_each_entry(bd, &backlight_dev_list, entry) { if (bd->props.type == type) { found = true; break; } } mutex_unlock(&backlight_dev_list_mutex); return found ? bd : NULL; } EXPORT_SYMBOL(backlight_device_get_by_type); /** * backlight_device_get_by_name - Get backlight device by name * @name: Device name * * This function looks up a backlight device by its name. It obtains a reference * on the backlight device and it is the caller's responsibility to drop the * reference by calling put_device(). * * Returns: * A pointer to the backlight device if found, otherwise NULL. */ struct backlight_device *backlight_device_get_by_name(const char *name) { struct device *dev; dev = class_find_device_by_name(&backlight_class, name); return dev ? to_backlight_device(dev) : NULL; } EXPORT_SYMBOL(backlight_device_get_by_name); /* deprecated - use devm_backlight_device_unregister() */ void backlight_device_unregister(struct backlight_device *bd) { if (!bd) return; mutex_lock(&backlight_dev_list_mutex); list_del(&bd->entry); mutex_unlock(&backlight_dev_list_mutex); #ifdef CONFIG_PMAC_BACKLIGHT mutex_lock(&pmac_backlight_mutex); if (pmac_backlight == bd) pmac_backlight = NULL; mutex_unlock(&pmac_backlight_mutex); #endif blocking_notifier_call_chain(&backlight_notifier, BACKLIGHT_UNREGISTERED, bd); mutex_lock(&bd->ops_lock); bd->ops = NULL; mutex_unlock(&bd->ops_lock); backlight_unregister_fb(bd); device_unregister(&bd->dev); } EXPORT_SYMBOL(backlight_device_unregister); static void devm_backlight_device_release(struct device *dev, void *res) { struct backlight_device *backlight = *(struct backlight_device **)res; backlight_device_unregister(backlight); } static int devm_backlight_device_match(struct device *dev, void *res, void *data) { struct backlight_device **r = res; return *r == data; } /** * backlight_register_notifier - get notified of backlight (un)registration * @nb: notifier block with the notifier to call on backlight (un)registration * * Register a notifier to get notified when backlight devices get registered * or unregistered. * * RETURNS: * * 0 on success, otherwise a negative error code */ int backlight_register_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&backlight_notifier, nb); } EXPORT_SYMBOL(backlight_register_notifier); /** * backlight_unregister_notifier - unregister a backlight notifier * @nb: notifier block to unregister * * Register a notifier to get notified when backlight devices get registered * or unregistered. * * RETURNS: * * 0 on success, otherwise a negative error code */ int backlight_unregister_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&backlight_notifier, nb); } EXPORT_SYMBOL(backlight_unregister_notifier); /** * devm_backlight_device_register - register a new backlight device * @dev: the device to register * @name: the name of the device * @parent: a pointer to the parent device (often the same as @dev) * @devdata: an optional pointer to be stored for private driver use * @ops: the backlight operations structure * @props: the backlight properties * * Creates and registers new backlight device. When a backlight device * is registered the configuration must be specified in the @props * parameter. See description of &backlight_properties. * * RETURNS: * * struct backlight on success, or an ERR_PTR on error */ struct backlight_device *devm_backlight_device_register(struct device *dev, const char *name, struct device *parent, void *devdata, const struct backlight_ops *ops, const struct backlight_properties *props) { struct backlight_device **ptr, *backlight; ptr = devres_alloc(devm_backlight_device_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) return ERR_PTR(-ENOMEM); backlight = backlight_device_register(name, parent, devdata, ops, props); if (!IS_ERR(backlight)) { *ptr = backlight; devres_add(dev, ptr); } else { devres_free(ptr); } return backlight; } EXPORT_SYMBOL(devm_backlight_device_register); /** * devm_backlight_device_unregister - unregister backlight device * @dev: the device to unregister * @bd: the backlight device to unregister * * Deallocates a backlight allocated with devm_backlight_device_register(). * Normally this function will not need to be called and the resource management * code will ensure that the resources are freed. */ void devm_backlight_device_unregister(struct device *dev, struct backlight_device *bd) { int rc; rc = devres_release(dev, devm_backlight_device_release, devm_backlight_device_match, bd); WARN_ON(rc); } EXPORT_SYMBOL(devm_backlight_device_unregister); #ifdef CONFIG_OF static int of_parent_match(struct device *dev, const void *data) { return dev->parent && dev->parent->of_node == data; } /** * of_find_backlight_by_node() - find backlight device by device-tree node * @node: device-tree node of the backlight device * * Returns a pointer to the backlight device corresponding to the given DT * node or NULL if no such backlight device exists or if the device hasn't * been probed yet. * * This function obtains a reference on the backlight device and it is the * caller's responsibility to drop the reference by calling put_device() on * the backlight device's .dev field. */ struct backlight_device *of_find_backlight_by_node(struct device_node *node) { struct device *dev; dev = class_find_device(&backlight_class, NULL, node, of_parent_match); return dev ? to_backlight_device(dev) : NULL; } EXPORT_SYMBOL(of_find_backlight_by_node); #endif static struct backlight_device *of_find_backlight(struct device *dev) { struct backlight_device *bd = NULL; struct device_node *np; if (!dev) return NULL; if (IS_ENABLED(CONFIG_OF) && dev->of_node) { np = of_parse_phandle(dev->of_node, "backlight", 0); if (np) { bd = of_find_backlight_by_node(np); of_node_put(np); if (!bd) return ERR_PTR(-EPROBE_DEFER); } } return bd; } static void devm_backlight_release(void *data) { struct backlight_device *bd = data; put_device(&bd->dev); } /** * devm_of_find_backlight - find backlight for a device * @dev: the device * * This function looks for a property named 'backlight' on the DT node * connected to @dev and looks up the backlight device. The lookup is * device managed so the reference to the backlight device is automatically * dropped on driver detach. * * RETURNS: * * A pointer to the backlight device if found. * Error pointer -EPROBE_DEFER if the DT property is set, but no backlight * device is found. NULL if there's no backlight property. */ struct backlight_device *devm_of_find_backlight(struct device *dev) { struct backlight_device *bd; int ret; bd = of_find_backlight(dev); if (IS_ERR_OR_NULL(bd)) return bd; ret = devm_add_action_or_reset(dev, devm_backlight_release, bd); if (ret) return ERR_PTR(ret); return bd; } EXPORT_SYMBOL(devm_of_find_backlight); static void __exit backlight_class_exit(void) { class_unregister(&backlight_class); } static int __init backlight_class_init(void) { int ret; ret = class_register(&backlight_class); if (ret) { pr_warn("Unable to create backlight class; errno = %d\n", ret); return ret; } INIT_LIST_HEAD(&backlight_dev_list); mutex_init(&backlight_dev_list_mutex); BLOCKING_INIT_NOTIFIER_HEAD(&backlight_notifier); return 0; } /* * if this is compiled into the kernel, we need to ensure that the * class is registered before users of the class try to register lcd's */ postcore_initcall(backlight_class_init); module_exit(backlight_class_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jamey Hicks <jamey.hicks@hp.com>, Andrew Zabolotny <zap@homelink.ru>"); MODULE_DESCRIPTION("Backlight Lowlevel Control Abstraction"); |
3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2014 Samsung Electronics Co., Ltd. * Sylwester Nawrocki <s.nawrocki@samsung.com> */ #include <linux/clk.h> #include <linux/clk-provider.h> #include <linux/clk/clk-conf.h> #include <linux/device.h> #include <linux/of.h> #include <linux/printk.h> #include <linux/slab.h> static int __set_clk_parents(struct device_node *node, bool clk_supplier) { struct of_phandle_args clkspec; int index, rc, num_parents; struct clk *clk, *pclk; num_parents = of_count_phandle_with_args(node, "assigned-clock-parents", "#clock-cells"); if (num_parents == -EINVAL) pr_err("clk: invalid value of clock-parents property at %pOF\n", node); for (index = 0; index < num_parents; index++) { rc = of_parse_phandle_with_args(node, "assigned-clock-parents", "#clock-cells", index, &clkspec); if (rc < 0) { /* skip empty (null) phandles */ if (rc == -ENOENT) continue; else return rc; } if (clkspec.np == node && !clk_supplier) { of_node_put(clkspec.np); return 0; } pclk = of_clk_get_from_provider(&clkspec); of_node_put(clkspec.np); if (IS_ERR(pclk)) { if (PTR_ERR(pclk) != -EPROBE_DEFER) pr_warn("clk: couldn't get parent clock %d for %pOF\n", index, node); return PTR_ERR(pclk); } rc = of_parse_phandle_with_args(node, "assigned-clocks", "#clock-cells", index, &clkspec); if (rc < 0) goto err; if (clkspec.np == node && !clk_supplier) { of_node_put(clkspec.np); rc = 0; goto err; } clk = of_clk_get_from_provider(&clkspec); of_node_put(clkspec.np); if (IS_ERR(clk)) { if (PTR_ERR(clk) != -EPROBE_DEFER) pr_warn("clk: couldn't get assigned clock %d for %pOF\n", index, node); rc = PTR_ERR(clk); goto err; } rc = clk_set_parent(clk, pclk); if (rc < 0) pr_err("clk: failed to reparent %s to %s: %d\n", __clk_get_name(clk), __clk_get_name(pclk), rc); clk_put(clk); clk_put(pclk); } return 0; err: clk_put(pclk); return rc; } static int __set_clk_rates(struct device_node *node, bool clk_supplier) { struct of_phandle_args clkspec; int rc, count, count_64, index; struct clk *clk; u64 *rates_64 __free(kfree) = NULL; u32 *rates __free(kfree) = NULL; count = of_property_count_u32_elems(node, "assigned-clock-rates"); count_64 = of_property_count_u64_elems(node, "assigned-clock-rates-u64"); if (count_64 > 0) { count = count_64; rates_64 = kcalloc(count, sizeof(*rates_64), GFP_KERNEL); if (!rates_64) return -ENOMEM; rc = of_property_read_u64_array(node, "assigned-clock-rates-u64", rates_64, count); } else if (count > 0) { rates = kcalloc(count, sizeof(*rates), GFP_KERNEL); if (!rates) return -ENOMEM; rc = of_property_read_u32_array(node, "assigned-clock-rates", rates, count); } else { return 0; } if (rc) return rc; for (index = 0; index < count; index++) { unsigned long rate; if (rates_64) rate = rates_64[index]; else rate = rates[index]; if (rate) { rc = of_parse_phandle_with_args(node, "assigned-clocks", "#clock-cells", index, &clkspec); if (rc < 0) { /* skip empty (null) phandles */ if (rc == -ENOENT) continue; else return rc; } if (clkspec.np == node && !clk_supplier) { of_node_put(clkspec.np); return 0; } clk = of_clk_get_from_provider(&clkspec); of_node_put(clkspec.np); if (IS_ERR(clk)) { if (PTR_ERR(clk) != -EPROBE_DEFER) pr_warn("clk: couldn't get clock %d for %pOF\n", index, node); return PTR_ERR(clk); } rc = clk_set_rate(clk, rate); if (rc < 0) pr_err("clk: couldn't set %s clk rate to %lu (%d), current rate: %lu\n", __clk_get_name(clk), rate, rc, clk_get_rate(clk)); clk_put(clk); } } return 0; } /** * of_clk_set_defaults() - parse and set assigned clocks configuration * @node: device node to apply clock settings for * @clk_supplier: true if clocks supplied by @node should also be considered * * This function parses 'assigned-{clocks/clock-parents/clock-rates}' properties * and sets any specified clock parents and rates. The @clk_supplier argument * should be set to true if @node may be also a clock supplier of any clock * listed in its 'assigned-clocks' or 'assigned-clock-parents' properties. * If @clk_supplier is false the function exits returning 0 as soon as it * determines the @node is also a supplier of any of the clocks. */ int of_clk_set_defaults(struct device_node *node, bool clk_supplier) { int rc; if (!node) return 0; rc = __set_clk_parents(node, clk_supplier); if (rc < 0) return rc; return __set_clk_rates(node, clk_supplier); } EXPORT_SYMBOL_GPL(of_clk_set_defaults); |
12 12 10 3 3 7 1 6 6 8 4 1 4 4 1831 1829 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 | /* Copyright (c) 2018, Mellanox Technologies All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <crypto/aead.h> #include <linux/highmem.h> #include <linux/module.h> #include <linux/netdevice.h> #include <net/dst.h> #include <net/inet_connection_sock.h> #include <net/tcp.h> #include <net/tls.h> #include <linux/skbuff_ref.h> #include "tls.h" #include "trace.h" /* device_offload_lock is used to synchronize tls_dev_add * against NETDEV_DOWN notifications. */ static DECLARE_RWSEM(device_offload_lock); static struct workqueue_struct *destruct_wq __read_mostly; static LIST_HEAD(tls_device_list); static LIST_HEAD(tls_device_down_list); static DEFINE_SPINLOCK(tls_device_lock); static struct page *dummy_page; static void tls_device_free_ctx(struct tls_context *ctx) { if (ctx->tx_conf == TLS_HW) kfree(tls_offload_ctx_tx(ctx)); if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); tls_ctx_free(NULL, ctx); } static void tls_device_tx_del_task(struct work_struct *work) { struct tls_offload_context_tx *offload_ctx = container_of(work, struct tls_offload_context_tx, destruct_work); struct tls_context *ctx = offload_ctx->ctx; struct net_device *netdev; /* Safe, because this is the destroy flow, refcount is 0, so * tls_device_down can't store this field in parallel. */ netdev = rcu_dereference_protected(ctx->netdev, !refcount_read(&ctx->refcount)); netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); dev_put(netdev); ctx->netdev = NULL; tls_device_free_ctx(ctx); } static void tls_device_queue_ctx_destruction(struct tls_context *ctx) { struct net_device *netdev; unsigned long flags; bool async_cleanup; spin_lock_irqsave(&tls_device_lock, flags); if (unlikely(!refcount_dec_and_test(&ctx->refcount))) { spin_unlock_irqrestore(&tls_device_lock, flags); return; } list_del(&ctx->list); /* Remove from tls_device_list / tls_device_down_list */ /* Safe, because this is the destroy flow, refcount is 0, so * tls_device_down can't store this field in parallel. */ netdev = rcu_dereference_protected(ctx->netdev, !refcount_read(&ctx->refcount)); async_cleanup = netdev && ctx->tx_conf == TLS_HW; if (async_cleanup) { struct tls_offload_context_tx *offload_ctx = tls_offload_ctx_tx(ctx); /* queue_work inside the spinlock * to make sure tls_device_down waits for that work. */ queue_work(destruct_wq, &offload_ctx->destruct_work); } spin_unlock_irqrestore(&tls_device_lock, flags); if (!async_cleanup) tls_device_free_ctx(ctx); } /* We assume that the socket is already connected */ static struct net_device *get_netdev_for_sock(struct sock *sk) { struct dst_entry *dst = sk_dst_get(sk); struct net_device *netdev = NULL; if (likely(dst)) { netdev = netdev_sk_get_lowest_dev(dst->dev, sk); dev_hold(netdev); } dst_release(dst); return netdev; } static void destroy_record(struct tls_record_info *record) { int i; for (i = 0; i < record->num_frags; i++) __skb_frag_unref(&record->frags[i], false); kfree(record); } static void delete_all_records(struct tls_offload_context_tx *offload_ctx) { struct tls_record_info *info, *temp; list_for_each_entry_safe(info, temp, &offload_ctx->records_list, list) { list_del(&info->list); destroy_record(info); } offload_ctx->retransmit_hint = NULL; } static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_record_info *info, *temp; struct tls_offload_context_tx *ctx; u64 deleted_records = 0; unsigned long flags; if (!tls_ctx) return; ctx = tls_offload_ctx_tx(tls_ctx); spin_lock_irqsave(&ctx->lock, flags); info = ctx->retransmit_hint; if (info && !before(acked_seq, info->end_seq)) ctx->retransmit_hint = NULL; list_for_each_entry_safe(info, temp, &ctx->records_list, list) { if (before(acked_seq, info->end_seq)) break; list_del(&info->list); destroy_record(info); deleted_records++; } ctx->unacked_record_sn += deleted_records; spin_unlock_irqrestore(&ctx->lock, flags); } /* At this point, there should be no references on this * socket and no in-flight SKBs associated with this * socket, so it is safe to free all the resources. */ void tls_device_sk_destruct(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); tls_ctx->sk_destruct(sk); if (tls_ctx->tx_conf == TLS_HW) { if (ctx->open_record) destroy_record(ctx->open_record); delete_all_records(ctx); crypto_free_aead(ctx->aead_send); clean_acked_data_disable(inet_csk(sk)); } tls_device_queue_ctx_destruction(tls_ctx); } EXPORT_SYMBOL_GPL(tls_device_sk_destruct); void tls_device_free_resources_tx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); tls_free_partial_record(sk, tls_ctx); } void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); trace_tls_device_tx_resync_req(sk, got_seq, exp_seq); WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags)); } EXPORT_SYMBOL_GPL(tls_offload_tx_resync_request); static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, u32 seq) { struct net_device *netdev; int err = 0; u8 *rcd_sn; tcp_write_collapse_fence(sk); rcd_sn = tls_ctx->tx.rec_seq; trace_tls_device_tx_resync_send(sk, seq, rcd_sn); down_read(&device_offload_lock); netdev = rcu_dereference_protected(tls_ctx->netdev, lockdep_is_held(&device_offload_lock)); if (netdev) err = netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn, TLS_OFFLOAD_CTX_DIR_TX); up_read(&device_offload_lock); if (err) return; clear_bit_unlock(TLS_TX_SYNC_SCHED, &tls_ctx->flags); } static void tls_append_frag(struct tls_record_info *record, struct page_frag *pfrag, int size) { skb_frag_t *frag; frag = &record->frags[record->num_frags - 1]; if (skb_frag_page(frag) == pfrag->page && skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) { skb_frag_size_add(frag, size); } else { ++frag; skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset, size); ++record->num_frags; get_page(pfrag->page); } pfrag->offset += size; record->len += size; } static int tls_push_record(struct sock *sk, struct tls_context *ctx, struct tls_offload_context_tx *offload_ctx, struct tls_record_info *record, int flags) { struct tls_prot_info *prot = &ctx->prot_info; struct tcp_sock *tp = tcp_sk(sk); skb_frag_t *frag; int i; record->end_seq = tp->write_seq + record->len; list_add_tail_rcu(&record->list, &offload_ctx->records_list); offload_ctx->open_record = NULL; if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags)) tls_device_resync_tx(sk, ctx, tp->write_seq); tls_advance_record_sn(sk, prot, &ctx->tx); for (i = 0; i < record->num_frags; i++) { frag = &record->frags[i]; sg_unmark_end(&offload_ctx->sg_tx_data[i]); sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag), skb_frag_size(frag), skb_frag_off(frag)); sk_mem_charge(sk, skb_frag_size(frag)); get_page(skb_frag_page(frag)); } sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]); /* all ready, send */ return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags); } static void tls_device_record_close(struct sock *sk, struct tls_context *ctx, struct tls_record_info *record, struct page_frag *pfrag, unsigned char record_type) { struct tls_prot_info *prot = &ctx->prot_info; struct page_frag dummy_tag_frag; /* append tag * device will fill in the tag, we just need to append a placeholder * use socket memory to improve coalescing (re-using a single buffer * increases frag count) * if we can't allocate memory now use the dummy page */ if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) && !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) { dummy_tag_frag.page = dummy_page; dummy_tag_frag.offset = 0; pfrag = &dummy_tag_frag; } tls_append_frag(record, pfrag, prot->tag_size); /* fill prepend */ tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]), record->len - prot->overhead_size, record_type); } static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, struct page_frag *pfrag, size_t prepend_size) { struct tls_record_info *record; skb_frag_t *frag; record = kmalloc(sizeof(*record), GFP_KERNEL); if (!record) return -ENOMEM; frag = &record->frags[0]; skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset, prepend_size); get_page(pfrag->page); pfrag->offset += prepend_size; record->num_frags = 1; record->len = prepend_size; offload_ctx->open_record = record; return 0; } static int tls_do_allocation(struct sock *sk, struct tls_offload_context_tx *offload_ctx, struct page_frag *pfrag, size_t prepend_size) { int ret; if (!offload_ctx->open_record) { if (unlikely(!skb_page_frag_refill(prepend_size, pfrag, sk->sk_allocation))) { READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); return -ENOMEM; } ret = tls_create_new_record(offload_ctx, pfrag, prepend_size); if (ret) return ret; if (pfrag->size > pfrag->offset) return 0; } if (!sk_page_frag_refill(sk, pfrag)) return -ENOMEM; return 0; } static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i) { size_t pre_copy, nocache; pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1); if (pre_copy) { pre_copy = min(pre_copy, bytes); if (copy_from_iter(addr, pre_copy, i) != pre_copy) return -EFAULT; bytes -= pre_copy; addr += pre_copy; } nocache = round_down(bytes, SMP_CACHE_BYTES); if (copy_from_iter_nocache(addr, nocache, i) != nocache) return -EFAULT; bytes -= nocache; addr += nocache; if (bytes && copy_from_iter(addr, bytes, i) != bytes) return -EFAULT; return 0; } static int tls_push_data(struct sock *sk, struct iov_iter *iter, size_t size, int flags, unsigned char record_type) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); struct tls_record_info *record; int tls_push_record_flags; struct page_frag *pfrag; size_t orig_size = size; u32 max_open_record_len; bool more = false; bool done = false; int copy, rc = 0; long timeo; if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SPLICE_PAGES | MSG_EOR)) return -EOPNOTSUPP; if ((flags & (MSG_MORE | MSG_EOR)) == (MSG_MORE | MSG_EOR)) return -EINVAL; if (unlikely(sk->sk_err)) return -sk->sk_err; flags |= MSG_SENDPAGE_DECRYPTED; tls_push_record_flags = flags | MSG_MORE; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (tls_is_partially_sent_record(tls_ctx)) { rc = tls_push_partial_record(sk, tls_ctx, flags); if (rc < 0) return rc; } pfrag = sk_page_frag(sk); /* TLS_HEADER_SIZE is not counted as part of the TLS record, and * we need to leave room for an authentication tag. */ max_open_record_len = TLS_MAX_PAYLOAD_SIZE + prot->prepend_size; do { rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size); if (unlikely(rc)) { rc = sk_stream_wait_memory(sk, &timeo); if (!rc) continue; record = ctx->open_record; if (!record) break; handle_error: if (record_type != TLS_RECORD_TYPE_DATA) { /* avoid sending partial * record with type != * application_data */ size = orig_size; destroy_record(record); ctx->open_record = NULL; } else if (record->len > prot->prepend_size) { goto last_record; } break; } record = ctx->open_record; copy = min_t(size_t, size, max_open_record_len - record->len); if (copy && (flags & MSG_SPLICE_PAGES)) { struct page_frag zc_pfrag; struct page **pages = &zc_pfrag.page; size_t off; rc = iov_iter_extract_pages(iter, &pages, copy, 1, 0, &off); if (rc <= 0) { if (rc == 0) rc = -EIO; goto handle_error; } copy = rc; if (WARN_ON_ONCE(!sendpage_ok(zc_pfrag.page))) { iov_iter_revert(iter, copy); rc = -EIO; goto handle_error; } zc_pfrag.offset = off; zc_pfrag.size = copy; tls_append_frag(record, &zc_pfrag, copy); } else if (copy) { copy = min_t(size_t, copy, pfrag->size - pfrag->offset); rc = tls_device_copy_data(page_address(pfrag->page) + pfrag->offset, copy, iter); if (rc) goto handle_error; tls_append_frag(record, pfrag, copy); } size -= copy; if (!size) { last_record: tls_push_record_flags = flags; if (flags & MSG_MORE) { more = true; break; } done = true; } if (done || record->len >= max_open_record_len || (record->num_frags >= MAX_SKB_FRAGS - 1)) { tls_device_record_close(sk, tls_ctx, record, pfrag, record_type); rc = tls_push_record(sk, tls_ctx, ctx, record, tls_push_record_flags); if (rc < 0) break; } } while (!done); tls_ctx->pending_open_record_frags = more; if (orig_size - size > 0) rc = orig_size - size; return rc; } int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { unsigned char record_type = TLS_RECORD_TYPE_DATA; struct tls_context *tls_ctx = tls_get_ctx(sk); int rc; if (!tls_ctx->zerocopy_sendfile) msg->msg_flags &= ~MSG_SPLICE_PAGES; mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); if (unlikely(msg->msg_controllen)) { rc = tls_process_cmsg(sk, msg, &record_type); if (rc) goto out; } rc = tls_push_data(sk, &msg->msg_iter, size, msg->msg_flags, record_type); out: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); return rc; } void tls_device_splice_eof(struct socket *sock) { struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct iov_iter iter = {}; if (!tls_is_partially_sent_record(tls_ctx)) return; mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); if (tls_is_partially_sent_record(tls_ctx)) { iov_iter_bvec(&iter, ITER_SOURCE, NULL, 0, 0); tls_push_data(sk, &iter, 0, 0, TLS_RECORD_TYPE_DATA); } release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); } struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn) { u64 record_sn = context->hint_record_sn; struct tls_record_info *info, *last; info = context->retransmit_hint; if (!info || before(seq, info->end_seq - info->len)) { /* if retransmit_hint is irrelevant start * from the beginning of the list */ info = list_first_entry_or_null(&context->records_list, struct tls_record_info, list); if (!info) return NULL; /* send the start_marker record if seq number is before the * tls offload start marker sequence number. This record is * required to handle TCP packets which are before TLS offload * started. * And if it's not start marker, look if this seq number * belongs to the list. */ if (likely(!tls_record_is_start_marker(info))) { /* we have the first record, get the last record to see * if this seq number belongs to the list. */ last = list_last_entry(&context->records_list, struct tls_record_info, list); if (!between(seq, tls_record_start_seq(info), last->end_seq)) return NULL; } record_sn = context->unacked_record_sn; } /* We just need the _rcu for the READ_ONCE() */ rcu_read_lock(); list_for_each_entry_from_rcu(info, &context->records_list, list) { if (before(seq, info->end_seq)) { if (!context->retransmit_hint || after(info->end_seq, context->retransmit_hint->end_seq)) { context->hint_record_sn = record_sn; context->retransmit_hint = info; } *p_record_sn = record_sn; goto exit_rcu_unlock; } record_sn++; } info = NULL; exit_rcu_unlock: rcu_read_unlock(); return info; } EXPORT_SYMBOL(tls_get_record); static int tls_device_push_pending_record(struct sock *sk, int flags) { struct iov_iter iter; iov_iter_kvec(&iter, ITER_SOURCE, NULL, 0, 0); return tls_push_data(sk, &iter, 0, flags, TLS_RECORD_TYPE_DATA); } void tls_device_write_space(struct sock *sk, struct tls_context *ctx) { if (tls_is_partially_sent_record(ctx)) { gfp_t sk_allocation = sk->sk_allocation; WARN_ON_ONCE(sk->sk_write_pending); sk->sk_allocation = GFP_ATOMIC; tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_DECRYPTED); sk->sk_allocation = sk_allocation; } } static void tls_device_resync_rx(struct tls_context *tls_ctx, struct sock *sk, u32 seq, u8 *rcd_sn) { struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); struct net_device *netdev; trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type); rcu_read_lock(); netdev = rcu_dereference(tls_ctx->netdev); if (netdev) netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn, TLS_OFFLOAD_CTX_DIR_RX); rcu_read_unlock(); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC); } static bool tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async, s64 resync_req, u32 *seq, u16 *rcd_delta) { u32 is_async = resync_req & RESYNC_REQ_ASYNC; u32 req_seq = resync_req >> 32; u32 req_end = req_seq + ((resync_req >> 16) & 0xffff); u16 i; *rcd_delta = 0; if (is_async) { /* shouldn't get to wraparound: * too long in async stage, something bad happened */ if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) return false; /* asynchronous stage: log all headers seq such that * req_seq <= seq <= end_seq, and wait for real resync request */ if (before(*seq, req_seq)) return false; if (!after(*seq, req_end) && resync_async->loglen < TLS_DEVICE_RESYNC_ASYNC_LOGMAX) resync_async->log[resync_async->loglen++] = *seq; resync_async->rcd_delta++; return false; } /* synchronous stage: check against the logged entries and * proceed to check the next entries if no match was found */ for (i = 0; i < resync_async->loglen; i++) if (req_seq == resync_async->log[i] && atomic64_try_cmpxchg(&resync_async->req, &resync_req, 0)) { *rcd_delta = resync_async->rcd_delta - i; *seq = req_seq; resync_async->loglen = 0; resync_async->rcd_delta = 0; return true; } resync_async->loglen = 0; resync_async->rcd_delta = 0; if (req_seq == *seq && atomic64_try_cmpxchg(&resync_async->req, &resync_req, 0)) return true; return false; } void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *rx_ctx; u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE]; u32 sock_data, is_req_pending; struct tls_prot_info *prot; s64 resync_req; u16 rcd_delta; u32 req_seq; if (tls_ctx->rx_conf != TLS_HW) return; if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) return; prot = &tls_ctx->prot_info; rx_ctx = tls_offload_ctx_rx(tls_ctx); memcpy(rcd_sn, tls_ctx->rx.rec_seq, prot->rec_seq_size); switch (rx_ctx->resync_type) { case TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ: resync_req = atomic64_read(&rx_ctx->resync_req); req_seq = resync_req >> 32; seq += TLS_HEADER_SIZE - 1; is_req_pending = resync_req; if (likely(!is_req_pending) || req_seq != seq || !atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) return; break; case TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT: if (likely(!rx_ctx->resync_nh_do_now)) return; /* head of next rec is already in, note that the sock_inq will * include the currently parsed message when called from parser */ sock_data = tcp_inq(sk); if (sock_data > rcd_len) { trace_tls_device_rx_resync_nh_delay(sk, sock_data, rcd_len); return; } rx_ctx->resync_nh_do_now = 0; seq += rcd_len; tls_bigint_increment(rcd_sn, prot->rec_seq_size); break; case TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC: resync_req = atomic64_read(&rx_ctx->resync_async->req); is_req_pending = resync_req; if (likely(!is_req_pending)) return; if (!tls_device_rx_resync_async(rx_ctx->resync_async, resync_req, &seq, &rcd_delta)) return; tls_bigint_subtract(rcd_sn, rcd_delta); break; } tls_device_resync_rx(tls_ctx, sk, seq, rcd_sn); } static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx, struct tls_offload_context_rx *ctx, struct sock *sk, struct sk_buff *skb) { struct strp_msg *rxm; /* device will request resyncs by itself based on stream scan */ if (ctx->resync_type != TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT) return; /* already scheduled */ if (ctx->resync_nh_do_now) return; /* seen decrypted fragments since last fully-failed record */ if (ctx->resync_nh_reset) { ctx->resync_nh_reset = 0; ctx->resync_nh.decrypted_failed = 1; ctx->resync_nh.decrypted_tgt = TLS_DEVICE_RESYNC_NH_START_IVAL; return; } if (++ctx->resync_nh.decrypted_failed <= ctx->resync_nh.decrypted_tgt) return; /* doing resync, bump the next target in case it fails */ if (ctx->resync_nh.decrypted_tgt < TLS_DEVICE_RESYNC_NH_MAX_IVAL) ctx->resync_nh.decrypted_tgt *= 2; else ctx->resync_nh.decrypted_tgt += TLS_DEVICE_RESYNC_NH_MAX_IVAL; rxm = strp_msg(skb); /* head of next rec is already in, parser will sync for us */ if (tcp_inq(sk) > rxm->full_len) { trace_tls_device_rx_resync_nh_schedule(sk); ctx->resync_nh_do_now = 1; } else { struct tls_prot_info *prot = &tls_ctx->prot_info; u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE]; memcpy(rcd_sn, tls_ctx->rx.rec_seq, prot->rec_seq_size); tls_bigint_increment(rcd_sn, prot->rec_seq_size); tls_device_resync_rx(tls_ctx, sk, tcp_sk(sk)->copied_seq, rcd_sn); } } static int tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx) { struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx); const struct tls_cipher_desc *cipher_desc; int err, offset, copy, data_len, pos; struct sk_buff *skb, *skb_iter; struct scatterlist sg[1]; struct strp_msg *rxm; char *orig_buf, *buf; cipher_desc = get_cipher_desc(tls_ctx->crypto_recv.info.cipher_type); DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); rxm = strp_msg(tls_strp_msg(sw_ctx)); orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv, sk->sk_allocation); if (!orig_buf) return -ENOMEM; buf = orig_buf; err = tls_strp_msg_cow(sw_ctx); if (unlikely(err)) goto free_buf; skb = tls_strp_msg(sw_ctx); rxm = strp_msg(skb); offset = rxm->offset; sg_init_table(sg, 1); sg_set_buf(&sg[0], buf, rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv); err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_desc->iv); if (err) goto free_buf; /* We are interested only in the decrypted data not the auth */ err = decrypt_skb(sk, sg); if (err != -EBADMSG) goto free_buf; else err = 0; data_len = rxm->full_len - cipher_desc->tag; if (skb_pagelen(skb) > offset) { copy = min_t(int, skb_pagelen(skb) - offset, data_len); if (skb->decrypted) { err = skb_store_bits(skb, offset, buf, copy); if (err) goto free_buf; } offset += copy; buf += copy; } pos = skb_pagelen(skb); skb_walk_frags(skb, skb_iter) { int frag_pos; /* Practically all frags must belong to msg if reencrypt * is needed with current strparser and coalescing logic, * but strparser may "get optimized", so let's be safe. */ if (pos + skb_iter->len <= offset) goto done_with_frag; if (pos >= data_len + rxm->offset) break; frag_pos = offset - pos; copy = min_t(int, skb_iter->len - frag_pos, data_len + rxm->offset - offset); if (skb_iter->decrypted) { err = skb_store_bits(skb_iter, frag_pos, buf, copy); if (err) goto free_buf; } offset += copy; buf += copy; done_with_frag: pos += skb_iter->len; } free_buf: kfree(orig_buf); return err; } int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx) { struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx); struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx); struct sk_buff *skb = tls_strp_msg(sw_ctx); struct strp_msg *rxm = strp_msg(skb); int is_decrypted, is_encrypted; if (!tls_strp_msg_mixed_decrypted(sw_ctx)) { is_decrypted = skb->decrypted; is_encrypted = !is_decrypted; } else { is_decrypted = 0; is_encrypted = 0; } trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len, tls_ctx->rx.rec_seq, rxm->full_len, is_encrypted, is_decrypted); if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) { if (likely(is_encrypted || is_decrypted)) return is_decrypted; /* After tls_device_down disables the offload, the next SKB will * likely have initial fragments decrypted, and final ones not * decrypted. We need to reencrypt that single SKB. */ return tls_device_reencrypt(sk, tls_ctx); } /* Return immediately if the record is either entirely plaintext or * entirely ciphertext. Otherwise handle reencrypt partially decrypted * record. */ if (is_decrypted) { ctx->resync_nh_reset = 1; return is_decrypted; } if (is_encrypted) { tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb); return 0; } ctx->resync_nh_reset = 1; return tls_device_reencrypt(sk, tls_ctx); } static void tls_device_attach(struct tls_context *ctx, struct sock *sk, struct net_device *netdev) { if (sk->sk_destruct != tls_device_sk_destruct) { refcount_set(&ctx->refcount, 1); dev_hold(netdev); RCU_INIT_POINTER(ctx->netdev, netdev); spin_lock_irq(&tls_device_lock); list_add_tail(&ctx->list, &tls_device_list); spin_unlock_irq(&tls_device_lock); ctx->sk_destruct = sk->sk_destruct; smp_store_release(&sk->sk_destruct, tls_device_sk_destruct); } } static struct tls_offload_context_tx *alloc_offload_ctx_tx(struct tls_context *ctx) { struct tls_offload_context_tx *offload_ctx; __be64 rcd_sn; offload_ctx = kzalloc(sizeof(*offload_ctx), GFP_KERNEL); if (!offload_ctx) return NULL; INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); INIT_LIST_HEAD(&offload_ctx->records_list); spin_lock_init(&offload_ctx->lock); sg_init_table(offload_ctx->sg_tx_data, ARRAY_SIZE(offload_ctx->sg_tx_data)); /* start at rec_seq - 1 to account for the start marker record */ memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; offload_ctx->ctx = ctx; return offload_ctx; } int tls_set_device_offload(struct sock *sk) { struct tls_record_info *start_marker_record; struct tls_offload_context_tx *offload_ctx; const struct tls_cipher_desc *cipher_desc; struct tls_crypto_info *crypto_info; struct tls_prot_info *prot; struct net_device *netdev; struct tls_context *ctx; char *iv, *rec_seq; int rc; ctx = tls_get_ctx(sk); prot = &ctx->prot_info; if (ctx->priv_ctx_tx) return -EEXIST; netdev = get_netdev_for_sock(sk); if (!netdev) { pr_err_ratelimited("%s: netdev not found\n", __func__); return -EINVAL; } if (!(netdev->features & NETIF_F_HW_TLS_TX)) { rc = -EOPNOTSUPP; goto release_netdev; } crypto_info = &ctx->crypto_send.info; if (crypto_info->version != TLS_1_2_VERSION) { rc = -EOPNOTSUPP; goto release_netdev; } cipher_desc = get_cipher_desc(crypto_info->cipher_type); if (!cipher_desc || !cipher_desc->offloadable) { rc = -EINVAL; goto release_netdev; } rc = init_prot_info(prot, crypto_info, cipher_desc); if (rc) goto release_netdev; iv = crypto_info_iv(crypto_info, cipher_desc); rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv); memcpy(ctx->tx.rec_seq, rec_seq, cipher_desc->rec_seq); start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); if (!start_marker_record) { rc = -ENOMEM; goto release_netdev; } offload_ctx = alloc_offload_ctx_tx(ctx); if (!offload_ctx) { rc = -ENOMEM; goto free_marker_record; } rc = tls_sw_fallback_init(sk, offload_ctx, crypto_info); if (rc) goto free_offload_ctx; start_marker_record->end_seq = tcp_sk(sk)->write_seq; start_marker_record->len = 0; start_marker_record->num_frags = 0; list_add_tail(&start_marker_record->list, &offload_ctx->records_list); clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); ctx->push_pending_record = tls_device_push_pending_record; /* TLS offload is greatly simplified if we don't send * SKBs where only part of the payload needs to be encrypted. * So mark the last skb in the write queue as end of record. */ tcp_write_collapse_fence(sk); /* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event * * device_offload_lock is taken in tls_devices's NETDEV_DOWN * handler thus protecting from the device going down before * ctx was added to tls_device_list. */ down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) { rc = -EINVAL; goto release_lock; } ctx->priv_ctx_tx = offload_ctx; rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX, &ctx->crypto_send.info, tcp_sk(sk)->write_seq); trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_TX, tcp_sk(sk)->write_seq, rec_seq, rc); if (rc) goto release_lock; tls_device_attach(ctx, sk, netdev); up_read(&device_offload_lock); /* following this assignment tls_is_skb_tx_device_offloaded * will return true and the context might be accessed * by the netdev's xmit function. */ smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb); dev_put(netdev); return 0; release_lock: up_read(&device_offload_lock); clean_acked_data_disable(inet_csk(sk)); crypto_free_aead(offload_ctx->aead_send); free_offload_ctx: kfree(offload_ctx); ctx->priv_ctx_tx = NULL; free_marker_record: kfree(start_marker_record); release_netdev: dev_put(netdev); return rc; } int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) { struct tls12_crypto_info_aes_gcm_128 *info; struct tls_offload_context_rx *context; struct net_device *netdev; int rc = 0; if (ctx->crypto_recv.info.version != TLS_1_2_VERSION) return -EOPNOTSUPP; netdev = get_netdev_for_sock(sk); if (!netdev) { pr_err_ratelimited("%s: netdev not found\n", __func__); return -EINVAL; } if (!(netdev->features & NETIF_F_HW_TLS_RX)) { rc = -EOPNOTSUPP; goto release_netdev; } /* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event * * device_offload_lock is taken in tls_devices's NETDEV_DOWN * handler thus protecting from the device going down before * ctx was added to tls_device_list. */ down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) { rc = -EINVAL; goto release_lock; } context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) { rc = -ENOMEM; goto release_lock; } context->resync_nh_reset = 1; ctx->priv_ctx_rx = context; rc = tls_set_sw_offload(sk, 0); if (rc) goto release_ctx; rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, &ctx->crypto_recv.info, tcp_sk(sk)->copied_seq); info = (void *)&ctx->crypto_recv.info; trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_RX, tcp_sk(sk)->copied_seq, info->rec_seq, rc); if (rc) goto free_sw_resources; tls_device_attach(ctx, sk, netdev); up_read(&device_offload_lock); dev_put(netdev); return 0; free_sw_resources: up_read(&device_offload_lock); tls_sw_free_resources_rx(sk); down_read(&device_offload_lock); release_ctx: ctx->priv_ctx_rx = NULL; release_lock: up_read(&device_offload_lock); release_netdev: dev_put(netdev); return rc; } void tls_device_offload_cleanup_rx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct net_device *netdev; down_read(&device_offload_lock); netdev = rcu_dereference_protected(tls_ctx->netdev, lockdep_is_held(&device_offload_lock)); if (!netdev) goto out; netdev->tlsdev_ops->tls_dev_del(netdev, tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); if (tls_ctx->tx_conf != TLS_HW) { dev_put(netdev); rcu_assign_pointer(tls_ctx->netdev, NULL); } else { set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags); } out: up_read(&device_offload_lock); tls_sw_release_resources_rx(sk); } static int tls_device_down(struct net_device *netdev) { struct tls_context *ctx, *tmp; unsigned long flags; LIST_HEAD(list); /* Request a write lock to block new offload attempts */ down_write(&device_offload_lock); spin_lock_irqsave(&tls_device_lock, flags); list_for_each_entry_safe(ctx, tmp, &tls_device_list, list) { struct net_device *ctx_netdev = rcu_dereference_protected(ctx->netdev, lockdep_is_held(&device_offload_lock)); if (ctx_netdev != netdev || !refcount_inc_not_zero(&ctx->refcount)) continue; list_move(&ctx->list, &list); } spin_unlock_irqrestore(&tls_device_lock, flags); list_for_each_entry_safe(ctx, tmp, &list, list) { /* Stop offloaded TX and switch to the fallback. * tls_is_skb_tx_device_offloaded will return false. */ WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw); /* Stop the RX and TX resync. * tls_dev_resync must not be called after tls_dev_del. */ rcu_assign_pointer(ctx->netdev, NULL); /* Start skipping the RX resync logic completely. */ set_bit(TLS_RX_DEV_DEGRADED, &ctx->flags); /* Sync with inflight packets. After this point: * TX: no non-encrypted packets will be passed to the driver. * RX: resync requests from the driver will be ignored. */ synchronize_net(); /* Release the offload context on the driver side. */ if (ctx->tx_conf == TLS_HW) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); if (ctx->rx_conf == TLS_HW && !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags)) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_RX); dev_put(netdev); /* Move the context to a separate list for two reasons: * 1. When the context is deallocated, list_del is called. * 2. It's no longer an offloaded context, so we don't want to * run offload-specific code on this context. */ spin_lock_irqsave(&tls_device_lock, flags); list_move_tail(&ctx->list, &tls_device_down_list); spin_unlock_irqrestore(&tls_device_lock, flags); /* Device contexts for RX and TX will be freed in on sk_destruct * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. * Now release the ref taken above. */ if (refcount_dec_and_test(&ctx->refcount)) { /* sk_destruct ran after tls_device_down took a ref, and * it returned early. Complete the destruction here. */ list_del(&ctx->list); tls_device_free_ctx(ctx); } } up_write(&device_offload_lock); flush_workqueue(destruct_wq); return NOTIFY_DONE; } static int tls_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!dev->tlsdev_ops && !(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX))) return NOTIFY_DONE; switch (event) { case NETDEV_REGISTER: case NETDEV_FEAT_CHANGE: if (netif_is_bond_master(dev)) return NOTIFY_DONE; if ((dev->features & NETIF_F_HW_TLS_RX) && !dev->tlsdev_ops->tls_dev_resync) return NOTIFY_BAD; if (dev->tlsdev_ops && dev->tlsdev_ops->tls_dev_add && dev->tlsdev_ops->tls_dev_del) return NOTIFY_DONE; else return NOTIFY_BAD; case NETDEV_DOWN: return tls_device_down(dev); } return NOTIFY_DONE; } static struct notifier_block tls_dev_notifier = { .notifier_call = tls_dev_event, }; int __init tls_device_init(void) { int err; dummy_page = alloc_page(GFP_KERNEL); if (!dummy_page) return -ENOMEM; destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0); if (!destruct_wq) { err = -ENOMEM; goto err_free_dummy; } err = register_netdevice_notifier(&tls_dev_notifier); if (err) goto err_destroy_wq; return 0; err_destroy_wq: destroy_workqueue(destruct_wq); err_free_dummy: put_page(dummy_page); return err; } void __exit tls_device_cleanup(void) { unregister_netdevice_notifier(&tls_dev_notifier); destroy_workqueue(destruct_wq); clean_acked_data_flush(); put_page(dummy_page); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * RNG: Random Number Generator algorithms under the crypto API * * Copyright (c) 2008 Neil Horman <nhorman@tuxdriver.com> * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_RNG_H #define _CRYPTO_RNG_H #include <linux/atomic.h> #include <linux/container_of.h> #include <linux/crypto.h> struct crypto_rng; /** * struct rng_alg - random number generator definition * * @generate: The function defined by this variable obtains a * random number. The random number generator transform * must generate the random number out of the context * provided with this call, plus any additional data * if provided to the call. * @seed: Seed or reseed the random number generator. With the * invocation of this function call, the random number * generator shall become ready for generation. If the * random number generator requires a seed for setting * up a new state, the seed must be provided by the * consumer while invoking this function. The required * size of the seed is defined with @seedsize . * @set_ent: Set entropy that would otherwise be obtained from * entropy source. Internal use only. * @seedsize: The seed size required for a random number generator * initialization defined with this variable. Some * random number generators does not require a seed * as the seeding is implemented internally without * the need of support by the consumer. In this case, * the seed size is set to zero. * @base: Common crypto API algorithm data structure. */ struct rng_alg { int (*generate)(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen); int (*seed)(struct crypto_rng *tfm, const u8 *seed, unsigned int slen); void (*set_ent)(struct crypto_rng *tfm, const u8 *data, unsigned int len); unsigned int seedsize; struct crypto_alg base; }; struct crypto_rng { struct crypto_tfm base; }; extern struct crypto_rng *crypto_default_rng; int crypto_get_default_rng(void); void crypto_put_default_rng(void); /** * DOC: Random number generator API * * The random number generator API is used with the ciphers of type * CRYPTO_ALG_TYPE_RNG (listed as type "rng" in /proc/crypto) */ /** * crypto_alloc_rng() -- allocate RNG handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * message digest cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for a random number generator. The returned struct * crypto_rng is the cipher handle that is required for any subsequent * API invocation for that random number generator. * * For all random number generators, this call creates a new private copy of * the random number generator that does not share a state with other * instances. The only exception is the "krng" random number generator which * is a kernel crypto API use case for the get_random_bytes() function of the * /dev/random driver. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_rng *crypto_alloc_rng(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_rng_tfm(struct crypto_rng *tfm) { return &tfm->base; } static inline struct rng_alg *__crypto_rng_alg(struct crypto_alg *alg) { return container_of(alg, struct rng_alg, base); } /** * crypto_rng_alg - obtain name of RNG * @tfm: cipher handle * * Return the generic name (cra_name) of the initialized random number generator * * Return: generic name string */ static inline struct rng_alg *crypto_rng_alg(struct crypto_rng *tfm) { return __crypto_rng_alg(crypto_rng_tfm(tfm)->__crt_alg); } /** * crypto_free_rng() - zeroize and free RNG handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_rng(struct crypto_rng *tfm) { crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm)); } /** * crypto_rng_generate() - get random number * @tfm: cipher handle * @src: Input buffer holding additional data, may be NULL * @slen: Length of additional data * @dst: output buffer holding the random numbers * @dlen: length of the output buffer * * This function fills the caller-allocated buffer with random * numbers using the random number generator referenced by the * cipher handle. * * Return: 0 function was successful; < 0 if an error occurred */ static inline int crypto_rng_generate(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen) { return crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen); } /** * crypto_rng_get_bytes() - get random number * @tfm: cipher handle * @rdata: output buffer holding the random numbers * @dlen: length of the output buffer * * This function fills the caller-allocated buffer with random numbers using the * random number generator referenced by the cipher handle. * * Return: 0 function was successful; < 0 if an error occurred */ static inline int crypto_rng_get_bytes(struct crypto_rng *tfm, u8 *rdata, unsigned int dlen) { return crypto_rng_generate(tfm, NULL, 0, rdata, dlen); } /** * crypto_rng_reset() - re-initialize the RNG * @tfm: cipher handle * @seed: seed input data * @slen: length of the seed input data * * The reset function completely re-initializes the random number generator * referenced by the cipher handle by clearing the current state. The new state * is initialized with the caller provided seed or automatically, depending * on the random number generator type (the ANSI X9.31 RNG requires * caller-provided seed, the SP800-90A DRBGs perform an automatic seeding). * The seed is provided as a parameter to this function call. The provided seed * should have the length of the seed size defined for the random number * generator as defined by crypto_rng_seedsize. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen); /** * crypto_rng_seedsize() - obtain seed size of RNG * @tfm: cipher handle * * The function returns the seed size for the random number generator * referenced by the cipher handle. This value may be zero if the random * number generator does not implement or require a reseeding. For example, * the SP800-90A DRBGs implement an automated reseeding after reaching a * pre-defined threshold. * * Return: seed size for the random number generator */ static inline int crypto_rng_seedsize(struct crypto_rng *tfm) { return crypto_rng_alg(tfm)->seedsize; } #endif |
6 6 6 19 1 20 20 21 21 20 3 6 21 9 1 1 1 2 1 1 1 3 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 | // SPDX-License-Identifier: GPL-2.0-only /* * HWSIM IEEE 802.15.4 interface * * (C) 2018 Mojatau, Alexander Aring <aring@mojatau.com> * Copyright 2007-2012 Siemens AG * * Based on fakelb, original Written by: * Sergey Lapin <slapin@ossfans.org> * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ #include <linux/module.h> #include <linux/timer.h> #include <linux/platform_device.h> #include <linux/rtnetlink.h> #include <linux/netdevice.h> #include <linux/device.h> #include <linux/spinlock.h> #include <net/ieee802154_netdev.h> #include <net/mac802154.h> #include <net/cfg802154.h> #include <net/genetlink.h> #include "mac802154_hwsim.h" MODULE_DESCRIPTION("Software simulator of IEEE 802.15.4 radio(s) for mac802154"); MODULE_LICENSE("GPL"); static LIST_HEAD(hwsim_phys); static DEFINE_MUTEX(hwsim_phys_lock); static struct platform_device *mac802154hwsim_dev; /* MAC802154_HWSIM netlink family */ static struct genl_family hwsim_genl_family; static int hwsim_radio_idx; enum hwsim_multicast_groups { HWSIM_MCGRP_CONFIG, }; static const struct genl_multicast_group hwsim_mcgrps[] = { [HWSIM_MCGRP_CONFIG] = { .name = "config", }, }; struct hwsim_pib { u8 page; u8 channel; struct ieee802154_hw_addr_filt filt; enum ieee802154_filtering_level filt_level; struct rcu_head rcu; }; struct hwsim_edge_info { u8 lqi; struct rcu_head rcu; }; struct hwsim_edge { struct hwsim_phy *endpoint; struct hwsim_edge_info __rcu *info; struct list_head list; struct rcu_head rcu; }; struct hwsim_phy { struct ieee802154_hw *hw; u32 idx; struct hwsim_pib __rcu *pib; bool suspended; struct list_head edges; struct list_head list; }; static int hwsim_add_one(struct genl_info *info, struct device *dev, bool init); static void hwsim_del(struct hwsim_phy *phy); static int hwsim_hw_ed(struct ieee802154_hw *hw, u8 *level) { *level = 0xbe; return 0; } static int hwsim_update_pib(struct ieee802154_hw *hw, u8 page, u8 channel, struct ieee802154_hw_addr_filt *filt, enum ieee802154_filtering_level filt_level) { struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib, *pib_old; pib = kzalloc(sizeof(*pib), GFP_ATOMIC); if (!pib) return -ENOMEM; pib_old = rtnl_dereference(phy->pib); pib->page = page; pib->channel = channel; pib->filt.short_addr = filt->short_addr; pib->filt.pan_id = filt->pan_id; pib->filt.ieee_addr = filt->ieee_addr; pib->filt.pan_coord = filt->pan_coord; pib->filt_level = filt_level; rcu_assign_pointer(phy->pib, pib); kfree_rcu(pib_old, rcu); return 0; } static int hwsim_hw_channel(struct ieee802154_hw *hw, u8 page, u8 channel) { struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; int ret; rcu_read_lock(); pib = rcu_dereference(phy->pib); ret = hwsim_update_pib(hw, page, channel, &pib->filt, pib->filt_level); rcu_read_unlock(); return ret; } static int hwsim_hw_addr_filt(struct ieee802154_hw *hw, struct ieee802154_hw_addr_filt *filt, unsigned long changed) { struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; int ret; rcu_read_lock(); pib = rcu_dereference(phy->pib); ret = hwsim_update_pib(hw, pib->page, pib->channel, filt, pib->filt_level); rcu_read_unlock(); return ret; } static void hwsim_hw_receive(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi) { struct ieee802154_hdr hdr; struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; rcu_read_lock(); pib = rcu_dereference(phy->pib); if (!pskb_may_pull(skb, 3)) { dev_dbg(hw->parent, "invalid frame\n"); goto drop; } memcpy(&hdr, skb->data, 3); /* Level 4 filtering: Frame fields validity */ if (pib->filt_level == IEEE802154_FILTERING_4_FRAME_FIELDS) { /* a) Drop reserved frame types */ switch (mac_cb(skb)->type) { case IEEE802154_FC_TYPE_BEACON: case IEEE802154_FC_TYPE_DATA: case IEEE802154_FC_TYPE_ACK: case IEEE802154_FC_TYPE_MAC_CMD: break; default: dev_dbg(hw->parent, "unrecognized frame type 0x%x\n", mac_cb(skb)->type); goto drop; } /* b) Drop reserved frame versions */ switch (hdr.fc.version) { case IEEE802154_2003_STD: case IEEE802154_2006_STD: case IEEE802154_STD: break; default: dev_dbg(hw->parent, "unrecognized frame version 0x%x\n", hdr.fc.version); goto drop; } /* c) PAN ID constraints */ if ((mac_cb(skb)->dest.mode == IEEE802154_ADDR_LONG || mac_cb(skb)->dest.mode == IEEE802154_ADDR_SHORT) && mac_cb(skb)->dest.pan_id != pib->filt.pan_id && mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST)) { dev_dbg(hw->parent, "unrecognized PAN ID %04x\n", le16_to_cpu(mac_cb(skb)->dest.pan_id)); goto drop; } /* d1) Short address constraints */ if (mac_cb(skb)->dest.mode == IEEE802154_ADDR_SHORT && mac_cb(skb)->dest.short_addr != pib->filt.short_addr && mac_cb(skb)->dest.short_addr != cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { dev_dbg(hw->parent, "unrecognized short address %04x\n", le16_to_cpu(mac_cb(skb)->dest.short_addr)); goto drop; } /* d2) Extended address constraints */ if (mac_cb(skb)->dest.mode == IEEE802154_ADDR_LONG && mac_cb(skb)->dest.extended_addr != pib->filt.ieee_addr) { dev_dbg(hw->parent, "unrecognized long address 0x%016llx\n", mac_cb(skb)->dest.extended_addr); goto drop; } /* d4) Specific PAN coordinator case (no parent) */ if ((mac_cb(skb)->type == IEEE802154_FC_TYPE_DATA || mac_cb(skb)->type == IEEE802154_FC_TYPE_MAC_CMD) && mac_cb(skb)->dest.mode == IEEE802154_ADDR_NONE) { dev_dbg(hw->parent, "relaying is not supported\n"); goto drop; } /* e) Beacon frames follow specific PAN ID rules */ if (mac_cb(skb)->type == IEEE802154_FC_TYPE_BEACON && pib->filt.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST) && mac_cb(skb)->dest.pan_id != pib->filt.pan_id) { dev_dbg(hw->parent, "invalid beacon PAN ID %04x\n", le16_to_cpu(mac_cb(skb)->dest.pan_id)); goto drop; } } rcu_read_unlock(); ieee802154_rx_irqsafe(hw, skb, lqi); return; drop: rcu_read_unlock(); kfree_skb(skb); } static int hwsim_hw_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) { struct hwsim_phy *current_phy = hw->priv; struct hwsim_pib *current_pib, *endpoint_pib; struct hwsim_edge_info *einfo; struct hwsim_edge *e; WARN_ON(current_phy->suspended); rcu_read_lock(); current_pib = rcu_dereference(current_phy->pib); list_for_each_entry_rcu(e, ¤t_phy->edges, list) { /* Can be changed later in rx_irqsafe, but this is only a * performance tweak. Received radio should drop the frame * in mac802154 stack anyway... so we don't need to be * 100% of locking here to check on suspended */ if (e->endpoint->suspended) continue; endpoint_pib = rcu_dereference(e->endpoint->pib); if (current_pib->page == endpoint_pib->page && current_pib->channel == endpoint_pib->channel) { struct sk_buff *newskb = pskb_copy(skb, GFP_ATOMIC); einfo = rcu_dereference(e->info); if (newskb) hwsim_hw_receive(e->endpoint->hw, newskb, einfo->lqi); } } rcu_read_unlock(); ieee802154_xmit_complete(hw, skb, false); return 0; } static int hwsim_hw_start(struct ieee802154_hw *hw) { struct hwsim_phy *phy = hw->priv; phy->suspended = false; return 0; } static void hwsim_hw_stop(struct ieee802154_hw *hw) { struct hwsim_phy *phy = hw->priv; phy->suspended = true; } static int hwsim_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) { enum ieee802154_filtering_level filt_level; struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; int ret; if (on) filt_level = IEEE802154_FILTERING_NONE; else filt_level = IEEE802154_FILTERING_4_FRAME_FIELDS; rcu_read_lock(); pib = rcu_dereference(phy->pib); ret = hwsim_update_pib(hw, pib->page, pib->channel, &pib->filt, filt_level); rcu_read_unlock(); return ret; } static const struct ieee802154_ops hwsim_ops = { .owner = THIS_MODULE, .xmit_async = hwsim_hw_xmit, .ed = hwsim_hw_ed, .set_channel = hwsim_hw_channel, .start = hwsim_hw_start, .stop = hwsim_hw_stop, .set_promiscuous_mode = hwsim_set_promiscuous_mode, .set_hw_addr_filt = hwsim_hw_addr_filt, }; static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) { return hwsim_add_one(info, &mac802154hwsim_dev->dev, false); } static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) { struct hwsim_phy *phy, *tmp; s64 idx = -1; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]) return -EINVAL; idx = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); mutex_lock(&hwsim_phys_lock); list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) { if (idx == phy->idx) { hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); return 0; } } mutex_unlock(&hwsim_phys_lock); return -ENODEV; } static int append_radio_msg(struct sk_buff *skb, struct hwsim_phy *phy) { struct nlattr *nl_edges, *nl_edge; struct hwsim_edge_info *einfo; struct hwsim_edge *e; int ret; ret = nla_put_u32(skb, MAC802154_HWSIM_ATTR_RADIO_ID, phy->idx); if (ret < 0) return ret; rcu_read_lock(); if (list_empty(&phy->edges)) { rcu_read_unlock(); return 0; } nl_edges = nla_nest_start_noflag(skb, MAC802154_HWSIM_ATTR_RADIO_EDGES); if (!nl_edges) { rcu_read_unlock(); return -ENOBUFS; } list_for_each_entry_rcu(e, &phy->edges, list) { nl_edge = nla_nest_start_noflag(skb, MAC802154_HWSIM_ATTR_RADIO_EDGE); if (!nl_edge) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edges); return -ENOBUFS; } ret = nla_put_u32(skb, MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID, e->endpoint->idx); if (ret < 0) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edge); nla_nest_cancel(skb, nl_edges); return ret; } einfo = rcu_dereference(e->info); ret = nla_put_u8(skb, MAC802154_HWSIM_EDGE_ATTR_LQI, einfo->lqi); if (ret < 0) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edge); nla_nest_cancel(skb, nl_edges); return ret; } nla_nest_end(skb, nl_edge); } rcu_read_unlock(); nla_nest_end(skb, nl_edges); return 0; } static int hwsim_get_radio(struct sk_buff *skb, struct hwsim_phy *phy, u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; int res; hdr = genlmsg_put(skb, portid, seq, &hwsim_genl_family, flags, MAC802154_HWSIM_CMD_GET_RADIO); if (!hdr) return -EMSGSIZE; if (cb) genl_dump_check_consistent(cb, hdr); res = append_radio_msg(skb, phy); if (res < 0) goto out_err; genlmsg_end(skb, hdr); return 0; out_err: genlmsg_cancel(skb, hdr); return res; } static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) { struct hwsim_phy *phy; struct sk_buff *skb; int idx, res = -ENODEV; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]) return -EINVAL; idx = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); mutex_lock(&hwsim_phys_lock); list_for_each_entry(phy, &hwsim_phys, list) { if (phy->idx != idx) continue; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) { res = -ENOMEM; goto out_err; } res = hwsim_get_radio(skb, phy, info->snd_portid, info->snd_seq, NULL, 0); if (res < 0) { nlmsg_free(skb); goto out_err; } res = genlmsg_reply(skb, info); break; } out_err: mutex_unlock(&hwsim_phys_lock); return res; } static int hwsim_dump_radio_nl(struct sk_buff *skb, struct netlink_callback *cb) { int idx = cb->args[0]; struct hwsim_phy *phy; int res; mutex_lock(&hwsim_phys_lock); if (idx == hwsim_radio_idx) goto done; list_for_each_entry(phy, &hwsim_phys, list) { if (phy->idx < idx) continue; res = hwsim_get_radio(skb, phy, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (res < 0) break; idx = phy->idx + 1; } cb->args[0] = idx; done: mutex_unlock(&hwsim_phys_lock); return skb->len; } /* caller need to held hwsim_phys_lock */ static struct hwsim_phy *hwsim_get_radio_by_id(uint32_t idx) { struct hwsim_phy *phy; list_for_each_entry(phy, &hwsim_phys, list) { if (phy->idx == idx) return phy; } return NULL; } static const struct nla_policy hwsim_edge_policy[MAC802154_HWSIM_EDGE_ATTR_MAX + 1] = { [MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] = { .type = NLA_U32 }, [MAC802154_HWSIM_EDGE_ATTR_LQI] = { .type = NLA_U8 }, }; static struct hwsim_edge *hwsim_alloc_edge(struct hwsim_phy *endpoint, u8 lqi) { struct hwsim_edge_info *einfo; struct hwsim_edge *e; e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) return NULL; einfo = kzalloc(sizeof(*einfo), GFP_KERNEL); if (!einfo) { kfree(e); return NULL; } einfo->lqi = 0xff; rcu_assign_pointer(e->info, einfo); e->endpoint = endpoint; return e; } static void hwsim_free_edge(struct hwsim_edge *e) { struct hwsim_edge_info *einfo; rcu_read_lock(); einfo = rcu_dereference(e->info); rcu_read_unlock(); kfree_rcu(einfo, rcu); kfree_rcu(e, rcu); } static int hwsim_new_edge_nl(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; struct hwsim_phy *phy_v0, *phy_v1; struct hwsim_edge *e; u32 v0, v1; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]) return -EINVAL; v0 = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); v1 = nla_get_u32(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]); if (v0 == v1) return -EINVAL; mutex_lock(&hwsim_phys_lock); phy_v0 = hwsim_get_radio_by_id(v0); if (!phy_v0) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } phy_v1 = hwsim_get_radio_by_id(v1); if (!phy_v1) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } rcu_read_lock(); list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { mutex_unlock(&hwsim_phys_lock); rcu_read_unlock(); return -EEXIST; } } rcu_read_unlock(); e = hwsim_alloc_edge(phy_v1, 0xff); if (!e) { mutex_unlock(&hwsim_phys_lock); return -ENOMEM; } list_add_rcu(&e->list, &phy_v0->edges); /* wait until changes are done under hwsim_phys_lock lock * should prevent of calling this function twice while * edges list has not the changes yet. */ synchronize_rcu(); mutex_unlock(&hwsim_phys_lock); return 0; } static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; struct hwsim_phy *phy_v0; struct hwsim_edge *e; u32 v0, v1; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]) return -EINVAL; v0 = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); v1 = nla_get_u32(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]); mutex_lock(&hwsim_phys_lock); phy_v0 = hwsim_get_radio_by_id(v0); if (!phy_v0) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } rcu_read_lock(); list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { rcu_read_unlock(); list_del_rcu(&e->list); hwsim_free_edge(e); /* same again - wait until list changes are done */ synchronize_rcu(); mutex_unlock(&hwsim_phys_lock); return 0; } } rcu_read_unlock(); mutex_unlock(&hwsim_phys_lock); return -ENOENT; } static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; struct hwsim_edge_info *einfo, *einfo_old; struct hwsim_phy *phy_v0; struct hwsim_edge *e; u32 v0, v1; u8 lqi; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] || !edge_attrs[MAC802154_HWSIM_EDGE_ATTR_LQI]) return -EINVAL; v0 = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); v1 = nla_get_u32(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]); lqi = nla_get_u8(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_LQI]); mutex_lock(&hwsim_phys_lock); phy_v0 = hwsim_get_radio_by_id(v0); if (!phy_v0) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } einfo = kzalloc(sizeof(*einfo), GFP_KERNEL); if (!einfo) { mutex_unlock(&hwsim_phys_lock); return -ENOMEM; } rcu_read_lock(); list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { einfo->lqi = lqi; einfo_old = rcu_replace_pointer(e->info, einfo, lockdep_is_held(&hwsim_phys_lock)); rcu_read_unlock(); kfree_rcu(einfo_old, rcu); mutex_unlock(&hwsim_phys_lock); return 0; } } rcu_read_unlock(); kfree(einfo); mutex_unlock(&hwsim_phys_lock); return -ENOENT; } /* MAC802154_HWSIM netlink policy */ static const struct nla_policy hwsim_genl_policy[MAC802154_HWSIM_ATTR_MAX + 1] = { [MAC802154_HWSIM_ATTR_RADIO_ID] = { .type = NLA_U32 }, [MAC802154_HWSIM_ATTR_RADIO_EDGE] = { .type = NLA_NESTED }, [MAC802154_HWSIM_ATTR_RADIO_EDGES] = { .type = NLA_NESTED }, }; /* Generic Netlink operations array */ static const struct genl_small_ops hwsim_nl_ops[] = { { .cmd = MAC802154_HWSIM_CMD_NEW_RADIO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_new_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_RADIO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_del_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_GET_RADIO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_get_radio_nl, .dumpit = hwsim_dump_radio_nl, }, { .cmd = MAC802154_HWSIM_CMD_NEW_EDGE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_new_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_EDGE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_del_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_SET_EDGE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_set_edge_lqi, .flags = GENL_UNS_ADMIN_PERM, }, }; static struct genl_family hwsim_genl_family __ro_after_init = { .name = "MAC802154_HWSIM", .version = 1, .maxattr = MAC802154_HWSIM_ATTR_MAX, .policy = hwsim_genl_policy, .module = THIS_MODULE, .small_ops = hwsim_nl_ops, .n_small_ops = ARRAY_SIZE(hwsim_nl_ops), .resv_start_op = MAC802154_HWSIM_CMD_NEW_EDGE + 1, .mcgrps = hwsim_mcgrps, .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb, struct genl_info *info) { if (info) genl_notify(&hwsim_genl_family, mcast_skb, info, HWSIM_MCGRP_CONFIG, GFP_KERNEL); else genlmsg_multicast(&hwsim_genl_family, mcast_skb, 0, HWSIM_MCGRP_CONFIG, GFP_KERNEL); } static void hwsim_mcast_new_radio(struct genl_info *info, struct hwsim_phy *phy) { struct sk_buff *mcast_skb; void *data; mcast_skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!mcast_skb) return; data = genlmsg_put(mcast_skb, 0, 0, &hwsim_genl_family, 0, MAC802154_HWSIM_CMD_NEW_RADIO); if (!data) goto out_err; if (append_radio_msg(mcast_skb, phy) < 0) goto out_err; genlmsg_end(mcast_skb, data); hwsim_mcast_config_msg(mcast_skb, info); return; out_err: genlmsg_cancel(mcast_skb, data); nlmsg_free(mcast_skb); } static void hwsim_edge_unsubscribe_me(struct hwsim_phy *phy) { struct hwsim_phy *tmp; struct hwsim_edge *e; rcu_read_lock(); /* going to all phy edges and remove phy from it */ list_for_each_entry(tmp, &hwsim_phys, list) { list_for_each_entry_rcu(e, &tmp->edges, list) { if (e->endpoint->idx == phy->idx) { list_del_rcu(&e->list); hwsim_free_edge(e); } } } rcu_read_unlock(); synchronize_rcu(); } static int hwsim_subscribe_all_others(struct hwsim_phy *phy) { struct hwsim_phy *sub; struct hwsim_edge *e; list_for_each_entry(sub, &hwsim_phys, list) { e = hwsim_alloc_edge(sub, 0xff); if (!e) goto me_fail; list_add_rcu(&e->list, &phy->edges); } list_for_each_entry(sub, &hwsim_phys, list) { e = hwsim_alloc_edge(phy, 0xff); if (!e) goto sub_fail; list_add_rcu(&e->list, &sub->edges); } return 0; sub_fail: hwsim_edge_unsubscribe_me(phy); me_fail: rcu_read_lock(); list_for_each_entry_rcu(e, &phy->edges, list) { list_del_rcu(&e->list); hwsim_free_edge(e); } rcu_read_unlock(); return -ENOMEM; } static int hwsim_add_one(struct genl_info *info, struct device *dev, bool init) { struct ieee802154_hw *hw; struct hwsim_phy *phy; struct hwsim_pib *pib; int idx; int err; idx = hwsim_radio_idx++; hw = ieee802154_alloc_hw(sizeof(*phy), &hwsim_ops); if (!hw) return -ENOMEM; phy = hw->priv; phy->hw = hw; /* 868 MHz BPSK 802.15.4-2003 */ hw->phy->supported.channels[0] |= 1; /* 915 MHz BPSK 802.15.4-2003 */ hw->phy->supported.channels[0] |= 0x7fe; /* 2.4 GHz O-QPSK 802.15.4-2003 */ hw->phy->supported.channels[0] |= 0x7FFF800; /* 868 MHz ASK 802.15.4-2006 */ hw->phy->supported.channels[1] |= 1; /* 915 MHz ASK 802.15.4-2006 */ hw->phy->supported.channels[1] |= 0x7fe; /* 868 MHz O-QPSK 802.15.4-2006 */ hw->phy->supported.channels[2] |= 1; /* 915 MHz O-QPSK 802.15.4-2006 */ hw->phy->supported.channels[2] |= 0x7fe; /* 2.4 GHz CSS 802.15.4a-2007 */ hw->phy->supported.channels[3] |= 0x3fff; /* UWB Sub-gigahertz 802.15.4a-2007 */ hw->phy->supported.channels[4] |= 1; /* UWB Low band 802.15.4a-2007 */ hw->phy->supported.channels[4] |= 0x1e; /* UWB High band 802.15.4a-2007 */ hw->phy->supported.channels[4] |= 0xffe0; /* 750 MHz O-QPSK 802.15.4c-2009 */ hw->phy->supported.channels[5] |= 0xf; /* 750 MHz MPSK 802.15.4c-2009 */ hw->phy->supported.channels[5] |= 0xf0; /* 950 MHz BPSK 802.15.4d-2009 */ hw->phy->supported.channels[6] |= 0x3ff; /* 950 MHz GFSK 802.15.4d-2009 */ hw->phy->supported.channels[6] |= 0x3ffc00; ieee802154_random_extended_addr(&hw->phy->perm_extended_addr); /* hwsim phy channel 13 as default */ hw->phy->current_channel = 13; pib = kzalloc(sizeof(*pib), GFP_KERNEL); if (!pib) { err = -ENOMEM; goto err_pib; } pib->channel = 13; pib->filt.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); pib->filt.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); rcu_assign_pointer(phy->pib, pib); phy->idx = idx; INIT_LIST_HEAD(&phy->edges); hw->flags = IEEE802154_HW_PROMISCUOUS; hw->parent = dev; err = ieee802154_register_hw(hw); if (err) goto err_reg; mutex_lock(&hwsim_phys_lock); if (init) { err = hwsim_subscribe_all_others(phy); if (err < 0) { mutex_unlock(&hwsim_phys_lock); goto err_subscribe; } } list_add_tail(&phy->list, &hwsim_phys); mutex_unlock(&hwsim_phys_lock); hwsim_mcast_new_radio(info, phy); return idx; err_subscribe: ieee802154_unregister_hw(phy->hw); err_reg: kfree(pib); err_pib: ieee802154_free_hw(phy->hw); return err; } static void hwsim_del(struct hwsim_phy *phy) { struct hwsim_pib *pib; struct hwsim_edge *e; hwsim_edge_unsubscribe_me(phy); list_del(&phy->list); rcu_read_lock(); list_for_each_entry_rcu(e, &phy->edges, list) { list_del_rcu(&e->list); hwsim_free_edge(e); } pib = rcu_dereference(phy->pib); rcu_read_unlock(); kfree_rcu(pib, rcu); ieee802154_unregister_hw(phy->hw); ieee802154_free_hw(phy->hw); } static int hwsim_probe(struct platform_device *pdev) { struct hwsim_phy *phy, *tmp; int err, i; for (i = 0; i < 2; i++) { err = hwsim_add_one(NULL, &pdev->dev, true); if (err < 0) goto err_slave; } dev_info(&pdev->dev, "Added 2 mac802154 hwsim hardware radios\n"); return 0; err_slave: mutex_lock(&hwsim_phys_lock); list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); return err; } static void hwsim_remove(struct platform_device *pdev) { struct hwsim_phy *phy, *tmp; mutex_lock(&hwsim_phys_lock); list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); } static struct platform_driver mac802154hwsim_driver = { .probe = hwsim_probe, .remove_new = hwsim_remove, .driver = { .name = "mac802154_hwsim", }, }; static __init int hwsim_init_module(void) { int rc; rc = genl_register_family(&hwsim_genl_family); if (rc) return rc; mac802154hwsim_dev = platform_device_register_simple("mac802154_hwsim", -1, NULL, 0); if (IS_ERR(mac802154hwsim_dev)) { rc = PTR_ERR(mac802154hwsim_dev); goto platform_dev; } rc = platform_driver_register(&mac802154hwsim_driver); if (rc < 0) goto platform_drv; return 0; platform_drv: platform_device_unregister(mac802154hwsim_dev); platform_dev: genl_unregister_family(&hwsim_genl_family); return rc; } static __exit void hwsim_remove_module(void) { genl_unregister_family(&hwsim_genl_family); platform_driver_unregister(&mac802154hwsim_driver); platform_device_unregister(mac802154hwsim_dev); } module_init(hwsim_init_module); module_exit(hwsim_remove_module); |
6 5 2 59 1 4 7 6 62 62 51 37 4 60 57 39 41 55 61 6 55 62 1 1 7 7 8 5 2 2 2 2 8 8 8 10 4 1 2 2 3 4 3 3 1 2 5 2 5 4 4 3 12 3 6 3 2 5 32 7 2 1 1 1 3 1 1 2 2 1 1 5 1 10 4 6 5 4 17 5 12 3 3 5 1 4 3 1 2 27 20 3 6 10 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 | // SPDX-License-Identifier: GPL-2.0 /* * The USB Monitor, inspired by Dave Harding's USBMon. * * This is a binary format reader. * * Copyright (C) 2006 Paolo Abeni (paolo.abeni@email.it) * Copyright (C) 2006,2007 Pete Zaitcev (zaitcev@redhat.com) */ #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/types.h> #include <linux/fs.h> #include <linux/cdev.h> #include <linux/export.h> #include <linux/usb.h> #include <linux/poll.h> #include <linux/compat.h> #include <linux/mm.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/time64.h> #include <linux/uaccess.h> #include "usb_mon.h" /* * Defined by USB 2.0 clause 9.3, table 9.2. */ #define SETUP_LEN 8 /* ioctl macros */ #define MON_IOC_MAGIC 0x92 #define MON_IOCQ_URB_LEN _IO(MON_IOC_MAGIC, 1) /* #2 used to be MON_IOCX_URB, removed before it got into Linus tree */ #define MON_IOCG_STATS _IOR(MON_IOC_MAGIC, 3, struct mon_bin_stats) #define MON_IOCT_RING_SIZE _IO(MON_IOC_MAGIC, 4) #define MON_IOCQ_RING_SIZE _IO(MON_IOC_MAGIC, 5) #define MON_IOCX_GET _IOW(MON_IOC_MAGIC, 6, struct mon_bin_get) #define MON_IOCX_MFETCH _IOWR(MON_IOC_MAGIC, 7, struct mon_bin_mfetch) #define MON_IOCH_MFLUSH _IO(MON_IOC_MAGIC, 8) /* #9 was MON_IOCT_SETAPI */ #define MON_IOCX_GETX _IOW(MON_IOC_MAGIC, 10, struct mon_bin_get) #ifdef CONFIG_COMPAT #define MON_IOCX_GET32 _IOW(MON_IOC_MAGIC, 6, struct mon_bin_get32) #define MON_IOCX_MFETCH32 _IOWR(MON_IOC_MAGIC, 7, struct mon_bin_mfetch32) #define MON_IOCX_GETX32 _IOW(MON_IOC_MAGIC, 10, struct mon_bin_get32) #endif /* * Some architectures have enormous basic pages (16KB for ia64, 64KB for ppc). * But it's all right. Just use a simple way to make sure the chunk is never * smaller than a page. * * N.B. An application does not know our chunk size. * * Woops, get_zeroed_page() returns a single page. I guess we're stuck with * page-sized chunks for the time being. */ #define CHUNK_SIZE PAGE_SIZE #define CHUNK_ALIGN(x) (((x)+CHUNK_SIZE-1) & ~(CHUNK_SIZE-1)) /* * The magic limit was calculated so that it allows the monitoring * application to pick data once in two ticks. This way, another application, * which presumably drives the bus, gets to hog CPU, yet we collect our data. * If HZ is 100, a 480 mbit/s bus drives 614 KB every jiffy. USB has an * enormous overhead built into the bus protocol, so we need about 1000 KB. * * This is still too much for most cases, where we just snoop a few * descriptor fetches for enumeration. So, the default is a "reasonable" * amount for systems with HZ=250 and incomplete bus saturation. * * XXX What about multi-megabyte URBs which take minutes to transfer? */ #define BUFF_MAX CHUNK_ALIGN(1200*1024) #define BUFF_DFL CHUNK_ALIGN(300*1024) #define BUFF_MIN CHUNK_ALIGN(8*1024) /* * The per-event API header (2 per URB). * * This structure is seen in userland as defined by the documentation. */ struct mon_bin_hdr { u64 id; /* URB ID - from submission to callback */ unsigned char type; /* Same as in text API; extensible. */ unsigned char xfer_type; /* ISO, Intr, Control, Bulk */ unsigned char epnum; /* Endpoint number and transfer direction */ unsigned char devnum; /* Device address */ unsigned short busnum; /* Bus number */ char flag_setup; char flag_data; s64 ts_sec; /* ktime_get_real_ts64 */ s32 ts_usec; /* ktime_get_real_ts64 */ int status; unsigned int len_urb; /* Length of data (submitted or actual) */ unsigned int len_cap; /* Delivered length */ union { unsigned char setup[SETUP_LEN]; /* Only for Control S-type */ struct iso_rec { int error_count; int numdesc; } iso; } s; int interval; int start_frame; unsigned int xfer_flags; unsigned int ndesc; /* Actual number of ISO descriptors */ }; /* * ISO vector, packed into the head of data stream. * This has to take 16 bytes to make sure that the end of buffer * wrap is not happening in the middle of a descriptor. */ struct mon_bin_isodesc { int iso_status; unsigned int iso_off; unsigned int iso_len; u32 _pad; }; /* per file statistic */ struct mon_bin_stats { u32 queued; u32 dropped; }; struct mon_bin_get { struct mon_bin_hdr __user *hdr; /* Can be 48 bytes or 64. */ void __user *data; size_t alloc; /* Length of data (can be zero) */ }; struct mon_bin_mfetch { u32 __user *offvec; /* Vector of events fetched */ u32 nfetch; /* Number of events to fetch (out: fetched) */ u32 nflush; /* Number of events to flush */ }; #ifdef CONFIG_COMPAT struct mon_bin_get32 { u32 hdr32; u32 data32; u32 alloc32; }; struct mon_bin_mfetch32 { u32 offvec32; u32 nfetch32; u32 nflush32; }; #endif /* Having these two values same prevents wrapping of the mon_bin_hdr */ #define PKT_ALIGN 64 #define PKT_SIZE 64 #define PKT_SZ_API0 48 /* API 0 (2.6.20) size */ #define PKT_SZ_API1 64 /* API 1 size: extra fields */ #define ISODESC_MAX 128 /* Same number as usbfs allows, 2048 bytes. */ /* max number of USB bus supported */ #define MON_BIN_MAX_MINOR 128 /* * The buffer: map of used pages. */ struct mon_pgmap { struct page *pg; unsigned char *ptr; /* XXX just use page_to_virt everywhere? */ }; /* * This gets associated with an open file struct. */ struct mon_reader_bin { /* The buffer: one per open. */ spinlock_t b_lock; /* Protect b_cnt, b_in */ unsigned int b_size; /* Current size of the buffer - bytes */ unsigned int b_cnt; /* Bytes used */ unsigned int b_in, b_out; /* Offsets into buffer - bytes */ unsigned int b_read; /* Amount of read data in curr. pkt. */ struct mon_pgmap *b_vec; /* The map array */ wait_queue_head_t b_wait; /* Wait for data here */ struct mutex fetch_lock; /* Protect b_read, b_out */ int mmap_active; /* A list of these is needed for "bus 0". Some time later. */ struct mon_reader r; /* Stats */ unsigned int cnt_lost; }; static inline struct mon_bin_hdr *MON_OFF2HDR(const struct mon_reader_bin *rp, unsigned int offset) { return (struct mon_bin_hdr *) (rp->b_vec[offset / CHUNK_SIZE].ptr + offset % CHUNK_SIZE); } #define MON_RING_EMPTY(rp) ((rp)->b_cnt == 0) static unsigned char xfer_to_pipe[4] = { PIPE_CONTROL, PIPE_ISOCHRONOUS, PIPE_BULK, PIPE_INTERRUPT }; static const struct class mon_bin_class = { .name = "usbmon", }; static dev_t mon_bin_dev0; static struct cdev mon_bin_cdev; static void mon_buff_area_fill(const struct mon_reader_bin *rp, unsigned int offset, unsigned int size); static int mon_bin_wait_event(struct file *file, struct mon_reader_bin *rp); static int mon_alloc_buff(struct mon_pgmap *map, int npages); static void mon_free_buff(struct mon_pgmap *map, int npages); /* * This is a "chunked memcpy". It does not manipulate any counters. */ static unsigned int mon_copy_to_buff(const struct mon_reader_bin *this, unsigned int off, const unsigned char *from, unsigned int length) { unsigned int step_len; unsigned char *buf; unsigned int in_page; while (length) { /* * Determine step_len. */ step_len = length; in_page = CHUNK_SIZE - (off & (CHUNK_SIZE-1)); if (in_page < step_len) step_len = in_page; /* * Copy data and advance pointers. */ buf = this->b_vec[off / CHUNK_SIZE].ptr + off % CHUNK_SIZE; memcpy(buf, from, step_len); if ((off += step_len) >= this->b_size) off = 0; from += step_len; length -= step_len; } return off; } /* * This is a little worse than the above because it's "chunked copy_to_user". * The return value is an error code, not an offset. */ static int copy_from_buf(const struct mon_reader_bin *this, unsigned int off, char __user *to, int length) { unsigned int step_len; unsigned char *buf; unsigned int in_page; while (length) { /* * Determine step_len. */ step_len = length; in_page = CHUNK_SIZE - (off & (CHUNK_SIZE-1)); if (in_page < step_len) step_len = in_page; /* * Copy data and advance pointers. */ buf = this->b_vec[off / CHUNK_SIZE].ptr + off % CHUNK_SIZE; if (copy_to_user(to, buf, step_len)) return -EINVAL; if ((off += step_len) >= this->b_size) off = 0; to += step_len; length -= step_len; } return 0; } /* * Allocate an (aligned) area in the buffer. * This is called under b_lock. * Returns ~0 on failure. */ static unsigned int mon_buff_area_alloc(struct mon_reader_bin *rp, unsigned int size) { unsigned int offset; size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); if (rp->b_cnt + size > rp->b_size) return ~0; offset = rp->b_in; rp->b_cnt += size; if ((rp->b_in += size) >= rp->b_size) rp->b_in -= rp->b_size; return offset; } /* * This is the same thing as mon_buff_area_alloc, only it does not allow * buffers to wrap. This is needed by applications which pass references * into mmap-ed buffers up their stacks (libpcap can do that). * * Currently, we always have the header stuck with the data, although * it is not strictly speaking necessary. * * When a buffer would wrap, we place a filler packet to mark the space. */ static unsigned int mon_buff_area_alloc_contiguous(struct mon_reader_bin *rp, unsigned int size) { unsigned int offset; unsigned int fill_size; size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); if (rp->b_cnt + size > rp->b_size) return ~0; if (rp->b_in + size > rp->b_size) { /* * This would wrap. Find if we still have space after * skipping to the end of the buffer. If we do, place * a filler packet and allocate a new packet. */ fill_size = rp->b_size - rp->b_in; if (rp->b_cnt + size + fill_size > rp->b_size) return ~0; mon_buff_area_fill(rp, rp->b_in, fill_size); offset = 0; rp->b_in = size; rp->b_cnt += size + fill_size; } else if (rp->b_in + size == rp->b_size) { offset = rp->b_in; rp->b_in = 0; rp->b_cnt += size; } else { offset = rp->b_in; rp->b_in += size; rp->b_cnt += size; } return offset; } /* * Return a few (kilo-)bytes to the head of the buffer. * This is used if a data fetch fails. */ static void mon_buff_area_shrink(struct mon_reader_bin *rp, unsigned int size) { /* size &= ~(PKT_ALIGN-1); -- we're called with aligned size */ rp->b_cnt -= size; if (rp->b_in < size) rp->b_in += rp->b_size; rp->b_in -= size; } /* * This has to be called under both b_lock and fetch_lock, because * it accesses both b_cnt and b_out. */ static void mon_buff_area_free(struct mon_reader_bin *rp, unsigned int size) { size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); rp->b_cnt -= size; if ((rp->b_out += size) >= rp->b_size) rp->b_out -= rp->b_size; } static void mon_buff_area_fill(const struct mon_reader_bin *rp, unsigned int offset, unsigned int size) { struct mon_bin_hdr *ep; ep = MON_OFF2HDR(rp, offset); memset(ep, 0, PKT_SIZE); ep->type = '@'; ep->len_cap = size - PKT_SIZE; } static inline char mon_bin_get_setup(unsigned char *setupb, const struct urb *urb, char ev_type) { if (urb->setup_packet == NULL) return 'Z'; memcpy(setupb, urb->setup_packet, SETUP_LEN); return 0; } static unsigned int mon_bin_get_data(const struct mon_reader_bin *rp, unsigned int offset, struct urb *urb, unsigned int length, char *flag) { int i; struct scatterlist *sg; unsigned int this_len; *flag = 0; if (urb->num_sgs == 0) { if (urb->transfer_buffer == NULL) { *flag = 'Z'; return length; } mon_copy_to_buff(rp, offset, urb->transfer_buffer, length); length = 0; } else { /* If IOMMU coalescing occurred, we cannot trust sg_page */ if (urb->transfer_flags & URB_DMA_SG_COMBINED) { *flag = 'D'; return length; } /* Copy up to the first non-addressable segment */ for_each_sg(urb->sg, sg, urb->num_sgs, i) { if (length == 0 || PageHighMem(sg_page(sg))) break; this_len = min_t(unsigned int, sg->length, length); offset = mon_copy_to_buff(rp, offset, sg_virt(sg), this_len); length -= this_len; } if (i == 0) *flag = 'D'; } return length; } /* * This is the look-ahead pass in case of 'C Zi', when actual_length cannot * be used to determine the length of the whole contiguous buffer. */ static unsigned int mon_bin_collate_isodesc(const struct mon_reader_bin *rp, struct urb *urb, unsigned int ndesc) { struct usb_iso_packet_descriptor *fp; unsigned int length; length = 0; fp = urb->iso_frame_desc; while (ndesc-- != 0) { if (fp->actual_length != 0) { if (fp->offset + fp->actual_length > length) length = fp->offset + fp->actual_length; } fp++; } return length; } static void mon_bin_get_isodesc(const struct mon_reader_bin *rp, unsigned int offset, struct urb *urb, char ev_type, unsigned int ndesc) { struct mon_bin_isodesc *dp; struct usb_iso_packet_descriptor *fp; fp = urb->iso_frame_desc; while (ndesc-- != 0) { dp = (struct mon_bin_isodesc *) (rp->b_vec[offset / CHUNK_SIZE].ptr + offset % CHUNK_SIZE); dp->iso_status = fp->status; dp->iso_off = fp->offset; dp->iso_len = (ev_type == 'S') ? fp->length : fp->actual_length; dp->_pad = 0; if ((offset += sizeof(struct mon_bin_isodesc)) >= rp->b_size) offset = 0; fp++; } } static void mon_bin_event(struct mon_reader_bin *rp, struct urb *urb, char ev_type, int status) { const struct usb_endpoint_descriptor *epd = &urb->ep->desc; struct timespec64 ts; unsigned long flags; unsigned int urb_length; unsigned int offset; unsigned int length; unsigned int delta; unsigned int ndesc, lendesc; unsigned char dir; struct mon_bin_hdr *ep; char data_tag = 0; ktime_get_real_ts64(&ts); spin_lock_irqsave(&rp->b_lock, flags); /* * Find the maximum allowable length, then allocate space. */ urb_length = (ev_type == 'S') ? urb->transfer_buffer_length : urb->actual_length; length = urb_length; if (usb_endpoint_xfer_isoc(epd)) { if (urb->number_of_packets < 0) { ndesc = 0; } else if (urb->number_of_packets >= ISODESC_MAX) { ndesc = ISODESC_MAX; } else { ndesc = urb->number_of_packets; } if (ev_type == 'C' && usb_urb_dir_in(urb)) length = mon_bin_collate_isodesc(rp, urb, ndesc); } else { ndesc = 0; } lendesc = ndesc*sizeof(struct mon_bin_isodesc); /* not an issue unless there's a subtle bug in a HCD somewhere */ if (length >= urb->transfer_buffer_length) length = urb->transfer_buffer_length; if (length >= rp->b_size/5) length = rp->b_size/5; if (usb_urb_dir_in(urb)) { if (ev_type == 'S') { length = 0; data_tag = '<'; } /* Cannot rely on endpoint number in case of control ep.0 */ dir = USB_DIR_IN; } else { if (ev_type == 'C') { length = 0; data_tag = '>'; } dir = 0; } if (rp->mmap_active) { offset = mon_buff_area_alloc_contiguous(rp, length + PKT_SIZE + lendesc); } else { offset = mon_buff_area_alloc(rp, length + PKT_SIZE + lendesc); } if (offset == ~0) { rp->cnt_lost++; spin_unlock_irqrestore(&rp->b_lock, flags); return; } ep = MON_OFF2HDR(rp, offset); if ((offset += PKT_SIZE) >= rp->b_size) offset = 0; /* * Fill the allocated area. */ memset(ep, 0, PKT_SIZE); ep->type = ev_type; ep->xfer_type = xfer_to_pipe[usb_endpoint_type(epd)]; ep->epnum = dir | usb_endpoint_num(epd); ep->devnum = urb->dev->devnum; ep->busnum = urb->dev->bus->busnum; ep->id = (unsigned long) urb; ep->ts_sec = ts.tv_sec; ep->ts_usec = ts.tv_nsec / NSEC_PER_USEC; ep->status = status; ep->len_urb = urb_length; ep->len_cap = length + lendesc; ep->xfer_flags = urb->transfer_flags; if (usb_endpoint_xfer_int(epd)) { ep->interval = urb->interval; } else if (usb_endpoint_xfer_isoc(epd)) { ep->interval = urb->interval; ep->start_frame = urb->start_frame; ep->s.iso.error_count = urb->error_count; ep->s.iso.numdesc = urb->number_of_packets; } if (usb_endpoint_xfer_control(epd) && ev_type == 'S') { ep->flag_setup = mon_bin_get_setup(ep->s.setup, urb, ev_type); } else { ep->flag_setup = '-'; } if (ndesc != 0) { ep->ndesc = ndesc; mon_bin_get_isodesc(rp, offset, urb, ev_type, ndesc); if ((offset += lendesc) >= rp->b_size) offset -= rp->b_size; } if (length != 0) { length = mon_bin_get_data(rp, offset, urb, length, &ep->flag_data); if (length > 0) { delta = (ep->len_cap + PKT_ALIGN-1) & ~(PKT_ALIGN-1); ep->len_cap -= length; delta -= (ep->len_cap + PKT_ALIGN-1) & ~(PKT_ALIGN-1); mon_buff_area_shrink(rp, delta); } } else { ep->flag_data = data_tag; } spin_unlock_irqrestore(&rp->b_lock, flags); wake_up(&rp->b_wait); } static void mon_bin_submit(void *data, struct urb *urb) { struct mon_reader_bin *rp = data; mon_bin_event(rp, urb, 'S', -EINPROGRESS); } static void mon_bin_complete(void *data, struct urb *urb, int status) { struct mon_reader_bin *rp = data; mon_bin_event(rp, urb, 'C', status); } static void mon_bin_error(void *data, struct urb *urb, int error) { struct mon_reader_bin *rp = data; struct timespec64 ts; unsigned long flags; unsigned int offset; struct mon_bin_hdr *ep; ktime_get_real_ts64(&ts); spin_lock_irqsave(&rp->b_lock, flags); offset = mon_buff_area_alloc(rp, PKT_SIZE); if (offset == ~0) { /* Not incrementing cnt_lost. Just because. */ spin_unlock_irqrestore(&rp->b_lock, flags); return; } ep = MON_OFF2HDR(rp, offset); memset(ep, 0, PKT_SIZE); ep->type = 'E'; ep->xfer_type = xfer_to_pipe[usb_endpoint_type(&urb->ep->desc)]; ep->epnum = usb_urb_dir_in(urb) ? USB_DIR_IN : 0; ep->epnum |= usb_endpoint_num(&urb->ep->desc); ep->devnum = urb->dev->devnum; ep->busnum = urb->dev->bus->busnum; ep->id = (unsigned long) urb; ep->ts_sec = ts.tv_sec; ep->ts_usec = ts.tv_nsec / NSEC_PER_USEC; ep->status = error; ep->flag_setup = '-'; ep->flag_data = 'E'; spin_unlock_irqrestore(&rp->b_lock, flags); wake_up(&rp->b_wait); } static int mon_bin_open(struct inode *inode, struct file *file) { struct mon_bus *mbus; struct mon_reader_bin *rp; size_t size; int rc; mutex_lock(&mon_lock); mbus = mon_bus_lookup(iminor(inode)); if (mbus == NULL) { mutex_unlock(&mon_lock); return -ENODEV; } if (mbus != &mon_bus0 && mbus->u_bus == NULL) { printk(KERN_ERR TAG ": consistency error on open\n"); mutex_unlock(&mon_lock); return -ENODEV; } rp = kzalloc(sizeof(struct mon_reader_bin), GFP_KERNEL); if (rp == NULL) { rc = -ENOMEM; goto err_alloc; } spin_lock_init(&rp->b_lock); init_waitqueue_head(&rp->b_wait); mutex_init(&rp->fetch_lock); rp->b_size = BUFF_DFL; size = sizeof(struct mon_pgmap) * (rp->b_size/CHUNK_SIZE); if ((rp->b_vec = kzalloc(size, GFP_KERNEL)) == NULL) { rc = -ENOMEM; goto err_allocvec; } if ((rc = mon_alloc_buff(rp->b_vec, rp->b_size/CHUNK_SIZE)) < 0) goto err_allocbuff; rp->r.m_bus = mbus; rp->r.r_data = rp; rp->r.rnf_submit = mon_bin_submit; rp->r.rnf_error = mon_bin_error; rp->r.rnf_complete = mon_bin_complete; mon_reader_add(mbus, &rp->r); file->private_data = rp; mutex_unlock(&mon_lock); return 0; err_allocbuff: kfree(rp->b_vec); err_allocvec: kfree(rp); err_alloc: mutex_unlock(&mon_lock); return rc; } /* * Extract an event from buffer and copy it to user space. * Wait if there is no event ready. * Returns zero or error. */ static int mon_bin_get_event(struct file *file, struct mon_reader_bin *rp, struct mon_bin_hdr __user *hdr, unsigned int hdrbytes, void __user *data, unsigned int nbytes) { unsigned long flags; struct mon_bin_hdr *ep; size_t step_len; unsigned int offset; int rc; mutex_lock(&rp->fetch_lock); if ((rc = mon_bin_wait_event(file, rp)) < 0) { mutex_unlock(&rp->fetch_lock); return rc; } ep = MON_OFF2HDR(rp, rp->b_out); if (copy_to_user(hdr, ep, hdrbytes)) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } step_len = min(ep->len_cap, nbytes); if ((offset = rp->b_out + PKT_SIZE) >= rp->b_size) offset = 0; if (copy_from_buf(rp, offset, data, step_len)) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } spin_lock_irqsave(&rp->b_lock, flags); mon_buff_area_free(rp, PKT_SIZE + ep->len_cap); spin_unlock_irqrestore(&rp->b_lock, flags); rp->b_read = 0; mutex_unlock(&rp->fetch_lock); return 0; } static int mon_bin_release(struct inode *inode, struct file *file) { struct mon_reader_bin *rp = file->private_data; struct mon_bus* mbus = rp->r.m_bus; mutex_lock(&mon_lock); if (mbus->nreaders <= 0) { printk(KERN_ERR TAG ": consistency error on close\n"); mutex_unlock(&mon_lock); return 0; } mon_reader_del(mbus, &rp->r); mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); kfree(rp->b_vec); kfree(rp); mutex_unlock(&mon_lock); return 0; } static ssize_t mon_bin_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { struct mon_reader_bin *rp = file->private_data; unsigned int hdrbytes = PKT_SZ_API0; unsigned long flags; struct mon_bin_hdr *ep; unsigned int offset; size_t step_len; char *ptr; ssize_t done = 0; int rc; mutex_lock(&rp->fetch_lock); if ((rc = mon_bin_wait_event(file, rp)) < 0) { mutex_unlock(&rp->fetch_lock); return rc; } ep = MON_OFF2HDR(rp, rp->b_out); if (rp->b_read < hdrbytes) { step_len = min(nbytes, (size_t)(hdrbytes - rp->b_read)); ptr = ((char *)ep) + rp->b_read; if (step_len && copy_to_user(buf, ptr, step_len)) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } nbytes -= step_len; buf += step_len; rp->b_read += step_len; done += step_len; } if (rp->b_read >= hdrbytes) { step_len = ep->len_cap; step_len -= rp->b_read - hdrbytes; if (step_len > nbytes) step_len = nbytes; offset = rp->b_out + PKT_SIZE; offset += rp->b_read - hdrbytes; if (offset >= rp->b_size) offset -= rp->b_size; if (copy_from_buf(rp, offset, buf, step_len)) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } nbytes -= step_len; buf += step_len; rp->b_read += step_len; done += step_len; } /* * Check if whole packet was read, and if so, jump to the next one. */ if (rp->b_read >= hdrbytes + ep->len_cap) { spin_lock_irqsave(&rp->b_lock, flags); mon_buff_area_free(rp, PKT_SIZE + ep->len_cap); spin_unlock_irqrestore(&rp->b_lock, flags); rp->b_read = 0; } mutex_unlock(&rp->fetch_lock); return done; } /* * Remove at most nevents from chunked buffer. * Returns the number of removed events. */ static int mon_bin_flush(struct mon_reader_bin *rp, unsigned nevents) { unsigned long flags; struct mon_bin_hdr *ep; int i; mutex_lock(&rp->fetch_lock); spin_lock_irqsave(&rp->b_lock, flags); for (i = 0; i < nevents; ++i) { if (MON_RING_EMPTY(rp)) break; ep = MON_OFF2HDR(rp, rp->b_out); mon_buff_area_free(rp, PKT_SIZE + ep->len_cap); } spin_unlock_irqrestore(&rp->b_lock, flags); rp->b_read = 0; mutex_unlock(&rp->fetch_lock); return i; } /* * Fetch at most max event offsets into the buffer and put them into vec. * The events are usually freed later with mon_bin_flush. * Return the effective number of events fetched. */ static int mon_bin_fetch(struct file *file, struct mon_reader_bin *rp, u32 __user *vec, unsigned int max) { unsigned int cur_out; unsigned int bytes, avail; unsigned int size; unsigned int nevents; struct mon_bin_hdr *ep; unsigned long flags; int rc; mutex_lock(&rp->fetch_lock); if ((rc = mon_bin_wait_event(file, rp)) < 0) { mutex_unlock(&rp->fetch_lock); return rc; } spin_lock_irqsave(&rp->b_lock, flags); avail = rp->b_cnt; spin_unlock_irqrestore(&rp->b_lock, flags); cur_out = rp->b_out; nevents = 0; bytes = 0; while (bytes < avail) { if (nevents >= max) break; ep = MON_OFF2HDR(rp, cur_out); if (put_user(cur_out, &vec[nevents])) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } nevents++; size = ep->len_cap + PKT_SIZE; size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); if ((cur_out += size) >= rp->b_size) cur_out -= rp->b_size; bytes += size; } mutex_unlock(&rp->fetch_lock); return nevents; } /* * Count events. This is almost the same as the above mon_bin_fetch, * only we do not store offsets into user vector, and we have no limit. */ static int mon_bin_queued(struct mon_reader_bin *rp) { unsigned int cur_out; unsigned int bytes, avail; unsigned int size; unsigned int nevents; struct mon_bin_hdr *ep; unsigned long flags; mutex_lock(&rp->fetch_lock); spin_lock_irqsave(&rp->b_lock, flags); avail = rp->b_cnt; spin_unlock_irqrestore(&rp->b_lock, flags); cur_out = rp->b_out; nevents = 0; bytes = 0; while (bytes < avail) { ep = MON_OFF2HDR(rp, cur_out); nevents++; size = ep->len_cap + PKT_SIZE; size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); if ((cur_out += size) >= rp->b_size) cur_out -= rp->b_size; bytes += size; } mutex_unlock(&rp->fetch_lock); return nevents; } /* */ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct mon_reader_bin *rp = file->private_data; // struct mon_bus* mbus = rp->r.m_bus; int ret = 0; struct mon_bin_hdr *ep; unsigned long flags; switch (cmd) { case MON_IOCQ_URB_LEN: /* * N.B. This only returns the size of data, without the header. */ spin_lock_irqsave(&rp->b_lock, flags); if (!MON_RING_EMPTY(rp)) { ep = MON_OFF2HDR(rp, rp->b_out); ret = ep->len_cap; } spin_unlock_irqrestore(&rp->b_lock, flags); break; case MON_IOCQ_RING_SIZE: mutex_lock(&rp->fetch_lock); ret = rp->b_size; mutex_unlock(&rp->fetch_lock); break; case MON_IOCT_RING_SIZE: /* * Changing the buffer size will flush it's contents; the new * buffer is allocated before releasing the old one to be sure * the device will stay functional also in case of memory * pressure. */ { int size; struct mon_pgmap *vec; if (arg < BUFF_MIN || arg > BUFF_MAX) return -EINVAL; size = CHUNK_ALIGN(arg); vec = kcalloc(size / CHUNK_SIZE, sizeof(struct mon_pgmap), GFP_KERNEL); if (vec == NULL) { ret = -ENOMEM; break; } ret = mon_alloc_buff(vec, size/CHUNK_SIZE); if (ret < 0) { kfree(vec); break; } mutex_lock(&rp->fetch_lock); spin_lock_irqsave(&rp->b_lock, flags); if (rp->mmap_active) { mon_free_buff(vec, size/CHUNK_SIZE); kfree(vec); ret = -EBUSY; } else { mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); kfree(rp->b_vec); rp->b_vec = vec; rp->b_size = size; rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0; rp->cnt_lost = 0; } spin_unlock_irqrestore(&rp->b_lock, flags); mutex_unlock(&rp->fetch_lock); } break; case MON_IOCH_MFLUSH: ret = mon_bin_flush(rp, arg); break; case MON_IOCX_GET: case MON_IOCX_GETX: { struct mon_bin_get getb; if (copy_from_user(&getb, (void __user *)arg, sizeof(struct mon_bin_get))) return -EFAULT; if (getb.alloc > 0x10000000) /* Want to cast to u32 */ return -EINVAL; ret = mon_bin_get_event(file, rp, getb.hdr, (cmd == MON_IOCX_GET)? PKT_SZ_API0: PKT_SZ_API1, getb.data, (unsigned int)getb.alloc); } break; case MON_IOCX_MFETCH: { struct mon_bin_mfetch mfetch; struct mon_bin_mfetch __user *uptr; uptr = (struct mon_bin_mfetch __user *)arg; if (copy_from_user(&mfetch, uptr, sizeof(mfetch))) return -EFAULT; if (mfetch.nflush) { ret = mon_bin_flush(rp, mfetch.nflush); if (ret < 0) return ret; if (put_user(ret, &uptr->nflush)) return -EFAULT; } ret = mon_bin_fetch(file, rp, mfetch.offvec, mfetch.nfetch); if (ret < 0) return ret; if (put_user(ret, &uptr->nfetch)) return -EFAULT; ret = 0; } break; case MON_IOCG_STATS: { struct mon_bin_stats __user *sp; unsigned int nevents; unsigned int ndropped; spin_lock_irqsave(&rp->b_lock, flags); ndropped = rp->cnt_lost; rp->cnt_lost = 0; spin_unlock_irqrestore(&rp->b_lock, flags); nevents = mon_bin_queued(rp); sp = (struct mon_bin_stats __user *)arg; if (put_user(ndropped, &sp->dropped)) return -EFAULT; if (put_user(nevents, &sp->queued)) return -EFAULT; } break; default: return -ENOTTY; } return ret; } #ifdef CONFIG_COMPAT static long mon_bin_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct mon_reader_bin *rp = file->private_data; int ret; switch (cmd) { case MON_IOCX_GET32: case MON_IOCX_GETX32: { struct mon_bin_get32 getb; if (copy_from_user(&getb, (void __user *)arg, sizeof(struct mon_bin_get32))) return -EFAULT; ret = mon_bin_get_event(file, rp, compat_ptr(getb.hdr32), (cmd == MON_IOCX_GET32)? PKT_SZ_API0: PKT_SZ_API1, compat_ptr(getb.data32), getb.alloc32); if (ret < 0) return ret; } return 0; case MON_IOCX_MFETCH32: { struct mon_bin_mfetch32 mfetch; struct mon_bin_mfetch32 __user *uptr; uptr = (struct mon_bin_mfetch32 __user *) compat_ptr(arg); if (copy_from_user(&mfetch, uptr, sizeof(mfetch))) return -EFAULT; if (mfetch.nflush32) { ret = mon_bin_flush(rp, mfetch.nflush32); if (ret < 0) return ret; if (put_user(ret, &uptr->nflush32)) return -EFAULT; } ret = mon_bin_fetch(file, rp, compat_ptr(mfetch.offvec32), mfetch.nfetch32); if (ret < 0) return ret; if (put_user(ret, &uptr->nfetch32)) return -EFAULT; } return 0; case MON_IOCG_STATS: return mon_bin_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); case MON_IOCQ_URB_LEN: case MON_IOCQ_RING_SIZE: case MON_IOCT_RING_SIZE: case MON_IOCH_MFLUSH: return mon_bin_ioctl(file, cmd, arg); default: ; } return -ENOTTY; } #endif /* CONFIG_COMPAT */ static __poll_t mon_bin_poll(struct file *file, struct poll_table_struct *wait) { struct mon_reader_bin *rp = file->private_data; __poll_t mask = 0; unsigned long flags; if (file->f_mode & FMODE_READ) poll_wait(file, &rp->b_wait, wait); spin_lock_irqsave(&rp->b_lock, flags); if (!MON_RING_EMPTY(rp)) mask |= EPOLLIN | EPOLLRDNORM; /* readable */ spin_unlock_irqrestore(&rp->b_lock, flags); return mask; } /* * open and close: just keep track of how many times the device is * mapped, to use the proper memory allocation function. */ static void mon_bin_vma_open(struct vm_area_struct *vma) { struct mon_reader_bin *rp = vma->vm_private_data; unsigned long flags; spin_lock_irqsave(&rp->b_lock, flags); rp->mmap_active++; spin_unlock_irqrestore(&rp->b_lock, flags); } static void mon_bin_vma_close(struct vm_area_struct *vma) { unsigned long flags; struct mon_reader_bin *rp = vma->vm_private_data; spin_lock_irqsave(&rp->b_lock, flags); rp->mmap_active--; spin_unlock_irqrestore(&rp->b_lock, flags); } /* * Map ring pages to user space. */ static vm_fault_t mon_bin_vma_fault(struct vm_fault *vmf) { struct mon_reader_bin *rp = vmf->vma->vm_private_data; unsigned long offset, chunk_idx; struct page *pageptr; unsigned long flags; spin_lock_irqsave(&rp->b_lock, flags); offset = vmf->pgoff << PAGE_SHIFT; if (offset >= rp->b_size) { spin_unlock_irqrestore(&rp->b_lock, flags); return VM_FAULT_SIGBUS; } chunk_idx = offset / CHUNK_SIZE; pageptr = rp->b_vec[chunk_idx].pg; get_page(pageptr); vmf->page = pageptr; spin_unlock_irqrestore(&rp->b_lock, flags); return 0; } static const struct vm_operations_struct mon_bin_vm_ops = { .open = mon_bin_vma_open, .close = mon_bin_vma_close, .fault = mon_bin_vma_fault, }; static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma) { /* don't do anything here: "fault" will set up page table entries */ vma->vm_ops = &mon_bin_vm_ops; if (vma->vm_flags & VM_WRITE) return -EPERM; vm_flags_mod(vma, VM_DONTEXPAND | VM_DONTDUMP, VM_MAYWRITE); vma->vm_private_data = filp->private_data; mon_bin_vma_open(vma); return 0; } static const struct file_operations mon_fops_binary = { .owner = THIS_MODULE, .open = mon_bin_open, .read = mon_bin_read, /* .write = mon_text_write, */ .poll = mon_bin_poll, .unlocked_ioctl = mon_bin_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = mon_bin_compat_ioctl, #endif .release = mon_bin_release, .mmap = mon_bin_mmap, }; static int mon_bin_wait_event(struct file *file, struct mon_reader_bin *rp) { DECLARE_WAITQUEUE(waita, current); unsigned long flags; add_wait_queue(&rp->b_wait, &waita); set_current_state(TASK_INTERRUPTIBLE); spin_lock_irqsave(&rp->b_lock, flags); while (MON_RING_EMPTY(rp)) { spin_unlock_irqrestore(&rp->b_lock, flags); if (file->f_flags & O_NONBLOCK) { set_current_state(TASK_RUNNING); remove_wait_queue(&rp->b_wait, &waita); return -EWOULDBLOCK; /* Same as EAGAIN in Linux */ } schedule(); if (signal_pending(current)) { remove_wait_queue(&rp->b_wait, &waita); return -EINTR; } set_current_state(TASK_INTERRUPTIBLE); spin_lock_irqsave(&rp->b_lock, flags); } spin_unlock_irqrestore(&rp->b_lock, flags); set_current_state(TASK_RUNNING); remove_wait_queue(&rp->b_wait, &waita); return 0; } static int mon_alloc_buff(struct mon_pgmap *map, int npages) { int n; unsigned long vaddr; for (n = 0; n < npages; n++) { vaddr = get_zeroed_page(GFP_KERNEL); if (vaddr == 0) { while (n-- != 0) free_page((unsigned long) map[n].ptr); return -ENOMEM; } map[n].ptr = (unsigned char *) vaddr; map[n].pg = virt_to_page((void *) vaddr); } return 0; } static void mon_free_buff(struct mon_pgmap *map, int npages) { int n; for (n = 0; n < npages; n++) free_page((unsigned long) map[n].ptr); } int mon_bin_add(struct mon_bus *mbus, const struct usb_bus *ubus) { struct device *dev; unsigned minor = ubus? ubus->busnum: 0; if (minor >= MON_BIN_MAX_MINOR) return 0; dev = device_create(&mon_bin_class, ubus ? ubus->controller : NULL, MKDEV(MAJOR(mon_bin_dev0), minor), NULL, "usbmon%d", minor); if (IS_ERR(dev)) return 0; mbus->classdev = dev; return 1; } void mon_bin_del(struct mon_bus *mbus) { device_destroy(&mon_bin_class, mbus->classdev->devt); } int __init mon_bin_init(void) { int rc; rc = class_register(&mon_bin_class); if (rc) goto err_class; rc = alloc_chrdev_region(&mon_bin_dev0, 0, MON_BIN_MAX_MINOR, "usbmon"); if (rc < 0) goto err_dev; cdev_init(&mon_bin_cdev, &mon_fops_binary); mon_bin_cdev.owner = THIS_MODULE; rc = cdev_add(&mon_bin_cdev, mon_bin_dev0, MON_BIN_MAX_MINOR); if (rc < 0) goto err_add; return 0; err_add: unregister_chrdev_region(mon_bin_dev0, MON_BIN_MAX_MINOR); err_dev: class_unregister(&mon_bin_class); err_class: return rc; } void mon_bin_exit(void) { cdev_del(&mon_bin_cdev); unregister_chrdev_region(mon_bin_dev0, MON_BIN_MAX_MINOR); class_unregister(&mon_bin_class); } |
11 11 11 11 11 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 | // SPDX-License-Identifier: GPL-2.0 /****************************************************************************** * rtl871x_recv.c * * Copyright(c) 2007 - 2010 Realtek Corporation. All rights reserved. * Linux device driver for RTL8192SU * * Modifications for inclusion into the Linux staging tree are * Copyright(c) 2010 Larry Finger. All rights reserved. * * Contact information: * WLAN FAE <wlanfae@realtek.com> * Larry Finger <Larry.Finger@lwfinger.net> * ******************************************************************************/ #define _RTL871X_RECV_C_ #include <linux/ip.h> #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/ieee80211.h> #include <net/cfg80211.h> #include "osdep_service.h" #include "drv_types.h" #include "recv_osdep.h" #include "mlme_osdep.h" #include "ethernet.h" #include "usb_ops.h" #include "wifi.h" static const u8 SNAP_ETH_TYPE_IPX[2] = {0x81, 0x37}; /* Datagram Delivery Protocol */ static const u8 SNAP_ETH_TYPE_APPLETALK_AARP[2] = {0x80, 0xf3}; void _r8712_init_sta_recv_priv(struct sta_recv_priv *psta_recvpriv) { memset((u8 *)psta_recvpriv, 0, sizeof(struct sta_recv_priv)); spin_lock_init(&psta_recvpriv->lock); _init_queue(&psta_recvpriv->defrag_q); } int _r8712_init_recv_priv(struct recv_priv *precvpriv, struct _adapter *padapter) { int ret; sint i; union recv_frame *precvframe; memset((unsigned char *)precvpriv, 0, sizeof(struct recv_priv)); spin_lock_init(&precvpriv->lock); _init_queue(&precvpriv->free_recv_queue); _init_queue(&precvpriv->recv_pending_queue); precvpriv->adapter = padapter; precvpriv->free_recvframe_cnt = NR_RECVFRAME; precvpriv->pallocated_frame_buf = kzalloc(NR_RECVFRAME * sizeof(union recv_frame) + RXFRAME_ALIGN_SZ, GFP_ATOMIC); if (!precvpriv->pallocated_frame_buf) return -ENOMEM; precvpriv->precv_frame_buf = precvpriv->pallocated_frame_buf + RXFRAME_ALIGN_SZ - ((addr_t)(precvpriv->pallocated_frame_buf) & (RXFRAME_ALIGN_SZ - 1)); precvframe = (union recv_frame *)precvpriv->precv_frame_buf; for (i = 0; i < NR_RECVFRAME; i++) { INIT_LIST_HEAD(&(precvframe->u.list)); list_add_tail(&(precvframe->u.list), &(precvpriv->free_recv_queue.queue)); r8712_os_recv_resource_alloc(padapter, precvframe); precvframe->u.hdr.adapter = padapter; precvframe++; } precvpriv->rx_pending_cnt = 1; ret = r8712_init_recv_priv(precvpriv, padapter); if (ret) kfree(precvpriv->pallocated_frame_buf); return ret; } void _r8712_free_recv_priv(struct recv_priv *precvpriv) { kfree(precvpriv->pallocated_frame_buf); r8712_free_recv_priv(precvpriv); } union recv_frame *r8712_alloc_recvframe(struct __queue *pfree_recv_queue) { unsigned long irqL; union recv_frame *precvframe; struct _adapter *padapter; struct recv_priv *precvpriv; spin_lock_irqsave(&pfree_recv_queue->lock, irqL); precvframe = list_first_entry_or_null(&pfree_recv_queue->queue, union recv_frame, u.hdr.list); if (precvframe) { list_del_init(&precvframe->u.hdr.list); padapter = precvframe->u.hdr.adapter; if (padapter) { precvpriv = &padapter->recvpriv; if (pfree_recv_queue == &precvpriv->free_recv_queue) precvpriv->free_recvframe_cnt--; } } spin_unlock_irqrestore(&pfree_recv_queue->lock, irqL); return precvframe; } /* * caller : defrag; recvframe_chk_defrag in recv_thread (passive) * pframequeue: defrag_queue : will be accessed in recv_thread (passive) * using spin_lock to protect */ void r8712_free_recvframe_queue(struct __queue *pframequeue, struct __queue *pfree_recv_queue) { union recv_frame *precvframe; struct list_head *plist, *phead; spin_lock(&pframequeue->lock); phead = &pframequeue->queue; plist = phead->next; while (!end_of_queue_search(phead, plist)) { precvframe = container_of(plist, union recv_frame, u.list); plist = plist->next; r8712_free_recvframe(precvframe, pfree_recv_queue); } spin_unlock(&pframequeue->lock); } sint r8712_recvframe_chkmic(struct _adapter *adapter, union recv_frame *precvframe) { sint i, res = _SUCCESS; u32 datalen; u8 miccode[8]; u8 bmic_err = false; u8 *pframe, *payload, *pframemic; u8 *mickey, idx, *iv; struct sta_info *stainfo; struct rx_pkt_attrib *prxattrib = &precvframe->u.hdr.attrib; struct security_priv *psecuritypriv = &adapter->securitypriv; stainfo = r8712_get_stainfo(&adapter->stapriv, &prxattrib->ta[0]); if (prxattrib->encrypt == _TKIP_) { /* calculate mic code */ if (stainfo) { if (is_multicast_ether_addr(prxattrib->ra)) { iv = precvframe->u.hdr.rx_data + prxattrib->hdrlen; idx = iv[3]; mickey = &psecuritypriv->XGrprxmickey[(((idx >> 6) & 0x3)) - 1].skey[0]; if (!psecuritypriv->binstallGrpkey) return _FAIL; } else { mickey = &stainfo->tkiprxmickey.skey[0]; } /*icv_len included the mic code*/ datalen = precvframe->u.hdr.len - prxattrib->hdrlen - prxattrib->iv_len - prxattrib->icv_len - 8; pframe = precvframe->u.hdr.rx_data; payload = pframe + prxattrib->hdrlen + prxattrib->iv_len; seccalctkipmic(mickey, pframe, payload, datalen, &miccode[0], (unsigned char)prxattrib->priority); pframemic = payload + datalen; bmic_err = false; for (i = 0; i < 8; i++) { if (miccode[i] != *(pframemic + i)) bmic_err = true; } if (bmic_err) { if (prxattrib->bdecrypted) r8712_handle_tkip_mic_err(adapter, (u8)is_multicast_ether_addr(prxattrib->ra)); res = _FAIL; } else { /* mic checked ok */ if (!psecuritypriv->bcheck_grpkey && is_multicast_ether_addr(prxattrib->ra)) psecuritypriv->bcheck_grpkey = true; } recvframe_pull_tail(precvframe, 8); } } return res; } /* decrypt and set the ivlen,icvlen of the recv_frame */ union recv_frame *r8712_decryptor(struct _adapter *padapter, union recv_frame *precv_frame) { struct rx_pkt_attrib *prxattrib = &precv_frame->u.hdr.attrib; struct security_priv *psecuritypriv = &padapter->securitypriv; union recv_frame *return_packet = precv_frame; if ((prxattrib->encrypt > 0) && ((prxattrib->bdecrypted == 0) || psecuritypriv->sw_decrypt)) { psecuritypriv->hw_decrypted = false; switch (prxattrib->encrypt) { case _WEP40_: case _WEP104_: r8712_wep_decrypt(padapter, (u8 *)precv_frame); break; case _TKIP_: r8712_tkip_decrypt(padapter, (u8 *)precv_frame); break; case _AES_: r8712_aes_decrypt(padapter, (u8 *)precv_frame); break; default: break; } } else if (prxattrib->bdecrypted == 1) { psecuritypriv->hw_decrypted = true; } return return_packet; } /*###set the security information in the recv_frame */ union recv_frame *r8712_portctrl(struct _adapter *adapter, union recv_frame *precv_frame) { u8 *psta_addr, *ptr; uint auth_alg; struct recv_frame_hdr *pfhdr; struct sta_info *psta; struct sta_priv *pstapriv; union recv_frame *prtnframe; u16 ether_type; pstapriv = &adapter->stapriv; ptr = precv_frame->u.hdr.rx_data; pfhdr = &precv_frame->u.hdr; psta_addr = pfhdr->attrib.ta; psta = r8712_get_stainfo(pstapriv, psta_addr); auth_alg = adapter->securitypriv.AuthAlgrthm; if (auth_alg == 2) { /* get ether_type */ ptr = ptr + pfhdr->attrib.hdrlen + LLC_HEADER_SIZE; ether_type = get_unaligned_be16(ptr); if (psta && psta->ieee8021x_blocked) { /* blocked * only accept EAPOL frame */ if (ether_type == 0x888e) { prtnframe = precv_frame; } else { /*free this frame*/ r8712_free_recvframe(precv_frame, &adapter->recvpriv.free_recv_queue); prtnframe = NULL; } } else { /* allowed * check decryption status, and decrypt the * frame if needed */ prtnframe = precv_frame; /* check is the EAPOL frame or not (Rekey) */ if (ether_type == 0x888e) { /* check Rekey */ prtnframe = precv_frame; } } } else { prtnframe = precv_frame; } return prtnframe; } static sint recv_decache(union recv_frame *precv_frame, u8 bretry, struct stainfo_rxcache *prxcache) { sint tid = precv_frame->u.hdr.attrib.priority; u16 seq_ctrl = ((precv_frame->u.hdr.attrib.seq_num & 0xffff) << 4) | (precv_frame->u.hdr.attrib.frag_num & 0xf); if (tid > 15) return _FAIL; if (seq_ctrl == prxcache->tid_rxseq[tid]) return _FAIL; prxcache->tid_rxseq[tid] = seq_ctrl; return _SUCCESS; } static sint sta2sta_data_frame(struct _adapter *adapter, union recv_frame *precv_frame, struct sta_info **psta) { u8 *ptr = precv_frame->u.hdr.rx_data; sint ret = _SUCCESS; struct rx_pkt_attrib *pattrib = &precv_frame->u.hdr.attrib; struct sta_priv *pstapriv = &adapter->stapriv; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; u8 *mybssid = get_bssid(pmlmepriv); u8 *myhwaddr = myid(&adapter->eeprompriv); u8 *sta_addr = NULL; bool bmcast = is_multicast_ether_addr(pattrib->dst); if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE) || check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE)) { /* filter packets that SA is myself or multicast or broadcast */ if (!memcmp(myhwaddr, pattrib->src, ETH_ALEN)) return _FAIL; if ((memcmp(myhwaddr, pattrib->dst, ETH_ALEN)) && (!bmcast)) return _FAIL; if (is_zero_ether_addr(pattrib->bssid) || is_zero_ether_addr(mybssid) || (memcmp(pattrib->bssid, mybssid, ETH_ALEN))) return _FAIL; sta_addr = pattrib->src; } else if (check_fwstate(pmlmepriv, WIFI_STATION_STATE)) { /* For Station mode, sa and bssid should always be BSSID, * and DA is my mac-address */ if (memcmp(pattrib->bssid, pattrib->src, ETH_ALEN)) return _FAIL; sta_addr = pattrib->bssid; } else if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) { if (bmcast) { /* For AP mode, if DA == MCAST, then BSSID should * be also MCAST */ if (!is_multicast_ether_addr(pattrib->bssid)) return _FAIL; } else { /* not mc-frame */ /* For AP mode, if DA is non-MCAST, then it must be * BSSID, and bssid == BSSID */ if (memcmp(pattrib->bssid, pattrib->dst, ETH_ALEN)) return _FAIL; sta_addr = pattrib->src; } } else if (check_fwstate(pmlmepriv, WIFI_MP_STATE)) { memcpy(pattrib->dst, GetAddr1Ptr(ptr), ETH_ALEN); memcpy(pattrib->src, GetAddr2Ptr(ptr), ETH_ALEN); memcpy(pattrib->bssid, GetAddr3Ptr(ptr), ETH_ALEN); memcpy(pattrib->ra, pattrib->dst, ETH_ALEN); memcpy(pattrib->ta, pattrib->src, ETH_ALEN); sta_addr = mybssid; } else { ret = _FAIL; } if (bmcast) *psta = r8712_get_bcmc_stainfo(adapter); else *psta = r8712_get_stainfo(pstapriv, sta_addr); /* get ap_info */ if (!*psta) { if (check_fwstate(pmlmepriv, WIFI_MP_STATE)) adapter->mppriv.rx_pktloss++; return _FAIL; } return ret; } static sint ap2sta_data_frame(struct _adapter *adapter, union recv_frame *precv_frame, struct sta_info **psta) { u8 *ptr = precv_frame->u.hdr.rx_data; struct rx_pkt_attrib *pattrib = &precv_frame->u.hdr.attrib; struct sta_priv *pstapriv = &adapter->stapriv; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; u8 *mybssid = get_bssid(pmlmepriv); u8 *myhwaddr = myid(&adapter->eeprompriv); bool bmcast = is_multicast_ether_addr(pattrib->dst); if (check_fwstate(pmlmepriv, WIFI_STATION_STATE) && check_fwstate(pmlmepriv, _FW_LINKED)) { /* if NULL-frame, drop packet */ if ((GetFrameSubType(ptr)) == (IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC)) return _FAIL; /* drop QoS-SubType Data, including QoS NULL, * excluding QoS-Data */ if ((GetFrameSubType(ptr) & WIFI_QOS_DATA_TYPE) == WIFI_QOS_DATA_TYPE) { if (GetFrameSubType(ptr) & (BIT(4) | BIT(5) | BIT(6))) return _FAIL; } /* filter packets that SA is myself or multicast or broadcast */ if (!memcmp(myhwaddr, pattrib->src, ETH_ALEN)) return _FAIL; /* da should be for me */ if ((memcmp(myhwaddr, pattrib->dst, ETH_ALEN)) && (!bmcast)) return _FAIL; /* check BSSID */ if (is_zero_ether_addr(pattrib->bssid) || is_zero_ether_addr(mybssid) || (memcmp(pattrib->bssid, mybssid, ETH_ALEN))) return _FAIL; if (bmcast) *psta = r8712_get_bcmc_stainfo(adapter); else *psta = r8712_get_stainfo(pstapriv, pattrib->bssid); if (!*psta) return _FAIL; } else if (check_fwstate(pmlmepriv, WIFI_MP_STATE) && check_fwstate(pmlmepriv, _FW_LINKED)) { memcpy(pattrib->dst, GetAddr1Ptr(ptr), ETH_ALEN); memcpy(pattrib->src, GetAddr2Ptr(ptr), ETH_ALEN); memcpy(pattrib->bssid, GetAddr3Ptr(ptr), ETH_ALEN); memcpy(pattrib->ra, pattrib->dst, ETH_ALEN); memcpy(pattrib->ta, pattrib->src, ETH_ALEN); memcpy(pattrib->bssid, mybssid, ETH_ALEN); *psta = r8712_get_stainfo(pstapriv, pattrib->bssid); if (!*psta) return _FAIL; } else { return _FAIL; } return _SUCCESS; } static sint sta2ap_data_frame(struct _adapter *adapter, union recv_frame *precv_frame, struct sta_info **psta) { struct rx_pkt_attrib *pattrib = &precv_frame->u.hdr.attrib; struct sta_priv *pstapriv = &adapter->stapriv; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; unsigned char *mybssid = get_bssid(pmlmepriv); if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) { /* For AP mode, if DA is non-MCAST, then it must be BSSID, * and bssid == BSSID * For AP mode, RA=BSSID, TX=STA(SRC_ADDR), A3=DST_ADDR */ if (memcmp(pattrib->bssid, mybssid, ETH_ALEN)) return _FAIL; *psta = r8712_get_stainfo(pstapriv, pattrib->src); if (!*psta) return _FAIL; } return _SUCCESS; } static sint validate_recv_ctrl_frame(struct _adapter *adapter, union recv_frame *precv_frame) { return _FAIL; } static sint validate_recv_mgnt_frame(struct _adapter *adapter, union recv_frame *precv_frame) { return _FAIL; } static sint validate_recv_data_frame(struct _adapter *adapter, union recv_frame *precv_frame) { int res; u8 bretry; u8 *psa, *pda, *pbssid; struct sta_info *psta = NULL; u8 *ptr = precv_frame->u.hdr.rx_data; struct rx_pkt_attrib *pattrib = &precv_frame->u.hdr.attrib; struct security_priv *psecuritypriv = &adapter->securitypriv; bretry = GetRetry(ptr); pda = ieee80211_get_DA((struct ieee80211_hdr *)ptr); psa = ieee80211_get_SA((struct ieee80211_hdr *)ptr); pbssid = get_hdr_bssid(ptr); if (!pbssid) return _FAIL; memcpy(pattrib->dst, pda, ETH_ALEN); memcpy(pattrib->src, psa, ETH_ALEN); memcpy(pattrib->bssid, pbssid, ETH_ALEN); switch (pattrib->to_fr_ds) { case 0: memcpy(pattrib->ra, pda, ETH_ALEN); memcpy(pattrib->ta, psa, ETH_ALEN); res = sta2sta_data_frame(adapter, precv_frame, &psta); break; case 1: memcpy(pattrib->ra, pda, ETH_ALEN); memcpy(pattrib->ta, pbssid, ETH_ALEN); res = ap2sta_data_frame(adapter, precv_frame, &psta); break; case 2: memcpy(pattrib->ra, pbssid, ETH_ALEN); memcpy(pattrib->ta, psa, ETH_ALEN); res = sta2ap_data_frame(adapter, precv_frame, &psta); break; case 3: memcpy(pattrib->ra, GetAddr1Ptr(ptr), ETH_ALEN); memcpy(pattrib->ta, GetAddr2Ptr(ptr), ETH_ALEN); return _FAIL; default: return _FAIL; } if (res == _FAIL) return _FAIL; if (!psta) return _FAIL; precv_frame->u.hdr.psta = psta; pattrib->amsdu = 0; /* parsing QC field */ if (pattrib->qos == 1) { pattrib->priority = GetPriority((ptr + 24)); pattrib->ack_policy = GetAckpolicy((ptr + 24)); pattrib->amsdu = GetAMsdu((ptr + 24)); pattrib->hdrlen = pattrib->to_fr_ds == 3 ? 32 : 26; } else { pattrib->priority = 0; pattrib->hdrlen = (pattrib->to_fr_ds == 3) ? 30 : 24; } if (pattrib->order)/*HT-CTRL 11n*/ pattrib->hdrlen += 4; precv_frame->u.hdr.preorder_ctrl = &psta->recvreorder_ctrl[pattrib->priority]; /* decache, drop duplicate recv packets */ if (recv_decache(precv_frame, bretry, &psta->sta_recvpriv.rxcache) == _FAIL) return _FAIL; if (pattrib->privacy) { GET_ENCRY_ALGO(psecuritypriv, psta, pattrib->encrypt, is_multicast_ether_addr(pattrib->ra)); SET_ICE_IV_LEN(pattrib->iv_len, pattrib->icv_len, pattrib->encrypt); } else { pattrib->encrypt = 0; pattrib->iv_len = pattrib->icv_len = 0; } return _SUCCESS; } sint r8712_validate_recv_frame(struct _adapter *adapter, union recv_frame *precv_frame) { /*shall check frame subtype, to / from ds, da, bssid */ /*then call check if rx seq/frag. duplicated.*/ u8 type; u8 subtype; sint retval = _SUCCESS; struct rx_pkt_attrib *pattrib = &precv_frame->u.hdr.attrib; u8 *ptr = precv_frame->u.hdr.rx_data; u8 ver = (unsigned char)(*ptr) & 0x3; /*add version chk*/ if (ver != 0) return _FAIL; type = GetFrameType(ptr); subtype = GetFrameSubType(ptr); /*bit(7)~bit(2)*/ pattrib->to_fr_ds = get_tofr_ds(ptr); pattrib->frag_num = GetFragNum(ptr); pattrib->seq_num = GetSequence(ptr); pattrib->pw_save = GetPwrMgt(ptr); pattrib->mfrag = GetMFrag(ptr); pattrib->mdata = GetMData(ptr); pattrib->privacy = GetPrivacy(ptr); pattrib->order = GetOrder(ptr); switch (type) { case IEEE80211_FTYPE_MGMT: retval = validate_recv_mgnt_frame(adapter, precv_frame); break; case IEEE80211_FTYPE_CTL: retval = validate_recv_ctrl_frame(adapter, precv_frame); break; case IEEE80211_FTYPE_DATA: pattrib->qos = (subtype & BIT(7)) ? 1 : 0; retval = validate_recv_data_frame(adapter, precv_frame); break; default: return _FAIL; } return retval; } int r8712_wlanhdr_to_ethhdr(union recv_frame *precvframe) { /*remove the wlanhdr and add the eth_hdr*/ sint rmv_len; u16 len; u8 bsnaphdr; u8 *psnap_type; struct ieee80211_snap_hdr *psnap; struct _adapter *adapter = precvframe->u.hdr.adapter; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; u8 *ptr = precvframe->u.hdr.rx_data; /*point to frame_ctrl field*/ struct rx_pkt_attrib *pattrib = &precvframe->u.hdr.attrib; if (pattrib->encrypt) recvframe_pull_tail(precvframe, pattrib->icv_len); psnap = (struct ieee80211_snap_hdr *)(ptr + pattrib->hdrlen + pattrib->iv_len); psnap_type = ptr + pattrib->hdrlen + pattrib->iv_len + SNAP_SIZE; /* convert hdr + possible LLC headers into Ethernet header */ if ((!memcmp(psnap, (void *)rfc1042_header, SNAP_SIZE) && (memcmp(psnap_type, (void *)SNAP_ETH_TYPE_IPX, 2)) && (memcmp(psnap_type, (void *)SNAP_ETH_TYPE_APPLETALK_AARP, 2))) || !memcmp(psnap, (void *)bridge_tunnel_header, SNAP_SIZE)) { /* remove RFC1042 or Bridge-Tunnel encapsulation and * replace EtherType */ bsnaphdr = true; } else { /* Leave Ethernet header part of hdr and full payload */ bsnaphdr = false; } rmv_len = pattrib->hdrlen + pattrib->iv_len + (bsnaphdr ? SNAP_SIZE : 0); len = precvframe->u.hdr.len - rmv_len; if (check_fwstate(pmlmepriv, WIFI_MP_STATE)) { ptr += rmv_len; *ptr = 0x87; *(ptr + 1) = 0x12; /* append rx status for mp test packets */ ptr = recvframe_pull(precvframe, (rmv_len - sizeof(struct ethhdr) + 2) - 24); if (!ptr) return -ENOMEM; memcpy(ptr, get_rxmem(precvframe), 24); ptr += 24; } else { ptr = recvframe_pull(precvframe, (rmv_len - sizeof(struct ethhdr) + (bsnaphdr ? 2 : 0))); if (!ptr) return -ENOMEM; } memcpy(ptr, pattrib->dst, ETH_ALEN); memcpy(ptr + ETH_ALEN, pattrib->src, ETH_ALEN); if (!bsnaphdr) { __be16 be_tmp = htons(len); memcpy(ptr + 12, &be_tmp, 2); } return 0; } void r8712_recv_entry(union recv_frame *precvframe) { struct _adapter *padapter; struct recv_priv *precvpriv; s32 ret = _SUCCESS; padapter = precvframe->u.hdr.adapter; precvpriv = &(padapter->recvpriv); padapter->ledpriv.LedControlHandler(padapter, LED_CTL_RX); ret = recv_func(padapter, precvframe); if (ret == _FAIL) goto _recv_entry_drop; precvpriv->rx_pkts++; precvpriv->rx_bytes += (uint)(precvframe->u.hdr.rx_tail - precvframe->u.hdr.rx_data); return; _recv_entry_drop: precvpriv->rx_drop++; padapter->mppriv.rx_pktloss = precvpriv->rx_drop; } |
42 111 5 5 5 359 30 9 1 2 28 1 27 1 26 66 68 31 14 21 5 68 67 67 67 21 21 2 2 103 142 143 30 110 3 110 11 103 21 75 8 102 75 28 65 6 6 27 87 7 7 2 4 61 94 13 81 7 73 81 92 2 31 63 9 27 58 100 29 29 29 6 20 3 29 29 28 29 29 29 11 17 18 27 27 28 43 1 41 25 3 15 7 31 2 30 4 3 27 27 27 7 43 42 1 16 1 1 41 2 41 25 50 50 50 49 50 45 36 33 1 33 10 5 6 84 83 81 2 21 1 52 1 5 7 71 50 11 10 1 10 3 5 13 2 1 66 65 66 62 48 15 61 61 1 17 47 47 128 128 128 12 1 1 1 1 3 6 86 29 1 42 47 54 7 27 1 26 2 26 310 310 309 2 45 2 160 2 46 23 76 193 46 1 87 193 10 37 1 115 2 21 132 6 22 94 24 1 55 32 38 122 119 7 72 48 30 5 145 5 149 94 55 149 134 16 150 1 149 19 4 135 128 24 102 20 1 46 15 16 22 18 1 38 177 175 148 53 11 12 63 63 58 3 2 57 6 391 74 318 47 3 44 5 5 3 44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 | // SPDX-License-Identifier: GPL-2.0-or-later /* * UDP over IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on linux/ipv4/udp.c * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. */ #include <linux/bpf-cgroup.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/init.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/indirect_call_wrapper.h> #include <trace/events/udp.h> #include <net/addrconf.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/raw.h> #include <net/seg6.h> #include <net/tcp_states.h> #include <net/ip6_checksum.h> #include <net/ip6_tunnel.h> #include <net/xfrm.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> #include <net/busy_poll.h> #include <net/sock_reuseport.h> #include <net/gro.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <trace/events/skb.h> #include "udp_impl.h" static void udpv6_destruct_sock(struct sock *sk) { udp_destruct_common(sk); inet6_sock_destruct(sk); } int udpv6_init_sock(struct sock *sk) { udp_lib_init_sock(sk); sk->sk_destruct = udpv6_destruct_sock; set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); return 0; } INDIRECT_CALLABLE_SCOPE u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { u32 lhash, fhash; net_get_random_once(&udp6_ehash_secret, sizeof(udp6_ehash_secret)); net_get_random_once(&udp_ipv6_hash_secret, sizeof(udp_ipv6_hash_secret)); lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); return __inet6_ehashfn(lhash, lport, fhash, fport, udp6_ehash_secret + net_hash_mix(net)); } int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum); unsigned int hash2_partial = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; return udp_lib_get_port(sk, snum, hash2_nulladdr); } void udp_v6_rehash(struct sock *sk) { u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); } static int compute_score(struct sock *sk, const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, int dif, int sdif) { int bound_dev_if, score; struct inet_sock *inet; bool dev_match; if (!net_eq(sock_net(sk), net) || udp_sk(sk)->udp_port_hash != hnum || sk->sk_family != PF_INET6) return -1; if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score = 0; inet = inet_sk(sk); if (inet->inet_dport) { if (inet->inet_dport != sport) return -1; score++; } if (!ipv6_addr_any(&sk->sk_v6_daddr)) { if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); dev_match = udp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif); if (!dev_match) return -1; if (bound_dev_if) score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; } /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, int sdif, struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; int score, badness; bool need_rescore; result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { need_rescore = false; rescore: score = compute_score(need_rescore ? result : sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { badness = score; if (need_rescore) continue; if (sk->sk_state == TCP_ESTABLISHED) { result = sk; continue; } result = inet6_lookup_reuseport(net, sk, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, udp6_ehashfn); if (!result) { result = sk; continue; } /* Fall back to scoring if group has connections */ if (!reuseport_has_conns(sk)) return result; /* Reuseport logic returned an error, keep original score. */ if (IS_ERR(result)) continue; /* compute_score is too long of a function to be * inlined, and calling it again here yields * measureable overhead for some * workloads. Work around it by jumping * backwards to rescore 'result'. */ need_rescore = true; goto rescore; } } return result; } /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *udptable, struct sk_buff *skb) { unsigned short hnum = ntohs(dport); unsigned int hash2, slot2; struct udp_hslot *hslot2; struct sock *result, *sk; hash2 = ipv6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; /* Lookup connected or non-wildcard sockets */ result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, hslot2, skb); if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) goto done; /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled) && udptable == net->ipv4.udp_table) { sk = inet6_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, dif, udp6_ehashfn); if (sk) { result = sk; goto done; } } /* Got non-wildcard socket or error on first lookup */ if (result) goto done; /* Lookup wildcard sockets */ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; result = udp6_lib_lookup2(net, saddr, sport, &in6addr_any, hnum, dif, sdif, hslot2, skb); done: if (IS_ERR(result)) return NULL; return result; } EXPORT_SYMBOL_GPL(__udp6_lib_lookup); static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { const struct ipv6hdr *iph = ipv6_hdr(skb); return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), inet6_sdif(skb), udptable, skb); } struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, &iph->daddr, dport, iif, sdif, net->ipv4.udp_table, NULL); } /* Must be called under rcu_read_lock(). * Does increment socket refcount. */ #if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6) struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { struct sock *sk; sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, 0, net->ipv4.udp_table, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } EXPORT_SYMBOL_GPL(udp6_lib_lookup); #endif /* do not use the scratch area len for jumbogram: their length execeeds the * scratch area space; note that the IP6CB flags is still in the first * cacheline, so checking for jumbograms is cheap */ static int udp6_skb_len(struct sk_buff *skb) { return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb); } /* * This should be easy, if there is something there we * return it, otherwise we block. */ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; int off, err, peeking = flags & MSG_PEEK; int is_udplite = IS_UDPLITE(sk); struct udp_mib __percpu *mib; bool checksum_valid = false; int is_udp4; if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, &off, &err); if (!skb) return err; ulen = udp6_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; is_udp4 = (skb->protocol == htons(ETH_P_IP)); mib = __UDPX_MIB(sk, is_udp4); /* * If checksum is needed at all, try to do it while copying the * data. If the data is truncated, or if we only want a partial * coverage checksum (UDP-Lite), do it before the copy. */ if (copied < ulen || peeking || (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { checksum_valid = udp_skb_csum_unnecessary(skb) || !__udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } if (checksum_valid || udp_skb_csum_unnecessary(skb)) { if (udp_skb_is_linear(skb)) err = copy_linear_skb(skb, copied, off, &msg->msg_iter); else err = skb_copy_datagram_msg(skb, off, msg, copied); } else { err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; } if (unlikely(err)) { if (!peeking) { atomic_inc(&sk->sk_drops); SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb(skb); return err; } if (!peeking) SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); sin6->sin6_family = AF_INET6; sin6->sin6_port = udp_hdr(skb)->source; sin6->sin6_flowinfo = 0; if (is_udp4) { ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin6->sin6_addr); sin6->sin6_scope_id = 0; } else { sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); } *addr_len = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, (struct sockaddr *)sin6, addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (np->rxopt.all) ip6_datagram_recv_common_ctl(sk, msg, skb); if (is_udp4) { if (inet_cmsg_flags(inet)) ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); } else { if (np->rxopt.all) ip6_datagram_recv_specific_ctl(sk, msg, skb); } err = copied; if (flags & MSG_TRUNC) err = ulen; skb_consume_udp(sk, skb, peeking ? -err : err); return err; csum_copy_err: if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, udp_skb_destructor)) { SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS); SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb(skb); /* starting over for a new packet, but check if we need to yield */ cond_resched(); msg->msg_flags &= ~MSG_TRUNC; goto try_again; } DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_inc(&udpv6_encap_needed_key); } EXPORT_SYMBOL(udpv6_encap_enable); /* Handler for tunnels with arbitrary destination ports: no socket lookup, go * through error handlers in encapsulations looking for a match. */ static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { int i; for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); const struct ip6_tnl_encap_ops *encap; encap = rcu_dereference(ip6tun_encaps[i]); if (!encap) continue; handler = encap->err_handler; if (handler && !handler(skb, opt, type, code, offset, info)) return 0; } return -ENOENT; } /* Try to match ICMP errors to UDP tunnels by looking up a socket without * reversing source and destination port: this will match tunnels that force the * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that * lwtunnels might actually break this assumption by being configured with * different destination ports on endpoints, in this case we won't be able to * trace ICMP messages back to them. * * If this doesn't match any socket, probe tunnels with arbitrary destination * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port * we've sent packets to won't necessarily match the local destination port. * * Then ask the tunnel implementation to match the error against a valid * association. * * Return an error if we can't find a match, the socket if we need further * processing, zero otherwise. */ static struct sock *__udp6_lib_err_encap(struct net *net, const struct ipv6hdr *hdr, int offset, struct udphdr *uh, struct udp_table *udptable, struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, __be32 info) { int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); /* Network header needs to point to the outer IPv6 header inside ICMP */ skb_reset_network_header(skb); /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, offset); if (sk) { up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (lookup && lookup(sk, skb)) sk = NULL; goto out; } sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, &hdr->saddr, uh->dest, inet6_iif(skb), 0, udptable, skb); if (sk) { up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } out: if (!sk) { sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, offset, info)); } skb_set_transport_header(skb, transport_offset); skb_set_network_header(skb, network_offset); return sk; } int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info, struct udp_table *udptable) { struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct in6_addr *saddr = &hdr->saddr; const struct in6_addr *daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); bool tunnel = false; struct sock *sk; int harderr; int err; struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, udptable, sk, skb, opt, type, code, info); if (!sk) return 0; } else sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return PTR_ERR(sk); } tunnel = true; } harderr = icmpv6_err_convert(type, code, &err); np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT) harderr = 1; } if (type == NDISC_REDIRECT) { if (tunnel) { ip6_redirect(skb, sock_net(sk), inet6_iif(skb), READ_ONCE(sk->sk_mark), sk->sk_uid); } else { ip6_sk_redirect(skb, sk); } goto out; } /* Tunnels don't have an application socket: don't pass errors back */ if (tunnel) { if (udp_sk(sk)->encap_err_rcv) udp_sk(sk)->encap_err_rcv(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); goto out; } if (!inet6_test_bit(RECVERR6, sk)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); } sk->sk_err = err; sk_error_report(sk); out: return 0; } static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk); } else { sk_mark_napi_id_once(sk, skb); } rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); enum skb_drop_reason drop_reason; /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; } else { UDP6_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS, is_udplite); drop_reason = SKB_DROP_REASON_PROTO_MEM; } UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); trace_udp_fail_queue_rcv_skb(rc, sk, skb); sk_skb_reason_drop(sk, skb, drop_reason); return -1; } return 0; } static __inline__ int udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { return __udp6_lib_err(skb, opt, type, code, offset, info, dev_net(skb->dev)->ipv4.udp_table); } static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; } nf_reset_ct(skb); if (static_branch_unlikely(&udpv6_encap_needed_key) && READ_ONCE(up->encap_type)) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* * This is an encapsulation socket so pass the skb to * the socket's udp_encap_rcv() hook. Otherwise, just * fall through and pass this up the UDP socket. * up->encap_rcv() returns the following value: * =0 if skb was successfully passed to the encap * handler or was discarded by it. * >0 if skb should be passed on to UDP. * <0 if skb should be resubmitted as proto -N */ /* if we're overly short, let UDP handle it */ encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ if (udp_lib_checksum_complete(skb)) goto csum_error; ret = encap_rcv(sk, skb); if (ret <= 0) { __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); return -ret; } } /* FALLTHROUGH -- it's a UDP Packet */ } /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { u16 pcrlen = READ_ONCE(up->pcrlen); if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) goto csum_error; if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; } udp_csum_pull_header(skb); skb_dst_drop(skb); return __udpv6_queue_rcv_skb(sk, skb); csum_error: drop_reason = SKB_DROP_REASON_UDP_CSUM; __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); sk_skb_reason_drop(sk, skb, drop_reason); return -1; } static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff *next, *segs; int ret; if (likely(!udp_unexpected_gso(sk, skb))) return udpv6_queue_rcv_one_skb(sk, skb); __skb_push(skb, -skb_mac_offset(skb)); segs = udp_rcv_segment(sk, skb, false); skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); udp_post_segment_fix_csum(skb); ret = udpv6_queue_rcv_one_skb(sk, skb); if (ret > 0) ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret, true); } return 0; } static bool __udp_v6_is_mcast_sock(struct net *net, const struct sock *sk, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, int dif, int sdif, unsigned short hnum) { const struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) return false; if (udp_sk(sk)->udp_port_hash != hnum || sk->sk_family != PF_INET6 || (inet->inet_dport && inet->inet_dport != rmt_port) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || !udp_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif) || (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))) return false; if (!inet6_mc_check(sk, loc_addr, rmt_addr)) return false; return true; } static void udp6_csum_zero_error(struct sk_buff *skb) { /* RFC 2460 section 8.1 says that we SHOULD log * this error. Well, it is reasonable. */ net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); } /* * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, struct udp_table *udptable, int proto) { struct sock *sk, *first = NULL; const struct udphdr *uh = udp_hdr(skb); unsigned short hnum = ntohs(uh->dest); struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); unsigned int offset = offsetof(typeof(*sk), sk_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); int dif = inet6_iif(skb); int sdif = inet6_sdif(skb); struct hlist_node *node; struct sk_buff *nskb; if (use_hash2) { hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) & udptable->mask; hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr, uh->source, saddr, dif, sdif, hnum)) continue; /* If zero checksum and no_check is not on for * the socket then skip it. */ if (!uh->check && !udp_get_no_check6_rx(sk)) continue; if (!first) { first = sk; continue; } nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { atomic_inc(&sk->sk_drops); __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); __UDP6_INC_STATS(net, UDP_MIB_INERRORS, IS_UDPLITE(sk)); continue; } if (udpv6_queue_rcv_skb(sk, nskb) > 0) consume_skb(nskb); } /* Also lookup *:port if we are using hash2 and haven't done so yet. */ if (use_hash2 && hash2 != hash2_any) { hash2 = hash2_any; goto start_lookup; } if (first) { if (udpv6_queue_rcv_skb(first, skb) > 0) consume_skb(skb); } else { kfree_skb(skb); __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI, proto == IPPROTO_UDPLITE); } return 0; } static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { if (udp_sk_rx_dst_set(sk, dst)) sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and * return code conversion for ip layer consumption */ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, struct udphdr *uh) { int ret; if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, ip6_compute_pseudo); ret = udpv6_queue_rcv_skb(sk, skb); /* a return value > 0 means to resubmit the input */ if (ret > 0) return ret; return 0; } int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct in6_addr *saddr, *daddr; struct net *net = dev_net(skb->dev); struct sock *sk = NULL; struct udphdr *uh; bool refcounted; u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto discard; saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; uh = udp_hdr(skb); ulen = ntohs(uh->len); if (ulen > skb->len) goto short_packet; if (proto == IPPROTO_UDP) { /* UDP validates ulen. */ /* Check for jumbo payload */ if (ulen == 0) ulen = skb->len; if (ulen < sizeof(*uh)) goto short_packet; if (ulen < skb->len) { if (pskb_trim_rcsum(skb, ulen)) goto short_packet; saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; uh = udp_hdr(skb); } } if (udp6_csum_init(skb, uh, proto)) goto csum_error; /* Check if the socket is already available, e.g. due to early demux */ sk = inet6_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest, &refcounted, udp6_ehashfn); if (IS_ERR(sk)) goto no_sk; if (sk) { struct dst_entry *dst = skb_dst(skb); int ret; if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); if (!uh->check && !udp_get_no_check6_rx(sk)) { if (refcounted) sock_put(sk); goto report_csum_error; } ret = udp6_unicast_rcv_skb(sk, skb, uh); if (refcounted) sock_put(sk); return ret; } /* * Multicast receive code */ if (ipv6_addr_is_multicast(daddr)) return __udp6_lib_mcast_deliver(net, skb, saddr, daddr, udptable, proto); /* Unicast */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { if (!uh->check && !udp_get_no_check6_rx(sk)) goto report_csum_error; return udp6_unicast_rcv_skb(sk, skb, uh); } no_sk: reason = SKB_DROP_REASON_NO_SOCKET; if (!uh->check) goto report_csum_error; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; nf_reset_ct(skb); if (udp_lib_checksum_complete(skb)) goto csum_error; __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); sk_skb_reason_drop(sk, skb, reason); return 0; short_packet: if (reason == SKB_DROP_REASON_NOT_SPECIFIED) reason = SKB_DROP_REASON_PKT_TOO_SMALL; net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", saddr, ntohs(uh->source), ulen, skb->len, daddr, ntohs(uh->dest)); goto discard; report_csum_error: udp6_csum_zero_error(skb); csum_error: if (reason == SKB_DROP_REASON_NOT_SPECIFIED) reason = SKB_DROP_REASON_UDP_CSUM; __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); sk_skb_reason_drop(sk, skb, reason); return 0; } static struct sock *__udp6_lib_demux_lookup(struct net *net, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, int dif, int sdif) { struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(loc_port); unsigned int hash2, slot2; struct udp_hslot *hslot2; __portpair ports; struct sock *sk; hash2 = ipv6_portaddr_hash(net, loc_addr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; ports = INET_COMBINED_PORTS(rmt_port, hnum); udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (sk->sk_state == TCP_ESTABLISHED && inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; } return NULL; } void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; struct sock *sk; struct dst_entry *dst; int dif = skb->dev->ifindex; int sdif = inet6_sdif(skb); if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) return; uh = udp_hdr(skb); if (skb->pkt_type == PACKET_HOST) sk = __udp6_lib_demux_lookup(net, uh->dest, &ipv6_hdr(skb)->daddr, uh->source, &ipv6_hdr(skb)->saddr, dif, sdif); else return; if (!sk) return; skb->sk = sk; DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk)); skb->destructor = sock_pfree; dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, sk->sk_rx_dst_cookie); if (dst) { /* set noref for now. * any place which wants to hold dst has to call * dst_hold_safe() */ skb_dst_set_noref(skb, dst); } } INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); } /* * Throw away all pending data and cancel the corking. Socket is locked. */ static void udp_v6_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); if (up->pending == AF_INET) udp_flush_pending_frames(sk); else if (up->pending) { up->len = 0; WRITE_ONCE(up->pending, 0); ip6_flush_pending_frames(sk); } } static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { if (addr_len < offsetofend(struct sockaddr, sa_family)) return -EINVAL; /* The following checks are replicated from __ip6_datagram_connect() * and intended to prevent BPF program called below from accessing * bytes that are out of the bound specified by user in addr_len. */ if (uaddr->sa_family == AF_INET) { if (ipv6_only_sock(sk)) return -EAFNOSUPPORT; return udp_pre_connect(sk, uaddr, addr_len); } if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } /** * udp6_hwcsum_outgoing - handle outgoing HW checksumming * @sk: socket we are sending on * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) * @saddr: source address * @daddr: destination address * @len: length of packet */ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int len) { unsigned int offset; struct udphdr *uh = udp_hdr(skb); struct sk_buff *frags = skb_shinfo(skb)->frag_list; __wsum csum = 0; if (!frags) { /* Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0); } else { /* * HW-checksum won't work as there are two or more * fragments on the socket so that all csums of sk_buffs * should be together */ offset = skb_transport_offset(skb); skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); csum = skb->csum; skb->ip_summed = CHECKSUM_NONE; do { csum = csum_add(csum, frags->csum); } while ((frags = frags->next)); uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } } /* * Sending */ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, struct inet_cork *cork) { struct sock *sk = skb->sk; struct udphdr *uh; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; int offset = skb_transport_offset(skb); int len = skb->len - offset; int datalen = len - sizeof(*uh); /* * Create a UDP header */ uh = udp_hdr(skb); uh->source = fl6->fl6_sport; uh->dest = fl6->fl6_dport; uh->len = htons(len); uh->check = 0; if (cork->gso_size) { const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); if (hlen + cork->gso_size > cork->fragsize) { kfree_skb(skb); return -EINVAL; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } if (udp_get_no_check6_tx(sk)) { kfree_skb(skb); return -EINVAL; } if (is_udplite || dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } if (datalen > cork->gso_size) { skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen, cork->gso_size); /* Don't checksum the payload, skb will get segmented */ goto csum_partial; } } if (is_udplite) csum = udplite_csum(skb); else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ csum_partial: udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len); goto send; } else csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: err = ip6_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; } } else { UDP6_INC_STATS(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); } return err; } static int udp_v6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; struct udp_sock *up = udp_sk(sk); int err = 0; if (up->pending == AF_INET) return udp_push_pending_frames(sk); skb = ip6_finish_skb(sk); if (!skb) goto out; err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6, &inet_sk(sk)->cork.base); out: up->len = 0; WRITE_ONCE(up->pending, 0); return err; } int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; struct inet_cork_full cork; struct flowi6 *fl6 = &cork.fl.u.ip6; struct dst_entry *dst; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); ipcm6_init(&ipc6); ipc6.gso_size = READ_ONCE(up->gso_size); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); /* destination address check */ if (sin6) { if (addr_len < offsetof(struct sockaddr, sa_data)) return -EINVAL; switch (sin6->sin6_family) { case AF_INET6: if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; daddr = &sin6->sin6_addr; if (ipv6_addr_any(daddr) && ipv6_addr_v4mapped(&np->saddr)) ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), daddr); break; case AF_INET: goto do_udp_sendmsg; case AF_UNSPEC: msg->msg_name = sin6 = NULL; msg->msg_namelen = addr_len = 0; daddr = NULL; break; default: return -EINVAL; } } else if (!READ_ONCE(up->pending)) { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = &sk->sk_v6_daddr; } else daddr = NULL; if (daddr) { if (ipv6_addr_v4mapped(daddr)) { struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport; sin.sin_addr.s_addr = daddr->s6_addr32[3]; msg->msg_name = &sin; msg->msg_namelen = sizeof(sin); do_udp_sendmsg: err = ipv6_only_sock(sk) ? -ENETUNREACH : udp_sendmsg(sk, msg, len); msg->msg_name = sin6; msg->msg_namelen = addr_len; return err; } } /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). */ if (len > INT_MAX - sizeof(struct udphdr)) return -EMSGSIZE; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (READ_ONCE(up->pending)) { if (READ_ONCE(up->pending) == AF_INET) return udp_sendmsg(sk, msg, len); /* * There are pending frames. * The socket lock must be held while it's corked. */ lock_sock(sk); if (likely(up->pending)) { if (unlikely(up->pending != AF_INET6)) { release_sock(sk); return -EAFNOSUPPORT; } dst = NULL; goto do_append_data; } release_sock(sk); } ulen += sizeof(struct udphdr); memset(fl6, 0, sizeof(*fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (inet6_test_bit(SNDFLOW, sk)) { fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } } /* * Otherwise it will be difficult to maintain * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) fl6->flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; fl6->fl6_dport = inet->inet_dport; daddr = &sk->sk_v6_daddr; fl6->flowlabel = np->flow_label; connected = true; } if (!fl6->flowi6_oif) fl6->flowi6_oif = READ_ONCE(sk->sk_bound_dev_if); if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6->flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); ipc6.opt = opt; err = udp_cmsg_send(sk, msg, &ipc6.gso_size); if (err > 0) { err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); connected = false; } if (err < 0) { fl6_sock_release(flowlabel); return err; } if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } if (!opt) { opt = txopt_get(np); opt_to_free = opt; } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); ipc6.opt = opt; fl6->flowi6_proto = sk->sk_protocol; fl6->flowi6_mark = ipc6.sockc.mark; fl6->daddr = *daddr; if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr)) fl6->saddr = np->saddr; fl6->fl6_sport = inet->inet_sport; if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, (struct sockaddr *)sin6, &addr_len, &fl6->saddr); if (err) goto out_no_dst; if (sin6) { if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { /* BPF program rewrote IPv6-only by IPv4-mapped * IPv6. It's currently unsupported. */ err = -ENOTSUPP; goto out_no_dst; } if (sin6->sin6_port == 0) { /* BPF program set invalid port. Reject it. */ err = -EINVAL; goto out_no_dst; } fl6->fl6_dport = sin6->sin6_port; fl6->daddr = sin6->sin6_addr; } } if (ipv6_addr_any(&fl6->daddr)) fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ final_p = fl6_update_dst(fl6, opt, &final); if (final_p) connected = false; if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) { fl6->flowi6_oif = READ_ONCE(np->mcast_oif); connected = false; } else if (!fl6->flowi6_oif) fl6->flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); if (ipc6.tclass < 0) ipc6.tclass = np->tclass; fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel); dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto out; } if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: /* Lockless fast path for the non-corking case */ if (!corkreq) { struct sk_buff *skb; skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, dst_rt6_info(dst), msg->msg_flags, &cork); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) err = udp_v6_send_skb(skb, fl6, &cork.base); /* ip6_make_skb steals dst reference */ goto out_no_dst; } lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ /* ... which is an evident application bug. --ANK */ release_sock(sk); net_dbg_ratelimited("udp cork app bug 2\n"); err = -EINVAL; goto out; } WRITE_ONCE(up->pending, AF_INET6); do_append_data: if (ipc6.dontfrag < 0) ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, dst_rt6_info(dst), corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) err = udp_v6_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) WRITE_ONCE(up->pending, 0); if (err > 0) err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0; release_sock(sk); out: dst_release(dst); out_no_dst: fl6_sock_release(flowlabel); txopt_put(opt_to_free); if (!err) return len; /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise * we don't have a good statistic (IpOutDiscards but it can be too many * things). We could add another new stat but at least for now that * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); } return err; do_confirm: if (msg->msg_flags & MSG_PROBE) dst_confirm_neigh(dst, &fl6->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; goto out; } EXPORT_SYMBOL(udpv6_sendmsg); static void udpv6_splice_eof(struct socket *sock) { struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); if (!READ_ONCE(up->pending) || udp_test_bit(CORK, sk)) return; lock_sock(sk); if (up->pending && !udp_test_bit(CORK, sk)) udp_v6_push_pending_frames(sk); release_sock(sk); } void udpv6_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); lock_sock(sk); /* protects from races with udp_abort() */ sock_set_flag(sk, SOCK_DEAD); udp_v6_flush_pending_frames(sk); release_sock(sk); if (static_branch_unlikely(&udpv6_encap_needed_key)) { if (up->encap_type) { void (*encap_destroy)(struct sock *sk); encap_destroy = READ_ONCE(up->encap_destroy); if (encap_destroy) encap_destroy(sk); } if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udpv6_encap_needed_key); udp_encap_disable(); } } } /* * Socket option code for UDP */ int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); } int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen); } /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS int udp6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { int bucket = ((struct udp_iter_state *)seq->private)->bucket; const struct inet_sock *inet = inet_sk((const struct sock *)v); __u16 srcp = ntohs(inet->inet_sport); __u16 destp = ntohs(inet->inet_dport); __ip6_dgram_sock_seq_show(seq, v, srcp, destp, udp_rqueue_get(v), bucket); } return 0; } const struct seq_operations udp6_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, .stop = udp_seq_stop, .show = udp6_seq_show, }; EXPORT_SYMBOL(udp6_seq_ops); static struct udp_seq_afinfo udp6_seq_afinfo = { .family = AF_INET6, .udp_table = NULL, }; int __net_init udp6_proc_init(struct net *net) { if (!proc_create_net_data("udp6", 0444, net->proc_net, &udp6_seq_ops, sizeof(struct udp_iter_state), &udp6_seq_afinfo)) return -ENOMEM; return 0; } void udp6_proc_exit(struct net *net) { remove_proc_entry("udp6", net->proc_net); } #endif /* CONFIG_PROC_FS */ /* ------------------------------------------------------------------------ */ struct proto udpv6_prot = { .name = "UDPv6", .owner = THIS_MODULE, .close = udp_lib_close, .pre_connect = udpv6_pre_connect, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .init = udpv6_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, .splice_eof = udpv6_splice_eof, .release_cb = ip6_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .put_port = udp_lib_unhash, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif .memory_allocated = &udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp6_sock), .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6), .h.udp_table = NULL, .diag_destroy = udp_abort, }; static struct inet_protosw udpv6_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, .flags = INET_PROTOSW_PERMANENT, }; int __init udpv6_init(void) { int ret; net_hotdata.udpv6_protocol = (struct inet6_protocol) { .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, }; ret = inet6_add_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP); if (ret) goto out; ret = inet6_register_protosw(&udpv6_protosw); if (ret) goto out_udpv6_protocol; out: return ret; out_udpv6_protocol: inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP); goto out; } void udpv6_exit(void) { inet6_unregister_protosw(&udpv6_protosw); inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP); } |
2 21 17 2 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS FID Management * * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> */ #ifndef FS_9P_FID_H #define FS_9P_FID_H #include <linux/list.h> #include "v9fs.h" struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable, kuid_t uid, bool any); struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry) { return v9fs_fid_lookup(dentry->d_parent); } void v9fs_fid_add(struct dentry *dentry, struct p9_fid **fid); void v9fs_open_fid_add(struct inode *inode, struct p9_fid **fid); static inline struct p9_fid *clone_fid(struct p9_fid *fid) { return IS_ERR(fid) ? fid : p9_client_walk(fid, 0, NULL, 1); } static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry) { struct p9_fid *fid, *nfid; fid = v9fs_fid_lookup(dentry); if (!fid || IS_ERR(fid)) return fid; nfid = clone_fid(fid); p9_fid_put(fid); return nfid; } /** * v9fs_fid_addmodes - add cache flags to fid mode (for client use only) * @fid: fid to augment * @s_flags: session info mount flags * @s_cache: session info cache flags * @f_flags: unix open flags * * make sure mode reflects flags of underlying mounts * also qid.version == 0 reflects a synthetic or legacy file system * NOTE: these are set after open so only reflect 9p client not * underlying file system on server. */ static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags, unsigned int s_cache, unsigned int f_flags) { if ((!s_cache) || ((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) || (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) { fid->mode |= P9L_DIRECT; /* no read or write cache */ } else if ((!(s_cache & CACHE_WRITEBACK)) || (f_flags & O_DSYNC) || (s_flags & V9FS_SYNC)) { fid->mode |= P9L_NOWRITECACHE; } } #endif |
18 3 15 18 1 2 2 2 1 1 1 1 1 6 1 3 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_fib.h> #include <net/inet_dscp.h> #include <net/ip_fib.h> #include <net/route.h> /* don't try to find route from mcast/bcast/zeronet */ static __be32 get_saddr(__be32 addr) { if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || ipv4_is_zeronet(addr)) return 0; return addr; } void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); u32 *dst = ®s->data[priv->dreg]; const struct net_device *dev = NULL; struct iphdr *iph, _iph; __be32 addr; if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } if (priv->flags & NFTA_FIB_F_DADDR) addr = iph->daddr; else addr = iph->saddr; *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); } EXPORT_SYMBOL_GPL(nft_fib4_eval_type); void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); u32 *dest = ®s->data[priv->dreg]; struct iphdr *iph, _iph; struct fib_result res; struct flowi4 fl4 = { .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), }; const struct net_device *oif; const struct net_device *found; /* * Do not set flowi4_oif, it restricts results (for example, asking * for oif 3 will get RTN_UNICAST result even if the daddr exits * on another interface. * * Search results for the desired outinterface instead. */ if (priv->flags & NFTA_FIB_F_OIF) oif = nft_out(pkt); else if (priv->flags & NFTA_FIB_F_IIF) oif = nft_in(pkt); else oif = NULL; if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { nft_fib_store_result(dest, priv, nft_in(pkt)); return; } iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) { nft_fib_store_result(dest, priv, pkt->skb->dev); return; } } if (priv->flags & NFTA_FIB_F_MARK) fl4.flowi4_mark = pkt->skb->mark; fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; if (priv->flags & NFTA_FIB_F_DADDR) { fl4.daddr = iph->daddr; fl4.saddr = get_saddr(iph->saddr); } else { if (nft_hook(pkt) == NF_INET_FORWARD && priv->flags & NFTA_FIB_F_IIF) fl4.flowi4_iif = nft_out(pkt)->ifindex; fl4.daddr = iph->saddr; fl4.saddr = get_saddr(iph->daddr); } *dest = 0; if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return; switch (res.type) { case RTN_UNICAST: break; case RTN_LOCAL: /* Should not see RTN_LOCAL here */ return; default: break; } if (!oif) { found = FIB_RES_DEV(res); } else { if (!fib_info_nh_uses_dev(res.fi, oif)) return; found = oif; } nft_fib_store_result(dest, priv, found); } EXPORT_SYMBOL_GPL(nft_fib4_eval); static struct nft_expr_type nft_fib4_type; static const struct nft_expr_ops nft_fib4_type_ops = { .type = &nft_fib4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib4_eval_type, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib4_ops = { .type = &nft_fib4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib4_eval, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * nft_fib4_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { enum nft_fib_result result; if (!tb[NFTA_FIB_RESULT]) return ERR_PTR(-EINVAL); result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); switch (result) { case NFT_FIB_RESULT_OIF: return &nft_fib4_ops; case NFT_FIB_RESULT_OIFNAME: return &nft_fib4_ops; case NFT_FIB_RESULT_ADDRTYPE: return &nft_fib4_type_ops; default: return ERR_PTR(-EOPNOTSUPP); } } static struct nft_expr_type nft_fib4_type __read_mostly = { .name = "fib", .select_ops = nft_fib4_select_ops, .policy = nft_fib_policy, .maxattr = NFTA_FIB_MAX, .family = NFPROTO_IPV4, .owner = THIS_MODULE, }; static int __init nft_fib4_module_init(void) { return nft_register_expr(&nft_fib4_type); } static void __exit nft_fib4_module_exit(void) { nft_unregister_expr(&nft_fib4_type); } module_init(nft_fib4_module_init); module_exit(nft_fib4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_ALIAS_NFT_AF_EXPR(2, "fib"); MODULE_DESCRIPTION("nftables fib / ip route lookup support"); |
1 1 21 21 21 21 20 1 1 1 1 1 1 1 1 21 21 21 20 1 1 1 1 1 1 21 21 21 21 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 Thomas Gleixner. * Copyright (C) 2016-2017 Christoph Hellwig. */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/cpu.h> #include <linux/sort.h> #include <linux/group_cpus.h> #ifdef CONFIG_SMP static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, unsigned int cpus_per_grp) { const struct cpumask *siblmsk; int cpu, sibl; for ( ; cpus_per_grp > 0; ) { cpu = cpumask_first(nmsk); /* Should not happen, but I'm too lazy to think about it */ if (cpu >= nr_cpu_ids) return; cpumask_clear_cpu(cpu, nmsk); cpumask_set_cpu(cpu, irqmsk); cpus_per_grp--; /* If the cpu has siblings, use them first */ siblmsk = topology_sibling_cpumask(cpu); for (sibl = -1; cpus_per_grp > 0; ) { sibl = cpumask_next(sibl, siblmsk); if (sibl >= nr_cpu_ids) break; if (!cpumask_test_and_clear_cpu(sibl, nmsk)) continue; cpumask_set_cpu(sibl, irqmsk); cpus_per_grp--; } } } static cpumask_var_t *alloc_node_to_cpumask(void) { cpumask_var_t *masks; int node; masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); if (!masks) return NULL; for (node = 0; node < nr_node_ids; node++) { if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL)) goto out_unwind; } return masks; out_unwind: while (--node >= 0) free_cpumask_var(masks[node]); kfree(masks); return NULL; } static void free_node_to_cpumask(cpumask_var_t *masks) { int node; for (node = 0; node < nr_node_ids; node++) free_cpumask_var(masks[node]); kfree(masks); } static void build_node_to_cpumask(cpumask_var_t *masks) { int cpu; for_each_possible_cpu(cpu) cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); } static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, const struct cpumask *mask, nodemask_t *nodemsk) { int n, nodes = 0; /* Calculate the number of nodes in the supplied affinity mask */ for_each_node(n) { if (cpumask_intersects(mask, node_to_cpumask[n])) { node_set(n, *nodemsk); nodes++; } } return nodes; } struct node_groups { unsigned id; union { unsigned ngroups; unsigned ncpus; }; }; static int ncpus_cmp_func(const void *l, const void *r) { const struct node_groups *ln = l; const struct node_groups *rn = r; return ln->ncpus - rn->ncpus; } /* * Allocate group number for each node, so that for each node: * * 1) the allocated number is >= 1 * * 2) the allocated number is <= active CPU number of this node * * The actual allocated total groups may be less than @numgrps when * active total CPU number is less than @numgrps. * * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' * for each node. */ static void alloc_nodes_groups(unsigned int numgrps, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, const nodemask_t nodemsk, struct cpumask *nmsk, struct node_groups *node_groups) { unsigned n, remaining_ncpus = 0; for (n = 0; n < nr_node_ids; n++) { node_groups[n].id = n; node_groups[n].ncpus = UINT_MAX; } for_each_node_mask(n, nodemsk) { unsigned ncpus; cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); ncpus = cpumask_weight(nmsk); if (!ncpus) continue; remaining_ncpus += ncpus; node_groups[n].ncpus = ncpus; } numgrps = min_t(unsigned, remaining_ncpus, numgrps); sort(node_groups, nr_node_ids, sizeof(node_groups[0]), ncpus_cmp_func, NULL); /* * Allocate groups for each node according to the ratio of this * node's nr_cpus to remaining un-assigned ncpus. 'numgrps' is * bigger than number of active numa nodes. Always start the * allocation from the node with minimized nr_cpus. * * This way guarantees that each active node gets allocated at * least one group, and the theory is simple: over-allocation * is only done when this node is assigned by one group, so * other nodes will be allocated >= 1 groups, since 'numgrps' is * bigger than number of numa nodes. * * One perfect invariant is that number of allocated groups for * each node is <= CPU count of this node: * * 1) suppose there are two nodes: A and B * ncpu(X) is CPU count of node X * grps(X) is the group count allocated to node X via this * algorithm * * ncpu(A) <= ncpu(B) * ncpu(A) + ncpu(B) = N * grps(A) + grps(B) = G * * grps(A) = max(1, round_down(G * ncpu(A) / N)) * grps(B) = G - grps(A) * * both N and G are integer, and 2 <= G <= N, suppose * G = N - delta, and 0 <= delta <= N - 2 * * 2) obviously grps(A) <= ncpu(A) because: * * if grps(A) is 1, then grps(A) <= ncpu(A) given * ncpu(A) >= 1 * * otherwise, * grps(A) <= G * ncpu(A) / N <= ncpu(A), given G <= N * * 3) prove how grps(B) <= ncpu(B): * * if round_down(G * ncpu(A) / N) == 0, vecs(B) won't be * over-allocated, so grps(B) <= ncpu(B), * * otherwise: * * grps(A) = * round_down(G * ncpu(A) / N) = * round_down((N - delta) * ncpu(A) / N) = * round_down((N * ncpu(A) - delta * ncpu(A)) / N) >= * round_down((N * ncpu(A) - delta * N) / N) = * cpu(A) - delta * * then: * * grps(A) - G >= ncpu(A) - delta - G * => * G - grps(A) <= G + delta - ncpu(A) * => * grps(B) <= N - ncpu(A) * => * grps(B) <= cpu(B) * * For nodes >= 3, it can be thought as one node and another big * node given that is exactly what this algorithm is implemented, * and we always re-calculate 'remaining_ncpus' & 'numgrps', and * finally for each node X: grps(X) <= ncpu(X). * */ for (n = 0; n < nr_node_ids; n++) { unsigned ngroups, ncpus; if (node_groups[n].ncpus == UINT_MAX) continue; WARN_ON_ONCE(numgrps == 0); ncpus = node_groups[n].ncpus; ngroups = max_t(unsigned, 1, numgrps * ncpus / remaining_ncpus); WARN_ON_ONCE(ngroups > ncpus); node_groups[n].ngroups = ngroups; remaining_ncpus -= ncpus; numgrps -= ngroups; } } static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, struct cpumask *nmsk, struct cpumask *masks) { unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0; unsigned int last_grp = numgrps; unsigned int curgrp = startgrp; nodemask_t nodemsk = NODE_MASK_NONE; struct node_groups *node_groups; if (cpumask_empty(cpu_mask)) return 0; nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); /* * If the number of nodes in the mask is greater than or equal the * number of groups we just spread the groups across the nodes. */ if (numgrps <= nodes) { for_each_node_mask(n, nodemsk) { /* Ensure that only CPUs which are in both masks are set */ cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); cpumask_or(&masks[curgrp], &masks[curgrp], nmsk); if (++curgrp == last_grp) curgrp = 0; } return numgrps; } node_groups = kcalloc(nr_node_ids, sizeof(struct node_groups), GFP_KERNEL); if (!node_groups) return -ENOMEM; /* allocate group number for each node */ alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask, nodemsk, nmsk, node_groups); for (i = 0; i < nr_node_ids; i++) { unsigned int ncpus, v; struct node_groups *nv = &node_groups[i]; if (nv->ngroups == UINT_MAX) continue; /* Get the cpus on this node which are in the mask */ cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]); ncpus = cpumask_weight(nmsk); if (!ncpus) continue; WARN_ON_ONCE(nv->ngroups > ncpus); /* Account for rounding errors */ extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups); /* Spread allocated groups on CPUs of the current node */ for (v = 0; v < nv->ngroups; v++, curgrp++) { cpus_per_grp = ncpus / nv->ngroups; /* Account for extra groups to compensate rounding errors */ if (extra_grps) { cpus_per_grp++; --extra_grps; } /* * wrapping has to be considered given 'startgrp' * may start anywhere */ if (curgrp >= last_grp) curgrp = 0; grp_spread_init_one(&masks[curgrp], nmsk, cpus_per_grp); } done += nv->ngroups; } kfree(node_groups); return done; } /** * group_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality * @numgrps: number of groups * * Return: cpumask array if successful, NULL otherwise. And each element * includes CPUs assigned to this group * * Try to put close CPUs from viewpoint of CPU and NUMA locality into * same group, and run two-stage grouping: * 1) allocate present CPUs on these groups evenly first * 2) allocate other possible CPUs on these groups evenly * * We guarantee in the resulted grouping that all CPUs are covered, and * no same CPU is assigned to multiple groups */ struct cpumask *group_cpus_evenly(unsigned int numgrps) { unsigned int curgrp = 0, nr_present = 0, nr_others = 0; cpumask_var_t *node_to_cpumask; cpumask_var_t nmsk, npresmsk; int ret = -ENOMEM; struct cpumask *masks = NULL; if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return NULL; if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) goto fail_nmsk; node_to_cpumask = alloc_node_to_cpumask(); if (!node_to_cpumask) goto fail_npresmsk; masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); if (!masks) goto fail_node_to_cpumask; build_node_to_cpumask(node_to_cpumask); /* * Make a local cache of 'cpu_present_mask', so the two stages * spread can observe consistent 'cpu_present_mask' without holding * cpu hotplug lock, then we can reduce deadlock risk with cpu * hotplug code. * * Here CPU hotplug may happen when reading `cpu_present_mask`, and * we can live with the case because it only affects that hotplug * CPU is handled in the 1st or 2nd stage, and either way is correct * from API user viewpoint since 2-stage spread is sort of * optimization. */ cpumask_copy(npresmsk, data_race(cpu_present_mask)); /* grouping present CPUs first */ ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, npresmsk, nmsk, masks); if (ret < 0) goto fail_build_affinity; nr_present = ret; /* * Allocate non present CPUs starting from the next group to be * handled. If the grouping of present CPUs already exhausted the * group space, assign the non present CPUs to the already * allocated out groups. */ if (nr_present >= numgrps) curgrp = 0; else curgrp = nr_present; cpumask_andnot(npresmsk, cpu_possible_mask, npresmsk); ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, npresmsk, nmsk, masks); if (ret >= 0) nr_others = ret; fail_build_affinity: if (ret >= 0) WARN_ON(nr_present + nr_others < numgrps); fail_node_to_cpumask: free_node_to_cpumask(node_to_cpumask); fail_npresmsk: free_cpumask_var(npresmsk); fail_nmsk: free_cpumask_var(nmsk); if (ret < 0) { kfree(masks); return NULL; } return masks; } #else /* CONFIG_SMP */ struct cpumask *group_cpus_evenly(unsigned int numgrps) { struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); if (!masks) return NULL; /* assign all CPUs(cpu 0) to the 1st group only */ cpumask_copy(&masks[0], cpu_possible_mask); return masks; } #endif /* CONFIG_SMP */ EXPORT_SYMBOL_GPL(group_cpus_evenly); |
2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 | // SPDX-License-Identifier: GPL-2.0+ /* * Driver for SCM Microsystems (a.k.a. Shuttle) USB-ATAPI cable * * Current development and maintenance by: * (c) 2000, 2001 Robert Baruch (autophile@starband.net) * (c) 2004, 2005 Daniel Drake <dsd@gentoo.org> * * Developed with the assistance of: * (c) 2002 Alan Stern <stern@rowland.org> * * Flash support based on earlier work by: * (c) 2002 Thomas Kreiling <usbdev@sm04.de> * * Many originally ATAPI devices were slightly modified to meet the USB * market by using some kind of translation from ATAPI to USB on the host, * and the peripheral would translate from USB back to ATAPI. * * SCM Microsystems (www.scmmicro.com) makes a device, sold to OEM's only, * which does the USB-to-ATAPI conversion. By obtaining the data sheet on * their device under nondisclosure agreement, I have been able to write * this driver for Linux. * * The chip used in the device can also be used for EPP and ISA translation * as well. This driver is only guaranteed to work with the ATAPI * translation. * * See the Kconfig help text for a list of devices known to be supported by * this driver. */ #include <linux/errno.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/cdrom.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-usbat" MODULE_DESCRIPTION("Driver for SCM Microsystems (a.k.a. Shuttle) USB-ATAPI cable"); MODULE_AUTHOR("Daniel Drake <dsd@gentoo.org>, Robert Baruch <autophile@starband.net>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS(USB_STORAGE); /* Supported device types */ #define USBAT_DEV_HP8200 0x01 #define USBAT_DEV_FLASH 0x02 #define USBAT_EPP_PORT 0x10 #define USBAT_EPP_REGISTER 0x30 #define USBAT_ATA 0x40 #define USBAT_ISA 0x50 /* Commands (need to be logically OR'd with an access type */ #define USBAT_CMD_READ_REG 0x00 #define USBAT_CMD_WRITE_REG 0x01 #define USBAT_CMD_READ_BLOCK 0x02 #define USBAT_CMD_WRITE_BLOCK 0x03 #define USBAT_CMD_COND_READ_BLOCK 0x04 #define USBAT_CMD_COND_WRITE_BLOCK 0x05 #define USBAT_CMD_WRITE_REGS 0x07 /* Commands (these don't need an access type) */ #define USBAT_CMD_EXEC_CMD 0x80 #define USBAT_CMD_SET_FEAT 0x81 #define USBAT_CMD_UIO 0x82 /* Methods of accessing UIO register */ #define USBAT_UIO_READ 1 #define USBAT_UIO_WRITE 0 /* Qualifier bits */ #define USBAT_QUAL_FCQ 0x20 /* full compare */ #define USBAT_QUAL_ALQ 0x10 /* auto load subcount */ /* USBAT Flash Media status types */ #define USBAT_FLASH_MEDIA_NONE 0 #define USBAT_FLASH_MEDIA_CF 1 /* USBAT Flash Media change types */ #define USBAT_FLASH_MEDIA_SAME 0 #define USBAT_FLASH_MEDIA_CHANGED 1 /* USBAT ATA registers */ #define USBAT_ATA_DATA 0x10 /* read/write data (R/W) */ #define USBAT_ATA_FEATURES 0x11 /* set features (W) */ #define USBAT_ATA_ERROR 0x11 /* error (R) */ #define USBAT_ATA_SECCNT 0x12 /* sector count (R/W) */ #define USBAT_ATA_SECNUM 0x13 /* sector number (R/W) */ #define USBAT_ATA_LBA_ME 0x14 /* cylinder low (R/W) */ #define USBAT_ATA_LBA_HI 0x15 /* cylinder high (R/W) */ #define USBAT_ATA_DEVICE 0x16 /* head/device selection (R/W) */ #define USBAT_ATA_STATUS 0x17 /* device status (R) */ #define USBAT_ATA_CMD 0x17 /* device command (W) */ #define USBAT_ATA_ALTSTATUS 0x0E /* status (no clear IRQ) (R) */ /* USBAT User I/O Data registers */ #define USBAT_UIO_EPAD 0x80 /* Enable Peripheral Control Signals */ #define USBAT_UIO_CDT 0x40 /* Card Detect (Read Only) */ /* CDT = ACKD & !UI1 & !UI0 */ #define USBAT_UIO_1 0x20 /* I/O 1 */ #define USBAT_UIO_0 0x10 /* I/O 0 */ #define USBAT_UIO_EPP_ATA 0x08 /* 1=EPP mode, 0=ATA mode */ #define USBAT_UIO_UI1 0x04 /* Input 1 */ #define USBAT_UIO_UI0 0x02 /* Input 0 */ #define USBAT_UIO_INTR_ACK 0x01 /* Interrupt (ATA/ISA)/Acknowledge (EPP) */ /* USBAT User I/O Enable registers */ #define USBAT_UIO_DRVRST 0x80 /* Reset Peripheral */ #define USBAT_UIO_ACKD 0x40 /* Enable Card Detect */ #define USBAT_UIO_OE1 0x20 /* I/O 1 set=output/clr=input */ /* If ACKD=1, set OE1 to 1 also. */ #define USBAT_UIO_OE0 0x10 /* I/O 0 set=output/clr=input */ #define USBAT_UIO_ADPRST 0x01 /* Reset SCM chip */ /* USBAT Features */ #define USBAT_FEAT_ETEN 0x80 /* External trigger enable */ #define USBAT_FEAT_U1 0x08 #define USBAT_FEAT_U0 0x04 #define USBAT_FEAT_ET1 0x02 #define USBAT_FEAT_ET2 0x01 struct usbat_info { int devicetype; /* Used for Flash readers only */ unsigned long sectors; /* total sector count */ unsigned long ssize; /* sector size in bytes */ unsigned char sense_key; unsigned long sense_asc; /* additional sense code */ unsigned long sense_ascq; /* additional sense code qualifier */ }; #define short_pack(LSB,MSB) ( ((u16)(LSB)) | ( ((u16)(MSB))<<8 ) ) #define LSB_of(s) ((s)&0xFF) #define MSB_of(s) ((s)>>8) static int transferred = 0; static int usbat_flash_transport(struct scsi_cmnd * srb, struct us_data *us); static int usbat_hp8200e_transport(struct scsi_cmnd *srb, struct us_data *us); static int init_usbat_cd(struct us_data *us); static int init_usbat_flash(struct us_data *us); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } static const struct usb_device_id usbat_usb_ids[] = { # include "unusual_usbat.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, usbat_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev usbat_unusual_dev_list[] = { # include "unusual_usbat.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV /* * Convenience function to produce an ATA read/write sectors command * Use cmd=0x20 for read, cmd=0x30 for write */ static void usbat_pack_ata_sector_cmd(unsigned char *buf, unsigned char thistime, u32 sector, unsigned char cmd) { buf[0] = 0; buf[1] = thistime; buf[2] = sector & 0xFF; buf[3] = (sector >> 8) & 0xFF; buf[4] = (sector >> 16) & 0xFF; buf[5] = 0xE0 | ((sector >> 24) & 0x0F); buf[6] = cmd; } /* * Convenience function to get the device type (flash or hp8200) */ static int usbat_get_device_type(struct us_data *us) { return ((struct usbat_info*)us->extra)->devicetype; } /* * Read a register from the device */ static int usbat_read(struct us_data *us, unsigned char access, unsigned char reg, unsigned char *content) { return usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, access | USBAT_CMD_READ_REG, 0xC0, (u16)reg, 0, content, 1); } /* * Write to a register on the device */ static int usbat_write(struct us_data *us, unsigned char access, unsigned char reg, unsigned char content) { return usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, access | USBAT_CMD_WRITE_REG, 0x40, short_pack(reg, content), 0, NULL, 0); } /* * Convenience function to perform a bulk read */ static int usbat_bulk_read(struct us_data *us, void* buf, unsigned int len, int use_sg) { if (len == 0) return USB_STOR_XFER_GOOD; usb_stor_dbg(us, "len = %d\n", len); return usb_stor_bulk_transfer_sg(us, us->recv_bulk_pipe, buf, len, use_sg, NULL); } /* * Convenience function to perform a bulk write */ static int usbat_bulk_write(struct us_data *us, void* buf, unsigned int len, int use_sg) { if (len == 0) return USB_STOR_XFER_GOOD; usb_stor_dbg(us, "len = %d\n", len); return usb_stor_bulk_transfer_sg(us, us->send_bulk_pipe, buf, len, use_sg, NULL); } /* * Some USBAT-specific commands can only be executed over a command transport * This transport allows one (len=8) or two (len=16) vendor-specific commands * to be executed. */ static int usbat_execute_command(struct us_data *us, unsigned char *commands, unsigned int len) { return usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, USBAT_CMD_EXEC_CMD, 0x40, 0, 0, commands, len); } /* * Read the status register */ static int usbat_get_status(struct us_data *us, unsigned char *status) { int rc; rc = usbat_read(us, USBAT_ATA, USBAT_ATA_STATUS, status); usb_stor_dbg(us, "0x%02X\n", *status); return rc; } /* * Check the device status */ static int usbat_check_status(struct us_data *us) { unsigned char *reply = us->iobuf; int rc; rc = usbat_get_status(us, reply); if (rc != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_FAILED; /* error/check condition (0x51 is ok) */ if (*reply & 0x01 && *reply != 0x51) return USB_STOR_TRANSPORT_FAILED; /* device fault */ if (*reply & 0x20) return USB_STOR_TRANSPORT_FAILED; return USB_STOR_TRANSPORT_GOOD; } /* * Stores critical information in internal registers in preparation for the execution * of a conditional usbat_read_blocks or usbat_write_blocks call. */ static int usbat_set_shuttle_features(struct us_data *us, unsigned char external_trigger, unsigned char epp_control, unsigned char mask_byte, unsigned char test_pattern, unsigned char subcountH, unsigned char subcountL) { unsigned char *command = us->iobuf; command[0] = 0x40; command[1] = USBAT_CMD_SET_FEAT; /* * The only bit relevant to ATA access is bit 6 * which defines 8 bit data access (set) or 16 bit (unset) */ command[2] = epp_control; /* * If FCQ is set in the qualifier (defined in R/W cmd), then bits U0, U1, * ET1 and ET2 define an external event to be checked for on event of a * _read_blocks or _write_blocks operation. The read/write will not take * place unless the defined trigger signal is active. */ command[3] = external_trigger; /* * The resultant byte of the mask operation (see mask_byte) is compared for * equivalence with this test pattern. If equal, the read/write will take * place. */ command[4] = test_pattern; /* * This value is logically ANDed with the status register field specified * in the read/write command. */ command[5] = mask_byte; /* * If ALQ is set in the qualifier, this field contains the address of the * registers where the byte count should be read for transferring the data. * If ALQ is not set, then this field contains the number of bytes to be * transferred. */ command[6] = subcountL; command[7] = subcountH; return usbat_execute_command(us, command, 8); } /* * Block, waiting for an ATA device to become not busy or to report * an error condition. */ static int usbat_wait_not_busy(struct us_data *us, int minutes) { int i; int result; unsigned char *status = us->iobuf; /* * Synchronizing cache on a CDR could take a heck of a long time, * but probably not more than 10 minutes or so. On the other hand, * doing a full blank on a CDRW at speed 1 will take about 75 * minutes! */ for (i=0; i<1200+minutes*60; i++) { result = usbat_get_status(us, status); if (result!=USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (*status & 0x01) { /* check condition */ result = usbat_read(us, USBAT_ATA, 0x10, status); return USB_STOR_TRANSPORT_FAILED; } if (*status & 0x20) /* device fault */ return USB_STOR_TRANSPORT_FAILED; if ((*status & 0x80)==0x00) { /* not busy */ usb_stor_dbg(us, "Waited not busy for %d steps\n", i); return USB_STOR_TRANSPORT_GOOD; } if (i<500) msleep(10); /* 5 seconds */ else if (i<700) msleep(50); /* 10 seconds */ else if (i<1200) msleep(100); /* 50 seconds */ else msleep(1000); /* X minutes */ } usb_stor_dbg(us, "Waited not busy for %d minutes, timing out\n", minutes); return USB_STOR_TRANSPORT_FAILED; } /* * Read block data from the data register */ static int usbat_read_block(struct us_data *us, void* buf, unsigned short len, int use_sg) { int result; unsigned char *command = us->iobuf; if (!len) return USB_STOR_TRANSPORT_GOOD; command[0] = 0xC0; command[1] = USBAT_ATA | USBAT_CMD_READ_BLOCK; command[2] = USBAT_ATA_DATA; command[3] = 0; command[4] = 0; command[5] = 0; command[6] = LSB_of(len); command[7] = MSB_of(len); result = usbat_execute_command(us, command, 8); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; result = usbat_bulk_read(us, buf, len, use_sg); return (result == USB_STOR_XFER_GOOD ? USB_STOR_TRANSPORT_GOOD : USB_STOR_TRANSPORT_ERROR); } /* * Write block data via the data register */ static int usbat_write_block(struct us_data *us, unsigned char access, void* buf, unsigned short len, int minutes, int use_sg) { int result; unsigned char *command = us->iobuf; if (!len) return USB_STOR_TRANSPORT_GOOD; command[0] = 0x40; command[1] = access | USBAT_CMD_WRITE_BLOCK; command[2] = USBAT_ATA_DATA; command[3] = 0; command[4] = 0; command[5] = 0; command[6] = LSB_of(len); command[7] = MSB_of(len); result = usbat_execute_command(us, command, 8); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; result = usbat_bulk_write(us, buf, len, use_sg); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; return usbat_wait_not_busy(us, minutes); } /* * Process read and write requests */ static int usbat_hp8200e_rw_block_test(struct us_data *us, unsigned char access, unsigned char *registers, unsigned char *data_out, unsigned short num_registers, unsigned char data_reg, unsigned char status_reg, unsigned char timeout, unsigned char qualifier, int direction, void *buf, unsigned short len, int use_sg, int minutes) { int result; unsigned int pipe = (direction == DMA_FROM_DEVICE) ? us->recv_bulk_pipe : us->send_bulk_pipe; unsigned char *command = us->iobuf; int i, j; int cmdlen; unsigned char *data = us->iobuf; unsigned char *status = us->iobuf; BUG_ON(num_registers > US_IOBUF_SIZE/2); for (i=0; i<20; i++) { /* * The first time we send the full command, which consists * of downloading the SCSI command followed by downloading * the data via a write-and-test. Any other time we only * send the command to download the data -- the SCSI command * is still 'active' in some sense in the device. * * We're only going to try sending the data 10 times. After * that, we just return a failure. */ if (i==0) { cmdlen = 16; /* * Write to multiple registers * Not really sure the 0x07, 0x17, 0xfc, 0xe7 is * necessary here, but that's what came out of the * trace every single time. */ command[0] = 0x40; command[1] = access | USBAT_CMD_WRITE_REGS; command[2] = 0x07; command[3] = 0x17; command[4] = 0xFC; command[5] = 0xE7; command[6] = LSB_of(num_registers*2); command[7] = MSB_of(num_registers*2); } else cmdlen = 8; /* Conditionally read or write blocks */ command[cmdlen-8] = (direction==DMA_TO_DEVICE ? 0x40 : 0xC0); command[cmdlen-7] = access | (direction==DMA_TO_DEVICE ? USBAT_CMD_COND_WRITE_BLOCK : USBAT_CMD_COND_READ_BLOCK); command[cmdlen-6] = data_reg; command[cmdlen-5] = status_reg; command[cmdlen-4] = timeout; command[cmdlen-3] = qualifier; command[cmdlen-2] = LSB_of(len); command[cmdlen-1] = MSB_of(len); result = usbat_execute_command(us, command, cmdlen); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (i==0) { for (j=0; j<num_registers; j++) { data[j<<1] = registers[j]; data[1+(j<<1)] = data_out[j]; } result = usbat_bulk_write(us, data, num_registers*2, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; } result = usb_stor_bulk_transfer_sg(us, pipe, buf, len, use_sg, NULL); /* * If we get a stall on the bulk download, we'll retry * the bulk download -- but not the SCSI command because * in some sense the SCSI command is still 'active' and * waiting for the data. Don't ask me why this should be; * I'm only following what the Windoze driver did. * * Note that a stall for the test-and-read/write command means * that the test failed. In this case we're testing to make * sure that the device is error-free * (i.e. bit 0 -- CHK -- of status is 0). The most likely * hypothesis is that the USBAT chip somehow knows what * the device will accept, but doesn't give the device any * data until all data is received. Thus, the device would * still be waiting for the first byte of data if a stall * occurs, even if the stall implies that some data was * transferred. */ if (result == USB_STOR_XFER_SHORT || result == USB_STOR_XFER_STALLED) { /* * If we're reading and we stalled, then clear * the bulk output pipe only the first time. */ if (direction==DMA_FROM_DEVICE && i==0) { if (usb_stor_clear_halt(us, us->send_bulk_pipe) < 0) return USB_STOR_TRANSPORT_ERROR; } /* * Read status: is the device angry, or just busy? */ result = usbat_read(us, USBAT_ATA, direction==DMA_TO_DEVICE ? USBAT_ATA_STATUS : USBAT_ATA_ALTSTATUS, status); if (result!=USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (*status & 0x01) /* check condition */ return USB_STOR_TRANSPORT_FAILED; if (*status & 0x20) /* device fault */ return USB_STOR_TRANSPORT_FAILED; usb_stor_dbg(us, "Redoing %s\n", direction == DMA_TO_DEVICE ? "write" : "read"); } else if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; else return usbat_wait_not_busy(us, minutes); } usb_stor_dbg(us, "Bummer! %s bulk data 20 times failed\n", direction == DMA_TO_DEVICE ? "Writing" : "Reading"); return USB_STOR_TRANSPORT_FAILED; } /* * Write to multiple registers: * Allows us to write specific data to any registers. The data to be written * gets packed in this sequence: reg0, data0, reg1, data1, ..., regN, dataN * which gets sent through bulk out. * Not designed for large transfers of data! */ static int usbat_multiple_write(struct us_data *us, unsigned char *registers, unsigned char *data_out, unsigned short num_registers) { int i, result; unsigned char *data = us->iobuf; unsigned char *command = us->iobuf; BUG_ON(num_registers > US_IOBUF_SIZE/2); /* Write to multiple registers, ATA access */ command[0] = 0x40; command[1] = USBAT_ATA | USBAT_CMD_WRITE_REGS; /* No relevance */ command[2] = 0; command[3] = 0; command[4] = 0; command[5] = 0; /* Number of bytes to be transferred (incl. addresses and data) */ command[6] = LSB_of(num_registers*2); command[7] = MSB_of(num_registers*2); /* The setup command */ result = usbat_execute_command(us, command, 8); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* Create the reg/data, reg/data sequence */ for (i=0; i<num_registers; i++) { data[i<<1] = registers[i]; data[1+(i<<1)] = data_out[i]; } /* Send the data */ result = usbat_bulk_write(us, data, num_registers*2, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (usbat_get_device_type(us) == USBAT_DEV_HP8200) return usbat_wait_not_busy(us, 0); else return USB_STOR_TRANSPORT_GOOD; } /* * Conditionally read blocks from device: * Allows us to read blocks from a specific data register, based upon the * condition that a status register can be successfully masked with a status * qualifier. If this condition is not initially met, the read will wait * up until a maximum amount of time has elapsed, as specified by timeout. * The read will start when the condition is met, otherwise the command aborts. * * The qualifier defined here is not the value that is masked, it defines * conditions for the write to take place. The actual masked qualifier (and * other related details) are defined beforehand with _set_shuttle_features(). */ static int usbat_read_blocks(struct us_data *us, void* buffer, int len, int use_sg) { int result; unsigned char *command = us->iobuf; command[0] = 0xC0; command[1] = USBAT_ATA | USBAT_CMD_COND_READ_BLOCK; command[2] = USBAT_ATA_DATA; command[3] = USBAT_ATA_STATUS; command[4] = 0xFD; /* Timeout (ms); */ command[5] = USBAT_QUAL_FCQ; command[6] = LSB_of(len); command[7] = MSB_of(len); /* Multiple block read setup command */ result = usbat_execute_command(us, command, 8); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_FAILED; /* Read the blocks we just asked for */ result = usbat_bulk_read(us, buffer, len, use_sg); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_FAILED; return USB_STOR_TRANSPORT_GOOD; } /* * Conditionally write blocks to device: * Allows us to write blocks to a specific data register, based upon the * condition that a status register can be successfully masked with a status * qualifier. If this condition is not initially met, the write will wait * up until a maximum amount of time has elapsed, as specified by timeout. * The read will start when the condition is met, otherwise the command aborts. * * The qualifier defined here is not the value that is masked, it defines * conditions for the write to take place. The actual masked qualifier (and * other related details) are defined beforehand with _set_shuttle_features(). */ static int usbat_write_blocks(struct us_data *us, void* buffer, int len, int use_sg) { int result; unsigned char *command = us->iobuf; command[0] = 0x40; command[1] = USBAT_ATA | USBAT_CMD_COND_WRITE_BLOCK; command[2] = USBAT_ATA_DATA; command[3] = USBAT_ATA_STATUS; command[4] = 0xFD; /* Timeout (ms) */ command[5] = USBAT_QUAL_FCQ; command[6] = LSB_of(len); command[7] = MSB_of(len); /* Multiple block write setup command */ result = usbat_execute_command(us, command, 8); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_FAILED; /* Write the data */ result = usbat_bulk_write(us, buffer, len, use_sg); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_FAILED; return USB_STOR_TRANSPORT_GOOD; } /* * Read the User IO register */ static int usbat_read_user_io(struct us_data *us, unsigned char *data_flags) { int result; result = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, USBAT_CMD_UIO, 0xC0, 0, 0, data_flags, USBAT_UIO_READ); usb_stor_dbg(us, "UIO register reads %02X\n", *data_flags); return result; } /* * Write to the User IO register */ static int usbat_write_user_io(struct us_data *us, unsigned char enable_flags, unsigned char data_flags) { return usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, USBAT_CMD_UIO, 0x40, short_pack(enable_flags, data_flags), 0, NULL, USBAT_UIO_WRITE); } /* * Reset the device * Often needed on media change. */ static int usbat_device_reset(struct us_data *us) { int rc; /* * Reset peripheral, enable peripheral control signals * (bring reset signal up) */ rc = usbat_write_user_io(us, USBAT_UIO_DRVRST | USBAT_UIO_OE1 | USBAT_UIO_OE0, USBAT_UIO_EPAD | USBAT_UIO_1); if (rc != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* * Enable peripheral control signals * (bring reset signal down) */ rc = usbat_write_user_io(us, USBAT_UIO_OE1 | USBAT_UIO_OE0, USBAT_UIO_EPAD | USBAT_UIO_1); if (rc != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } /* * Enable card detect */ static int usbat_device_enable_cdt(struct us_data *us) { int rc; /* Enable peripheral control signals and card detect */ rc = usbat_write_user_io(us, USBAT_UIO_ACKD | USBAT_UIO_OE1 | USBAT_UIO_OE0, USBAT_UIO_EPAD | USBAT_UIO_1); if (rc != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } /* * Determine if media is present. */ static int usbat_flash_check_media_present(struct us_data *us, unsigned char *uio) { if (*uio & USBAT_UIO_UI0) { usb_stor_dbg(us, "no media detected\n"); return USBAT_FLASH_MEDIA_NONE; } return USBAT_FLASH_MEDIA_CF; } /* * Determine if media has changed since last operation */ static int usbat_flash_check_media_changed(struct us_data *us, unsigned char *uio) { if (*uio & USBAT_UIO_0) { usb_stor_dbg(us, "media change detected\n"); return USBAT_FLASH_MEDIA_CHANGED; } return USBAT_FLASH_MEDIA_SAME; } /* * Check for media change / no media and handle the situation appropriately */ static int usbat_flash_check_media(struct us_data *us, struct usbat_info *info) { int rc; unsigned char *uio = us->iobuf; rc = usbat_read_user_io(us, uio); if (rc != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* Check for media existence */ rc = usbat_flash_check_media_present(us, uio); if (rc == USBAT_FLASH_MEDIA_NONE) { info->sense_key = 0x02; info->sense_asc = 0x3A; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } /* Check for media change */ rc = usbat_flash_check_media_changed(us, uio); if (rc == USBAT_FLASH_MEDIA_CHANGED) { /* Reset and re-enable card detect */ rc = usbat_device_reset(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; rc = usbat_device_enable_cdt(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; msleep(50); rc = usbat_read_user_io(us, uio); if (rc != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; info->sense_key = UNIT_ATTENTION; info->sense_asc = 0x28; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } return USB_STOR_TRANSPORT_GOOD; } /* * Determine whether we are controlling a flash-based reader/writer, * or a HP8200-based CD drive. * Sets transport functions as appropriate. */ static int usbat_identify_device(struct us_data *us, struct usbat_info *info) { int rc; unsigned char status; if (!us || !info) return USB_STOR_TRANSPORT_ERROR; rc = usbat_device_reset(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; msleep(500); /* * In attempt to distinguish between HP CDRW's and Flash readers, we now * execute the IDENTIFY PACKET DEVICE command. On ATA devices (i.e. flash * readers), this command should fail with error. On ATAPI devices (i.e. * CDROM drives), it should succeed. */ rc = usbat_write(us, USBAT_ATA, USBAT_ATA_CMD, 0xA1); if (rc != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; rc = usbat_get_status(us, &status); if (rc != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* Check for error bit, or if the command 'fell through' */ if (status == 0xA1 || !(status & 0x01)) { /* Device is HP 8200 */ usb_stor_dbg(us, "Detected HP8200 CDRW\n"); info->devicetype = USBAT_DEV_HP8200; } else { /* Device is a CompactFlash reader/writer */ usb_stor_dbg(us, "Detected Flash reader/writer\n"); info->devicetype = USBAT_DEV_FLASH; } return USB_STOR_TRANSPORT_GOOD; } /* * Set the transport function based on the device type */ static int usbat_set_transport(struct us_data *us, struct usbat_info *info, int devicetype) { if (!info->devicetype) info->devicetype = devicetype; if (!info->devicetype) usbat_identify_device(us, info); switch (info->devicetype) { default: return USB_STOR_TRANSPORT_ERROR; case USBAT_DEV_HP8200: us->transport = usbat_hp8200e_transport; break; case USBAT_DEV_FLASH: us->transport = usbat_flash_transport; break; } return 0; } /* * Read the media capacity */ static int usbat_flash_get_sector_count(struct us_data *us, struct usbat_info *info) { unsigned char registers[3] = { USBAT_ATA_SECCNT, USBAT_ATA_DEVICE, USBAT_ATA_CMD, }; unsigned char command[3] = { 0x01, 0xA0, 0xEC }; unsigned char *reply; unsigned char status; int rc; if (!us || !info) return USB_STOR_TRANSPORT_ERROR; reply = kmalloc(512, GFP_NOIO); if (!reply) return USB_STOR_TRANSPORT_ERROR; /* ATA command : IDENTIFY DEVICE */ rc = usbat_multiple_write(us, registers, command, 3); if (rc != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Gah! identify_device failed\n"); rc = USB_STOR_TRANSPORT_ERROR; goto leave; } /* Read device status */ if (usbat_get_status(us, &status) != USB_STOR_XFER_GOOD) { rc = USB_STOR_TRANSPORT_ERROR; goto leave; } msleep(100); /* Read the device identification data */ rc = usbat_read_block(us, reply, 512, 0); if (rc != USB_STOR_TRANSPORT_GOOD) goto leave; info->sectors = ((u32)(reply[117]) << 24) | ((u32)(reply[116]) << 16) | ((u32)(reply[115]) << 8) | ((u32)(reply[114]) ); rc = USB_STOR_TRANSPORT_GOOD; leave: kfree(reply); return rc; } /* * Read data from device */ static int usbat_flash_read_data(struct us_data *us, struct usbat_info *info, u32 sector, u32 sectors) { unsigned char registers[7] = { USBAT_ATA_FEATURES, USBAT_ATA_SECCNT, USBAT_ATA_SECNUM, USBAT_ATA_LBA_ME, USBAT_ATA_LBA_HI, USBAT_ATA_DEVICE, USBAT_ATA_STATUS, }; unsigned char command[7]; unsigned char *buffer; unsigned char thistime; unsigned int totallen, alloclen; int len, result; unsigned int sg_offset = 0; struct scatterlist *sg = NULL; result = usbat_flash_check_media(us, info); if (result != USB_STOR_TRANSPORT_GOOD) return result; /* * we're working in LBA mode. according to the ATA spec, * we can support up to 28-bit addressing. I don't know if Jumpshot * supports beyond 24-bit addressing. It's kind of hard to test * since it requires > 8GB CF card. */ if (sector > 0x0FFFFFFF) return USB_STOR_TRANSPORT_ERROR; totallen = sectors * info->ssize; /* * Since we don't read more than 64 KB at a time, we have to create * a bounce buffer and move the data a piece at a time between the * bounce buffer and the actual transfer buffer. */ alloclen = min(totallen, 65536u); buffer = kmalloc(alloclen, GFP_NOIO); if (buffer == NULL) return USB_STOR_TRANSPORT_ERROR; do { /* * loop, never allocate or transfer more than 64k at once * (min(128k, 255*info->ssize) is the real limit) */ len = min(totallen, alloclen); thistime = (len / info->ssize) & 0xff; /* ATA command 0x20 (READ SECTORS) */ usbat_pack_ata_sector_cmd(command, thistime, sector, 0x20); /* Write/execute ATA read command */ result = usbat_multiple_write(us, registers, command, 7); if (result != USB_STOR_TRANSPORT_GOOD) goto leave; /* Read the data we just requested */ result = usbat_read_blocks(us, buffer, len, 0); if (result != USB_STOR_TRANSPORT_GOOD) goto leave; usb_stor_dbg(us, "%d bytes\n", len); /* Store the data in the transfer buffer */ usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &sg_offset, TO_XFER_BUF); sector += thistime; totallen -= len; } while (totallen > 0); kfree(buffer); return USB_STOR_TRANSPORT_GOOD; leave: kfree(buffer); return USB_STOR_TRANSPORT_ERROR; } /* * Write data to device */ static int usbat_flash_write_data(struct us_data *us, struct usbat_info *info, u32 sector, u32 sectors) { unsigned char registers[7] = { USBAT_ATA_FEATURES, USBAT_ATA_SECCNT, USBAT_ATA_SECNUM, USBAT_ATA_LBA_ME, USBAT_ATA_LBA_HI, USBAT_ATA_DEVICE, USBAT_ATA_STATUS, }; unsigned char command[7]; unsigned char *buffer; unsigned char thistime; unsigned int totallen, alloclen; int len, result; unsigned int sg_offset = 0; struct scatterlist *sg = NULL; result = usbat_flash_check_media(us, info); if (result != USB_STOR_TRANSPORT_GOOD) return result; /* * we're working in LBA mode. according to the ATA spec, * we can support up to 28-bit addressing. I don't know if the device * supports beyond 24-bit addressing. It's kind of hard to test * since it requires > 8GB media. */ if (sector > 0x0FFFFFFF) return USB_STOR_TRANSPORT_ERROR; totallen = sectors * info->ssize; /* * Since we don't write more than 64 KB at a time, we have to create * a bounce buffer and move the data a piece at a time between the * bounce buffer and the actual transfer buffer. */ alloclen = min(totallen, 65536u); buffer = kmalloc(alloclen, GFP_NOIO); if (buffer == NULL) return USB_STOR_TRANSPORT_ERROR; do { /* * loop, never allocate or transfer more than 64k at once * (min(128k, 255*info->ssize) is the real limit) */ len = min(totallen, alloclen); thistime = (len / info->ssize) & 0xff; /* Get the data from the transfer buffer */ usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &sg_offset, FROM_XFER_BUF); /* ATA command 0x30 (WRITE SECTORS) */ usbat_pack_ata_sector_cmd(command, thistime, sector, 0x30); /* Write/execute ATA write command */ result = usbat_multiple_write(us, registers, command, 7); if (result != USB_STOR_TRANSPORT_GOOD) goto leave; /* Write the data */ result = usbat_write_blocks(us, buffer, len, 0); if (result != USB_STOR_TRANSPORT_GOOD) goto leave; sector += thistime; totallen -= len; } while (totallen > 0); kfree(buffer); return result; leave: kfree(buffer); return USB_STOR_TRANSPORT_ERROR; } /* * Squeeze a potentially huge (> 65535 byte) read10 command into * a little ( <= 65535 byte) ATAPI pipe */ static int usbat_hp8200e_handle_read10(struct us_data *us, unsigned char *registers, unsigned char *data, struct scsi_cmnd *srb) { int result = USB_STOR_TRANSPORT_GOOD; unsigned char *buffer; unsigned int len; unsigned int sector; unsigned int sg_offset = 0; struct scatterlist *sg = NULL; usb_stor_dbg(us, "transfersize %d\n", srb->transfersize); if (scsi_bufflen(srb) < 0x10000) { result = usbat_hp8200e_rw_block_test(us, USBAT_ATA, registers, data, 19, USBAT_ATA_DATA, USBAT_ATA_STATUS, 0xFD, (USBAT_QUAL_FCQ | USBAT_QUAL_ALQ), DMA_FROM_DEVICE, scsi_sglist(srb), scsi_bufflen(srb), scsi_sg_count(srb), 1); return result; } /* * Since we're requesting more data than we can handle in * a single read command (max is 64k-1), we will perform * multiple reads, but each read must be in multiples of * a sector. Luckily the sector size is in srb->transfersize * (see linux/drivers/scsi/sr.c). */ if (data[7+0] == GPCMD_READ_CD) { len = short_pack(data[7+9], data[7+8]); len <<= 16; len |= data[7+7]; usb_stor_dbg(us, "GPCMD_READ_CD: len %d\n", len); srb->transfersize = scsi_bufflen(srb)/len; } if (!srb->transfersize) { srb->transfersize = 2048; /* A guess */ usb_stor_dbg(us, "transfersize 0, forcing %d\n", srb->transfersize); } /* * Since we only read in one block at a time, we have to create * a bounce buffer and move the data a piece at a time between the * bounce buffer and the actual transfer buffer. */ len = (65535/srb->transfersize) * srb->transfersize; usb_stor_dbg(us, "Max read is %d bytes\n", len); len = min(len, scsi_bufflen(srb)); buffer = kmalloc(len, GFP_NOIO); if (buffer == NULL) /* bloody hell! */ return USB_STOR_TRANSPORT_FAILED; sector = short_pack(data[7+3], data[7+2]); sector <<= 16; sector |= short_pack(data[7+5], data[7+4]); transferred = 0; while (transferred != scsi_bufflen(srb)) { if (len > scsi_bufflen(srb) - transferred) len = scsi_bufflen(srb) - transferred; data[3] = len&0xFF; /* (cylL) = expected length (L) */ data[4] = (len>>8)&0xFF; /* (cylH) = expected length (H) */ /* Fix up the SCSI command sector and num sectors */ data[7+2] = MSB_of(sector>>16); /* SCSI command sector */ data[7+3] = LSB_of(sector>>16); data[7+4] = MSB_of(sector&0xFFFF); data[7+5] = LSB_of(sector&0xFFFF); if (data[7+0] == GPCMD_READ_CD) data[7+6] = 0; data[7+7] = MSB_of(len / srb->transfersize); /* SCSI command */ data[7+8] = LSB_of(len / srb->transfersize); /* num sectors */ result = usbat_hp8200e_rw_block_test(us, USBAT_ATA, registers, data, 19, USBAT_ATA_DATA, USBAT_ATA_STATUS, 0xFD, (USBAT_QUAL_FCQ | USBAT_QUAL_ALQ), DMA_FROM_DEVICE, buffer, len, 0, 1); if (result != USB_STOR_TRANSPORT_GOOD) break; /* Store the data in the transfer buffer */ usb_stor_access_xfer_buf(buffer, len, srb, &sg, &sg_offset, TO_XFER_BUF); /* Update the amount transferred and the sector number */ transferred += len; sector += len / srb->transfersize; } /* while transferred != scsi_bufflen(srb) */ kfree(buffer); return result; } static int usbat_select_and_test_registers(struct us_data *us) { int selector; unsigned char *status = us->iobuf; /* try device = master, then device = slave. */ for (selector = 0xA0; selector <= 0xB0; selector += 0x10) { if (usbat_write(us, USBAT_ATA, USBAT_ATA_DEVICE, selector) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (usbat_read(us, USBAT_ATA, USBAT_ATA_STATUS, status) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (usbat_read(us, USBAT_ATA, USBAT_ATA_DEVICE, status) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (usbat_read(us, USBAT_ATA, USBAT_ATA_LBA_ME, status) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (usbat_read(us, USBAT_ATA, USBAT_ATA_LBA_HI, status) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (usbat_write(us, USBAT_ATA, USBAT_ATA_LBA_ME, 0x55) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (usbat_write(us, USBAT_ATA, USBAT_ATA_LBA_HI, 0xAA) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (usbat_read(us, USBAT_ATA, USBAT_ATA_LBA_ME, status) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (usbat_read(us, USBAT_ATA, USBAT_ATA_LBA_ME, status) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; } return USB_STOR_TRANSPORT_GOOD; } /* * Initialize the USBAT processor and the storage device */ static int init_usbat(struct us_data *us, int devicetype) { int rc; struct usbat_info *info; unsigned char subcountH = USBAT_ATA_LBA_HI; unsigned char subcountL = USBAT_ATA_LBA_ME; unsigned char *status = us->iobuf; us->extra = kzalloc(sizeof(struct usbat_info), GFP_NOIO); if (!us->extra) return -ENOMEM; info = (struct usbat_info *) (us->extra); /* Enable peripheral control signals */ rc = usbat_write_user_io(us, USBAT_UIO_OE1 | USBAT_UIO_OE0, USBAT_UIO_EPAD | USBAT_UIO_1); if (rc != USB_STOR_XFER_GOOD) return -EIO; usb_stor_dbg(us, "INIT 1\n"); msleep(2000); rc = usbat_read_user_io(us, status); if (rc != USB_STOR_TRANSPORT_GOOD) return -EIO; usb_stor_dbg(us, "INIT 2\n"); rc = usbat_read_user_io(us, status); if (rc != USB_STOR_XFER_GOOD) return -EIO; rc = usbat_read_user_io(us, status); if (rc != USB_STOR_XFER_GOOD) return -EIO; usb_stor_dbg(us, "INIT 3\n"); rc = usbat_select_and_test_registers(us); if (rc != USB_STOR_TRANSPORT_GOOD) return -EIO; usb_stor_dbg(us, "INIT 4\n"); rc = usbat_read_user_io(us, status); if (rc != USB_STOR_XFER_GOOD) return -EIO; usb_stor_dbg(us, "INIT 5\n"); /* Enable peripheral control signals and card detect */ rc = usbat_device_enable_cdt(us); if (rc != USB_STOR_TRANSPORT_GOOD) return -EIO; usb_stor_dbg(us, "INIT 6\n"); rc = usbat_read_user_io(us, status); if (rc != USB_STOR_XFER_GOOD) return -EIO; usb_stor_dbg(us, "INIT 7\n"); msleep(1400); rc = usbat_read_user_io(us, status); if (rc != USB_STOR_XFER_GOOD) return -EIO; usb_stor_dbg(us, "INIT 8\n"); rc = usbat_select_and_test_registers(us); if (rc != USB_STOR_TRANSPORT_GOOD) return -EIO; usb_stor_dbg(us, "INIT 9\n"); /* At this point, we need to detect which device we are using */ if (usbat_set_transport(us, info, devicetype)) return -EIO; usb_stor_dbg(us, "INIT 10\n"); if (usbat_get_device_type(us) == USBAT_DEV_FLASH) { subcountH = 0x02; subcountL = 0x00; } rc = usbat_set_shuttle_features(us, (USBAT_FEAT_ETEN | USBAT_FEAT_ET2 | USBAT_FEAT_ET1), 0x00, 0x88, 0x08, subcountH, subcountL); if (rc != USB_STOR_XFER_GOOD) return -EIO; usb_stor_dbg(us, "INIT 11\n"); return 0; } /* * Transport for the HP 8200e */ static int usbat_hp8200e_transport(struct scsi_cmnd *srb, struct us_data *us) { int result; unsigned char *status = us->iobuf; unsigned char registers[32]; unsigned char data[32]; unsigned int len; int i; len = scsi_bufflen(srb); /* * Send A0 (ATA PACKET COMMAND). * Note: I guess we're never going to get any of the ATA * commands... just ATA Packet Commands. */ registers[0] = USBAT_ATA_FEATURES; registers[1] = USBAT_ATA_SECCNT; registers[2] = USBAT_ATA_SECNUM; registers[3] = USBAT_ATA_LBA_ME; registers[4] = USBAT_ATA_LBA_HI; registers[5] = USBAT_ATA_DEVICE; registers[6] = USBAT_ATA_CMD; data[0] = 0x00; data[1] = 0x00; data[2] = 0x00; data[3] = len&0xFF; /* (cylL) = expected length (L) */ data[4] = (len>>8)&0xFF; /* (cylH) = expected length (H) */ data[5] = 0xB0; /* (device sel) = slave */ data[6] = 0xA0; /* (command) = ATA PACKET COMMAND */ for (i=7; i<19; i++) { registers[i] = 0x10; data[i] = (i-7 >= srb->cmd_len) ? 0 : srb->cmnd[i-7]; } result = usbat_get_status(us, status); usb_stor_dbg(us, "Status = %02X\n", *status); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (srb->cmnd[0] == TEST_UNIT_READY) transferred = 0; if (srb->sc_data_direction == DMA_TO_DEVICE) { result = usbat_hp8200e_rw_block_test(us, USBAT_ATA, registers, data, 19, USBAT_ATA_DATA, USBAT_ATA_STATUS, 0xFD, (USBAT_QUAL_FCQ | USBAT_QUAL_ALQ), DMA_TO_DEVICE, scsi_sglist(srb), len, scsi_sg_count(srb), 10); if (result == USB_STOR_TRANSPORT_GOOD) { transferred += len; usb_stor_dbg(us, "Wrote %08X bytes\n", transferred); } return result; } else if (srb->cmnd[0] == READ_10 || srb->cmnd[0] == GPCMD_READ_CD) { return usbat_hp8200e_handle_read10(us, registers, data, srb); } if (len > 0xFFFF) { usb_stor_dbg(us, "Error: len = %08X... what do I do now?\n", len); return USB_STOR_TRANSPORT_ERROR; } result = usbat_multiple_write(us, registers, data, 7); if (result != USB_STOR_TRANSPORT_GOOD) return result; /* * Write the 12-byte command header. * * If the command is BLANK then set the timer for 75 minutes. * Otherwise set it for 10 minutes. * * NOTE: THE 8200 DOCUMENTATION STATES THAT BLANKING A CDRW * AT SPEED 4 IS UNRELIABLE!!! */ result = usbat_write_block(us, USBAT_ATA, srb->cmnd, 12, srb->cmnd[0] == GPCMD_BLANK ? 75 : 10, 0); if (result != USB_STOR_TRANSPORT_GOOD) return result; /* If there is response data to be read in then do it here. */ if (len != 0 && (srb->sc_data_direction == DMA_FROM_DEVICE)) { /* How many bytes to read in? Check cylL register */ if (usbat_read(us, USBAT_ATA, USBAT_ATA_LBA_ME, status) != USB_STOR_XFER_GOOD) { return USB_STOR_TRANSPORT_ERROR; } if (len > 0xFF) { /* need to read cylH also */ len = *status; if (usbat_read(us, USBAT_ATA, USBAT_ATA_LBA_HI, status) != USB_STOR_XFER_GOOD) { return USB_STOR_TRANSPORT_ERROR; } len += ((unsigned int) *status)<<8; } else len = *status; result = usbat_read_block(us, scsi_sglist(srb), len, scsi_sg_count(srb)); } return result; } /* * Transport for USBAT02-based CompactFlash and similar storage devices */ static int usbat_flash_transport(struct scsi_cmnd * srb, struct us_data *us) { int rc; struct usbat_info *info = (struct usbat_info *) (us->extra); unsigned long block, blocks; unsigned char *ptr = us->iobuf; static unsigned char inquiry_response[36] = { 0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00 }; if (srb->cmnd[0] == INQUIRY) { usb_stor_dbg(us, "INQUIRY - Returning bogus response\n"); memcpy(ptr, inquiry_response, sizeof(inquiry_response)); fill_inquiry_response(us, ptr, 36); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == READ_CAPACITY) { rc = usbat_flash_check_media(us, info); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; rc = usbat_flash_get_sector_count(us, info); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; /* hard coded 512 byte sectors as per ATA spec */ info->ssize = 0x200; usb_stor_dbg(us, "READ_CAPACITY: %ld sectors, %ld bytes per sector\n", info->sectors, info->ssize); /* * build the reply * note: must return the sector number of the last sector, * *not* the total number of sectors */ ((__be32 *) ptr)[0] = cpu_to_be32(info->sectors - 1); ((__be32 *) ptr)[1] = cpu_to_be32(info->ssize); usb_stor_set_xfer_buf(ptr, 8, srb); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == MODE_SELECT_10) { usb_stor_dbg(us, "Gah! MODE_SELECT_10\n"); return USB_STOR_TRANSPORT_ERROR; } if (srb->cmnd[0] == READ_10) { block = ((u32)(srb->cmnd[2]) << 24) | ((u32)(srb->cmnd[3]) << 16) | ((u32)(srb->cmnd[4]) << 8) | ((u32)(srb->cmnd[5])); blocks = ((u32)(srb->cmnd[7]) << 8) | ((u32)(srb->cmnd[8])); usb_stor_dbg(us, "READ_10: read block 0x%04lx count %ld\n", block, blocks); return usbat_flash_read_data(us, info, block, blocks); } if (srb->cmnd[0] == READ_12) { /* * I don't think we'll ever see a READ_12 but support it anyway */ block = ((u32)(srb->cmnd[2]) << 24) | ((u32)(srb->cmnd[3]) << 16) | ((u32)(srb->cmnd[4]) << 8) | ((u32)(srb->cmnd[5])); blocks = ((u32)(srb->cmnd[6]) << 24) | ((u32)(srb->cmnd[7]) << 16) | ((u32)(srb->cmnd[8]) << 8) | ((u32)(srb->cmnd[9])); usb_stor_dbg(us, "READ_12: read block 0x%04lx count %ld\n", block, blocks); return usbat_flash_read_data(us, info, block, blocks); } if (srb->cmnd[0] == WRITE_10) { block = ((u32)(srb->cmnd[2]) << 24) | ((u32)(srb->cmnd[3]) << 16) | ((u32)(srb->cmnd[4]) << 8) | ((u32)(srb->cmnd[5])); blocks = ((u32)(srb->cmnd[7]) << 8) | ((u32)(srb->cmnd[8])); usb_stor_dbg(us, "WRITE_10: write block 0x%04lx count %ld\n", block, blocks); return usbat_flash_write_data(us, info, block, blocks); } if (srb->cmnd[0] == WRITE_12) { /* * I don't think we'll ever see a WRITE_12 but support it anyway */ block = ((u32)(srb->cmnd[2]) << 24) | ((u32)(srb->cmnd[3]) << 16) | ((u32)(srb->cmnd[4]) << 8) | ((u32)(srb->cmnd[5])); blocks = ((u32)(srb->cmnd[6]) << 24) | ((u32)(srb->cmnd[7]) << 16) | ((u32)(srb->cmnd[8]) << 8) | ((u32)(srb->cmnd[9])); usb_stor_dbg(us, "WRITE_12: write block 0x%04lx count %ld\n", block, blocks); return usbat_flash_write_data(us, info, block, blocks); } if (srb->cmnd[0] == TEST_UNIT_READY) { usb_stor_dbg(us, "TEST_UNIT_READY\n"); rc = usbat_flash_check_media(us, info); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; return usbat_check_status(us); } if (srb->cmnd[0] == REQUEST_SENSE) { usb_stor_dbg(us, "REQUEST_SENSE\n"); memset(ptr, 0, 18); ptr[0] = 0xF0; ptr[2] = info->sense_key; ptr[7] = 11; ptr[12] = info->sense_asc; ptr[13] = info->sense_ascq; usb_stor_set_xfer_buf(ptr, 18, srb); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) { /* * sure. whatever. not like we can stop the user from popping * the media out of the device (no locking doors, etc) */ return USB_STOR_TRANSPORT_GOOD; } usb_stor_dbg(us, "Gah! Unknown command: %d (0x%x)\n", srb->cmnd[0], srb->cmnd[0]); info->sense_key = 0x05; info->sense_asc = 0x20; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } static int init_usbat_cd(struct us_data *us) { return init_usbat(us, USBAT_DEV_HP8200); } static int init_usbat_flash(struct us_data *us) { return init_usbat(us, USBAT_DEV_FLASH); } static struct scsi_host_template usbat_host_template; static int usbat_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; result = usb_stor_probe1(&us, intf, id, (id - usbat_usb_ids) + usbat_unusual_dev_list, &usbat_host_template); if (result) return result; /* * The actual transport will be determined later by the * initialization routine; this is just a placeholder. */ us->transport_name = "Shuttle USBAT"; us->transport = usbat_flash_transport; us->transport_reset = usb_stor_CB_reset; us->max_lun = 0; result = usb_stor_probe2(us); return result; } static struct usb_driver usbat_driver = { .name = DRV_NAME, .probe = usbat_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = usbat_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(usbat_driver, usbat_host_template, DRV_NAME); |
2 1 1 1 1 1 1 4 2 6 2 2 13 13 8 13 10 14 14 14 14 14 8 12 12 4 12 12 16 15 2 17 11 3 4 7 7 7 1 3 2 7 2 1 1 1 12 1 11 2 2 4 4 4 6 9 4 4 4 3 1 1 4 4 4 4 4 4 19 1 19 19 3 1 15 16 16 16 19 16 2 17 2 16 16 15 1 2 10 4 15 5 10 15 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Xbox gamepad driver * * Copyright (c) 2002 Marko Friedemann <mfr@bmx-chemnitz.de> * 2004 Oliver Schwartz <Oliver.Schwartz@gmx.de>, * Steven Toth <steve@toth.demon.co.uk>, * Franz Lehner <franz@caos.at>, * Ivan Hawkes <blackhawk@ivanhawkes.com> * 2005 Dominic Cerquetti <binary1230@yahoo.com> * 2006 Adam Buchbinder <adam.buchbinder@gmail.com> * 2007 Jan Kratochvil <honza@jikos.cz> * 2010 Christoph Fritz <chf.fritz@googlemail.com> * * This driver is based on: * - information from http://euc.jp/periphs/xbox-controller.ja.html * - the iForce driver drivers/char/joystick/iforce.c * - the skeleton-driver drivers/usb/usb-skeleton.c * - Xbox 360 information http://www.free60.org/wiki/Gamepad * - Xbox One information https://github.com/quantus/xbox-one-controller-protocol * * Thanks to: * - ITO Takayuki for providing essential xpad information on his website * - Vojtech Pavlik - iforce driver / input subsystem * - Greg Kroah-Hartman - usb-skeleton driver * - Xbox Linux project - extra USB IDs * - Pekka Pöyry (quantus) - Xbox One controller reverse-engineering * * TODO: * - fine tune axes (especially trigger axes) * - fix "analog" buttons (reported as digital now) * - get rumble working * - need USB IDs for other dance pads * * History: * * 2002-06-27 - 0.0.1 : first version, just said "XBOX HID controller" * * 2002-07-02 - 0.0.2 : basic working version * - all axes and 9 of the 10 buttons work (german InterAct device) * - the black button does not work * * 2002-07-14 - 0.0.3 : rework by Vojtech Pavlik * - indentation fixes * - usb + input init sequence fixes * * 2002-07-16 - 0.0.4 : minor changes, merge with Vojtech's v0.0.3 * - verified the lack of HID and report descriptors * - verified that ALL buttons WORK * - fixed d-pad to axes mapping * * 2002-07-17 - 0.0.5 : simplified d-pad handling * * 2004-10-02 - 0.0.6 : DDR pad support * - borrowed from the Xbox Linux kernel * - USB id's for commonly used dance pads are present * - dance pads will map D-PAD to buttons, not axes * - pass the module paramater 'dpad_to_buttons' to force * the D-PAD to map to buttons if your pad is not detected * * Later changes can be tracked in SCM. */ #include <linux/bits.h> #include <linux/kernel.h> #include <linux/input.h> #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/module.h> #include <linux/usb/input.h> #include <linux/usb/quirks.h> #define XPAD_PKT_LEN 64 /* * xbox d-pads should map to buttons, as is required for DDR pads * but we map them to axes when possible to simplify things */ #define MAP_DPAD_TO_BUTTONS (1 << 0) #define MAP_TRIGGERS_TO_BUTTONS (1 << 1) #define MAP_STICKS_TO_NULL (1 << 2) #define MAP_SELECT_BUTTON (1 << 3) #define MAP_PADDLES (1 << 4) #define MAP_PROFILE_BUTTON (1 << 5) #define DANCEPAD_MAP_CONFIG (MAP_DPAD_TO_BUTTONS | \ MAP_TRIGGERS_TO_BUTTONS | MAP_STICKS_TO_NULL) #define XTYPE_XBOX 0 #define XTYPE_XBOX360 1 #define XTYPE_XBOX360W 2 #define XTYPE_XBOXONE 3 #define XTYPE_UNKNOWN 4 /* Send power-off packet to xpad360w after holding the mode button for this many * seconds */ #define XPAD360W_POWEROFF_TIMEOUT 5 #define PKT_XB 0 #define PKT_XBE1 1 #define PKT_XBE2_FW_OLD 2 #define PKT_XBE2_FW_5_EARLY 3 #define PKT_XBE2_FW_5_11 4 static bool dpad_to_buttons; module_param(dpad_to_buttons, bool, S_IRUGO); MODULE_PARM_DESC(dpad_to_buttons, "Map D-PAD to buttons rather than axes for unknown pads"); static bool triggers_to_buttons; module_param(triggers_to_buttons, bool, S_IRUGO); MODULE_PARM_DESC(triggers_to_buttons, "Map triggers to buttons rather than axes for unknown pads"); static bool sticks_to_null; module_param(sticks_to_null, bool, S_IRUGO); MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads"); static bool auto_poweroff = true; module_param(auto_poweroff, bool, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(auto_poweroff, "Power off wireless controllers on suspend"); static const struct xpad_device { u16 idVendor; u16 idProduct; char *name; u8 mapping; u8 xtype; } xpad_device[] = { /* Please keep this list sorted by vendor and product ID. */ { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 }, { 0x03f0, 0x038D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wired */ { 0x03f0, 0x048D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wireless */ { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, { 0x03f0, 0x07A0, "HyperX Clutch Gladiate RGB", 0, XTYPE_XBOXONE }, { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, /* v2 */ { 0x03f0, 0x09B4, "HyperX Clutch Tanto", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, { 0x044f, 0x0f10, "Thrustmaster Modena GT Wheel", 0, XTYPE_XBOX }, { 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 }, { 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX }, { 0x045e, 0x0285, "Microsoft X-Box pad (Japan)", 0, XTYPE_XBOX }, { 0x045e, 0x0287, "Microsoft Xbox Controller S", 0, XTYPE_XBOX }, { 0x045e, 0x0288, "Microsoft Xbox Controller S v2", 0, XTYPE_XBOX }, { 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX }, { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 }, { 0x045e, 0x028f, "Microsoft X-Box 360 pad v2", 0, XTYPE_XBOX360 }, { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE }, { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE }, { 0x045e, 0x02e3, "Microsoft X-Box One Elite pad", MAP_PADDLES, XTYPE_XBOXONE }, { 0x045e, 0x02ea, "Microsoft X-Box One S pad", 0, XTYPE_XBOXONE }, { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x0b00, "Microsoft X-Box One Elite 2 pad", MAP_PADDLES, XTYPE_XBOXONE }, { 0x045e, 0x0b0a, "Microsoft X-Box Adaptive Controller", MAP_PROFILE_BUTTON, XTYPE_XBOXONE }, { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, { 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 }, { 0x046d, 0xc242, "Logitech Chillstream Controller", 0, XTYPE_XBOX360 }, { 0x046d, 0xca84, "Logitech Xbox Cordless Controller", 0, XTYPE_XBOX }, { 0x046d, 0xca88, "Logitech Compact Controller for Xbox", 0, XTYPE_XBOX }, { 0x046d, 0xca8a, "Logitech Precision Vibration Feedback Wheel", 0, XTYPE_XBOX }, { 0x046d, 0xcaa3, "Logitech DriveFx Racing Wheel", 0, XTYPE_XBOX360 }, { 0x056e, 0x2004, "Elecom JC-U3613M", 0, XTYPE_XBOX360 }, { 0x05fd, 0x1007, "Mad Catz Controller (unverified)", 0, XTYPE_XBOX }, { 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX }, { 0x05fe, 0x3030, "Chic Controller", 0, XTYPE_XBOX }, { 0x05fe, 0x3031, "Chic Controller", 0, XTYPE_XBOX }, { 0x062a, 0x0020, "Logic3 Xbox GamePad", 0, XTYPE_XBOX }, { 0x062a, 0x0033, "Competition Pro Steering Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0201, "Saitek Adrenalin", 0, XTYPE_XBOX }, { 0x06a3, 0xf51a, "Saitek P3600", 0, XTYPE_XBOX360 }, { 0x0738, 0x4506, "Mad Catz 4506 Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4516, "Mad Catz Control Pad", 0, XTYPE_XBOX }, { 0x0738, 0x4520, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX }, { 0x0738, 0x4522, "Mad Catz LumiCON", 0, XTYPE_XBOX }, { 0x0738, 0x4526, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX }, { 0x0738, 0x4530, "Mad Catz Universal MC2 Racing Wheel and Pedals", 0, XTYPE_XBOX }, { 0x0738, 0x4536, "Mad Catz MicroCON", 0, XTYPE_XBOX }, { 0x0738, 0x4540, "Mad Catz Beat Pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4556, "Mad Catz Lynx Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4586, "Mad Catz MicroCon Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4588, "Mad Catz Blaster", 0, XTYPE_XBOX }, { 0x0738, 0x45ff, "Mad Catz Beat Pad (w/ Handle)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4716, "Mad Catz Wired Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0738, 0x4718, "Mad Catz Street Fighter IV FightStick SE", 0, XTYPE_XBOX360 }, { 0x0738, 0x4726, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0738, 0x4728, "Mad Catz Street Fighter IV FightPad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4736, "Mad Catz MicroCon Gamepad", 0, XTYPE_XBOX360 }, { 0x0738, 0x4738, "Mad Catz Wired Xbox 360 Controller (SFIV)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4740, "Mad Catz Beat Pad", 0, XTYPE_XBOX360 }, { 0x0738, 0x4743, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4758, "Mad Catz Arcade Game Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4a01, "Mad Catz FightStick TE 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0738, 0x6040, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x9871, "Mad Catz Portable Drum", 0, XTYPE_XBOX360 }, { 0x0738, 0xb726, "Mad Catz Xbox controller - MW2", 0, XTYPE_XBOX360 }, { 0x0738, 0xb738, "Mad Catz MVC2TE Stick 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", XTYPE_XBOX360 }, { 0x0738, 0xcb02, "Saitek Cyborg Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb03, "Saitek P3200 Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE }, { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8809, "RedOctane Xbox Dance Pad", DANCEPAD_MAP_CONFIG, XTYPE_XBOX }, { 0x0c12, 0x880a, "Pelican Eclipse PL-2023", 0, XTYPE_XBOX }, { 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX }, { 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0db0, 0x1901, "Micro Star International Xbox360 Controller for Windows", 0, XTYPE_XBOX360 }, { 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX }, { 0x0e4c, 0x1103, "Radica Gamester Reflex", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX }, { 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX }, { 0x0e4c, 0x3510, "Radica Gamester", 0, XTYPE_XBOX }, { 0x0e6f, 0x0003, "Logic3 Freebird wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0005, "Eclipse wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0006, "Edge wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0008, "After Glow Pro Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0e6f, 0x0113, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x011f, "Rock Candy Gamepad Wired Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0131, "PDP EA Sports Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0133, "Xbox 360 Wired Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0139, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x013a, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0164, "PDP Battlefield One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0165, "PDP Titanfall 2", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a0, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a1, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a2, "PDP Wired Controller for Xbox One - Crimson Red", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a7, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a8, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ad, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02b3, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02b8, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0413, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0501, "PDP Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0xf900, "PDP Afterglow AX.1", 0, XTYPE_XBOX360 }, { 0x0e8f, 0x0201, "SmartJoy Frag Xpad/PS2 adaptor", 0, XTYPE_XBOX }, { 0x0e8f, 0x3008, "Generic xbox control (dealextreme)", 0, XTYPE_XBOX }, { 0x0f0d, 0x000a, "Hori Co. DOA4 FightStick", 0, XTYPE_XBOX360 }, { 0x0f0d, 0x000c, "Hori PadEX Turbo", 0, XTYPE_XBOX360 }, { 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x001b, "Hori Real Arcade Pro VX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0063, "Hori Real Arcade Pro Hayabusa (USA) Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x0067, "HORIPAD ONE", 0, XTYPE_XBOXONE }, { 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX }, { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 }, { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0303, "Mortal Kombat Klassic FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 }, { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x1430, 0xf801, "RedOctane Controller", 0, XTYPE_XBOX360 }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x146b, 0x0604, "Bigben Interactive DAIJA Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1532, 0x0a00, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE }, { 0x1532, 0x0a29, "Razer Wolverine V2", 0, XTYPE_XBOXONE }, { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 }, { 0x162e, 0xbeef, "Joytech Neo-Se Take2", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0x0130, "Ion Drum Rocker", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf018, "Mad Catz Street Fighter IV SE Fighting Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf019, "Mad Catz Brawlstick for Xbox 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf021, "Mad Cats Ghost Recon FS GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf023, "MLG Pro Circuit Controller (Xbox)", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf025, "Mad Catz Call Of Duty", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf027, "Mad Catz FPS Pro", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf028, "Street Fighter IV FightPad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf02e, "Mad Catz Fightpad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf030, "Mad Catz Xbox 360 MC2 MicroCon Racing Wheel", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf036, "Mad Catz MicroCon GamePad Pro", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf038, "Street Fighter IV FightStick TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf039, "Mad Catz MvC2 TE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03a, "Mad Catz SFxT Fightstick Pro", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03d, "Street Fighter IV Arcade Stick TE - Chun Li", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03e, "Mad Catz MLG FightStick TE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03f, "Mad Catz FightStick SoulCaliber", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf042, "Mad Catz FightStick TES+", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf080, "Mad Catz FightStick TE2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf501, "HoriPad EX2 Turbo", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf502, "Hori Real Arcade Pro.VX SA", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf503, "Hori Fighting Stick VX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf504, "Hori Real Arcade Pro. EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf505, "Hori Fighting Stick EX2B", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf506, "Hori Real Arcade Pro.EX Premium VLX", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf900, "Harmonix Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf901, "Gamestop Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf903, "Tron Xbox 360 controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf904, "PDP Versus Fighting Pad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf906, "MortalKombat FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 }, { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, { 0x2345, 0xe00b, "Machenike G5 Pro Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x530a, "Xbox 360 Pro EX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x531a, "PowerA Pro Ex", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5397, "FUS1ON Tournament Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x541a, "PowerA Xbox One Mini Wired Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x542a, "Xbox ONE spectra", 0, XTYPE_XBOXONE }, { 0x24c6, 0x543a, "PowerA Xbox One wired controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5500, "Hori XBOX 360 EX 2 with Turbo", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5502, "Hori Fighting Stick VX Alt", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5503, "Hori Fighting Edge", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 }, { 0x24c6, 0x550d, "Hori GEM Xbox controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x550e, "Hori Real Arcade Pro V Kai 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x551a, "PowerA FUSION Pro Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x561a, "PowerA FUSION Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5b00, "ThrustMaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x24c6, 0xfafe, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x2563, 0x058d, "OneXPlayer Gamepad", 0, XTYPE_XBOX360 }, { 0x294b, 0x3303, "Snakebyte GAMEPAD BASE X", 0, XTYPE_XBOXONE }, { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1220, "Wooting Two HE", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 }, { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } }; /* buttons shared with xbox and xbox360 */ static const signed short xpad_common_btn[] = { BTN_A, BTN_B, BTN_X, BTN_Y, /* "analog" buttons */ BTN_START, BTN_SELECT, BTN_THUMBL, BTN_THUMBR, /* start/back/sticks */ -1 /* terminating entry */ }; /* original xbox controllers only */ static const signed short xpad_btn[] = { BTN_C, BTN_Z, /* "analog" buttons */ -1 /* terminating entry */ }; /* used when dpad is mapped to buttons */ static const signed short xpad_btn_pad[] = { BTN_TRIGGER_HAPPY1, BTN_TRIGGER_HAPPY2, /* d-pad left, right */ BTN_TRIGGER_HAPPY3, BTN_TRIGGER_HAPPY4, /* d-pad up, down */ -1 /* terminating entry */ }; /* used when triggers are mapped to buttons */ static const signed short xpad_btn_triggers[] = { BTN_TL2, BTN_TR2, /* triggers left/right */ -1 }; static const signed short xpad360_btn[] = { /* buttons for x360 controller */ BTN_TL, BTN_TR, /* Button LB/RB */ BTN_MODE, /* The big X button */ -1 }; static const signed short xpad_abs[] = { ABS_X, ABS_Y, /* left stick */ ABS_RX, ABS_RY, /* right stick */ -1 /* terminating entry */ }; /* used when dpad is mapped to axes */ static const signed short xpad_abs_pad[] = { ABS_HAT0X, ABS_HAT0Y, /* d-pad axes */ -1 /* terminating entry */ }; /* used when triggers are mapped to axes */ static const signed short xpad_abs_triggers[] = { ABS_Z, ABS_RZ, /* triggers left/right */ -1 }; /* used when the controller has extra paddle buttons */ static const signed short xpad_btn_paddles[] = { BTN_TRIGGER_HAPPY5, BTN_TRIGGER_HAPPY6, /* paddle upper right, lower right */ BTN_TRIGGER_HAPPY7, BTN_TRIGGER_HAPPY8, /* paddle upper left, lower left */ -1 /* terminating entry */ }; /* * Xbox 360 has a vendor-specific class, so we cannot match it with only * USB_INTERFACE_INFO (also specifically refused by USB subsystem), so we * match against vendor id as well. Wired Xbox 360 devices have protocol 1, * wireless controllers have protocol 129. */ #define XPAD_XBOX360_VENDOR_PROTOCOL(vend, pr) \ .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \ .idVendor = (vend), \ .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \ .bInterfaceSubClass = 93, \ .bInterfaceProtocol = (pr) #define XPAD_XBOX360_VENDOR(vend) \ { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 1) }, \ { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 129) } /* The Xbox One controller uses subclass 71 and protocol 208. */ #define XPAD_XBOXONE_VENDOR_PROTOCOL(vend, pr) \ .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \ .idVendor = (vend), \ .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \ .bInterfaceSubClass = 71, \ .bInterfaceProtocol = (pr) #define XPAD_XBOXONE_VENDOR(vend) \ { XPAD_XBOXONE_VENDOR_PROTOCOL((vend), 208) } static const struct usb_device_id xpad_table[] = { /* * Please keep this list sorted by vendor ID. Note that there are 2 * macros - XPAD_XBOX360_VENDOR and XPAD_XBOXONE_VENDOR. */ { USB_INTERFACE_INFO('X', 'B', 0) }, /* Xbox USB-IF not-approved class */ XPAD_XBOX360_VENDOR(0x0079), /* GPD Win 2 controller */ XPAD_XBOX360_VENDOR(0x03eb), /* Wooting Keyboards (Legacy) */ XPAD_XBOX360_VENDOR(0x03f0), /* HP HyperX Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */ XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */ XPAD_XBOX360_VENDOR(0x056e), /* Elecom JC-U3613M */ XPAD_XBOX360_VENDOR(0x06a3), /* Saitek P3600 */ XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz Xbox 360 controllers */ { USB_DEVICE(0x0738, 0x4540) }, /* Mad Catz Beat Pad */ XPAD_XBOXONE_VENDOR(0x0738), /* Mad Catz FightStick TE 2 */ XPAD_XBOX360_VENDOR(0x07ff), /* Mad Catz Gamepad */ XPAD_XBOXONE_VENDOR(0x0b05), /* ASUS controllers */ XPAD_XBOX360_VENDOR(0x0c12), /* Zeroplus X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0db0), /* Micro Star International X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f Xbox One controllers */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori controllers */ XPAD_XBOXONE_VENDOR(0x0f0d), /* Hori controllers */ XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries controllers */ XPAD_XBOXONE_VENDOR(0x10f5), /* Turtle Beach Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ XPAD_XBOX360_VENDOR(0x11ff), /* PXN V900 */ XPAD_XBOX360_VENDOR(0x1209), /* Ardwiino Controllers */ XPAD_XBOX360_VENDOR(0x12ab), /* Xbox 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x146b), /* Bigben Interactive controllers */ XPAD_XBOX360_VENDOR(0x1532), /* Razer Sabertooth */ XPAD_XBOXONE_VENDOR(0x1532), /* Razer Wildcat */ XPAD_XBOX360_VENDOR(0x15e4), /* Numark Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2345), /* Machenike Controllers */ XPAD_XBOX360_VENDOR(0x24c6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2563), /* OneXPlayer Gamepad */ XPAD_XBOX360_VENDOR(0x260d), /* Dareu H101 */ XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */ XPAD_XBOX360_VENDOR(0x2c22), /* Qanba Controllers */ XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller */ XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller for Xbox */ XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke Xbox One pad */ XPAD_XBOX360_VENDOR(0x2f24), /* GameSir controllers */ XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */ XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */ { } }; MODULE_DEVICE_TABLE(usb, xpad_table); struct xboxone_init_packet { u16 idVendor; u16 idProduct; const u8 *data; u8 len; }; #define XBOXONE_INIT_PKT(_vid, _pid, _data) \ { \ .idVendor = (_vid), \ .idProduct = (_pid), \ .data = (_data), \ .len = ARRAY_SIZE(_data), \ } /* * starting with xbox one, the game input protocol is used * magic numbers are taken from * - https://github.com/xpadneo/gip-dissector/blob/main/src/gip-dissector.lua * - https://github.com/medusalix/xone/blob/master/bus/protocol.c */ #define GIP_CMD_ACK 0x01 #define GIP_CMD_IDENTIFY 0x04 #define GIP_CMD_POWER 0x05 #define GIP_CMD_AUTHENTICATE 0x06 #define GIP_CMD_VIRTUAL_KEY 0x07 #define GIP_CMD_RUMBLE 0x09 #define GIP_CMD_LED 0x0a #define GIP_CMD_FIRMWARE 0x0c #define GIP_CMD_INPUT 0x20 #define GIP_SEQ0 0x00 #define GIP_OPT_ACK 0x10 #define GIP_OPT_INTERNAL 0x20 /* * length of the command payload encoded with * https://en.wikipedia.org/wiki/LEB128 * which is a no-op for N < 128 */ #define GIP_PL_LEN(N) (N) /* * payload specific defines */ #define GIP_PWR_ON 0x00 #define GIP_LED_ON 0x01 #define GIP_MOTOR_R BIT(0) #define GIP_MOTOR_L BIT(1) #define GIP_MOTOR_RT BIT(2) #define GIP_MOTOR_LT BIT(3) #define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT) #define GIP_WIRED_INTF_DATA 0 #define GIP_WIRED_INTF_AUDIO 1 /* * This packet is required for all Xbox One pads with 2015 * or later firmware installed (or present from the factory). */ static const u8 xboxone_power_on[] = { GIP_CMD_POWER, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(1), GIP_PWR_ON }; /* * This packet is required for Xbox One S (0x045e:0x02ea) * and Xbox One Elite Series 2 (0x045e:0x0b00) pads to * initialize the controller that was previously used in * Bluetooth mode. */ static const u8 xboxone_s_init[] = { GIP_CMD_POWER, GIP_OPT_INTERNAL, GIP_SEQ0, 0x0f, 0x06 }; /* * This packet is required to get additional input data * from Xbox One Elite Series 2 (0x045e:0x0b00) pads. * We mostly do this right now to get paddle data */ static const u8 extra_input_packet_init[] = { 0x4d, 0x10, 0x01, 0x02, 0x07, 0x00 }; /* * This packet is required for the Titanfall 2 Xbox One pads * (0x0e6f:0x0165) to finish initialization and for Hori pads * (0x0f0d:0x0067) to make the analog sticks work. */ static const u8 xboxone_hori_ack_id[] = { GIP_CMD_ACK, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_CMD_IDENTIFY, GIP_OPT_INTERNAL, 0x3a, 0x00, 0x00, 0x00, 0x80, 0x00 }; /* * This packet is required for most (all?) of the PDP pads to start * sending input reports. These pads include: (0x0e6f:0x02ab), * (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_pdp_led_on[] = { GIP_CMD_LED, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(3), 0x00, GIP_LED_ON, 0x14 }; /* * This packet is required for most (all?) of the PDP pads to start * sending input reports. These pads include: (0x0e6f:0x02ab), * (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_pdp_auth[] = { GIP_CMD_AUTHENTICATE, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(2), 0x01, 0x00 }; /* * A specific rumble packet is required for some PowerA pads to start * sending input reports. One of those pads is (0x24c6:0x543a). */ static const u8 xboxone_rumblebegin_init[] = { GIP_CMD_RUMBLE, 0x00, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_MOTOR_ALL, 0x00, 0x00, 0x1D, 0x1D, 0xFF, 0x00, 0x00 }; /* * A rumble packet with zero FF intensity will immediately * terminate the rumbling required to init PowerA pads. * This should happen fast enough that the motors don't * spin up to enough speed to actually vibrate the gamepad. */ static const u8 xboxone_rumbleend_init[] = { GIP_CMD_RUMBLE, 0x00, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_MOTOR_ALL, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; /* * This specifies the selection of init packets that a gamepad * will be sent on init *and* the order in which they will be * sent. The correct sequence number will be added when the * packet is going to be sent. */ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x0165, xboxone_hori_ack_id), XBOXONE_INIT_PKT(0x0f0d, 0x0067, xboxone_hori_ack_id), XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_power_on), XBOXONE_INIT_PKT(0x045e, 0x02ea, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, extra_input_packet_init), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_led_on), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_auth), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumbleend_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumbleend_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumbleend_init), }; struct xpad_output_packet { u8 data[XPAD_PKT_LEN]; u8 len; bool pending; }; #define XPAD_OUT_CMD_IDX 0 #define XPAD_OUT_FF_IDX 1 #define XPAD_OUT_LED_IDX (1 + IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF)) #define XPAD_NUM_OUT_PACKETS (1 + \ IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF) + \ IS_ENABLED(CONFIG_JOYSTICK_XPAD_LEDS)) struct usb_xpad { struct input_dev *dev; /* input device interface */ struct input_dev __rcu *x360w_dev; struct usb_device *udev; /* usb device */ struct usb_interface *intf; /* usb interface */ bool pad_present; bool input_created; struct urb *irq_in; /* urb for interrupt in report */ unsigned char *idata; /* input data */ dma_addr_t idata_dma; struct urb *irq_out; /* urb for interrupt out report */ struct usb_anchor irq_out_anchor; bool irq_out_active; /* we must not use an active URB */ u8 odata_serial; /* serial number for xbox one protocol */ unsigned char *odata; /* output data */ dma_addr_t odata_dma; spinlock_t odata_lock; struct xpad_output_packet out_packets[XPAD_NUM_OUT_PACKETS]; int last_out_packet; int init_seq; #if defined(CONFIG_JOYSTICK_XPAD_LEDS) struct xpad_led *led; #endif char phys[64]; /* physical device path */ int mapping; /* map d-pad to buttons or to axes */ int xtype; /* type of xbox device */ int packet_type; /* type of the extended packet */ int pad_nr; /* the order x360 pads were attached */ const char *name; /* name of the device */ struct work_struct work; /* init/remove device from callback */ time64_t mode_btn_down_ts; }; static int xpad_init_input(struct usb_xpad *xpad); static void xpad_deinit_input(struct usb_xpad *xpad); static void xpadone_ack_mode_report(struct usb_xpad *xpad, u8 seq_num); static void xpad360w_poweroff_controller(struct usb_xpad *xpad); /* * xpad_process_packet * * Completes a request by converting the data into events for the * input subsystem. * * The used report descriptor was taken from ITO Takayuki's website: * http://euc.jp/periphs/xbox-controller.ja.html */ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev = xpad->dev; if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* left stick */ input_report_abs(dev, ABS_X, (__s16) le16_to_cpup((__le16 *)(data + 12))); input_report_abs(dev, ABS_Y, ~(__s16) le16_to_cpup((__le16 *)(data + 14))); /* right stick */ input_report_abs(dev, ABS_RX, (__s16) le16_to_cpup((__le16 *)(data + 16))); input_report_abs(dev, ABS_RY, ~(__s16) le16_to_cpup((__le16 *)(data + 18))); } /* triggers left/right */ if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { input_report_key(dev, BTN_TL2, data[10]); input_report_key(dev, BTN_TR2, data[11]); } else { input_report_abs(dev, ABS_Z, data[10]); input_report_abs(dev, ABS_RZ, data[11]); } /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ input_report_key(dev, BTN_TRIGGER_HAPPY1, data[2] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY2, data[2] & BIT(3)); input_report_key(dev, BTN_TRIGGER_HAPPY3, data[2] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY4, data[2] & BIT(1)); } else { input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); } /* start/back buttons and stick press left/right */ input_report_key(dev, BTN_START, data[2] & BIT(4)); input_report_key(dev, BTN_SELECT, data[2] & BIT(5)); input_report_key(dev, BTN_THUMBL, data[2] & BIT(6)); input_report_key(dev, BTN_THUMBR, data[2] & BIT(7)); /* "analog" buttons A, B, X, Y */ input_report_key(dev, BTN_A, data[4]); input_report_key(dev, BTN_B, data[5]); input_report_key(dev, BTN_X, data[6]); input_report_key(dev, BTN_Y, data[7]); /* "analog" buttons black, white */ input_report_key(dev, BTN_C, data[8]); input_report_key(dev, BTN_Z, data[9]); input_sync(dev); } /* * xpad360_process_packet * * Completes a request by converting the data into events for the * input subsystem. It is version for xbox 360 controller * * The used report descriptor was taken from: * http://www.free60.org/wiki/Gamepad */ static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev, u16 cmd, unsigned char *data) { /* valid pad data */ if (data[0] != 0x00) return; /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ input_report_key(dev, BTN_TRIGGER_HAPPY1, data[2] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY2, data[2] & BIT(3)); input_report_key(dev, BTN_TRIGGER_HAPPY3, data[2] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY4, data[2] & BIT(1)); } /* * This should be a simple else block. However historically * xbox360w has mapped DPAD to buttons while xbox360 did not. This * made no sense, but now we can not just switch back and have to * support both behaviors. */ if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) || xpad->xtype == XTYPE_XBOX360W) { input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); } /* start/back buttons */ input_report_key(dev, BTN_START, data[2] & BIT(4)); input_report_key(dev, BTN_SELECT, data[2] & BIT(5)); /* stick press left/right */ input_report_key(dev, BTN_THUMBL, data[2] & BIT(6)); input_report_key(dev, BTN_THUMBR, data[2] & BIT(7)); /* buttons A,B,X,Y,TL,TR and MODE */ input_report_key(dev, BTN_A, data[3] & BIT(4)); input_report_key(dev, BTN_B, data[3] & BIT(5)); input_report_key(dev, BTN_X, data[3] & BIT(6)); input_report_key(dev, BTN_Y, data[3] & BIT(7)); input_report_key(dev, BTN_TL, data[3] & BIT(0)); input_report_key(dev, BTN_TR, data[3] & BIT(1)); input_report_key(dev, BTN_MODE, data[3] & BIT(2)); if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* left stick */ input_report_abs(dev, ABS_X, (__s16) le16_to_cpup((__le16 *)(data + 6))); input_report_abs(dev, ABS_Y, ~(__s16) le16_to_cpup((__le16 *)(data + 8))); /* right stick */ input_report_abs(dev, ABS_RX, (__s16) le16_to_cpup((__le16 *)(data + 10))); input_report_abs(dev, ABS_RY, ~(__s16) le16_to_cpup((__le16 *)(data + 12))); } /* triggers left/right */ if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { input_report_key(dev, BTN_TL2, data[4]); input_report_key(dev, BTN_TR2, data[5]); } else { input_report_abs(dev, ABS_Z, data[4]); input_report_abs(dev, ABS_RZ, data[5]); } input_sync(dev); /* XBOX360W controllers can't be turned off without driver assistance */ if (xpad->xtype == XTYPE_XBOX360W) { if (xpad->mode_btn_down_ts > 0 && xpad->pad_present && ((ktime_get_seconds() - xpad->mode_btn_down_ts) >= XPAD360W_POWEROFF_TIMEOUT)) { xpad360w_poweroff_controller(xpad); xpad->mode_btn_down_ts = 0; return; } /* mode button down/up */ if (data[3] & BIT(2)) xpad->mode_btn_down_ts = ktime_get_seconds(); else xpad->mode_btn_down_ts = 0; } } static void xpad_presence_work(struct work_struct *work) { struct usb_xpad *xpad = container_of(work, struct usb_xpad, work); int error; if (xpad->pad_present) { error = xpad_init_input(xpad); if (error) { /* complain only, not much else we can do here */ dev_err(&xpad->dev->dev, "unable to init device: %d\n", error); } else { rcu_assign_pointer(xpad->x360w_dev, xpad->dev); } } else { RCU_INIT_POINTER(xpad->x360w_dev, NULL); synchronize_rcu(); /* * Now that we are sure xpad360w_process_packet is not * using input device we can get rid of it. */ xpad_deinit_input(xpad); } } /* * xpad360w_process_packet * * Completes a request by converting the data into events for the * input subsystem. It is version for xbox 360 wireless controller. * * Byte.Bit * 00.1 - Status change: The controller or headset has connected/disconnected * Bits 01.7 and 01.6 are valid * 01.7 - Controller present * 01.6 - Headset present * 01.1 - Pad state (Bytes 4+) valid * */ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev; bool present; /* Presence change */ if (data[0] & 0x08) { present = (data[1] & 0x80) != 0; if (xpad->pad_present != present) { xpad->pad_present = present; schedule_work(&xpad->work); } } /* Valid pad data */ if (data[1] != 0x1) return; rcu_read_lock(); dev = rcu_dereference(xpad->x360w_dev); if (dev) xpad360_process_packet(xpad, dev, cmd, &data[4]); rcu_read_unlock(); } /* * xpadone_process_packet * * Completes a request by converting the data into events for the * input subsystem. This version is for the Xbox One controller. * * The report format was gleaned from * https://github.com/kylelemons/xbox/blob/master/xbox.go */ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev = xpad->dev; bool do_sync = false; /* the xbox button has its own special report */ if (data[0] == GIP_CMD_VIRTUAL_KEY) { /* * The Xbox One S controller requires these reports to be * acked otherwise it continues sending them forever and * won't report further mode button events. */ if (data[1] == (GIP_OPT_ACK | GIP_OPT_INTERNAL)) xpadone_ack_mode_report(xpad, data[2]); input_report_key(dev, BTN_MODE, data[4] & GENMASK(1, 0)); input_sync(dev); do_sync = true; } else if (data[0] == GIP_CMD_FIRMWARE) { /* Some packet formats force us to use this separate to poll paddle inputs */ if (xpad->packet_type == PKT_XBE2_FW_5_11) { /* Mute paddles if controller is in a custom profile slot * Checked by looking at the active profile slot to * verify it's the default slot */ if (data[19] != 0) data[18] = 0; /* Elite Series 2 split packet paddle bits */ input_report_key(dev, BTN_TRIGGER_HAPPY5, data[18] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY6, data[18] & BIT(1)); input_report_key(dev, BTN_TRIGGER_HAPPY7, data[18] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY8, data[18] & BIT(3)); do_sync = true; } } else if (data[0] == GIP_CMD_INPUT) { /* The main valid packet type for inputs */ /* menu/view buttons */ input_report_key(dev, BTN_START, data[4] & BIT(2)); input_report_key(dev, BTN_SELECT, data[4] & BIT(3)); if (xpad->mapping & MAP_SELECT_BUTTON) input_report_key(dev, KEY_RECORD, data[22] & BIT(0)); /* buttons A,B,X,Y */ input_report_key(dev, BTN_A, data[4] & BIT(4)); input_report_key(dev, BTN_B, data[4] & BIT(5)); input_report_key(dev, BTN_X, data[4] & BIT(6)); input_report_key(dev, BTN_Y, data[4] & BIT(7)); /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ input_report_key(dev, BTN_TRIGGER_HAPPY1, data[5] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY2, data[5] & BIT(3)); input_report_key(dev, BTN_TRIGGER_HAPPY3, data[5] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY4, data[5] & BIT(1)); } else { input_report_abs(dev, ABS_HAT0X, !!(data[5] & 0x08) - !!(data[5] & 0x04)); input_report_abs(dev, ABS_HAT0Y, !!(data[5] & 0x02) - !!(data[5] & 0x01)); } /* TL/TR */ input_report_key(dev, BTN_TL, data[5] & BIT(4)); input_report_key(dev, BTN_TR, data[5] & BIT(5)); /* stick press left/right */ input_report_key(dev, BTN_THUMBL, data[5] & BIT(6)); input_report_key(dev, BTN_THUMBR, data[5] & BIT(7)); if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* left stick */ input_report_abs(dev, ABS_X, (__s16) le16_to_cpup((__le16 *)(data + 10))); input_report_abs(dev, ABS_Y, ~(__s16) le16_to_cpup((__le16 *)(data + 12))); /* right stick */ input_report_abs(dev, ABS_RX, (__s16) le16_to_cpup((__le16 *)(data + 14))); input_report_abs(dev, ABS_RY, ~(__s16) le16_to_cpup((__le16 *)(data + 16))); } /* triggers left/right */ if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { input_report_key(dev, BTN_TL2, (__u16) le16_to_cpup((__le16 *)(data + 6))); input_report_key(dev, BTN_TR2, (__u16) le16_to_cpup((__le16 *)(data + 8))); } else { input_report_abs(dev, ABS_Z, (__u16) le16_to_cpup((__le16 *)(data + 6))); input_report_abs(dev, ABS_RZ, (__u16) le16_to_cpup((__le16 *)(data + 8))); } /* Profile button has a value of 0-3, so it is reported as an axis */ if (xpad->mapping & MAP_PROFILE_BUTTON) input_report_abs(dev, ABS_PROFILE, data[34]); /* paddle handling */ /* based on SDL's SDL_hidapi_xboxone.c */ if (xpad->mapping & MAP_PADDLES) { if (xpad->packet_type == PKT_XBE1) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping * config against the factory mapping config */ if (memcmp(&data[4], &data[18], 2) != 0) data[32] = 0; /* OG Elite Series Controller paddle bits */ input_report_key(dev, BTN_TRIGGER_HAPPY5, data[32] & BIT(1)); input_report_key(dev, BTN_TRIGGER_HAPPY6, data[32] & BIT(3)); input_report_key(dev, BTN_TRIGGER_HAPPY7, data[32] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY8, data[32] & BIT(2)); } else if (xpad->packet_type == PKT_XBE2_FW_OLD) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping * config against the factory mapping config */ if (data[19] != 0) data[18] = 0; /* Elite Series 2 4.x firmware paddle bits */ input_report_key(dev, BTN_TRIGGER_HAPPY5, data[18] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY6, data[18] & BIT(1)); input_report_key(dev, BTN_TRIGGER_HAPPY7, data[18] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY8, data[18] & BIT(3)); } else if (xpad->packet_type == PKT_XBE2_FW_5_EARLY) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping * config against the factory mapping config */ if (data[23] != 0) data[22] = 0; /* Elite Series 2 5.x firmware paddle bits * (before the packet was split) */ input_report_key(dev, BTN_TRIGGER_HAPPY5, data[22] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY6, data[22] & BIT(1)); input_report_key(dev, BTN_TRIGGER_HAPPY7, data[22] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY8, data[22] & BIT(3)); } } do_sync = true; } if (do_sync) input_sync(dev); } static void xpad_irq_in(struct urb *urb) { struct usb_xpad *xpad = urb->context; struct device *dev = &xpad->intf->dev; int retval, status; status = urb->status; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } switch (xpad->xtype) { case XTYPE_XBOX360: xpad360_process_packet(xpad, xpad->dev, 0, xpad->idata); break; case XTYPE_XBOX360W: xpad360w_process_packet(xpad, 0, xpad->idata); break; case XTYPE_XBOXONE: xpadone_process_packet(xpad, 0, xpad->idata); break; default: xpad_process_packet(xpad, 0, xpad->idata); } exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } /* Callers must hold xpad->odata_lock spinlock */ static bool xpad_prepare_next_init_packet(struct usb_xpad *xpad) { const struct xboxone_init_packet *init_packet; if (xpad->xtype != XTYPE_XBOXONE) return false; /* Perform initialization sequence for Xbox One pads that require it */ while (xpad->init_seq < ARRAY_SIZE(xboxone_init_packets)) { init_packet = &xboxone_init_packets[xpad->init_seq++]; if (init_packet->idVendor != 0 && init_packet->idVendor != xpad->dev->id.vendor) continue; if (init_packet->idProduct != 0 && init_packet->idProduct != xpad->dev->id.product) continue; /* This packet applies to our device, so prepare to send it */ memcpy(xpad->odata, init_packet->data, init_packet->len); xpad->irq_out->transfer_buffer_length = init_packet->len; /* Update packet with current sequence number */ xpad->odata[2] = xpad->odata_serial++; return true; } return false; } /* Callers must hold xpad->odata_lock spinlock */ static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad) { struct xpad_output_packet *pkt, *packet = NULL; int i; /* We may have init packets to send before we can send user commands */ if (xpad_prepare_next_init_packet(xpad)) return true; for (i = 0; i < XPAD_NUM_OUT_PACKETS; i++) { if (++xpad->last_out_packet >= XPAD_NUM_OUT_PACKETS) xpad->last_out_packet = 0; pkt = &xpad->out_packets[xpad->last_out_packet]; if (pkt->pending) { dev_dbg(&xpad->intf->dev, "%s - found pending output packet %d\n", __func__, xpad->last_out_packet); packet = pkt; break; } } if (packet) { memcpy(xpad->odata, packet->data, packet->len); xpad->irq_out->transfer_buffer_length = packet->len; packet->pending = false; return true; } return false; } /* Callers must hold xpad->odata_lock spinlock */ static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad) { int error; if (!xpad->irq_out_active && xpad_prepare_next_out_packet(xpad)) { usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor); error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC); if (error) { dev_err(&xpad->intf->dev, "%s - usb_submit_urb failed with result %d\n", __func__, error); usb_unanchor_urb(xpad->irq_out); return -EIO; } xpad->irq_out_active = true; } return 0; } static void xpad_irq_out(struct urb *urb) { struct usb_xpad *xpad = urb->context; struct device *dev = &xpad->intf->dev; int status = urb->status; int error; unsigned long flags; spin_lock_irqsave(&xpad->odata_lock, flags); switch (status) { case 0: /* success */ xpad->irq_out_active = xpad_prepare_next_out_packet(xpad); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, status); xpad->irq_out_active = false; break; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, status); break; } if (xpad->irq_out_active) { usb_anchor_urb(urb, &xpad->irq_out_anchor); error = usb_submit_urb(urb, GFP_ATOMIC); if (error) { dev_err(dev, "%s - usb_submit_urb failed with result %d\n", __func__, error); usb_unanchor_urb(urb); xpad->irq_out_active = false; } } spin_unlock_irqrestore(&xpad->odata_lock, flags); } static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad, struct usb_endpoint_descriptor *ep_irq_out) { int error; if (xpad->xtype == XTYPE_UNKNOWN) return 0; init_usb_anchor(&xpad->irq_out_anchor); xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN, GFP_KERNEL, &xpad->odata_dma); if (!xpad->odata) return -ENOMEM; spin_lock_init(&xpad->odata_lock); xpad->irq_out = usb_alloc_urb(0, GFP_KERNEL); if (!xpad->irq_out) { error = -ENOMEM; goto err_free_coherent; } usb_fill_int_urb(xpad->irq_out, xpad->udev, usb_sndintpipe(xpad->udev, ep_irq_out->bEndpointAddress), xpad->odata, XPAD_PKT_LEN, xpad_irq_out, xpad, ep_irq_out->bInterval); xpad->irq_out->transfer_dma = xpad->odata_dma; xpad->irq_out->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; return 0; err_free_coherent: usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->odata, xpad->odata_dma); return error; } static void xpad_stop_output(struct usb_xpad *xpad) { if (xpad->xtype != XTYPE_UNKNOWN) { if (!usb_wait_anchor_empty_timeout(&xpad->irq_out_anchor, 5000)) { dev_warn(&xpad->intf->dev, "timed out waiting for output URB to complete, killing\n"); usb_kill_anchored_urbs(&xpad->irq_out_anchor); } } } static void xpad_deinit_output(struct usb_xpad *xpad) { if (xpad->xtype != XTYPE_UNKNOWN) { usb_free_urb(xpad->irq_out); usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->odata, xpad->odata_dma); } } static int xpad_inquiry_pad_presence(struct usb_xpad *xpad) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_CMD_IDX]; unsigned long flags; int retval; spin_lock_irqsave(&xpad->odata_lock, flags); packet->data[0] = 0x08; packet->data[1] = 0x00; packet->data[2] = 0x0F; packet->data[3] = 0xC0; packet->data[4] = 0x00; packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; /* Reset the sequence so we send out presence first */ xpad->last_out_packet = -1; retval = xpad_try_sending_next_out_packet(xpad); spin_unlock_irqrestore(&xpad->odata_lock, flags); return retval; } static int xpad_start_xbox_one(struct usb_xpad *xpad) { unsigned long flags; int retval; if (usb_ifnum_to_if(xpad->udev, GIP_WIRED_INTF_AUDIO)) { /* * Explicitly disable the audio interface. This is needed * for some controllers, such as the PowerA Enhanced Wired * Controller for Series X|S (0x20d6:0x200e) to report the * guide button. */ retval = usb_set_interface(xpad->udev, GIP_WIRED_INTF_AUDIO, 0); if (retval) dev_warn(&xpad->dev->dev, "unable to disable audio interface: %d\n", retval); } spin_lock_irqsave(&xpad->odata_lock, flags); /* * Begin the init sequence by attempting to send a packet. * We will cycle through the init packet sequence before * sending any packets from the output ring. */ xpad->init_seq = 0; retval = xpad_try_sending_next_out_packet(xpad); spin_unlock_irqrestore(&xpad->odata_lock, flags); return retval; } static void xpadone_ack_mode_report(struct usb_xpad *xpad, u8 seq_num) { unsigned long flags; struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_CMD_IDX]; static const u8 mode_report_ack[] = { GIP_CMD_ACK, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_CMD_VIRTUAL_KEY, GIP_OPT_INTERNAL, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 }; spin_lock_irqsave(&xpad->odata_lock, flags); packet->len = sizeof(mode_report_ack); memcpy(packet->data, mode_report_ack, packet->len); packet->data[2] = seq_num; packet->pending = true; /* Reset the sequence so we send out the ack now */ xpad->last_out_packet = -1; xpad_try_sending_next_out_packet(xpad); spin_unlock_irqrestore(&xpad->odata_lock, flags); } #ifdef CONFIG_JOYSTICK_XPAD_FF static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { struct usb_xpad *xpad = input_get_drvdata(dev); struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_FF_IDX]; __u16 strong; __u16 weak; int retval; unsigned long flags; if (effect->type != FF_RUMBLE) return 0; strong = effect->u.rumble.strong_magnitude; weak = effect->u.rumble.weak_magnitude; spin_lock_irqsave(&xpad->odata_lock, flags); switch (xpad->xtype) { case XTYPE_XBOX: packet->data[0] = 0x00; packet->data[1] = 0x06; packet->data[2] = 0x00; packet->data[3] = strong / 256; /* left actuator */ packet->data[4] = 0x00; packet->data[5] = weak / 256; /* right actuator */ packet->len = 6; packet->pending = true; break; case XTYPE_XBOX360: packet->data[0] = 0x00; packet->data[1] = 0x08; packet->data[2] = 0x00; packet->data[3] = strong / 256; /* left actuator? */ packet->data[4] = weak / 256; /* right actuator? */ packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->len = 8; packet->pending = true; break; case XTYPE_XBOX360W: packet->data[0] = 0x00; packet->data[1] = 0x01; packet->data[2] = 0x0F; packet->data[3] = 0xC0; packet->data[4] = 0x00; packet->data[5] = strong / 256; packet->data[6] = weak / 256; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; break; case XTYPE_XBOXONE: packet->data[0] = GIP_CMD_RUMBLE; /* activate rumble */ packet->data[1] = 0x00; packet->data[2] = xpad->odata_serial++; packet->data[3] = GIP_PL_LEN(9); packet->data[4] = 0x00; packet->data[5] = GIP_MOTOR_ALL; packet->data[6] = 0x00; /* left trigger */ packet->data[7] = 0x00; /* right trigger */ packet->data[8] = strong / 512; /* left actuator */ packet->data[9] = weak / 512; /* right actuator */ packet->data[10] = 0xFF; /* on period */ packet->data[11] = 0x00; /* off period */ packet->data[12] = 0xFF; /* repeat count */ packet->len = 13; packet->pending = true; break; default: dev_dbg(&xpad->dev->dev, "%s - rumble command sent to unsupported xpad type: %d\n", __func__, xpad->xtype); retval = -EINVAL; goto out; } retval = xpad_try_sending_next_out_packet(xpad); out: spin_unlock_irqrestore(&xpad->odata_lock, flags); return retval; } static int xpad_init_ff(struct usb_xpad *xpad) { if (xpad->xtype == XTYPE_UNKNOWN) return 0; input_set_capability(xpad->dev, EV_FF, FF_RUMBLE); return input_ff_create_memless(xpad->dev, NULL, xpad_play_effect); } #else static int xpad_init_ff(struct usb_xpad *xpad) { return 0; } #endif #if defined(CONFIG_JOYSTICK_XPAD_LEDS) #include <linux/leds.h> #include <linux/idr.h> static DEFINE_IDA(xpad_pad_seq); struct xpad_led { char name[16]; struct led_classdev led_cdev; struct usb_xpad *xpad; }; /* * set the LEDs on Xbox 360 / Wireless Controllers * @param command * 0: off * 1: all blink, then previous setting * 2: 1/top-left blink, then on * 3: 2/top-right blink, then on * 4: 3/bottom-left blink, then on * 5: 4/bottom-right blink, then on * 6: 1/top-left on * 7: 2/top-right on * 8: 3/bottom-left on * 9: 4/bottom-right on * 10: rotate * 11: blink, based on previous setting * 12: slow blink, based on previous setting * 13: rotate with two lights * 14: persistent slow all blink * 15: blink once, then previous setting */ static void xpad_send_led_command(struct usb_xpad *xpad, int command) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_LED_IDX]; unsigned long flags; command %= 16; spin_lock_irqsave(&xpad->odata_lock, flags); switch (xpad->xtype) { case XTYPE_XBOX360: packet->data[0] = 0x01; packet->data[1] = 0x03; packet->data[2] = command; packet->len = 3; packet->pending = true; break; case XTYPE_XBOX360W: packet->data[0] = 0x00; packet->data[1] = 0x00; packet->data[2] = 0x08; packet->data[3] = 0x40 + command; packet->data[4] = 0x00; packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; break; } xpad_try_sending_next_out_packet(xpad); spin_unlock_irqrestore(&xpad->odata_lock, flags); } /* * Light up the segment corresponding to the pad number on * Xbox 360 Controllers. */ static void xpad_identify_controller(struct usb_xpad *xpad) { led_set_brightness(&xpad->led->led_cdev, (xpad->pad_nr % 4) + 2); } static void xpad_led_set(struct led_classdev *led_cdev, enum led_brightness value) { struct xpad_led *xpad_led = container_of(led_cdev, struct xpad_led, led_cdev); xpad_send_led_command(xpad_led->xpad, value); } static int xpad_led_probe(struct usb_xpad *xpad) { struct xpad_led *led; struct led_classdev *led_cdev; int error; if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX360W) return 0; xpad->led = led = kzalloc(sizeof(*led), GFP_KERNEL); if (!led) return -ENOMEM; xpad->pad_nr = ida_alloc(&xpad_pad_seq, GFP_KERNEL); if (xpad->pad_nr < 0) { error = xpad->pad_nr; goto err_free_mem; } snprintf(led->name, sizeof(led->name), "xpad%d", xpad->pad_nr); led->xpad = xpad; led_cdev = &led->led_cdev; led_cdev->name = led->name; led_cdev->brightness_set = xpad_led_set; led_cdev->flags = LED_CORE_SUSPENDRESUME; error = led_classdev_register(&xpad->udev->dev, led_cdev); if (error) goto err_free_id; xpad_identify_controller(xpad); return 0; err_free_id: ida_free(&xpad_pad_seq, xpad->pad_nr); err_free_mem: kfree(led); xpad->led = NULL; return error; } static void xpad_led_disconnect(struct usb_xpad *xpad) { struct xpad_led *xpad_led = xpad->led; if (xpad_led) { led_classdev_unregister(&xpad_led->led_cdev); ida_free(&xpad_pad_seq, xpad->pad_nr); kfree(xpad_led); } } #else static int xpad_led_probe(struct usb_xpad *xpad) { return 0; } static void xpad_led_disconnect(struct usb_xpad *xpad) { } #endif static int xpad_start_input(struct usb_xpad *xpad) { int error; if (usb_submit_urb(xpad->irq_in, GFP_KERNEL)) return -EIO; if (xpad->xtype == XTYPE_XBOXONE) { error = xpad_start_xbox_one(xpad); if (error) { usb_kill_urb(xpad->irq_in); return error; } } if (xpad->xtype == XTYPE_XBOX360) { /* * Some third-party controllers Xbox 360-style controllers * require this message to finish initialization. */ u8 dummy[20]; error = usb_control_msg_recv(xpad->udev, 0, /* bRequest */ 0x01, /* bmRequestType */ USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_INTERFACE, /* wValue */ 0x100, /* wIndex */ 0x00, dummy, sizeof(dummy), 25, GFP_KERNEL); if (error) dev_warn(&xpad->dev->dev, "unable to receive magic message: %d\n", error); } return 0; } static void xpad_stop_input(struct usb_xpad *xpad) { usb_kill_urb(xpad->irq_in); } static void xpad360w_poweroff_controller(struct usb_xpad *xpad) { unsigned long flags; struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_CMD_IDX]; spin_lock_irqsave(&xpad->odata_lock, flags); packet->data[0] = 0x00; packet->data[1] = 0x00; packet->data[2] = 0x08; packet->data[3] = 0xC0; packet->data[4] = 0x00; packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; /* Reset the sequence so we send out poweroff now */ xpad->last_out_packet = -1; xpad_try_sending_next_out_packet(xpad); spin_unlock_irqrestore(&xpad->odata_lock, flags); } static int xpad360w_start_input(struct usb_xpad *xpad) { int error; error = usb_submit_urb(xpad->irq_in, GFP_KERNEL); if (error) return -EIO; /* * Send presence packet. * This will force the controller to resend connection packets. * This is useful in the case we activate the module after the * adapter has been plugged in, as it won't automatically * send us info about the controllers. */ error = xpad_inquiry_pad_presence(xpad); if (error) { usb_kill_urb(xpad->irq_in); return error; } return 0; } static void xpad360w_stop_input(struct usb_xpad *xpad) { usb_kill_urb(xpad->irq_in); /* Make sure we are done with presence work if it was scheduled */ flush_work(&xpad->work); } static int xpad_open(struct input_dev *dev) { struct usb_xpad *xpad = input_get_drvdata(dev); return xpad_start_input(xpad); } static void xpad_close(struct input_dev *dev) { struct usb_xpad *xpad = input_get_drvdata(dev); xpad_stop_input(xpad); } static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) { struct usb_xpad *xpad = input_get_drvdata(input_dev); switch (abs) { case ABS_X: case ABS_Y: case ABS_RX: case ABS_RY: /* the two sticks */ input_set_abs_params(input_dev, abs, -32768, 32767, 16, 128); break; case ABS_Z: case ABS_RZ: /* the triggers (if mapped to axes) */ if (xpad->xtype == XTYPE_XBOXONE) input_set_abs_params(input_dev, abs, 0, 1023, 0, 0); else input_set_abs_params(input_dev, abs, 0, 255, 0, 0); break; case ABS_HAT0X: case ABS_HAT0Y: /* the d-pad (only if dpad is mapped to axes */ input_set_abs_params(input_dev, abs, -1, 1, 0, 0); break; case ABS_PROFILE: /* 4 value profile button (such as on XAC) */ input_set_abs_params(input_dev, abs, 0, 4, 0, 0); break; default: input_set_abs_params(input_dev, abs, 0, 0, 0, 0); break; } } static void xpad_deinit_input(struct usb_xpad *xpad) { if (xpad->input_created) { xpad->input_created = false; xpad_led_disconnect(xpad); input_unregister_device(xpad->dev); } } static int xpad_init_input(struct usb_xpad *xpad) { struct input_dev *input_dev; int i, error; input_dev = input_allocate_device(); if (!input_dev) return -ENOMEM; xpad->dev = input_dev; input_dev->name = xpad->name; input_dev->phys = xpad->phys; usb_to_input_id(xpad->udev, &input_dev->id); if (xpad->xtype == XTYPE_XBOX360W) { /* x360w controllers and the receiver have different ids */ input_dev->id.product = 0x02a1; } input_dev->dev.parent = &xpad->intf->dev; input_set_drvdata(input_dev, xpad); if (xpad->xtype != XTYPE_XBOX360W) { input_dev->open = xpad_open; input_dev->close = xpad_close; } if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* set up axes */ for (i = 0; xpad_abs[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs[i]); } /* set up standard buttons */ for (i = 0; xpad_common_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_common_btn[i]); /* set up model-specific ones */ if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX360W || xpad->xtype == XTYPE_XBOXONE) { for (i = 0; xpad360_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad360_btn[i]); if (xpad->mapping & MAP_SELECT_BUTTON) input_set_capability(input_dev, EV_KEY, KEY_RECORD); } else { for (i = 0; xpad_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn[i]); } if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { for (i = 0; xpad_btn_pad[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn_pad[i]); } /* set up paddles if the controller has them */ if (xpad->mapping & MAP_PADDLES) { for (i = 0; xpad_btn_paddles[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn_paddles[i]); } /* * This should be a simple else block. However historically * xbox360w has mapped DPAD to buttons while xbox360 did not. This * made no sense, but now we can not just switch back and have to * support both behaviors. */ if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) || xpad->xtype == XTYPE_XBOX360W) { for (i = 0; xpad_abs_pad[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs_pad[i]); } if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { for (i = 0; xpad_btn_triggers[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn_triggers[i]); } else { for (i = 0; xpad_abs_triggers[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs_triggers[i]); } /* setup profile button as an axis with 4 possible values */ if (xpad->mapping & MAP_PROFILE_BUTTON) xpad_set_up_abs(input_dev, ABS_PROFILE); error = xpad_init_ff(xpad); if (error) goto err_free_input; error = xpad_led_probe(xpad); if (error) goto err_destroy_ff; error = input_register_device(xpad->dev); if (error) goto err_disconnect_led; xpad->input_created = true; return 0; err_disconnect_led: xpad_led_disconnect(xpad); err_destroy_ff: input_ff_destroy(input_dev); err_free_input: input_free_device(input_dev); return error; } static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_xpad *xpad; struct usb_endpoint_descriptor *ep_irq_in, *ep_irq_out; int i, error; if (intf->cur_altsetting->desc.bNumEndpoints != 2) return -ENODEV; for (i = 0; xpad_device[i].idVendor; i++) { if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) && (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct)) break; } xpad = kzalloc(sizeof(*xpad), GFP_KERNEL); if (!xpad) return -ENOMEM; usb_make_path(udev, xpad->phys, sizeof(xpad->phys)); strlcat(xpad->phys, "/input0", sizeof(xpad->phys)); xpad->idata = usb_alloc_coherent(udev, XPAD_PKT_LEN, GFP_KERNEL, &xpad->idata_dma); if (!xpad->idata) { error = -ENOMEM; goto err_free_mem; } xpad->irq_in = usb_alloc_urb(0, GFP_KERNEL); if (!xpad->irq_in) { error = -ENOMEM; goto err_free_idata; } xpad->udev = udev; xpad->intf = intf; xpad->mapping = xpad_device[i].mapping; xpad->xtype = xpad_device[i].xtype; xpad->name = xpad_device[i].name; xpad->packet_type = PKT_XB; INIT_WORK(&xpad->work, xpad_presence_work); if (xpad->xtype == XTYPE_UNKNOWN) { if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) { if (intf->cur_altsetting->desc.bInterfaceProtocol == 129) xpad->xtype = XTYPE_XBOX360W; else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208) xpad->xtype = XTYPE_XBOXONE; else xpad->xtype = XTYPE_XBOX360; } else { xpad->xtype = XTYPE_XBOX; } if (dpad_to_buttons) xpad->mapping |= MAP_DPAD_TO_BUTTONS; if (triggers_to_buttons) xpad->mapping |= MAP_TRIGGERS_TO_BUTTONS; if (sticks_to_null) xpad->mapping |= MAP_STICKS_TO_NULL; } if (xpad->xtype == XTYPE_XBOXONE && intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) { /* * The Xbox One controller lists three interfaces all with the * same interface class, subclass and protocol. Differentiate by * interface number. */ error = -ENODEV; goto err_free_in_urb; } ep_irq_in = ep_irq_out = NULL; for (i = 0; i < 2; i++) { struct usb_endpoint_descriptor *ep = &intf->cur_altsetting->endpoint[i].desc; if (usb_endpoint_xfer_int(ep)) { if (usb_endpoint_dir_in(ep)) ep_irq_in = ep; else ep_irq_out = ep; } } if (!ep_irq_in || !ep_irq_out) { error = -ENODEV; goto err_free_in_urb; } error = xpad_init_output(intf, xpad, ep_irq_out); if (error) goto err_free_in_urb; usb_fill_int_urb(xpad->irq_in, udev, usb_rcvintpipe(udev, ep_irq_in->bEndpointAddress), xpad->idata, XPAD_PKT_LEN, xpad_irq_in, xpad, ep_irq_in->bInterval); xpad->irq_in->transfer_dma = xpad->idata_dma; xpad->irq_in->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_set_intfdata(intf, xpad); /* Packet type detection */ if (le16_to_cpu(udev->descriptor.idVendor) == 0x045e) { /* Microsoft controllers */ if (le16_to_cpu(udev->descriptor.idProduct) == 0x02e3) { /* The original elite controller always uses the oldest * type of extended packet */ xpad->packet_type = PKT_XBE1; } else if (le16_to_cpu(udev->descriptor.idProduct) == 0x0b00) { /* The elite 2 controller has seen multiple packet * revisions. These are tied to specific firmware * versions */ if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x0500) { /* This is the format that the Elite 2 used * prior to the BLE update */ xpad->packet_type = PKT_XBE2_FW_OLD; } else if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x050b) { /* This is the format that the Elite 2 used * prior to the update that split the packet */ xpad->packet_type = PKT_XBE2_FW_5_EARLY; } else { /* The split packet format that was introduced * in firmware v5.11 */ xpad->packet_type = PKT_XBE2_FW_5_11; } } } if (xpad->xtype == XTYPE_XBOX360W) { /* * Submit the int URB immediately rather than waiting for open * because we get status messages from the device whether * or not any controllers are attached. In fact, it's * exactly the message that a controller has arrived that * we're waiting for. */ error = xpad360w_start_input(xpad); if (error) goto err_deinit_output; /* * Wireless controllers require RESET_RESUME to work properly * after suspend. Ideally this quirk should be in usb core * quirk list, but we have too many vendors producing these * controllers and we'd need to maintain 2 identical lists * here in this driver and in usb core. */ udev->quirks |= USB_QUIRK_RESET_RESUME; } else { error = xpad_init_input(xpad); if (error) goto err_deinit_output; } return 0; err_deinit_output: xpad_deinit_output(xpad); err_free_in_urb: usb_free_urb(xpad->irq_in); err_free_idata: usb_free_coherent(udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma); err_free_mem: kfree(xpad); return error; } static void xpad_disconnect(struct usb_interface *intf) { struct usb_xpad *xpad = usb_get_intfdata(intf); if (xpad->xtype == XTYPE_XBOX360W) xpad360w_stop_input(xpad); xpad_deinit_input(xpad); /* * Now that both input device and LED device are gone we can * stop output URB. */ xpad_stop_output(xpad); xpad_deinit_output(xpad); usb_free_urb(xpad->irq_in); usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma); kfree(xpad); usb_set_intfdata(intf, NULL); } static int xpad_suspend(struct usb_interface *intf, pm_message_t message) { struct usb_xpad *xpad = usb_get_intfdata(intf); struct input_dev *input = xpad->dev; if (xpad->xtype == XTYPE_XBOX360W) { /* * Wireless controllers always listen to input so * they are notified when controller shows up * or goes away. */ xpad360w_stop_input(xpad); /* * The wireless adapter is going off now, so the * gamepads are going to become disconnected. * Unless explicitly disabled, power them down * so they don't just sit there flashing. */ if (auto_poweroff && xpad->pad_present) xpad360w_poweroff_controller(xpad); } else { mutex_lock(&input->mutex); if (input_device_enabled(input)) xpad_stop_input(xpad); mutex_unlock(&input->mutex); } xpad_stop_output(xpad); return 0; } static int xpad_resume(struct usb_interface *intf) { struct usb_xpad *xpad = usb_get_intfdata(intf); struct input_dev *input = xpad->dev; int retval = 0; if (xpad->xtype == XTYPE_XBOX360W) { retval = xpad360w_start_input(xpad); } else { mutex_lock(&input->mutex); if (input_device_enabled(input)) { retval = xpad_start_input(xpad); } else if (xpad->xtype == XTYPE_XBOXONE) { /* * Even if there are no users, we'll send Xbox One pads * the startup sequence so they don't sit there and * blink until somebody opens the input device again. */ retval = xpad_start_xbox_one(xpad); } mutex_unlock(&input->mutex); } return retval; } static struct usb_driver xpad_driver = { .name = "xpad", .probe = xpad_probe, .disconnect = xpad_disconnect, .suspend = xpad_suspend, .resume = xpad_resume, .id_table = xpad_table, }; module_usb_driver(xpad_driver); MODULE_AUTHOR("Marko Friedemann <mfr@bmx-chemnitz.de>"); MODULE_DESCRIPTION("Xbox pad driver"); MODULE_LICENSE("GPL"); |
55 16 16 16 16 24 42 16 54 39 39 35 5 35 34 25 24 24 824 39 220 25 826 827 193 12 190 232 234 827 817 42 231 227 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | // SPDX-License-Identifier: GPL-2.0 /* * Ldisc rw semaphore * * The ldisc semaphore is semantically a rw_semaphore but which enforces * an alternate policy, namely: * 1) Supports lock wait timeouts * 2) Write waiter has priority * 3) Downgrading is not supported * * Implementation notes: * 1) Upper half of semaphore count is a wait count (differs from rwsem * in that rwsem normalizes the upper half to the wait bias) * 2) Lacks overflow checking * * The generic counting was copied and modified from include/asm-generic/rwsem.h * by Paul Mackerras <paulus@samba.org>. * * The scheduling policy was copied and modified from lib/rwsem.c * Written by David Howells (dhowells@redhat.com). * * This implementation incorporates the write lock stealing work of * Michel Lespinasse <walken@google.com>. * * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com> */ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/tty.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task.h> #if BITS_PER_LONG == 64 # define LDSEM_ACTIVE_MASK 0xffffffffL #else # define LDSEM_ACTIVE_MASK 0x0000ffffL #endif #define LDSEM_UNLOCKED 0L #define LDSEM_ACTIVE_BIAS 1L #define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1) #define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS #define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS) struct ldsem_waiter { struct list_head list; struct task_struct *task; }; /* * Initialize an ldsem: */ void __init_ldsem(struct ld_semaphore *sem, const char *name, struct lock_class_key *key) { #ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make sure we are not reinitializing a held semaphore: */ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif atomic_long_set(&sem->count, LDSEM_UNLOCKED); sem->wait_readers = 0; raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->read_wait); INIT_LIST_HEAD(&sem->write_wait); } static void __ldsem_wake_readers(struct ld_semaphore *sem) { struct ldsem_waiter *waiter, *next; struct task_struct *tsk; long adjust, count; /* * Try to grant read locks to all readers on the read wait list. * Note the 'active part' of the count is incremented by * the number of readers before waking any processes up. */ adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS); count = atomic_long_add_return(adjust, &sem->count); do { if (count > 0) break; if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust)) return; } while (1); list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { tsk = waiter->task; smp_store_release(&waiter->task, NULL); wake_up_process(tsk); put_task_struct(tsk); } INIT_LIST_HEAD(&sem->read_wait); sem->wait_readers = 0; } static inline int writer_trylock(struct ld_semaphore *sem) { /* * Only wake this writer if the active part of the count can be * transitioned from 0 -> 1 */ long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count); do { if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) return 1; if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS)) return 0; } while (1); } static void __ldsem_wake_writer(struct ld_semaphore *sem) { struct ldsem_waiter *waiter; waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list); wake_up_process(waiter->task); } /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then: * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) * - the spinlock must be held by the caller * - woken process blocks are discarded from the list after having task zeroed */ static void __ldsem_wake(struct ld_semaphore *sem) { if (!list_empty(&sem->write_wait)) __ldsem_wake_writer(sem); else if (!list_empty(&sem->read_wait)) __ldsem_wake_readers(sem); } static void ldsem_wake(struct ld_semaphore *sem) { unsigned long flags; raw_spin_lock_irqsave(&sem->wait_lock, flags); __ldsem_wake(sem); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } /* * wait for the read lock to be granted */ static struct ld_semaphore __sched * down_read_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); /* * Try to reverse the lock attempt but if the count has changed * so that reversing fails, check if there are no waiters, * and early-out if not */ do { if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) { count += adjust; break; } if (count > 0) { raw_spin_unlock_irq(&sem->wait_lock); return sem; } } while (1); list_add_tail(&waiter.list, &sem->read_wait); sem->wait_readers++; waiter.task = current; get_task_struct(current); /* if there are no active locks, wake the new lock owner(s) */ if ((count & LDSEM_ACTIVE_MASK) == 0) __ldsem_wake(sem); raw_spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!smp_load_acquire(&waiter.task)) break; if (!timeout) break; timeout = schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); if (!timeout) { /* * Lock timed out but check if this task was just * granted lock ownership - if so, pretend there * was no timeout; otherwise, cleanup lock wait. */ raw_spin_lock_irq(&sem->wait_lock); if (waiter.task) { atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); sem->wait_readers--; list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); put_task_struct(waiter.task); return NULL; } raw_spin_unlock_irq(&sem->wait_lock); } return sem; } /* * wait for the write lock to be granted */ static struct ld_semaphore __sched * down_write_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; long adjust = -LDSEM_ACTIVE_BIAS; int locked = 0; /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); /* * Try to reverse the lock attempt but if the count has changed * so that reversing fails, check if the lock is now owned, * and early-out if so. */ do { if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) break; if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) { raw_spin_unlock_irq(&sem->wait_lock); return sem; } } while (1); list_add_tail(&waiter.list, &sem->write_wait); waiter.task = current; set_current_state(TASK_UNINTERRUPTIBLE); for (;;) { if (!timeout) break; raw_spin_unlock_irq(&sem->wait_lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->wait_lock); set_current_state(TASK_UNINTERRUPTIBLE); locked = writer_trylock(sem); if (locked) break; } if (!locked) atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); list_del(&waiter.list); /* * In case of timeout, wake up every reader who gave the right of way * to writer. Prevent separation readers into two groups: * one that helds semaphore and another that sleeps. * (in case of no contention with a writer) */ if (!locked && list_empty(&sem->write_wait)) __ldsem_wake_readers(sem); raw_spin_unlock_irq(&sem->wait_lock); __set_current_state(TASK_RUNNING); /* lock wait may have timed out */ if (!locked) return NULL; return sem; } static int __ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) { long count; rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count); if (count <= 0) { lock_contended(&sem->dep_map, _RET_IP_); if (!down_read_failed(sem, count, timeout)) { rwsem_release(&sem->dep_map, _RET_IP_); return 0; } } lock_acquired(&sem->dep_map, _RET_IP_); return 1; } static int __ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, long timeout) { long count; rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count); if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) { lock_contended(&sem->dep_map, _RET_IP_); if (!down_write_failed(sem, count, timeout)) { rwsem_release(&sem->dep_map, _RET_IP_); return 0; } } lock_acquired(&sem->dep_map, _RET_IP_); return 1; } /* * lock for reading -- returns 1 if successful, 0 if timed out */ int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout) { might_sleep(); return __ldsem_down_read_nested(sem, 0, timeout); } /* * trylock for reading -- returns 1 if successful, 0 if contention */ int ldsem_down_read_trylock(struct ld_semaphore *sem) { long count = atomic_long_read(&sem->count); while (count >= 0) { if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) { rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); lock_acquired(&sem->dep_map, _RET_IP_); return 1; } } return 0; } /* * lock for writing -- returns 1 if successful, 0 if timed out */ int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) { might_sleep(); return __ldsem_down_write_nested(sem, 0, timeout); } /* * trylock for writing -- returns 1 if successful, 0 if contention */ int ldsem_down_write_trylock(struct ld_semaphore *sem) { long count = atomic_long_read(&sem->count); while ((count & LDSEM_ACTIVE_MASK) == 0) { if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) { rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); lock_acquired(&sem->dep_map, _RET_IP_); return 1; } } return 0; } /* * release a read lock */ void ldsem_up_read(struct ld_semaphore *sem) { long count; rwsem_release(&sem->dep_map, _RET_IP_); count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count); if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0) ldsem_wake(sem); } /* * release a write lock */ void ldsem_up_write(struct ld_semaphore *sem) { long count; rwsem_release(&sem->dep_map, _RET_IP_); count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count); if (count < 0) ldsem_wake(sem); } #ifdef CONFIG_DEBUG_LOCK_ALLOC int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) { might_sleep(); return __ldsem_down_read_nested(sem, subclass, timeout); } int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, long timeout) { might_sleep(); return __ldsem_down_write_nested(sem, subclass, timeout); } #endif |
249 249 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | // SPDX-License-Identifier: GPL-2.0 /* * x86 specific code for irq_work * * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra */ #include <linux/kernel.h> #include <linux/irq_work.h> #include <linux/hardirq.h> #include <asm/apic.h> #include <asm/idtentry.h> #include <asm/trace/irq_vectors.h> #include <linux/interrupt.h> #ifdef CONFIG_X86_LOCAL_APIC DEFINE_IDTENTRY_SYSVEC(sysvec_irq_work) { apic_eoi(); trace_irq_work_entry(IRQ_WORK_VECTOR); inc_irq_stat(apic_irq_work_irqs); irq_work_run(); trace_irq_work_exit(IRQ_WORK_VECTOR); } void arch_irq_work_raise(void) { if (!arch_irq_work_has_interrupt()) return; __apic_send_IPI_self(IRQ_WORK_VECTOR); apic_wait_icr_idle(); } #endif |
1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/sysctl.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/in.h> #include <linux/if_ether.h> /* For the statistics structure. */ #include <linux/slab.h> #include <linux/uaccess.h> #include <asm/io.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/ip.h> #include <net/arp.h> #include <net/ax25.h> #include <net/netrom.h> /* * Only allow IP over NET/ROM frames through if the netrom device is up. */ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) { struct net_device_stats *stats = &dev->stats; if (!netif_running(dev)) { stats->rx_dropped++; return 0; } stats->rx_packets++; stats->rx_bytes += skb->len; skb->protocol = htons(ETH_P_IP); /* Spoof incoming device */ skb->dev = dev; skb->mac_header = skb->network_header; skb_reset_network_header(skb); skb->pkt_type = PACKET_HOST; netif_rx(skb); return 1; } static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { unsigned char *buff = skb_push(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); memcpy(buff, (saddr != NULL) ? saddr : dev->dev_addr, dev->addr_len); buff[6] &= ~AX25_CBIT; buff[6] &= ~AX25_EBIT; buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; if (daddr != NULL) memcpy(buff, daddr, dev->addr_len); buff[6] &= ~AX25_CBIT; buff[6] |= AX25_EBIT; buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); *buff++ = NR_PROTO_IP; *buff++ = NR_PROTO_IP; *buff++ = 0; *buff++ = 0; *buff++ = NR_PROTOEXT; if (daddr != NULL) return 37; return -37; } static int __must_check nr_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; int err; if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) return 0; if (dev->flags & IFF_UP) { err = ax25_listen_register((ax25_address *)sa->sa_data, NULL); if (err) return err; ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); } dev_addr_set(dev, sa->sa_data); return 0; } static int nr_open(struct net_device *dev) { int err; err = ax25_listen_register((const ax25_address *)dev->dev_addr, NULL); if (err) return err; netif_start_queue(dev); return 0; } static int nr_close(struct net_device *dev) { ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); netif_stop_queue(dev); return 0; } static netdev_tx_t nr_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_device_stats *stats = &dev->stats; unsigned int len = skb->len; if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); stats->tx_errors++; return NETDEV_TX_OK; } stats->tx_packets++; stats->tx_bytes += len; return NETDEV_TX_OK; } static const struct header_ops nr_header_ops = { .create = nr_header, }; static const struct net_device_ops nr_netdev_ops = { .ndo_open = nr_open, .ndo_stop = nr_close, .ndo_start_xmit = nr_xmit, .ndo_set_mac_address = nr_set_mac_address, }; void nr_setup(struct net_device *dev) { dev->mtu = NR_MAX_PACKET_SIZE; dev->netdev_ops = &nr_netdev_ops; dev->header_ops = &nr_header_ops; dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_NETROM; /* New-style flags. */ dev->flags = IFF_NOARP; } |
16 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | /* * User address space access functions. * * For licencing details see kernel-base/COPYING */ #include <linux/uaccess.h> #include <linux/export.h> #include <linux/instrumented.h> #include <asm/tlbflush.h> /** * copy_from_user_nmi - NMI safe copy from user * @to: Pointer to the destination buffer * @from: Pointer to a user space address of the current task * @n: Number of bytes to copy * * Returns: The number of not copied bytes. 0 is success, i.e. all bytes copied * * Contrary to other copy_from_user() variants this function can be called * from NMI context. Despite the name it is not restricted to be called * from NMI context. It is safe to be called from any other context as * well. It disables pagefaults across the copy which means a fault will * abort the copy. * * For NMI context invocations this relies on the nested NMI work to allow * atomic faults from the NMI path; the nested NMI paths are careful to * preserve CR2. */ unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n) { unsigned long ret; if (!__access_ok(from, n)) return n; if (!nmi_uaccess_okay()) return n; /* * Even though this function is typically called from NMI/IRQ context * disable pagefaults so that its behaviour is consistent even when * called from other contexts. */ pagefault_disable(); instrument_copy_from_user_before(to, from, n); ret = raw_copy_from_user(to, from, n); instrument_copy_from_user_after(to, from, n, ret); pagefault_enable(); return ret; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); |
37 19 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/dccp/minisocks.c * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/dccp.h> #include <linux/gfp.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/timer.h> #include <net/sock.h> #include <net/xfrm.h> #include <net/inet_timewait_sock.h> #include <net/rstreason.h> #include "ackvec.h" #include "ccid.h" #include "dccp.h" #include "feat.h" struct inet_timewait_death_row dccp_death_row = { .tw_refcount = REFCOUNT_INIT(1), .sysctl_max_tw_buckets = NR_FILE * 2, .hashinfo = &dccp_hashinfo, }; EXPORT_SYMBOL_GPL(dccp_death_row); void dccp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw; tw = inet_twsk_alloc(sk, &dccp_death_row, state); if (tw != NULL) { const struct inet_connection_sock *icsk = inet_csk(sk); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { tw->tw_v6_daddr = sk->sk_v6_daddr; tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_ipv6only = sk->sk_ipv6only; } #endif /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) timeo = rto; if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; /* Linkage updates. * Note that access to tw after this point is illegal. */ inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than * non-graceful socket closings. */ DCCP_WARN("time wait bucket table overflow\n"); } dccp_done(sk); } struct sock *dccp_create_openreq_child(const struct sock *sk, const struct request_sock *req, const struct sk_buff *skb) { /* * Step 3: Process LISTEN state * * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair */ struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); if (newsk != NULL) { struct dccp_request_sock *dreq = dccp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct dccp_sock *newdp = dccp_sk(newsk); newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; newdp->dccps_service_list = NULL; newdp->dccps_hc_rx_ccid = NULL; newdp->dccps_hc_tx_ccid = NULL; newdp->dccps_service = dreq->dreq_service; newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo; newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; INIT_LIST_HEAD(&newdp->dccps_featneg); /* * Step 3: Process LISTEN state * * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR from packet (or Init Cookies) * * Setting AWL/AWH and SWL/SWH happens as part of the feature * activation below, as these windows all depend on the local * and remote Sequence Window feature values (7.5.2). */ newdp->dccps_iss = dreq->dreq_iss; newdp->dccps_gss = dreq->dreq_gss; newdp->dccps_gar = newdp->dccps_iss; newdp->dccps_isr = dreq->dreq_isr; newdp->dccps_gsr = dreq->dreq_gsr; /* * Activate features: initialise CCIDs, sequence windows etc. */ if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { sk_free_unlock_clone(newsk); return NULL; } dccp_init_xmit_timers(newsk); __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS); } return newsk; } EXPORT_SYMBOL_GPL(dccp_create_openreq_child); /* * Process an incoming packet for RESPOND sockets represented * as an request_sock. */ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { struct sock *child = NULL; struct dccp_request_sock *dreq = dccp_rsk(req); bool own_req; /* TCP/DCCP listeners became lockless. * DCCP stores complex state in its request_sock, so we need * a protection for them, now this code runs without being protected * by the parent (listener) lock. */ spin_lock_bh(&dreq->dreq_lock); /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_gsr)) { dccp_pr_debug("Retransmitted REQUEST\n"); dreq->dreq_gsr = DCCP_SKB_CB(skb)->dccpd_seq; /* * Send another RESPONSE packet * To protect against Request floods, increment retrans * counter (backoff, monitored by dccp_response_timer). */ inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) goto drop; /* Invalid ACK */ if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dreq->dreq_iss, dreq->dreq_gss)) { dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " "dreq_iss=%llu, dreq_gss=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, (unsigned long long) dreq->dreq_iss, (unsigned long long) dreq->dreq_gss); goto drop; } if (dccp_parse_options(sk, dreq, skb)) goto drop; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); if (child) { child = inet_csk_complete_hashdance(sk, child, req, own_req); goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); inet_csk_reqsk_queue_drop(sk, req); out: spin_unlock_bh(&dreq->dreq_lock); return child; } EXPORT_SYMBOL_GPL(dccp_check_req); /* * Queue segment on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket. */ int dccp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb) __releases(child) { int ret = 0; const int state = child->sk_state; if (!sock_owned_by_user(child)) { ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); /* Wakeup parent, send SIGIO */ if (state == DCCP_RESPOND && child->sk_state != state) parent->sk_data_ready(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening * socket does not protect us more. */ __sk_add_backlog(child, skb); } bh_unlock_sock(child); sock_put(child); return ret; } EXPORT_SYMBOL_GPL(dccp_child_process); void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *rsk) { DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state"); } EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); int dccp_reqsk_init(struct request_sock *req, struct dccp_sock const *dp, struct sk_buff const *skb) { struct dccp_request_sock *dreq = dccp_rsk(req); spin_lock_init(&dreq->dreq_lock); inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); inet_rsk(req)->acked = 0; dreq->dreq_timestamp_echo = 0; /* inherit feature negotiation options from listening socket */ return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg); } EXPORT_SYMBOL_GPL(dccp_reqsk_init); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * VMware vSockets Driver * * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. */ #ifndef __AF_VSOCK_H__ #define __AF_VSOCK_H__ #include <linux/kernel.h> #include <linux/workqueue.h> #include <net/sock.h> #include <uapi/linux/vm_sockets.h> #include "vsock_addr.h" #define LAST_RESERVED_PORT 1023 #define VSOCK_HASH_SIZE 251 extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; extern spinlock_t vsock_table_lock; #define vsock_sk(__sk) ((struct vsock_sock *)__sk) #define sk_vsock(__vsk) (&(__vsk)->sk) struct vsock_sock { /* sk must be the first member. */ struct sock sk; const struct vsock_transport *transport; struct sockaddr_vm local_addr; struct sockaddr_vm remote_addr; /* Links for the global tables of bound and connected sockets. */ struct list_head bound_table; struct list_head connected_table; /* Accessed without the socket lock held. This means it can never be * modified outsided of socket create or destruct. */ bool trusted; bool cached_peer_allow_dgram; /* Dgram communication allowed to * cached peer? */ u32 cached_peer; /* Context ID of last dgram destination check. */ const struct cred *owner; /* Rest are SOCK_STREAM only. */ long connect_timeout; /* Listening socket that this came from. */ struct sock *listener; /* Used for pending list and accept queue during connection handshake. * The listening socket is the head for both lists. Sockets created * for connection requests are placed in the pending list until they * are connected, at which point they are put in the accept queue list * so they can be accepted in accept(). If accept() cannot accept the * connection, it is marked as rejected so the cleanup function knows * to clean up the socket. */ struct list_head pending_links; struct list_head accept_queue; bool rejected; struct delayed_work connect_work; struct delayed_work pending_work; struct delayed_work close_work; bool close_work_scheduled; u32 peer_shutdown; bool sent_request; bool ignore_connecting_rst; /* Protected by lock_sock(sk) */ u64 buffer_size; u64 buffer_min_size; u64 buffer_max_size; /* Private to transport. */ void *trans; }; s64 vsock_connectible_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); struct sock *vsock_create_connected(struct sock *parent); void vsock_data_ready(struct sock *sk); /**** TRANSPORT ****/ struct vsock_transport_recv_notify_data { u64 data1; /* Transport-defined. */ u64 data2; /* Transport-defined. */ bool notify_on_block; }; struct vsock_transport_send_notify_data { u64 data1; /* Transport-defined. */ u64 data2; /* Transport-defined. */ }; /* Transport features flags */ /* Transport provides host->guest communication */ #define VSOCK_TRANSPORT_F_H2G 0x00000001 /* Transport provides guest->host communication */ #define VSOCK_TRANSPORT_F_G2H 0x00000002 /* Transport provides DGRAM communication */ #define VSOCK_TRANSPORT_F_DGRAM 0x00000004 /* Transport provides local (loopback) communication */ #define VSOCK_TRANSPORT_F_LOCAL 0x00000008 struct vsock_transport { struct module *module; /* Initialize/tear-down socket. */ int (*init)(struct vsock_sock *, struct vsock_sock *); void (*destruct)(struct vsock_sock *); void (*release)(struct vsock_sock *); /* Cancel all pending packets sent on vsock. */ int (*cancel_pkt)(struct vsock_sock *vsk); /* Connections. */ int (*connect)(struct vsock_sock *); /* DGRAM. */ int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); int (*dgram_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags); int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, struct msghdr *, size_t len); bool (*dgram_allow)(u32 cid, u32 port); /* STREAM. */ /* TODO: stream_bind() */ ssize_t (*stream_dequeue)(struct vsock_sock *, struct msghdr *, size_t len, int flags); ssize_t (*stream_enqueue)(struct vsock_sock *, struct msghdr *, size_t len); s64 (*stream_has_data)(struct vsock_sock *); s64 (*stream_has_space)(struct vsock_sock *); u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); bool (*stream_allow)(u32 cid, u32 port); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, int flags); int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg, size_t len); bool (*seqpacket_allow)(u32 remote_cid); u32 (*seqpacket_has_data)(struct vsock_sock *vsk); /* Notification. */ int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); int (*notify_recv_init)(struct vsock_sock *, size_t, struct vsock_transport_recv_notify_data *); int (*notify_recv_pre_block)(struct vsock_sock *, size_t, struct vsock_transport_recv_notify_data *); int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, struct vsock_transport_recv_notify_data *); int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, ssize_t, bool, struct vsock_transport_recv_notify_data *); int (*notify_send_init)(struct vsock_sock *, struct vsock_transport_send_notify_data *); int (*notify_send_pre_block)(struct vsock_sock *, struct vsock_transport_send_notify_data *); int (*notify_send_pre_enqueue)(struct vsock_sock *, struct vsock_transport_send_notify_data *); int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, struct vsock_transport_send_notify_data *); /* sk_lock held by the caller */ void (*notify_buffer_size)(struct vsock_sock *, u64 *); int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val); /* SIOCOUTQ ioctl */ ssize_t (*unsent_bytes)(struct vsock_sock *vsk); /* Shutdown. */ int (*shutdown)(struct vsock_sock *, int); /* Addressing. */ u32 (*get_local_cid)(void); /* Read a single skb */ int (*read_skb)(struct vsock_sock *, skb_read_actor_t); /* Zero-copy. */ bool (*msgzerocopy_allow)(void); }; /**** CORE ****/ int vsock_core_register(const struct vsock_transport *t, int features); void vsock_core_unregister(const struct vsock_transport *t); /* The transport may downcast this to access transport-specific functions */ const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk); /**** UTILS ****/ /* vsock_table_lock must be held */ static inline bool __vsock_in_bound_table(struct vsock_sock *vsk) { return !list_empty(&vsk->bound_table); } /* vsock_table_lock must be held */ static inline bool __vsock_in_connected_table(struct vsock_sock *vsk) { return !list_empty(&vsk->connected_table); } void vsock_add_pending(struct sock *listener, struct sock *pending); void vsock_remove_pending(struct sock *listener, struct sock *pending); void vsock_enqueue_accept(struct sock *listener, struct sock *connected); void vsock_insert_connected(struct vsock_sock *vsk); void vsock_remove_bound(struct vsock_sock *vsk); void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(struct vsock_transport *transport, void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); /**** TAP ****/ struct vsock_tap { struct net_device *dev; struct module *module; struct list_head list; }; int vsock_add_tap(struct vsock_tap *vt); int vsock_remove_tap(struct vsock_tap *vt); void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); #ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else static inline void __init vsock_bpf_build_proto(void) {} #endif static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) { return t->msgzerocopy_allow && t->msgzerocopy_allow(); } #endif /* __AF_VSOCK_H__ */ |
65 236 236 154 83 83 83 421 72 351 83 33 83 401 402 402 419 420 234 190 188 231 478 28 480 294 191 190 294 6 6 6 434 435 218 435 430 435 7 422 421 10 1 5 2 416 1 3 3 236 231 5 158 76 236 236 236 236 11 5 4 2 1 4 2 7 3 1 2 1 422 44 420 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 | // SPDX-License-Identifier: GPL-2.0-only /* * Fd transport layer. Includes deprecated socket layer. * * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> #include <linux/ipv6.h> #include <linux/kthread.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/un.h> #include <linux/uaccess.h> #include <linux/inet.h> #include <linux/file.h> #include <linux/parser.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/syscalls.h> /* killme */ #define P9_PORT 564 #define MAX_SOCK_BUF (1024*1024) #define MAXPOLLWADDR 2 static struct p9_trans_module p9_tcp_trans; static struct p9_trans_module p9_fd_trans; /** * struct p9_fd_opts - per-transport options * @rfd: file descriptor for reading (trans=fd) * @wfd: file descriptor for writing (trans=fd) * @port: port to connect to (trans=tcp) * @privport: port is privileged */ struct p9_fd_opts { int rfd; int wfd; u16 port; bool privport; }; /* * Option Parsing (code inspired by NFS code) * - a little lazy - parse all fd-transport options */ enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, /* Options that take no arguments */ Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, {Opt_privport, "privport"}, {Opt_err, NULL}, }; enum { Rworksched = 1, /* read work scheduled or running */ Rpending = 2, /* can read */ Wworksched = 4, /* write work scheduled or running */ Wpending = 8, /* can write */ }; struct p9_poll_wait { struct p9_conn *conn; wait_queue_entry_t wait; wait_queue_head_t *wait_addr; }; /** * struct p9_conn - fd mux connection state information * @mux_list: list link for mux to manage multiple connections (?) * @client: reference to client instance for this connection * @err: error state * @req_lock: lock protecting req_list and requests statuses * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @rreq: read request * @wreq: write request * @tmp_buf: temporary buffer to read in header * @rc: temporary fcall for reading current frame * @wpos: write position for current frame * @wsize: amount of data to write for current frame * @wbuf: current write buffer * @poll_pending_link: pending links to be polled per conn * @poll_wait: array of wait_q's for various worker threads * @pt: poll state * @rq: current read work * @wq: current write work * @wsched: ???? * */ struct p9_conn { struct list_head mux_list; struct p9_client *client; int err; spinlock_t req_lock; struct list_head req_list; struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; char *wbuf; struct list_head poll_pending_link; struct p9_poll_wait poll_wait[MAXPOLLWADDR]; poll_table pt; struct work_struct rq; struct work_struct wq; unsigned long wsched; }; /** * struct p9_trans_fd - transport state * @rd: reference to file to read from * @wr: reference of file to write to * @conn: connection state reference * */ struct p9_trans_fd { struct file *rd; struct file *wr; struct p9_conn conn; }; static void p9_poll_workfn(struct work_struct *work); static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); static DECLARE_WORK(p9_poll_work, p9_poll_workfn); static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; static void p9_mux_poll_stop(struct p9_conn *m) { unsigned long flags; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { struct p9_poll_wait *pwait = &m->poll_wait[i]; if (pwait->wait_addr) { remove_wait_queue(pwait->wait_addr, &pwait->wait); pwait->wait_addr = NULL; } } spin_lock_irqsave(&p9_poll_lock, flags); list_del_init(&m->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); flush_work(&p9_poll_work); } /** * p9_conn_cancel - cancel all pending requests with error * @m: mux data * @err: error code * */ static void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req_t *req, *rtmp; LIST_HEAD(cancel_list); p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); spin_lock(&m->req_lock); if (m->err) { spin_unlock(&m->req_lock); return; } m->err = err; list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); WRITE_ONCE(req->status, REQ_STATUS_ERROR); } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); WRITE_ONCE(req->status, REQ_STATUS_ERROR); } spin_unlock(&m->req_lock); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); if (!req->t_err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } } static __poll_t p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) { __poll_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status == Connected) ts = client->trans; if (!ts) { if (err) *err = -EREMOTEIO; return EPOLLERR; } ret = vfs_poll(ts->rd, pt); if (ts->rd != ts->wr) ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN); return ret; } /** * p9_fd_read- read from a fd * @client: client instance * @v: buffer to receive data into * @len: size of receive buffer * */ static int p9_fd_read(struct p9_client *client, void *v, int len) { int ret; struct p9_trans_fd *ts = NULL; loff_t pos; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->rd->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); pos = ts->rd->f_pos; ret = kernel_read(ts->rd, v, len, &pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_read_work - called when there is some data to be read from a transport * @work: container of work to be done * */ static void p9_read_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; m = container_of(work, struct p9_conn, rq); if (m->err < 0) return; p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset); if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n", m, m->rc.offset, m->rc.capacity, m->rc.capacity - m->rc.offset); err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset, m->rc.capacity - m->rc.offset); p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err <= 0) goto error; m->rc.offset += err; /* header read in */ if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, "error parsing header: %d\n", err); goto error; } p9_debug(P9_DEBUG_TRANS, "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); m->rreq = p9_tag_lookup(m->client, m->rc.tag); if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) { p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", m->rc.tag); err = -EIO; goto error; } if (m->rc.size > m->rreq->rc.capacity) { p9_debug(P9_DEBUG_ERROR, "requested packet size too big: %d for tag %d with capacity %zd\n", m->rc.size, m->rc.tag, m->rreq->rc.capacity); err = -EIO; goto error; } if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", m->rc.tag, m->rreq); p9_req_put(m->client, m->rreq); m->rreq = NULL; err = -EIO; goto error; } m->rc.sdata = m->rreq->rc.sdata; memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity); m->rc.capacity = m->rc.size; } /* packet is read in * not an else because some packets (like clunk) have no payload */ if ((m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new packet\n"); m->rreq->rc.size = m->rc.offset; spin_lock(&m->req_lock); if (m->rreq->status == REQ_STATUS_SENT) { list_del(&m->rreq->req_list); p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD); } else if (m->rreq->status == REQ_STATUS_FLSHD) { /* Ignore replies associated with a cancelled request. */ p9_debug(P9_DEBUG_TRANS, "Ignore replies associated with a cancelled request\n"); } else { spin_unlock(&m->req_lock); p9_debug(P9_DEBUG_ERROR, "Request tag %d errored out while we were reading the reply\n", m->rc.tag); err = -EIO; goto error; } spin_unlock(&m->req_lock); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; p9_req_put(m->client, m->rreq); m->rreq = NULL; } end_clear: clear_bit(Rworksched, &m->wsched); if (!list_empty(&m->req_list)) { if (test_and_clear_bit(Rpending, &m->wsched)) n = EPOLLIN; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } return; error: p9_conn_cancel(m, err); clear_bit(Rworksched, &m->wsched); } /** * p9_fd_write - write to a socket * @client: client instance * @v: buffer to send data from * @len: size of send buffer * */ static int p9_fd_write(struct p9_client *client, void *v, int len) { ssize_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->wr->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_write_work - called when a transport can send some data * @work: container for work to be done * */ static void p9_write_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; struct p9_req_t *req; m = container_of(work, struct p9_conn, wq); if (m->err < 0) { clear_bit(Wworksched, &m->wsched); return; } if (!m->wsize) { spin_lock(&m->req_lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); spin_unlock(&m->req_lock); return; } req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); WRITE_ONCE(req->status, REQ_STATUS_SENT); p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc.sdata; m->wsize = req->tc.size; m->wpos = 0; p9_req_get(req); m->wreq = req; spin_unlock(&m->req_lock); } p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err < 0) goto error; else if (err == 0) { err = -EREMOTEIO; goto error; } m->wpos += err; if (m->wpos == m->wsize) { m->wpos = m->wsize = 0; p9_req_put(m->client, m->wreq); m->wreq = NULL; } end_clear: clear_bit(Wworksched, &m->wsched); if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLOUT) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } return; error: p9_conn_cancel(m, err); clear_bit(Wworksched, &m->wsched); } static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { struct p9_poll_wait *pwait = container_of(wait, struct p9_poll_wait, wait); struct p9_conn *m = pwait->conn; unsigned long flags; spin_lock_irqsave(&p9_poll_lock, flags); if (list_empty(&m->poll_pending_link)) list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); spin_unlock_irqrestore(&p9_poll_lock, flags); schedule_work(&p9_poll_work); return 1; } /** * p9_pollwait - add poll task to the wait queue * @filp: file pointer being polled * @wait_address: wait_q to block on * @p: poll state * * called by files poll operation to add v9fs-poll task to files wait queue */ static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { struct p9_conn *m = container_of(p, struct p9_conn, pt); struct p9_poll_wait *pwait = NULL; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { if (m->poll_wait[i].wait_addr == NULL) { pwait = &m->poll_wait[i]; break; } } if (!pwait) { p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } pwait->conn = m; pwait->wait_addr = wait_address; init_waitqueue_func_entry(&pwait->wait, p9_pollwake); add_wait_queue(wait_address, &pwait->wait); } /** * p9_conn_create - initialize the per-session mux data * @client: client instance * * Note: Creates the polling task if this is the first session. */ static void p9_conn_create(struct p9_client *client) { __poll_t n; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); INIT_LIST_HEAD(&m->mux_list); m->client = client; spin_lock_init(&m->req_lock); INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); INIT_WORK(&m->rq, p9_read_work); INIT_WORK(&m->wq, p9_write_work); INIT_LIST_HEAD(&m->poll_pending_link); init_poll_funcptr(&m->pt, p9_pollwait); n = p9_fd_poll(client, &m->pt, NULL); if (n & EPOLLIN) { p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); set_bit(Rpending, &m->wsched); } if (n & EPOLLOUT) { p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } } /** * p9_poll_mux - polls a mux and schedules read or write works if necessary * @m: connection to poll * */ static void p9_poll_mux(struct p9_conn *m) { __poll_t n; int err = -ECONNRESET; if (m->err < 0) return; n = p9_fd_poll(m->client, NULL, &err); if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) { p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); p9_conn_cancel(m, err); } if (n & EPOLLIN) { set_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } if (n & EPOLLOUT) { set_bit(Wpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } } /** * p9_fd_request - send 9P request * The function can sleep until the request is scheduled for sending. * The function can be interrupted. Return from the function is not * a guarantee that the request is sent successfully. * * @client: client instance * @req: request to be sent * */ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { __poll_t n; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, current, &req->tc, req->tc.id); spin_lock(&m->req_lock); if (m->err < 0) { spin_unlock(&m->req_lock); return m->err; } WRITE_ONCE(req->status, REQ_STATUS_UNSENT); list_add_tail(&req->req_list, &m->unsent_req_list); spin_unlock(&m->req_lock); if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) schedule_work(&m->wq); return 0; } static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; int ret = 1; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&m->req_lock); if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); WRITE_ONCE(req->status, REQ_STATUS_FLSHD); p9_req_put(client, req); ret = 0; } spin_unlock(&m->req_lock); return ret; } static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&m->req_lock); /* Ignore cancelled request if message has been received * before lock. */ if (req->status == REQ_STATUS_RCVD) { spin_unlock(&m->req_lock); return 0; } /* we haven't received a response for oldreq, * remove it from the list. */ list_del(&req->req_list); WRITE_ONCE(req->status, REQ_STATUS_FLSHD); spin_unlock(&m->req_lock); p9_req_put(client, req); return 0; } static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt) { if (clnt->trans_mod == &p9_tcp_trans) { if (clnt->trans_opts.tcp.port != P9_PORT) seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port); } else if (clnt->trans_mod == &p9_fd_trans) { if (clnt->trans_opts.fd.rfd != ~0) seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd); if (clnt->trans_opts.fd.wfd != ~0) seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd); } return 0; } /** * parse_opts - parse mount options into p9_fd_opts structure * @params: options string passed from mount * @opts: fd transport-specific structure to parse options into * * Returns 0 upon success, -ERRNO upon failure */ static int parse_opts(char *params, struct p9_fd_opts *opts) { char *p; substring_t args[MAX_OPT_ARGS]; int option; char *options, *tmp_options; opts->port = P9_PORT; opts->rfd = ~0; opts->wfd = ~0; opts->privport = false; if (!params) return 0; tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { p9_debug(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; int r; if (!*p) continue; token = match_token(p, tokens, args); if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); continue; } } switch (token) { case Opt_port: opts->port = option; break; case Opt_rfdno: opts->rfd = option; break; case Opt_wfdno: opts->wfd = option; break; case Opt_privport: opts->privport = true; break; default: continue; } } kfree(tmp_options); return 0; } static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!ts) return -ENOMEM; ts->rd = fget(rfd); if (!ts->rd) goto out_free_ts; if (!(ts->rd->f_mode & FMODE_READ)) goto out_put_rd; /* Prevent workers from hanging on IO when fd is a pipe. * It's technically possible for userspace or concurrent mounts to * modify this flag concurrently, which will likely result in a * broken filesystem. However, just having bad flags here should * not crash the kernel or cause any other sort of bug, so mark this * particular data race as intentional so that tooling (like KCSAN) * can allow it and detect further problems. */ data_race(ts->rd->f_flags |= O_NONBLOCK); ts->wr = fget(wfd); if (!ts->wr) goto out_put_rd; if (!(ts->wr->f_mode & FMODE_WRITE)) goto out_put_wr; data_race(ts->wr->f_flags |= O_NONBLOCK); client->trans = ts; client->status = Connected; return 0; out_put_wr: fput(ts->wr); out_put_rd: fput(ts->rd); out_free_ts: kfree(ts); return -EIO; } static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) { sock_release(csocket); return -ENOMEM; } csocket->sk->sk_allocation = GFP_NOIO; csocket->sk->sk_use_task_frag = false; file = sock_alloc_file(csocket, 0, NULL); if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); kfree(p); return PTR_ERR(file); } get_file(file); p->wr = p->rd = file; client->trans = p; client->status = Connected; p->rd->f_flags |= O_NONBLOCK; p9_conn_create(client); return 0; } /** * p9_conn_destroy - cancels all pending requests of mux * @m: mux to destroy * */ static void p9_conn_destroy(struct p9_conn *m) { p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, m->mux_list.prev, m->mux_list.next); p9_mux_poll_stop(m); cancel_work_sync(&m->rq); if (m->rreq) { p9_req_put(m->client, m->rreq); m->rreq = NULL; } cancel_work_sync(&m->wq); if (m->wreq) { p9_req_put(m->client, m->wreq); m->wreq = NULL; } p9_conn_cancel(m, -ECONNRESET); m->client = NULL; } /** * p9_fd_close - shutdown file descriptor transport * @client: client instance * */ static void p9_fd_close(struct p9_client *client) { struct p9_trans_fd *ts; if (!client) return; ts = client->trans; if (!ts) return; client->status = Disconnected; p9_conn_destroy(&ts->conn); if (ts->rd) fput(ts->rd); if (ts->wr) fput(ts->wr); kfree(ts); } /* * stolen from NFS - maybe should be made a generic function? */ static inline int valid_ipaddr4(const char *buf) { int rc, count, in[4]; rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); if (rc != 4) return -EINVAL; for (count = 0; count < 4; count++) { if (in[count] > 255) return -EINVAL; } return 0; } static int p9_bind_privport(struct socket *sock) { struct sockaddr_in cl; int port, err = -EINVAL; memset(&cl, 0, sizeof(cl)); cl.sin_family = AF_INET; cl.sin_addr.s_addr = htonl(INADDR_ANY); for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { cl.sin_port = htons((ushort)port); err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); if (err != -EADDRINUSE) break; } return err; } static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_in sin_server; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; if (addr == NULL || valid_ipaddr4(addr) < 0) return -EINVAL; csocket = NULL; client->trans_opts.tcp.port = opts.port; client->trans_opts.tcp.privport = opts.privport; sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); err = __sock_create(current->nsproxy->net_ns, PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } if (opts.privport) { err = p9_bind_privport(csocket); if (err < 0) { pr_err("%s (%d): problem binding to privport\n", __func__, task_pid_nr(current)); sock_release(csocket); return err; } } err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); if (err < 0) { pr_err("%s (%d): problem connecting socket to %s\n", __func__, task_pid_nr(current), addr); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_un sun_server; csocket = NULL; if (!addr || !strlen(addr)) return -EINVAL; if (strlen(addr) >= UNIX_PATH_MAX) { pr_err("%s (%d): address too long: %s\n", __func__, task_pid_nr(current), addr); return -ENAMETOOLONG; } sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); err = __sock_create(current->nsproxy->net_ns, PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { pr_err("%s (%d): problem connecting socket: %s: %d\n", __func__, task_pid_nr(current), addr, err); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; client->trans_opts.fd.rfd = opts.rfd; client->trans_opts.fd.wfd = opts.wfd; if (opts.rfd == ~0 || opts.wfd == ~0) { pr_err("Insufficient options for proto=fd\n"); return -ENOPROTOOPT; } err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) return err; p9_conn_create(client); return 0; } static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, .pooled_rbuffers = false, .def = 0, .create = p9_fd_create_tcp, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; MODULE_ALIAS_9P("tcp"); static struct p9_trans_module p9_unix_trans = { .name = "unix", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create_unix, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; MODULE_ALIAS_9P("unix"); static struct p9_trans_module p9_fd_trans = { .name = "fd", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; MODULE_ALIAS_9P("fd"); /** * p9_poll_workfn - poll worker thread * @work: work queue * * polls all v9fs transports for new events and queues the appropriate * work to the work queue * */ static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; p9_debug(P9_DEBUG_TRANS, "start %p\n", current); spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, struct p9_conn, poll_pending_link); list_del_init(&conn->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); p9_poll_mux(conn); spin_lock_irqsave(&p9_poll_lock, flags); } spin_unlock_irqrestore(&p9_poll_lock, flags); p9_debug(P9_DEBUG_TRANS, "finish\n"); } static int __init p9_trans_fd_init(void) { v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); return 0; } static void __exit p9_trans_fd_exit(void) { flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); } module_init(p9_trans_fd_init); module_exit(p9_trans_fd_exit); MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); MODULE_DESCRIPTION("Filedescriptor Transport for 9P"); MODULE_LICENSE("GPL"); |
25 3 49 48 25 823 85 823 34 50 823 825 130 129 1 391 51 400 402 47 392 320 181 209 504 2 3 26 470 468 9 15 11 4 509 8 1 504 1 1 504 498 3 409 768 681 125 56 56 2 2 51 5 3 3 12 3 4 3 3 3 3 1 3 7 53 227 692 188 783 785 73 72 785 83 770 776 53 4 50 781 690 4 70 29 41 41 41 40 1 69 70 30 40 31 327 17 2 175 328 327 17 2 45 156 17 2 55 58 8 241 204 203 66 20 24 322 322 5 5 73 142 4 300 252 184 24 10 20 15 32 292 9 2 12 293 196 173 293 323 210 12 176 42 50 50 4 321 187 24 156 124 154 112 53 61 159 5 3 2 293 53 24 70 31 40 298 170 193 14 100 144 70 274 316 6 4 3 3 4 197 36 120 119 1 1 1 1 1 6 3 6 374 374 42 80 118 117 1 118 72 11 46 46 116 110 16 177 7 15 169 181 174 3 6 27 118 118 118 118 10 8 91 11 118 140 17 152 3 1 1 174 1 11 184 184 1 3 5 8 1 3 4 2 15 170 182 182 182 11 7 11 11 11 11 181 173 8 8 81 103 184 184 369 33 7 12 13 4 99 99 60 9 60 8 14 45 19 44 49 16 10 6 4 2 7 68 13 84 62 4 34 83 15 69 13 4 4 4 4 11 1 1 5 12 5 4 4 3 6 1 13 13 13 10 1 10 10 6 6 2 1 4 4 3 1 95 7 69 10 10 25 11 13 2 13 87 32 88 12 79 81 20 338 335 763 767 4 217 676 267 6 4 2 381 95 95 339 322 79 16 327 323 12 323 4 322 115 249 19 11 179 117 767 766 173 5 170 51 51 11 2 33 20 28 28 47 47 35 22 22 1 18 4 4 5 14 1 12 12 5 3 2 5 5 124 1 3 40 11 15 22 1 7 27 28 26 7 11 11 11 11 11 11 11 6 6 6 6 6 1 1 1 2 2 20 9 2 3 8 2 18 15 1 3 18 11 3 16 15 10 5 4 11 10 10 10 3 2 1 7 10 2 2 12 1 3 3 8 11 11 6 1 1 10 10 12 2 3 6 1 1 1 3 2 2 11 1 2 1 1 7 1 8 7 2 9 8 7 4 4 3 2 1 1 6 2 7 7 7 4 3 7 7 7 35 17 23 17 1 5 23 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas <alex@clusterfs.com> * * Architecture independence: * Copyright (c) 2005, Bull S.A. * Written by Pierre Peiffer <pierre.peiffer@bull.net> */ /* * Extents support for EXT4 * * TODO: * - ext4*_error() should be used in some situations * - analyze all BUG()/BUG_ON(), use -EIO where appropriate * - smart tree reduction */ #include <linux/fs.h> #include <linux/time.h> #include <linux/jbd2.h> #include <linux/highuid.h> #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/fiemap.h> #include <linux/iomap.h> #include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "ext4_extents.h" #include "xattr.h" #include <trace/events/ext4.h> /* * used by extent splitting. */ #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ due to ENOSPC */ #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */ #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ static __le32 ext4_extent_block_csum(struct inode *inode, struct ext4_extent_header *eh) { struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __u32 csum; csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh, EXT4_EXTENT_TAIL_OFFSET(eh)); return cpu_to_le32(csum); } static int ext4_extent_block_csum_verify(struct inode *inode, struct ext4_extent_header *eh) { struct ext4_extent_tail *et; if (!ext4_has_metadata_csum(inode->i_sb)) return 1; et = find_ext4_extent_tail(eh); if (et->et_checksum != ext4_extent_block_csum(inode, eh)) return 0; return 1; } static void ext4_extent_block_csum_set(struct inode *inode, struct ext4_extent_header *eh) { struct ext4_extent_tail *et; if (!ext4_has_metadata_csum(inode->i_sb)) return; et = find_ext4_extent_tail(eh); et->et_checksum = ext4_extent_block_csum(inode, eh); } static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t split, int split_flag, int flags); static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) { /* * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; } static inline void ext4_ext_path_brelse(struct ext4_ext_path *path) { brelse(path->p_bh); path->p_bh = NULL; } static void ext4_ext_drop_refs(struct ext4_ext_path *path) { int depth, i; if (IS_ERR_OR_NULL(path)) return; depth = path->p_depth; for (i = 0; i <= depth; i++, path++) ext4_ext_path_brelse(path); } void ext4_free_ext_path(struct ext4_ext_path *path) { if (IS_ERR_OR_NULL(path)) return; ext4_ext_drop_refs(path); kfree(path); } /* * Make sure 'handle' has at least 'check_cred' credits. If not, restart * transaction with 'restart_cred' credits. The function drops i_data_sem * when restarting transaction and gets it after transaction is restarted. * * The function returns 0 on success, 1 if transaction had to be restarted, * and < 0 in case of fatal error. */ int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode, int check_cred, int restart_cred, int revoke_cred) { int ret; int dropped = 0; ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred, revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped)); if (dropped) down_write(&EXT4_I(inode)->i_data_sem); return ret; } /* * could return: * - EROFS * - ENOMEM */ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { int err = 0; if (path->p_bh) { /* path points to block */ BUFFER_TRACE(path->p_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, inode->i_sb, path->p_bh, EXT4_JTR_NONE); /* * The extent buffer's verified bit will be set again in * __ext4_ext_dirty(). We could leave an inconsistent * buffer if the extents updating procudure break off du * to some error happens, force to check it again. */ if (!err) clear_buffer_verified(path->p_bh); } /* path points to leaf/index in inode body */ /* we use in-core data, no need to protect them */ return err; } /* * could return: * - EROFS * - ENOMEM * - EIO */ static int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { int err; WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem)); if (path->p_bh) { ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); /* Extents updating done, re-set verified flag */ if (!err) set_buffer_verified(path->p_bh); } else { /* path points to leaf/index in inode body */ err = ext4_mark_inode_dirty(handle, inode); } return err; } #define ext4_ext_dirty(handle, inode, path) \ __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path)) static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) { if (path) { int depth = path->p_depth; struct ext4_extent *ex; /* * Try to predict block placement assuming that we are * filling in a file which will eventually be * non-sparse --- i.e., in the case of libbfd writing * an ELF object sections out-of-order but in a way * the eventually results in a contiguous object or * executable file, or some database extending a table * space file. However, this is actually somewhat * non-ideal if we are writing a sparse file such as * qemu or KVM writing a raw image file that is going * to stay fairly sparse, since it will end up * fragmenting the file system's free space. Maybe we * should have some hueristics or some way to allow * userspace to pass a hint to file system, * especially if the latter case turns out to be * common. */ ex = path[depth].p_ext; if (ex) { ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex); ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block); if (block > ext_block) return ext_pblk + (block - ext_block); else return ext_pblk - (ext_block - block); } /* it looks like index is empty; * try to find starting block from index itself */ if (path[depth].p_bh) return path[depth].p_bh->b_blocknr; } /* OK. use inode's group */ return ext4_inode_to_goal_block(inode); } /* * Allocation for a meta data block */ static ext4_fsblk_t ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex, int *err, unsigned int flags) { ext4_fsblk_t goal, newblock; goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); newblock = ext4_new_meta_blocks(handle, inode, goal, flags, NULL, err); return newblock; } static inline int ext4_ext_space_block(struct inode *inode, int check) { int size; size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) / sizeof(struct ext4_extent); #ifdef AGGRESSIVE_TEST if (!check && size > 6) size = 6; #endif return size; } static inline int ext4_ext_space_block_idx(struct inode *inode, int check) { int size; size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) / sizeof(struct ext4_extent_idx); #ifdef AGGRESSIVE_TEST if (!check && size > 5) size = 5; #endif return size; } static inline int ext4_ext_space_root(struct inode *inode, int check) { int size; size = sizeof(EXT4_I(inode)->i_data); size -= sizeof(struct ext4_extent_header); size /= sizeof(struct ext4_extent); #ifdef AGGRESSIVE_TEST if (!check && size > 3) size = 3; #endif return size; } static inline int ext4_ext_space_root_idx(struct inode *inode, int check) { int size; size = sizeof(EXT4_I(inode)->i_data); size -= sizeof(struct ext4_extent_header); size /= sizeof(struct ext4_extent_idx); #ifdef AGGRESSIVE_TEST if (!check && size > 4) size = 4; #endif return size; } static inline struct ext4_ext_path * ext4_force_split_extent_at(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t lblk, int nofail) { int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO; if (nofail) flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL; return ext4_split_extent_at(handle, inode, path, lblk, unwritten ? EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, flags); } static int ext4_ext_max_entries(struct inode *inode, int depth) { int max; if (depth == ext_depth(inode)) { if (depth == 0) max = ext4_ext_space_root(inode, 1); else max = ext4_ext_space_root_idx(inode, 1); } else { if (depth == 0) max = ext4_ext_space_block(inode, 1); else max = ext4_ext_space_block_idx(inode, 1); } return max; } static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) { ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); /* * We allow neither: * - zero length * - overflow/wrap-around */ if (lblock + len <= lblock) return 0; return ext4_inode_block_valid(inode, block, len); } static int ext4_valid_extent_idx(struct inode *inode, struct ext4_extent_idx *ext_idx) { ext4_fsblk_t block = ext4_idx_pblock(ext_idx); return ext4_inode_block_valid(inode, block, 1); } static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent_header *eh, ext4_lblk_t lblk, ext4_fsblk_t *pblk, int depth) { unsigned short entries; ext4_lblk_t lblock = 0; ext4_lblk_t cur = 0; if (eh->eh_entries == 0) return 1; entries = le16_to_cpu(eh->eh_entries); if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); /* * The logical block in the first entry should equal to * the number in the index block. */ if (depth != ext_depth(inode) && lblk != le32_to_cpu(ext->ee_block)) return 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; /* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); if (lblock < cur) { *pblk = ext4_ext_pblock(ext); return 0; } cur = lblock + ext4_ext_get_actual_len(ext); ext++; entries--; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); /* * The logical block in the first entry should equal to * the number in the parent index block. */ if (depth != ext_depth(inode) && lblk != le32_to_cpu(ext_idx->ei_block)) return 0; while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; /* Check for overlapping index extents */ lblock = le32_to_cpu(ext_idx->ei_block); if (lblock < cur) { *pblk = ext4_idx_pblock(ext_idx); return 0; } ext_idx++; entries--; cur = lblock + 1; } } return 1; } static int __ext4_ext_check(const char *function, unsigned int line, struct inode *inode, struct ext4_extent_header *eh, int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk) { const char *error_msg; int max = 0, err = -EFSCORRUPTED; if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { error_msg = "invalid magic"; goto corrupted; } if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) { error_msg = "unexpected eh_depth"; goto corrupted; } if (unlikely(eh->eh_max == 0)) { error_msg = "invalid eh_max"; goto corrupted; } max = ext4_ext_max_entries(inode, depth); if (unlikely(le16_to_cpu(eh->eh_max) > max)) { error_msg = "too large eh_max"; goto corrupted; } if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { error_msg = "invalid eh_entries"; goto corrupted; } if (unlikely((eh->eh_entries == 0) && (depth > 0))) { error_msg = "eh_entries is 0 but eh_depth is > 0"; goto corrupted; } if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; } if (unlikely(depth > 32)) { error_msg = "too large eh_depth"; goto corrupted; } /* Verify checksum on non-root extent tree nodes */ if (ext_depth(inode) != depth && !ext4_extent_block_csum_verify(inode, eh)) { error_msg = "extent tree corrupted"; err = -EFSBADCRC; goto corrupted; } return 0; corrupted: ext4_error_inode_err(inode, function, line, 0, -err, "pblk %llu bad header/extent: %s - magic %x, " "entries %u, max %u(%u), depth %u(%u)", (unsigned long long) pblk, error_msg, le16_to_cpu(eh->eh_magic), le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), max, le16_to_cpu(eh->eh_depth), depth); return err; } #define ext4_ext_check(inode, eh, depth, pblk) \ __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0) int ext4_ext_check_inode(struct inode *inode) { return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); } static void ext4_cache_extents(struct inode *inode, struct ext4_extent_header *eh) { struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); ext4_lblk_t prev = 0; int i; for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { unsigned int status = EXTENT_STATUS_WRITTEN; ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); int len = ext4_ext_get_actual_len(ex); if (prev && (prev != lblk)) ext4_es_cache_extent(inode, prev, lblk - prev, ~0, EXTENT_STATUS_HOLE); if (ext4_ext_is_unwritten(ex)) status = EXTENT_STATUS_UNWRITTEN; ext4_es_cache_extent(inode, lblk, len, ext4_ext_pblock(ex), status); prev = lblk + len; } } static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, struct inode *inode, struct ext4_extent_idx *idx, int depth, int flags) { struct buffer_head *bh; int err; gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS; ext4_fsblk_t pblk; if (flags & EXT4_EX_NOFAIL) gfp_flags |= __GFP_NOFAIL; pblk = ext4_idx_pblock(idx); bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); if (!bh_uptodate_or_lock(bh)) { trace_ext4_ext_load_extent(inode, pblk, _RET_IP_); err = ext4_read_bh(bh, 0, NULL); if (err < 0) goto errout; } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh), depth, pblk, le32_to_cpu(idx->ei_block)); if (err) goto errout; set_buffer_verified(bh); /* * If this is a leaf block, cache all of its entries */ if (!(flags & EXT4_EX_NOCACHE) && depth == 0) { struct ext4_extent_header *eh = ext_block_hdr(bh); ext4_cache_extents(inode, eh); } return bh; errout: put_bh(bh); return ERR_PTR(err); } #define read_extent_tree_block(inode, idx, depth, flags) \ __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \ (depth), (flags)) /* * This function is called to cache a file's extent information in the * extent status tree */ int ext4_ext_precache(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_ext_path *path = NULL; struct buffer_head *bh; int i = 0, depth, ret = 0; if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return 0; /* not an extent-mapped inode */ down_read(&ei->i_data_sem); depth = ext_depth(inode); /* Don't cache anything if there are no external extent blocks */ if (!depth) { up_read(&ei->i_data_sem); return ret; } path = kcalloc(depth + 1, sizeof(struct ext4_ext_path), GFP_NOFS); if (path == NULL) { up_read(&ei->i_data_sem); return -ENOMEM; } path[0].p_hdr = ext_inode_hdr(inode); ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0); if (ret) goto out; path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr); while (i >= 0) { /* * If this is a leaf block or we've reached the end of * the index block, go up */ if ((i == depth) || path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) { ext4_ext_path_brelse(path + i); i--; continue; } bh = read_extent_tree_block(inode, path[i].p_idx++, depth - i - 1, EXT4_EX_FORCE_CACHE); if (IS_ERR(bh)) { ret = PTR_ERR(bh); break; } i++; path[i].p_bh = bh; path[i].p_hdr = ext_block_hdr(bh); path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr); } ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED); out: up_read(&ei->i_data_sem); ext4_free_ext_path(path); return ret; } #ifdef EXT_DEBUG static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) { int k, l = path->p_depth; ext_debug(inode, "path:"); for (k = 0; k <= l; k++, path++) { if (path->p_idx) { ext_debug(inode, " %d->%llu", le32_to_cpu(path->p_idx->ei_block), ext4_idx_pblock(path->p_idx)); } else if (path->p_ext) { ext_debug(inode, " %d:[%d]%d:%llu ", le32_to_cpu(path->p_ext->ee_block), ext4_ext_is_unwritten(path->p_ext), ext4_ext_get_actual_len(path->p_ext), ext4_ext_pblock(path->p_ext)); } else ext_debug(inode, " []"); } ext_debug(inode, "\n"); } static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) { int depth = ext_depth(inode); struct ext4_extent_header *eh; struct ext4_extent *ex; int i; if (IS_ERR_OR_NULL(path)) return; eh = path[depth].p_hdr; ex = EXT_FIRST_EXTENT(eh); ext_debug(inode, "Displaying leaf extents\n"); for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); } ext_debug(inode, "\n"); } static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, ext4_fsblk_t newblock, int level) { int depth = ext_depth(inode); struct ext4_extent *ex; if (depth != level) { struct ext4_extent_idx *idx; idx = path[level].p_idx; while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { ext_debug(inode, "%d: move %d:%llu in new index %llu\n", level, le32_to_cpu(idx->ei_block), ext4_idx_pblock(idx), newblock); idx++; } return; } ex = path[depth].p_ext; while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n", le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), newblock); ex++; } } #else #define ext4_ext_show_path(inode, path) #define ext4_ext_show_leaf(inode, path) #define ext4_ext_show_move(inode, path, newblock, level) #endif /* * ext4_ext_binsearch_idx: * binary search for the closest index of the given block * the header must be checked before calling this */ static void ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) { struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent_idx *r, *l, *m; ext_debug(inode, "binsearch for %u(idx): ", block); l = EXT_FIRST_INDEX(eh) + 1; r = EXT_LAST_INDEX(eh); while (l <= r) { m = l + (r - l) / 2; ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), r, le32_to_cpu(r->ei_block)); if (block < le32_to_cpu(m->ei_block)) r = m - 1; else l = m + 1; } path->p_idx = l - 1; ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block), ext4_idx_pblock(path->p_idx)); #ifdef CHECK_BINSEARCH { struct ext4_extent_idx *chix, *ix; int k; chix = ix = EXT_FIRST_INDEX(eh); for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { if (k != 0 && le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { printk(KERN_DEBUG "k=%d, ix=0x%p, " "first=0x%p\n", k, ix, EXT_FIRST_INDEX(eh)); printk(KERN_DEBUG "%u <= %u\n", le32_to_cpu(ix->ei_block), le32_to_cpu(ix[-1].ei_block)); } BUG_ON(k && le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)); if (block < le32_to_cpu(ix->ei_block)) break; chix = ix; } BUG_ON(chix != path->p_idx); } #endif } /* * ext4_ext_binsearch: * binary search for closest extent of the given block * the header must be checked before calling this */ static void ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) { struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent *r, *l, *m; if (eh->eh_entries == 0) { /* * this leaf is empty: * we get such a leaf in split/add case */ return; } ext_debug(inode, "binsearch for %u: ", block); l = EXT_FIRST_EXTENT(eh) + 1; r = EXT_LAST_EXTENT(eh); while (l <= r) { m = l + (r - l) / 2; ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), r, le32_to_cpu(r->ee_block)); if (block < le32_to_cpu(m->ee_block)) r = m - 1; else l = m + 1; } path->p_ext = l - 1; ext_debug(inode, " -> %d:%llu:[%d]%d ", le32_to_cpu(path->p_ext->ee_block), ext4_ext_pblock(path->p_ext), ext4_ext_is_unwritten(path->p_ext), ext4_ext_get_actual_len(path->p_ext)); #ifdef CHECK_BINSEARCH { struct ext4_extent *chex, *ex; int k; chex = ex = EXT_FIRST_EXTENT(eh); for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { BUG_ON(k && le32_to_cpu(ex->ee_block) <= le32_to_cpu(ex[-1].ee_block)); if (block < le32_to_cpu(ex->ee_block)) break; chex = ex; } BUG_ON(chex != path->p_ext); } #endif } void ext4_ext_tree_init(handle_t *handle, struct inode *inode) { struct ext4_extent_header *eh; eh = ext_inode_hdr(inode); eh->eh_depth = 0; eh->eh_entries = 0; eh->eh_magic = EXT4_EXT_MAGIC; eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); eh->eh_generation = 0; ext4_mark_inode_dirty(handle, inode); } struct ext4_ext_path * ext4_find_extent(struct inode *inode, ext4_lblk_t block, struct ext4_ext_path *path, int flags) { struct ext4_extent_header *eh; struct buffer_head *bh; short int depth, i, ppos = 0; int ret; gfp_t gfp_flags = GFP_NOFS; if (flags & EXT4_EX_NOFAIL) gfp_flags |= __GFP_NOFAIL; eh = ext_inode_hdr(inode); depth = ext_depth(inode); if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) { EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d", depth); ret = -EFSCORRUPTED; goto err; } if (path) { ext4_ext_drop_refs(path); if (depth > path[0].p_maxdepth) { kfree(path); path = NULL; } } if (!path) { /* account possible depth increase */ path = kcalloc(depth + 2, sizeof(struct ext4_ext_path), gfp_flags); if (unlikely(!path)) return ERR_PTR(-ENOMEM); path[0].p_maxdepth = depth + 1; } path[0].p_hdr = eh; path[0].p_bh = NULL; i = depth; if (!(flags & EXT4_EX_NOCACHE) && depth == 0) ext4_cache_extents(inode, eh); /* walk through the tree */ while (i) { ext_debug(inode, "depth %d: num %d, max %d\n", ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); ext4_ext_binsearch_idx(inode, path + ppos, block); path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); path[ppos].p_depth = i; path[ppos].p_ext = NULL; bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags); if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; } eh = ext_block_hdr(bh); ppos++; path[ppos].p_bh = bh; path[ppos].p_hdr = eh; } path[ppos].p_depth = i; path[ppos].p_ext = NULL; path[ppos].p_idx = NULL; /* find extent */ ext4_ext_binsearch(inode, path + ppos, block); /* if not an empty leaf */ if (path[ppos].p_ext) path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); ext4_ext_show_path(inode, path); return path; err: ext4_free_ext_path(path); return ERR_PTR(ret); } /* * ext4_ext_insert_index: * insert new index [@logical;@ptr] into the block at @curp; * check where to insert: before @curp or after @curp */ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, struct ext4_ext_path *curp, int logical, ext4_fsblk_t ptr) { struct ext4_extent_idx *ix; int len, err; err = ext4_ext_get_access(handle, inode, curp); if (err) return err; if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { EXT4_ERROR_INODE(inode, "logical %d == ei_block %d!", logical, le32_to_cpu(curp->p_idx->ei_block)); return -EFSCORRUPTED; } if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) >= le16_to_cpu(curp->p_hdr->eh_max))) { EXT4_ERROR_INODE(inode, "eh_entries %d >= eh_max %d!", le16_to_cpu(curp->p_hdr->eh_entries), le16_to_cpu(curp->p_hdr->eh_max)); return -EFSCORRUPTED; } if (logical > le32_to_cpu(curp->p_idx->ei_block)) { /* insert after */ ext_debug(inode, "insert new index %d after: %llu\n", logical, ptr); ix = curp->p_idx + 1; } else { /* insert before */ ext_debug(inode, "insert new index %d before: %llu\n", logical, ptr); ix = curp->p_idx; } if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); return -EFSCORRUPTED; } len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; BUG_ON(len < 0); if (len > 0) { ext_debug(inode, "insert new index %d: " "move %d indices from 0x%p to 0x%p\n", logical, len, ix, ix + 1); memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); } ix->ei_block = cpu_to_le32(logical); ext4_idx_store_pblock(ix, ptr); le16_add_cpu(&curp->p_hdr->eh_entries, 1); if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); return -EFSCORRUPTED; } err = ext4_ext_dirty(handle, inode, curp); ext4_std_error(inode->i_sb, err); return err; } /* * ext4_ext_split: * inserts new subtree into the path, using free index entry * at depth @at: * - allocates all needed blocks (new leaf and all intermediate index blocks) * - makes decision where to split * - moves remaining extents and index entries (right to the split point) * into the newly allocated blocks * - initializes subtree */ static int ext4_ext_split(handle_t *handle, struct inode *inode, unsigned int flags, struct ext4_ext_path *path, struct ext4_extent *newext, int at) { struct buffer_head *bh = NULL; int depth = ext_depth(inode); struct ext4_extent_header *neh; struct ext4_extent_idx *fidx; int i = at, k, m, a; ext4_fsblk_t newblock, oldblock; __le32 border; ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ gfp_t gfp_flags = GFP_NOFS; int err = 0; size_t ext_size = 0; if (flags & EXT4_EX_NOFAIL) gfp_flags |= __GFP_NOFAIL; /* make decision: where to split? */ /* FIXME: now decision is simplest: at current extent */ /* if current leaf will be split, then we should use * border from split point */ if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); return -EFSCORRUPTED; } if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { border = path[depth].p_ext[1].ee_block; ext_debug(inode, "leaf will be split." " next leaf starts at %d\n", le32_to_cpu(border)); } else { border = newext->ee_block; ext_debug(inode, "leaf will be added." " next leaf starts at %d\n", le32_to_cpu(border)); } /* * If error occurs, then we break processing * and mark filesystem read-only. index won't * be inserted and tree will be in consistent * state. Next mount will repair buffers too. */ /* * Get array to track all allocated blocks. * We need this to handle errors and free blocks * upon them. */ ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags); if (!ablocks) return -ENOMEM; /* allocate all needed blocks */ ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at); for (a = 0; a < depth - at; a++) { newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err, flags); if (newblock == 0) goto cleanup; ablocks[a] = newblock; } /* initialize new leaf */ newblock = ablocks[--a]; if (unlikely(newblock == 0)) { EXT4_ERROR_INODE(inode, "newblock == 0!"); err = -EFSCORRUPTED; goto cleanup; } bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) { err = -ENOMEM; goto cleanup; } lock_buffer(bh); err = ext4_journal_get_create_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); if (err) goto cleanup; neh = ext_block_hdr(bh); neh->eh_entries = 0; neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_depth = 0; neh->eh_generation = 0; /* move remainder of path[depth] to the new leaf */ if (unlikely(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max)) { EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", path[depth].p_hdr->eh_entries, path[depth].p_hdr->eh_max); err = -EFSCORRUPTED; goto cleanup; } /* start copy from next extent */ m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; ext4_ext_show_move(inode, path, newblock, depth); if (m) { struct ext4_extent *ex; ex = EXT_FIRST_EXTENT(neh); memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); le16_add_cpu(&neh->eh_entries, m); } /* zero out unused area in the extent block */ ext_size = sizeof(struct ext4_extent_header) + sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries); memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, inode, bh); if (err) goto cleanup; brelse(bh); bh = NULL; /* correct old leaf */ if (m) { err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto cleanup; le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); err = ext4_ext_dirty(handle, inode, path + depth); if (err) goto cleanup; } /* create intermediate indexes */ k = depth - at - 1; if (unlikely(k < 0)) { EXT4_ERROR_INODE(inode, "k %d < 0!", k); err = -EFSCORRUPTED; goto cleanup; } if (k) ext_debug(inode, "create %d intermediate indices\n", k); /* insert new index into current index block */ /* current depth stored in i var */ i = depth - 1; while (k--) { oldblock = newblock; newblock = ablocks[--a]; bh = sb_getblk(inode->i_sb, newblock); if (unlikely(!bh)) { err = -ENOMEM; goto cleanup; } lock_buffer(bh); err = ext4_journal_get_create_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); if (err) goto cleanup; neh = ext_block_hdr(bh); neh->eh_entries = cpu_to_le16(1); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); neh->eh_depth = cpu_to_le16(depth - i); neh->eh_generation = 0; fidx = EXT_FIRST_INDEX(neh); fidx->ei_block = border; ext4_idx_store_pblock(fidx, oldblock); ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n", i, newblock, le32_to_cpu(border), oldblock); /* move remainder of path[i] to the new index block */ if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != EXT_LAST_INDEX(path[i].p_hdr))) { EXT4_ERROR_INODE(inode, "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", le32_to_cpu(path[i].p_ext->ee_block)); err = -EFSCORRUPTED; goto cleanup; } /* start copy indexes */ m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx, EXT_MAX_INDEX(path[i].p_hdr)); ext4_ext_show_move(inode, path, newblock, i); if (m) { memmove(++fidx, path[i].p_idx, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } /* zero out unused area in the extent block */ ext_size = sizeof(struct ext4_extent_header) + (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries)); memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, inode, bh); if (err) goto cleanup; brelse(bh); bh = NULL; /* correct old index */ if (m) { err = ext4_ext_get_access(handle, inode, path + i); if (err) goto cleanup; le16_add_cpu(&path[i].p_hdr->eh_entries, -m); err = ext4_ext_dirty(handle, inode, path + i); if (err) goto cleanup; } i--; } /* insert new index */ err = ext4_ext_insert_index(handle, inode, path + at, le32_to_cpu(border), newblock); cleanup: if (bh) { if (buffer_locked(bh)) unlock_buffer(bh); brelse(bh); } if (err) { /* free all allocated blocks in error case */ for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; ext4_free_blocks(handle, inode, NULL, ablocks[i], 1, EXT4_FREE_BLOCKS_METADATA); } } kfree(ablocks); return err; } /* * ext4_ext_grow_indepth: * implements tree growing procedure: * - allocates new block * - moves top-level data (index block or leaf) into the new block * - initializes new top-level, creating index that points to the * just created block */ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, unsigned int flags) { struct ext4_extent_header *neh; struct buffer_head *bh; ext4_fsblk_t newblock, goal = 0; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; int err = 0; size_t ext_size = 0; /* Try to prepend new index to old one */ if (ext_depth(inode)) goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); if (goal > le32_to_cpu(es->s_first_data_block)) { flags |= EXT4_MB_HINT_TRY_GOAL; goal--; } else goal = ext4_inode_to_goal_block(inode); newblock = ext4_new_meta_blocks(handle, inode, goal, flags, NULL, &err); if (newblock == 0) return err; bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return -ENOMEM; lock_buffer(bh); err = ext4_journal_get_create_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); if (err) { unlock_buffer(bh); goto out; } ext_size = sizeof(EXT4_I(inode)->i_data); /* move top-level index/leaf into new block */ memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size); /* zero out unused area in the extent block */ memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); /* set size of new block */ neh = ext_block_hdr(bh); /* old root could have indexes or leaves * so calculate e_max right way */ if (ext_depth(inode)) neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); else neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); set_buffer_verified(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, inode, bh); if (err) goto out; /* Update top-level index: num,max,pointer */ neh = ext_inode_hdr(inode); neh->eh_entries = cpu_to_le16(1); ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock); if (neh->eh_depth == 0) { /* Root extent block becomes index block */ neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); EXT_FIRST_INDEX(neh)->ei_block = EXT_FIRST_EXTENT(neh)->ee_block; } ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n", le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), ext4_idx_pblock(EXT_FIRST_INDEX(neh))); le16_add_cpu(&neh->eh_depth, 1); err = ext4_mark_inode_dirty(handle, inode); out: brelse(bh); return err; } /* * ext4_ext_create_new_leaf: * finds empty index and adds new leaf. * if no free index is found, then it requests in-depth growing. */ static struct ext4_ext_path * ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, unsigned int mb_flags, unsigned int gb_flags, struct ext4_ext_path *path, struct ext4_extent *newext) { struct ext4_ext_path *curp; int depth, i, err = 0; ext4_lblk_t ee_block = le32_to_cpu(newext->ee_block); repeat: i = depth = ext_depth(inode); /* walk up to the tree and look for free index entry */ curp = path + depth; while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { i--; curp--; } /* we use already allocated block for index block, * so subsequent data blocks should be contiguous */ if (EXT_HAS_FREE_INDEX(curp)) { /* if we found index with free entry, then use that * entry: create all needed subtree and add new leaf */ err = ext4_ext_split(handle, inode, mb_flags, path, newext, i); if (err) goto errout; /* refill path */ path = ext4_find_extent(inode, ee_block, path, gb_flags); return path; } /* tree is full, time to grow in depth */ err = ext4_ext_grow_indepth(handle, inode, mb_flags); if (err) goto errout; /* refill path */ path = ext4_find_extent(inode, ee_block, path, gb_flags); if (IS_ERR(path)) return path; /* * only first (depth 0 -> 1) produces free space; * in all other cases we have to split the grown tree */ depth = ext_depth(inode); if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { /* now we need to split */ goto repeat; } return path; errout: ext4_free_ext_path(path); return ERR_PTR(err); } /* * search the closest allocated block to the left for *logical * and returns it at @logical + it's physical address at @phys * if *logical is the smallest allocated block, the function * returns 0 at @phys * return value contains 0 (success) or error code */ static int ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t *logical, ext4_fsblk_t *phys) { struct ext4_extent_idx *ix; struct ext4_extent *ex; int depth, ee_len; if (unlikely(path == NULL)) { EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); return -EFSCORRUPTED; } depth = path->p_depth; *phys = 0; if (depth == 0 && path->p_ext == NULL) return 0; /* usually extent in the path covers blocks smaller * then *logical, but it can be that extent is the * first one in the file */ ex = path[depth].p_ext; ee_len = ext4_ext_get_actual_len(ex); if (*logical < le32_to_cpu(ex->ee_block)) { if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { EXT4_ERROR_INODE(inode, "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", *logical, le32_to_cpu(ex->ee_block)); return -EFSCORRUPTED; } while (--depth >= 0) { ix = path[depth].p_idx; if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { EXT4_ERROR_INODE(inode, "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", ix != NULL ? le32_to_cpu(ix->ei_block) : 0, le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block), depth); return -EFSCORRUPTED; } } return 0; } if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { EXT4_ERROR_INODE(inode, "logical %d < ee_block %d + ee_len %d!", *logical, le32_to_cpu(ex->ee_block), ee_len); return -EFSCORRUPTED; } *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; *phys = ext4_ext_pblock(ex) + ee_len - 1; return 0; } /* * Search the closest allocated block to the right for *logical * and returns it at @logical + it's physical address at @phys. * If not exists, return 0 and @phys is set to 0. We will return * 1 which means we found an allocated block and ret_ex is valid. * Or return a (< 0) error code. */ static int ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t *logical, ext4_fsblk_t *phys, struct ext4_extent *ret_ex) { struct buffer_head *bh = NULL; struct ext4_extent_header *eh; struct ext4_extent_idx *ix; struct ext4_extent *ex; int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len; if (unlikely(path == NULL)) { EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); return -EFSCORRUPTED; } depth = path->p_depth; *phys = 0; if (depth == 0 && path->p_ext == NULL) return 0; /* usually extent in the path covers blocks smaller * then *logical, but it can be that extent is the * first one in the file */ ex = path[depth].p_ext; ee_len = ext4_ext_get_actual_len(ex); if (*logical < le32_to_cpu(ex->ee_block)) { if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { EXT4_ERROR_INODE(inode, "first_extent(path[%d].p_hdr) != ex", depth); return -EFSCORRUPTED; } while (--depth >= 0) { ix = path[depth].p_idx; if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { EXT4_ERROR_INODE(inode, "ix != EXT_FIRST_INDEX *logical %d!", *logical); return -EFSCORRUPTED; } } goto found_extent; } if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { EXT4_ERROR_INODE(inode, "logical %d < ee_block %d + ee_len %d!", *logical, le32_to_cpu(ex->ee_block), ee_len); return -EFSCORRUPTED; } if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { /* next allocated block in this leaf */ ex++; goto found_extent; } /* go up and search for index to the right */ while (--depth >= 0) { ix = path[depth].p_idx; if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) goto got_index; } /* we've gone up to the root and found no index to the right */ return 0; got_index: /* we've found index to the right, let's * follow it and find the closest allocated * block to the right */ ix++; while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */ bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); ix = EXT_FIRST_INDEX(eh); put_bh(bh); } bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); ex = EXT_FIRST_EXTENT(eh); found_extent: *logical = le32_to_cpu(ex->ee_block); *phys = ext4_ext_pblock(ex); if (ret_ex) *ret_ex = *ex; if (bh) put_bh(bh); return 1; } /* * ext4_ext_next_allocated_block: * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. * NOTE: it considers block number from index entry as * allocated block. Thus, index entries have to be consistent * with leaves. */ ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path) { int depth; BUG_ON(path == NULL); depth = path->p_depth; if (depth == 0 && path->p_ext == NULL) return EXT_MAX_BLOCKS; while (depth >= 0) { struct ext4_ext_path *p = &path[depth]; if (depth == path->p_depth) { /* leaf */ if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr)) return le32_to_cpu(p->p_ext[1].ee_block); } else { /* index */ if (p->p_idx != EXT_LAST_INDEX(p->p_hdr)) return le32_to_cpu(p->p_idx[1].ei_block); } depth--; } return EXT_MAX_BLOCKS; } /* * ext4_ext_next_leaf_block: * returns first allocated block from next leaf or EXT_MAX_BLOCKS */ static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path) { int depth; BUG_ON(path == NULL); depth = path->p_depth; /* zero-tree has no leaf blocks at all */ if (depth == 0) return EXT_MAX_BLOCKS; /* go to index block */ depth--; while (depth >= 0) { if (path[depth].p_idx != EXT_LAST_INDEX(path[depth].p_hdr)) return (ext4_lblk_t) le32_to_cpu(path[depth].p_idx[1].ei_block); depth--; } return EXT_MAX_BLOCKS; } /* * ext4_ext_correct_indexes: * if leaf gets modified and modified extent is first in the leaf, * then we have to correct all indexes above. * TODO: do we need to correct tree in all cases? */ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { struct ext4_extent_header *eh; int depth = ext_depth(inode); struct ext4_extent *ex; __le32 border; int k, err = 0; eh = path[depth].p_hdr; ex = path[depth].p_ext; if (unlikely(ex == NULL || eh == NULL)) { EXT4_ERROR_INODE(inode, "ex %p == NULL or eh %p == NULL", ex, eh); return -EFSCORRUPTED; } if (depth == 0) { /* there is no tree at all */ return 0; } if (ex != EXT_FIRST_EXTENT(eh)) { /* we correct tree if first leaf got modified only */ return 0; } /* * TODO: we need correction if border is smaller than current one */ k = depth - 1; border = path[depth].p_ext->ee_block; err = ext4_ext_get_access(handle, inode, path + k); if (err) return err; path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) return err; while (k--) { /* change all left-side indexes */ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) break; err = ext4_ext_get_access(handle, inode, path + k); if (err) goto clean; path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) goto clean; } return 0; clean: /* * The path[k].p_bh is either unmodified or with no verified bit * set (see ext4_ext_get_access()). So just clear the verified bit * of the successfully modified extents buffers, which will force * these extents to be checked to avoid using inconsistent data. */ while (++k < depth) clear_buffer_verified(path[k].p_bh); return err; } static int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { unsigned short ext1_ee_len, ext2_ee_len; if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) return 0; ext1_ee_len = ext4_ext_get_actual_len(ex1); ext2_ee_len = ext4_ext_get_actual_len(ex2); if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != le32_to_cpu(ex2->ee_block)) return 0; if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) return 0; if (ext4_ext_is_unwritten(ex1) && ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN) return 0; #ifdef AGGRESSIVE_TEST if (ext1_ee_len >= 4) return 0; #endif if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) return 1; return 0; } /* * This function tries to merge the "ex" extent to the next extent in the tree. * It always tries to merge towards right. If you want to merge towards * left, pass "ex - 1" as argument instead of "ex". * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns * 1 if they got merged. */ static int ext4_ext_try_to_merge_right(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex) { struct ext4_extent_header *eh; unsigned int depth, len; int merge_done = 0, unwritten; depth = ext_depth(inode); BUG_ON(path[depth].p_hdr == NULL); eh = path[depth].p_hdr; while (ex < EXT_LAST_EXTENT(eh)) { if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) break; /* merge with next extent! */ unwritten = ext4_ext_is_unwritten(ex); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(ex + 1)); if (unwritten) ext4_ext_mark_unwritten(ex); if (ex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - ex - 1) * sizeof(struct ext4_extent); memmove(ex + 1, ex + 2, len); } le16_add_cpu(&eh->eh_entries, -1); merge_done = 1; WARN_ON(eh->eh_entries == 0); if (!eh->eh_entries) EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); } return merge_done; } /* * This function does a very simple check to see if we can collapse * an extent tree with a single extent tree leaf block into the inode. */ static void ext4_ext_try_to_merge_up(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { size_t s; unsigned max_root = ext4_ext_space_root(inode, 0); ext4_fsblk_t blk; if ((path[0].p_depth != 1) || (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) return; /* * We need to modify the block allocation bitmap and the block * group descriptor to release the extent tree block. If we * can't get the journal credits, give up. */ if (ext4_journal_extend(handle, 2, ext4_free_metadata_revoke_credits(inode->i_sb, 1))) return; /* * Copy the extent data up to the inode */ blk = ext4_idx_pblock(path[0].p_idx); s = le16_to_cpu(path[1].p_hdr->eh_entries) * sizeof(struct ext4_extent_idx); s += sizeof(struct ext4_extent_header); path[1].p_maxdepth = path[0].p_maxdepth; memcpy(path[0].p_hdr, path[1].p_hdr, s); path[0].p_depth = 0; path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); path[0].p_hdr->eh_max = cpu_to_le16(max_root); ext4_ext_path_brelse(path + 1); ext4_free_blocks(handle, inode, NULL, blk, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); } /* * This function tries to merge the @ex extent to neighbours in the tree, then * tries to collapse the extent tree into the inode. */ static void ext4_ext_try_to_merge(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex) { struct ext4_extent_header *eh; unsigned int depth; int merge_done = 0; depth = ext_depth(inode); BUG_ON(path[depth].p_hdr == NULL); eh = path[depth].p_hdr; if (ex > EXT_FIRST_EXTENT(eh)) merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); if (!merge_done) (void) ext4_ext_try_to_merge_right(inode, path, ex); ext4_ext_try_to_merge_up(handle, inode, path); } /* * check if a portion of the "newext" extent overlaps with an * existing extent. * * If there is an overlap discovered, it updates the length of the newext * such that there will be no overlap, and then returns 1. * If there is no overlap found, it returns 0. */ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, struct inode *inode, struct ext4_extent *newext, struct ext4_ext_path *path) { ext4_lblk_t b1, b2; unsigned int depth, len1; unsigned int ret = 0; b1 = le32_to_cpu(newext->ee_block); len1 = ext4_ext_get_actual_len(newext); depth = ext_depth(inode); if (!path[depth].p_ext) goto out; b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block)); /* * get the next allocated block if the extent in the path * is before the requested block(s) */ if (b2 < b1) { b2 = ext4_ext_next_allocated_block(path); if (b2 == EXT_MAX_BLOCKS) goto out; b2 = EXT4_LBLK_CMASK(sbi, b2); } /* check for wrap through zero on extent logical start block*/ if (b1 + len1 < b1) { len1 = EXT_MAX_BLOCKS - b1; newext->ee_len = cpu_to_le16(len1); ret = 1; } /* check for overlap */ if (b1 + len1 > b2) { newext->ee_len = cpu_to_le16(b2 - b1); ret = 1; } out: return ret; } /* * ext4_ext_insert_extent: * tries to merge requested extent into the existing extent or * inserts requested extent as new one into the tree, * creating new leaf in the no-space case. */ struct ext4_ext_path * ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *newext, int gb_flags) { struct ext4_extent_header *eh; struct ext4_extent *ex, *fex; struct ext4_extent *nearex; /* nearest extent */ int depth, len, err = 0; ext4_lblk_t next; int mb_flags = 0, unwritten; if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) mb_flags |= EXT4_MB_DELALLOC_RESERVED; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); err = -EFSCORRUPTED; goto errout; } depth = ext_depth(inode); ex = path[depth].p_ext; eh = path[depth].p_hdr; if (unlikely(path[depth].p_hdr == NULL)) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); err = -EFSCORRUPTED; goto errout; } /* try to insert block into found extent and return */ if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) { /* * Try to see whether we should rather test the extent on * right from ex, or from the left of ex. This is because * ext4_find_extent() can return either extent on the * left, or on the right from the searched position. This * will make merging more effective. */ if (ex < EXT_LAST_EXTENT(eh) && (le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) < le32_to_cpu(newext->ee_block))) { ex += 1; goto prepend; } else if ((ex > EXT_FIRST_EXTENT(eh)) && (le32_to_cpu(newext->ee_block) + ext4_ext_get_actual_len(newext) < le32_to_cpu(ex->ee_block))) ex -= 1; /* Try to append newex to the ex */ if (ext4_can_extents_be_merged(inode, ex, newext)) { ext_debug(inode, "append [%d]%d block to %u:[%d]%d" "(from %llu)\n", ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), le32_to_cpu(ex->ee_block), ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto errout; unwritten = ext4_ext_is_unwritten(ex); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); if (unwritten) ext4_ext_mark_unwritten(ex); nearex = ex; goto merge; } prepend: /* Try to prepend newex to the ex */ if (ext4_can_extents_be_merged(inode, newext, ex)) { ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d" "(from %llu)\n", le32_to_cpu(newext->ee_block), ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), le32_to_cpu(ex->ee_block), ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto errout; unwritten = ext4_ext_is_unwritten(ex); ex->ee_block = newext->ee_block; ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); if (unwritten) ext4_ext_mark_unwritten(ex); nearex = ex; goto merge; } } depth = ext_depth(inode); eh = path[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) goto has_space; /* probably next leaf has space for us? */ fex = EXT_LAST_EXTENT(eh); next = EXT_MAX_BLOCKS; if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) next = ext4_ext_next_leaf_block(path); if (next != EXT_MAX_BLOCKS) { struct ext4_ext_path *npath; ext_debug(inode, "next leaf block - %u\n", next); npath = ext4_find_extent(inode, next, NULL, gb_flags); if (IS_ERR(npath)) { err = PTR_ERR(npath); goto errout; } BUG_ON(npath->p_depth != path->p_depth); eh = npath[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { ext_debug(inode, "next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); ext4_free_ext_path(path); path = npath; goto has_space; } ext_debug(inode, "next leaf has no free space(%d,%d)\n", le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); ext4_free_ext_path(npath); } /* * There is no free space in the found leaf. * We're gonna add a new leaf in the tree. */ if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) mb_flags |= EXT4_MB_USE_RESERVED; path = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, path, newext); if (IS_ERR(path)) return path; depth = ext_depth(inode); eh = path[depth].p_hdr; has_space: nearex = path[depth].p_ext; err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto errout; if (!nearex) { /* there is no extent in this leaf, create first one */ ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext)); nearex = EXT_FIRST_EXTENT(eh); } else { if (le32_to_cpu(newext->ee_block) > le32_to_cpu(nearex->ee_block)) { /* Insert after */ ext_debug(inode, "insert %u:%llu:[%d]%d before: " "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), nearex); nearex++; } else { /* Insert before */ BUG_ON(newext->ee_block == nearex->ee_block); ext_debug(inode, "insert %u:%llu:[%d]%d after: " "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), nearex); } len = EXT_LAST_EXTENT(eh) - nearex + 1; if (len > 0) { ext_debug(inode, "insert %u:%llu:[%d]%d: " "move %d extents from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), len, nearex, nearex + 1); memmove(nearex + 1, nearex, len * sizeof(struct ext4_extent)); } } le16_add_cpu(&eh->eh_entries, 1); path[depth].p_ext = nearex; nearex->ee_block = newext->ee_block; ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); nearex->ee_len = newext->ee_len; merge: /* try to merge extents */ if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) ext4_ext_try_to_merge(handle, inode, path, nearex); /* time to correct all indexes above */ err = ext4_ext_correct_indexes(handle, inode, path); if (err) goto errout; err = ext4_ext_dirty(handle, inode, path + path->p_depth); if (err) goto errout; return path; errout: ext4_free_ext_path(path); return ERR_PTR(err); } static int ext4_fill_es_cache_info(struct inode *inode, ext4_lblk_t block, ext4_lblk_t num, struct fiemap_extent_info *fieinfo) { ext4_lblk_t next, end = block + num - 1; struct extent_status es; unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; unsigned int flags; int err; while (block <= end) { next = 0; flags = 0; if (!ext4_es_lookup_extent(inode, block, &next, &es)) break; if (ext4_es_is_unwritten(&es)) flags |= FIEMAP_EXTENT_UNWRITTEN; if (ext4_es_is_delayed(&es)) flags |= (FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN); if (ext4_es_is_hole(&es)) flags |= EXT4_FIEMAP_EXTENT_HOLE; if (next == 0) flags |= FIEMAP_EXTENT_LAST; if (flags & (FIEMAP_EXTENT_DELALLOC| EXT4_FIEMAP_EXTENT_HOLE)) es.es_pblk = 0; else es.es_pblk = ext4_es_pblock(&es); err = fiemap_fill_next_extent(fieinfo, (__u64)es.es_lblk << blksize_bits, (__u64)es.es_pblk << blksize_bits, (__u64)es.es_len << blksize_bits, flags); if (next == 0) break; block = next; if (err < 0) return err; if (err == 1) return 0; } return 0; } /* * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole * * Determine hole length (and start if easily possible) around given logical * block. We don't try too hard to find the beginning of the hole but @path * actually points to extent before @lblk, we provide it. * * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it. */ static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t *lblk) { int depth = ext_depth(inode); struct ext4_extent *ex; ext4_lblk_t len; ex = path[depth].p_ext; if (ex == NULL) { /* there is no extent yet, so gap is [0;-] */ *lblk = 0; len = EXT_MAX_BLOCKS; } else if (*lblk < le32_to_cpu(ex->ee_block)) { len = le32_to_cpu(ex->ee_block) - *lblk; } else if (*lblk >= le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex)) { ext4_lblk_t next; *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); next = ext4_ext_next_allocated_block(path); BUG_ON(next == *lblk); len = next - *lblk; } else { BUG(); } return len; } /* * ext4_ext_rm_idx: * removes index from the index block. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, int depth) { int err; ext4_fsblk_t leaf; int k = depth - 1; /* free index block */ leaf = ext4_idx_pblock(path[k].p_idx); if (unlikely(path[k].p_hdr->eh_entries == 0)) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr->eh_entries == 0", k); return -EFSCORRUPTED; } err = ext4_ext_get_access(handle, inode, path + k); if (err) return err; if (path[k].p_idx != EXT_LAST_INDEX(path[k].p_hdr)) { int len = EXT_LAST_INDEX(path[k].p_hdr) - path[k].p_idx; len *= sizeof(struct ext4_extent_idx); memmove(path[k].p_idx, path[k].p_idx + 1, len); } le16_add_cpu(&path[k].p_hdr->eh_entries, -1); err = ext4_ext_dirty(handle, inode, path + k); if (err) return err; ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf); trace_ext4_ext_rm_idx(inode, leaf); ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); while (--k >= 0) { if (path[k + 1].p_idx != EXT_FIRST_INDEX(path[k + 1].p_hdr)) break; err = ext4_ext_get_access(handle, inode, path + k); if (err) goto clean; path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block; err = ext4_ext_dirty(handle, inode, path + k); if (err) goto clean; } return 0; clean: /* * The path[k].p_bh is either unmodified or with no verified bit * set (see ext4_ext_get_access()). So just clear the verified bit * of the successfully modified extents buffers, which will force * these extents to be checked to avoid using inconsistent data. */ while (++k < depth) clear_buffer_verified(path[k].p_bh); return err; } /* * ext4_ext_calc_credits_for_single_extent: * This routine returns max. credits that needed to insert an extent * to the extent tree. * When pass the actual path, the caller should calculate credits * under i_data_sem. */ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, struct ext4_ext_path *path) { if (path) { int depth = ext_depth(inode); int ret = 0; /* probably there is space in leaf? */ if (le16_to_cpu(path[depth].p_hdr->eh_entries) < le16_to_cpu(path[depth].p_hdr->eh_max)) { /* * There are some space in the leaf tree, no * need to account for leaf block credit * * bitmaps and block group descriptor blocks * and other metadata blocks still need to be * accounted. */ /* 1 bitmap, 1 block group descriptor */ ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); return ret; } } return ext4_chunk_trans_blocks(inode, nrblocks); } /* * How many index/leaf blocks need to change/allocate to add @extents extents? * * If we add a single extent, then in the worse case, each tree level * index/leaf need to be changed in case of the tree split. * * If more extents are inserted, they could cause the whole tree split more * than once, but this is really rare. */ int ext4_ext_index_trans_blocks(struct inode *inode, int extents) { int index; int depth; /* If we are converting the inline data, only one is needed here. */ if (ext4_has_inline_data(inode)) return 1; depth = ext_depth(inode); if (extents <= 1) index = depth * 2; else index = depth * 3; return index; } static inline int get_default_free_blocks_flags(struct inode *inode) { if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE)) return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; else if (ext4_should_journal_data(inode)) return EXT4_FREE_BLOCKS_FORGET; return 0; } /* * ext4_rereserve_cluster - increment the reserved cluster count when * freeing a cluster with a pending reservation * * @inode - file containing the cluster * @lblk - logical block in cluster to be reserved * * Increments the reserved cluster count and adjusts quota in a bigalloc * file system when freeing a partial cluster containing at least one * delayed and unwritten block. A partial cluster meeting that * requirement will have a pending reservation. If so, the * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to * defer reserved and allocated space accounting to a subsequent call * to this function. */ static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); dquot_reclaim_block(inode, EXT4_C2B(sbi, 1)); spin_lock(&ei->i_block_reservation_lock); ei->i_reserved_data_blocks++; percpu_counter_add(&sbi->s_dirtyclusters_counter, 1); spin_unlock(&ei->i_block_reservation_lock); percpu_counter_add(&sbi->s_freeclusters_counter, 1); ext4_remove_pending(inode, lblk); } static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, struct partial_cluster *partial, ext4_lblk_t from, ext4_lblk_t to) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned short ee_len = ext4_ext_get_actual_len(ex); ext4_fsblk_t last_pblk, pblk; ext4_lblk_t num; int flags; /* only extent tail removal is allowed */ if (from < le32_to_cpu(ex->ee_block) || to != le32_to_cpu(ex->ee_block) + ee_len - 1) { ext4_error(sbi->s_sb, "strange request: removal(2) %u-%u from %u:%u", from, to, le32_to_cpu(ex->ee_block), ee_len); return 0; } #ifdef EXTENTS_STATS spin_lock(&sbi->s_ext_stats_lock); sbi->s_ext_blocks += ee_len; sbi->s_ext_extents++; if (ee_len < sbi->s_ext_min) sbi->s_ext_min = ee_len; if (ee_len > sbi->s_ext_max) sbi->s_ext_max = ee_len; if (ext_depth(inode) > sbi->s_depth_max) sbi->s_depth_max = ext_depth(inode); spin_unlock(&sbi->s_ext_stats_lock); #endif trace_ext4_remove_blocks(inode, ex, from, to, partial); /* * if we have a partial cluster, and it's different from the * cluster of the last block in the extent, we free it */ last_pblk = ext4_ext_pblock(ex) + ee_len - 1; if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, last_pblk)) { if (partial->state == tofree) { flags = get_default_free_blocks_flags(inode); if (ext4_is_pending(inode, partial->lblk)) flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER; ext4_free_blocks(handle, inode, NULL, EXT4_C2B(sbi, partial->pclu), sbi->s_cluster_ratio, flags); if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER) ext4_rereserve_cluster(inode, partial->lblk); } partial->state = initial; } num = le32_to_cpu(ex->ee_block) + ee_len - from; pblk = ext4_ext_pblock(ex) + ee_len - num; /* * We free the partial cluster at the end of the extent (if any), * unless the cluster is used by another extent (partial_cluster * state is nofree). If a partial cluster exists here, it must be * shared with the last block in the extent. */ flags = get_default_free_blocks_flags(inode); /* partial, left end cluster aligned, right end unaligned */ if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) && (EXT4_LBLK_CMASK(sbi, to) >= from) && (partial->state != nofree)) { if (ext4_is_pending(inode, to)) flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER; ext4_free_blocks(handle, inode, NULL, EXT4_PBLK_CMASK(sbi, last_pblk), sbi->s_cluster_ratio, flags); if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER) ext4_rereserve_cluster(inode, to); partial->state = initial; flags = get_default_free_blocks_flags(inode); } flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; /* * For bigalloc file systems, we never free a partial cluster * at the beginning of the extent. Instead, we check to see if we * need to free it on a subsequent call to ext4_remove_blocks, * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space. */ flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; ext4_free_blocks(handle, inode, NULL, pblk, num, flags); /* reset the partial cluster if we've freed past it */ if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk)) partial->state = initial; /* * If we've freed the entire extent but the beginning is not left * cluster aligned and is not marked as ineligible for freeing we * record the partial cluster at the beginning of the extent. It * wasn't freed by the preceding ext4_free_blocks() call, and we * need to look farther to the left to determine if it's to be freed * (not shared with another extent). Else, reset the partial * cluster - we're either done freeing or the beginning of the * extent is left cluster aligned. */ if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) { if (partial->state == initial) { partial->pclu = EXT4_B2C(sbi, pblk); partial->lblk = from; partial->state = tofree; } } else { partial->state = initial; } return 0; } /* * ext4_ext_rm_leaf() Removes the extents associated with the * blocks appearing between "start" and "end". Both "start" * and "end" must appear in the same extent or EIO is returned. * * @handle: The journal handle * @inode: The files inode * @path: The path to the leaf * @partial_cluster: The cluster which we'll have to free if all extents * has been released from it. However, if this value is * negative, it's a cluster just to the right of the * punched region and it must not be freed. * @start: The first block to remove * @end: The last block to remove */ static int ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct partial_cluster *partial, ext4_lblk_t start, ext4_lblk_t end) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int err = 0, correct_index = 0; int depth = ext_depth(inode), credits, revoke_credits; struct ext4_extent_header *eh; ext4_lblk_t a, b; unsigned num; ext4_lblk_t ex_ee_block; unsigned short ex_ee_len; unsigned unwritten = 0; struct ext4_extent *ex; ext4_fsblk_t pblk; /* the header must be checked already in ext4_ext_remove_space() */ ext_debug(inode, "truncate since %u in leaf to %u\n", start, end); if (!path[depth].p_hdr) path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); eh = path[depth].p_hdr; if (unlikely(path[depth].p_hdr == NULL)) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); return -EFSCORRUPTED; } /* find where to start removing */ ex = path[depth].p_ext; if (!ex) ex = EXT_LAST_EXTENT(eh); ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); trace_ext4_ext_rm_leaf(inode, start, ex, partial); while (ex >= EXT_FIRST_EXTENT(eh) && ex_ee_block + ex_ee_len > start) { if (ext4_ext_is_unwritten(ex)) unwritten = 1; else unwritten = 0; ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block, unwritten, ex_ee_len); path[depth].p_ext = ex; a = max(ex_ee_block, start); b = min(ex_ee_block + ex_ee_len - 1, end); ext_debug(inode, " border %u:%u\n", a, b); /* If this extent is beyond the end of the hole, skip it */ if (end < ex_ee_block) { /* * We're going to skip this extent and move to another, * so note that its first cluster is in use to avoid * freeing it when removing blocks. Eventually, the * right edge of the truncated/punched region will * be just to the left. */ if (sbi->s_cluster_ratio > 1) { pblk = ext4_ext_pblock(ex); partial->pclu = EXT4_B2C(sbi, pblk); partial->state = nofree; } ex--; ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); continue; } else if (b != ex_ee_block + ex_ee_len - 1) { EXT4_ERROR_INODE(inode, "can not handle truncate %u:%u " "on extent %u:%u", start, end, ex_ee_block, ex_ee_block + ex_ee_len - 1); err = -EFSCORRUPTED; goto out; } else if (a != ex_ee_block) { /* remove tail of the extent */ num = a - ex_ee_block; } else { /* remove whole extent: excellent! */ num = 0; } /* * 3 for leaf, sb, and inode plus 2 (bmap and group * descriptor) for each block group; assume two block * groups plus ex_ee_len/blocks_per_block_group for * the worst case */ credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb)); if (ex == EXT_FIRST_EXTENT(eh)) { correct_index = 1; credits += (ext_depth(inode)) + 1; } credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); /* * We may end up freeing some index blocks and data from the * punched range. Note that partial clusters are accounted for * by ext4_free_data_revoke_credits(). */ revoke_credits = ext4_free_metadata_revoke_credits(inode->i_sb, ext_depth(inode)) + ext4_free_data_revoke_credits(inode, b - a + 1); err = ext4_datasem_ensure_credits(handle, inode, credits, credits, revoke_credits); if (err) { if (err > 0) err = -EAGAIN; goto out; } err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; err = ext4_remove_blocks(handle, inode, ex, partial, a, b); if (err) goto out; if (num == 0) /* this extent is removed; mark slot entirely unused */ ext4_ext_store_pblock(ex, 0); ex->ee_len = cpu_to_le16(num); /* * Do not mark unwritten if all the blocks in the * extent have been removed. */ if (unwritten && num) ext4_ext_mark_unwritten(ex); /* * If the extent was completely released, * we need to remove it from the leaf */ if (num == 0) { if (end != EXT_MAX_BLOCKS - 1) { /* * For hole punching, we need to scoot all the * extents up when an extent is removed so that * we dont have blank extents in the middle */ memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) * sizeof(struct ext4_extent)); /* Now get rid of the one at the end */ memset(EXT_LAST_EXTENT(eh), 0, sizeof(struct ext4_extent)); } le16_add_cpu(&eh->eh_entries, -1); } err = ext4_ext_dirty(handle, inode, path + depth); if (err) goto out; ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num, ext4_ext_pblock(ex)); ex--; ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); } if (correct_index && eh->eh_entries) err = ext4_ext_correct_indexes(handle, inode, path); /* * If there's a partial cluster and at least one extent remains in * the leaf, free the partial cluster if it isn't shared with the * current extent. If it is shared with the current extent * we reset the partial cluster because we've reached the start of the * truncated/punched region and we're done removing blocks. */ if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) { pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; if (partial->pclu != EXT4_B2C(sbi, pblk)) { int flags = get_default_free_blocks_flags(inode); if (ext4_is_pending(inode, partial->lblk)) flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER; ext4_free_blocks(handle, inode, NULL, EXT4_C2B(sbi, partial->pclu), sbi->s_cluster_ratio, flags); if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER) ext4_rereserve_cluster(inode, partial->lblk); } partial->state = initial; } /* if this leaf is free, then we should * remove it from index block above */ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) err = ext4_ext_rm_idx(handle, inode, path, depth); out: return err; } /* * ext4_ext_more_to_rm: * returns 1 if current index has to be freed (even partial) */ static int ext4_ext_more_to_rm(struct ext4_ext_path *path) { BUG_ON(path->p_idx == NULL); if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) return 0; /* * if truncate on deeper level happened, it wasn't partial, * so we have to consider current index for truncation */ if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) return 0; return 1; } int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int depth = ext_depth(inode); struct ext4_ext_path *path = NULL; struct partial_cluster partial; handle_t *handle; int i = 0, err = 0; partial.pclu = 0; partial.lblk = 0; partial.state = initial; ext_debug(inode, "truncate since %u to %u\n", start, end); /* probably first extent we're gonna free will be last in block */ handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE, depth + 1, ext4_free_metadata_revoke_credits(inode->i_sb, depth)); if (IS_ERR(handle)) return PTR_ERR(handle); again: trace_ext4_ext_remove_space(inode, start, end, depth); /* * Check if we are removing extents inside the extent tree. If that * is the case, we are going to punch a hole inside the extent tree * so we have to check whether we need to split the extent covering * the last block to remove so we can easily remove the part of it * in ext4_ext_rm_leaf(). */ if (end < EXT_MAX_BLOCKS - 1) { struct ext4_extent *ex; ext4_lblk_t ee_block, ex_end, lblk; ext4_fsblk_t pblk; /* find extent for or closest extent to this block */ path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE | EXT4_EX_NOFAIL); if (IS_ERR(path)) { ext4_journal_stop(handle); return PTR_ERR(path); } depth = ext_depth(inode); /* Leaf not may not exist only if inode has no blocks at all */ ex = path[depth].p_ext; if (!ex) { if (depth) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); err = -EFSCORRUPTED; } goto out; } ee_block = le32_to_cpu(ex->ee_block); ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1; /* * See if the last block is inside the extent, if so split * the extent at 'end' block so we can easily remove the * tail of the first part of the split extent in * ext4_ext_rm_leaf(). */ if (end >= ee_block && end < ex_end) { /* * If we're going to split the extent, note that * the cluster containing the block after 'end' is * in use to avoid freeing it when removing blocks. */ if (sbi->s_cluster_ratio > 1) { pblk = ext4_ext_pblock(ex) + end - ee_block + 1; partial.pclu = EXT4_B2C(sbi, pblk); partial.state = nofree; } /* * Split the extent in two so that 'end' is the last * block in the first new extent. Also we should not * fail removing space due to ENOSPC so try to use * reserved block if that happens. */ path = ext4_force_split_extent_at(handle, inode, path, end + 1, 1); if (IS_ERR(path)) { err = PTR_ERR(path); goto out; } } else if (sbi->s_cluster_ratio > 1 && end >= ex_end && partial.state == initial) { /* * If we're punching, there's an extent to the right. * If the partial cluster hasn't been set, set it to * that extent's first cluster and its state to nofree * so it won't be freed should it contain blocks to be * removed. If it's already set (tofree/nofree), we're * retrying and keep the original partial cluster info * so a cluster marked tofree as a result of earlier * extent removal is not lost. */ lblk = ex_end + 1; err = ext4_ext_search_right(inode, path, &lblk, &pblk, NULL); if (err < 0) goto out; if (pblk) { partial.pclu = EXT4_B2C(sbi, pblk); partial.state = nofree; } } } /* * We start scanning from right side, freeing all the blocks * after i_size and walking into the tree depth-wise. */ depth = ext_depth(inode); if (path) { int k = i = depth; while (--k > 0) path[k].p_block = le16_to_cpu(path[k].p_hdr->eh_entries)+1; } else { path = kcalloc(depth + 1, sizeof(struct ext4_ext_path), GFP_NOFS | __GFP_NOFAIL); if (path == NULL) { ext4_journal_stop(handle); return -ENOMEM; } path[0].p_maxdepth = path[0].p_depth = depth; path[0].p_hdr = ext_inode_hdr(inode); i = 0; if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) { err = -EFSCORRUPTED; goto out; } } err = 0; while (i >= 0 && err == 0) { if (i == depth) { /* this is leaf block */ err = ext4_ext_rm_leaf(handle, inode, path, &partial, start, end); /* root level has p_bh == NULL, brelse() eats this */ ext4_ext_path_brelse(path + i); i--; continue; } /* this is index block */ if (!path[i].p_hdr) { ext_debug(inode, "initialize header\n"); path[i].p_hdr = ext_block_hdr(path[i].p_bh); } if (!path[i].p_idx) { /* this level hasn't been touched yet */ path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n", path[i].p_hdr, le16_to_cpu(path[i].p_hdr->eh_entries)); } else { /* we were already here, see at next index */ path[i].p_idx--; } ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n", i, EXT_FIRST_INDEX(path[i].p_hdr), path[i].p_idx); if (ext4_ext_more_to_rm(path + i)) { struct buffer_head *bh; /* go to the next level */ ext_debug(inode, "move to level %d (block %llu)\n", i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); bh = read_extent_tree_block(inode, path[i].p_idx, depth - i - 1, EXT4_EX_NOCACHE); if (IS_ERR(bh)) { /* should we reset i_size? */ err = PTR_ERR(bh); break; } /* Yield here to deal with large extent trees. * Should be a no-op if we did IO above. */ cond_resched(); if (WARN_ON(i + 1 > depth)) { err = -EFSCORRUPTED; break; } path[i + 1].p_bh = bh; /* save actual number of indexes since this * number is changed at the next iteration */ path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); i++; } else { /* we finished processing this index, go up */ if (path[i].p_hdr->eh_entries == 0 && i > 0) { /* index is empty, remove it; * handle must be already prepared by the * truncatei_leaf() */ err = ext4_ext_rm_idx(handle, inode, path, i); } /* root level has p_bh == NULL, brelse() eats this */ ext4_ext_path_brelse(path + i); i--; ext_debug(inode, "return to level %d\n", i); } } trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial, path->p_hdr->eh_entries); /* * if there's a partial cluster and we have removed the first extent * in the file, then we also free the partial cluster, if any */ if (partial.state == tofree && err == 0) { int flags = get_default_free_blocks_flags(inode); if (ext4_is_pending(inode, partial.lblk)) flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER; ext4_free_blocks(handle, inode, NULL, EXT4_C2B(sbi, partial.pclu), sbi->s_cluster_ratio, flags); if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER) ext4_rereserve_cluster(inode, partial.lblk); partial.state = initial; } /* TODO: flexible tree reduction should be here */ if (path->p_hdr->eh_entries == 0) { /* * truncate to zero freed all the tree, * so we need to correct eh_depth */ err = ext4_ext_get_access(handle, inode, path); if (err == 0) { ext_inode_hdr(inode)->eh_depth = 0; ext_inode_hdr(inode)->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); err = ext4_ext_dirty(handle, inode, path); } } out: ext4_free_ext_path(path); path = NULL; if (err == -EAGAIN) goto again; ext4_journal_stop(handle); return err; } /* * called at mount time */ void ext4_ext_init(struct super_block *sb) { /* * possible initialization would be here */ if (ext4_has_feature_extents(sb)) { #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) printk(KERN_INFO "EXT4-fs: file extents enabled" #ifdef AGGRESSIVE_TEST ", aggressive tests" #endif #ifdef CHECK_BINSEARCH ", check binsearch" #endif #ifdef EXTENTS_STATS ", stats" #endif "\n"); #endif #ifdef EXTENTS_STATS spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); EXT4_SB(sb)->s_ext_min = 1 << 30; EXT4_SB(sb)->s_ext_max = 0; #endif } } /* * called at umount time */ void ext4_ext_release(struct super_block *sb) { if (!ext4_has_feature_extents(sb)) return; #ifdef EXTENTS_STATS if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) { struct ext4_sb_info *sbi = EXT4_SB(sb); printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n", sbi->s_ext_blocks, sbi->s_ext_extents, sbi->s_ext_blocks / sbi->s_ext_extents); printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n", sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max); } #endif } static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) { ext4_lblk_t ee_block; ext4_fsblk_t ee_pblock; unsigned int ee_len; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); ee_pblock = ext4_ext_pblock(ex); if (ee_len == 0) return; ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, EXTENT_STATUS_WRITTEN, 0); } /* FIXME!! we need to try to merge to left or right after zero-out */ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { ext4_fsblk_t ee_pblock; unsigned int ee_len; ee_len = ext4_ext_get_actual_len(ex); ee_pblock = ext4_ext_pblock(ex); return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock, ee_len); } /* * ext4_split_extent_at() splits an extent at given block. * * @handle: the journal handle * @inode: the file inode * @path: the path to the extent * @split: the logical block where the extent is splitted. * @split_flags: indicates if the extent could be zeroout if split fails, and * the states(init or unwritten) of new extents. * @flags: flags used to insert new extent to extent tree. * * * Splits extent [a, b] into two extents [a, @split) and [@split, b], states * of which are determined by split_flag. * * There are two cases: * a> the extent are splitted into two extent. * b> split is not needed, and just mark the extent. * * Return an extent path pointer on success, or an error pointer on failure. */ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t split, int split_flag, int flags) { ext4_fsblk_t newblock; ext4_lblk_t ee_block; struct ext4_extent *ex, newex, orig_ex, zero_ex; struct ext4_extent *ex2 = NULL; unsigned int ee_len, depth; int err = 0; BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); ext_debug(inode, "logical block %llu\n", (unsigned long long)split); ext4_ext_show_leaf(inode, path); depth = ext_depth(inode); ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); newblock = split - ee_block + ext4_ext_pblock(ex); BUG_ON(split < ee_block || split >= (ee_block + ee_len)); BUG_ON(!ext4_ext_is_unwritten(ex) && split_flag & (EXT4_EXT_MAY_ZEROOUT | EXT4_EXT_MARK_UNWRIT1 | EXT4_EXT_MARK_UNWRIT2)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; if (split == ee_block) { /* * case b: block @split is the block that the extent begins with * then we just change the state of the extent, and splitting * is not needed. */ if (split_flag & EXT4_EXT_MARK_UNWRIT2) ext4_ext_mark_unwritten(ex); else ext4_ext_mark_initialized(ex); if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) ext4_ext_try_to_merge(handle, inode, path, ex); err = ext4_ext_dirty(handle, inode, path + path->p_depth); goto out; } /* case a */ memcpy(&orig_ex, ex, sizeof(orig_ex)); ex->ee_len = cpu_to_le16(split - ee_block); if (split_flag & EXT4_EXT_MARK_UNWRIT1) ext4_ext_mark_unwritten(ex); /* * path may lead to new leaf, not to original leaf any more * after ext4_ext_insert_extent() returns, */ err = ext4_ext_dirty(handle, inode, path + depth); if (err) goto fix_extent_len; ex2 = &newex; ex2->ee_block = cpu_to_le32(split); ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); ext4_ext_store_pblock(ex2, newblock); if (split_flag & EXT4_EXT_MARK_UNWRIT2) ext4_ext_mark_unwritten(ex2); path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (!IS_ERR(path)) goto out; err = PTR_ERR(path); if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM) return path; /* * Get a new path to try to zeroout or fix the extent length. * Using EXT4_EX_NOFAIL guarantees that ext4_find_extent() * will not return -ENOMEM, otherwise -ENOMEM will cause a * retry in do_writepages(), and a WARN_ON may be triggered * in ext4_da_update_reserve_space() due to an incorrect * ee_len causing the i_reserved_data_blocks exception. */ path = ext4_find_extent(inode, ee_block, NULL, flags | EXT4_EX_NOFAIL); if (IS_ERR(path)) { EXT4_ERROR_INODE(inode, "Failed split extent on %u, err %ld", split, PTR_ERR(path)); return path; } depth = ext_depth(inode); ex = path[depth].p_ext; if (EXT4_EXT_MAY_ZEROOUT & split_flag) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { if (split_flag & EXT4_EXT_DATA_VALID1) { err = ext4_ext_zeroout(inode, ex2); zero_ex.ee_block = ex2->ee_block; zero_ex.ee_len = cpu_to_le16( ext4_ext_get_actual_len(ex2)); ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex2)); } else { err = ext4_ext_zeroout(inode, ex); zero_ex.ee_block = ex->ee_block; zero_ex.ee_len = cpu_to_le16( ext4_ext_get_actual_len(ex)); ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex)); } } else { err = ext4_ext_zeroout(inode, &orig_ex); zero_ex.ee_block = orig_ex.ee_block; zero_ex.ee_len = cpu_to_le16( ext4_ext_get_actual_len(&orig_ex)); ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(&orig_ex)); } if (!err) { /* update the extent length and mark as initialized */ ex->ee_len = cpu_to_le16(ee_len); ext4_ext_try_to_merge(handle, inode, path, ex); err = ext4_ext_dirty(handle, inode, path + path->p_depth); if (!err) /* update extent status tree */ ext4_zeroout_es(inode, &zero_ex); /* If we failed at this point, we don't know in which * state the extent tree exactly is so don't try to fix * length of the original extent as it may do even more * damage. */ goto out; } } fix_extent_len: ex->ee_len = orig_ex.ee_len; /* * Ignore ext4_ext_dirty return value since we are already in error path * and err is a non-zero error code. */ ext4_ext_dirty(handle, inode, path + path->p_depth); out: if (err) { ext4_free_ext_path(path); path = ERR_PTR(err); } ext4_ext_show_leaf(inode, path); return path; } /* * ext4_split_extent() splits an extent and mark extent which is covered * by @map as split_flags indicates * * It may result in splitting the extent into multiple extents (up to three) * There are three possibilities: * a> There is no split required * b> Splits in two extents: Split is happening at either end of the extent * c> Splits in three extents: Somone is splitting in middle of the extent * */ static struct ext4_ext_path *ext4_split_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct ext4_map_blocks *map, int split_flag, int flags, unsigned int *allocated) { ext4_lblk_t ee_block; struct ext4_extent *ex; unsigned int ee_len, depth; int unwritten; int split_flag1, flags1; depth = ext_depth(inode); ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); unwritten = ext4_ext_is_unwritten(ex); if (map->m_lblk + map->m_len < ee_block + ee_len) { split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; if (unwritten) split_flag1 |= EXT4_EXT_MARK_UNWRIT1 | EXT4_EXT_MARK_UNWRIT2; if (split_flag & EXT4_EXT_DATA_VALID2) split_flag1 |= EXT4_EXT_DATA_VALID1; path = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); if (IS_ERR(path)) return path; /* * Update path is required because previous ext4_split_extent_at * may result in split of original leaf or extent zeroout. */ path = ext4_find_extent(inode, map->m_lblk, path, flags); if (IS_ERR(path)) return path; depth = ext_depth(inode); ex = path[depth].p_ext; if (!ex) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", (unsigned long) map->m_lblk); ext4_free_ext_path(path); return ERR_PTR(-EFSCORRUPTED); } unwritten = ext4_ext_is_unwritten(ex); } if (map->m_lblk >= ee_block) { split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; if (unwritten) { split_flag1 |= EXT4_EXT_MARK_UNWRIT1; split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | EXT4_EXT_MARK_UNWRIT2); } path = ext4_split_extent_at(handle, inode, path, map->m_lblk, split_flag1, flags); if (IS_ERR(path)) return path; } if (allocated) { if (map->m_lblk + map->m_len > ee_block + ee_len) *allocated = ee_len - (map->m_lblk - ee_block); else *allocated = map->m_len; } ext4_ext_show_leaf(inode, path); return path; } /* * This function is called by ext4_ext_map_blocks() if someone tries to write * to an unwritten extent. It may result in splitting the unwritten * extent into multiple extents (up to three - one initialized and two * unwritten). * There are three possibilities: * a> There is no split required: Entire extent should be initialized * b> Splits in two extents: Write is happening at either end of the extent * c> Splits in three extents: Somone is writing in middle of the extent * * Pre-conditions: * - The extent pointed to by 'path' is unwritten. * - The extent pointed to by 'path' contains a superset * of the logical span [map->m_lblk, map->m_lblk + map->m_len). * * Post-conditions on success: * - the returned value is the number of blocks beyond map->l_lblk * that are allocated and initialized. * It is guaranteed to be >= map->m_len. */ static struct ext4_ext_path * ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path *path, int flags, unsigned int *allocated) { struct ext4_sb_info *sbi; struct ext4_extent_header *eh; struct ext4_map_blocks split_map; struct ext4_extent zero_ex1, zero_ex2; struct ext4_extent *ex, *abut_ex; ext4_lblk_t ee_block, eof_block; unsigned int ee_len, depth, map_len = map->m_len; int err = 0; int split_flag = EXT4_EXT_DATA_VALID2; unsigned int max_zeroout = 0; ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)map->m_lblk, map_len); sbi = EXT4_SB(inode->i_sb); eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map_len) eof_block = map->m_lblk + map_len; depth = ext_depth(inode); eh = path[depth].p_hdr; ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); zero_ex1.ee_len = 0; zero_ex2.ee_len = 0; trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); /* Pre-conditions */ BUG_ON(!ext4_ext_is_unwritten(ex)); BUG_ON(!in_range(map->m_lblk, ee_block, ee_len)); /* * Attempt to transfer newly initialized blocks from the currently * unwritten extent to its neighbor. This is much cheaper * than an insertion followed by a merge as those involve costly * memmove() calls. Transferring to the left is the common case in * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE) * followed by append writes. * * Limitations of the current logic: * - L1: we do not deal with writes covering the whole extent. * This would require removing the extent if the transfer * is possible. * - L2: we only attempt to merge with an extent stored in the * same extent tree node. */ *allocated = 0; if ((map->m_lblk == ee_block) && /* See if we can merge left */ (map_len < ee_len) && /*L1*/ (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/ ext4_lblk_t prev_lblk; ext4_fsblk_t prev_pblk, ee_pblk; unsigned int prev_len; abut_ex = ex - 1; prev_lblk = le32_to_cpu(abut_ex->ee_block); prev_len = ext4_ext_get_actual_len(abut_ex); prev_pblk = ext4_ext_pblock(abut_ex); ee_pblk = ext4_ext_pblock(ex); /* * A transfer of blocks from 'ex' to 'abut_ex' is allowed * upon those conditions: * - C1: abut_ex is initialized, * - C2: abut_ex is logically abutting ex, * - C3: abut_ex is physically abutting ex, * - C4: abut_ex can receive the additional blocks without * overflowing the (initialized) length limit. */ if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/ ((prev_lblk + prev_len) == ee_block) && /*C2*/ ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto errout; trace_ext4_ext_convert_to_initialized_fastpath(inode, map, ex, abut_ex); /* Shift the start of ex by 'map_len' blocks */ ex->ee_block = cpu_to_le32(ee_block + map_len); ext4_ext_store_pblock(ex, ee_pblk + map_len); ex->ee_len = cpu_to_le16(ee_len - map_len); ext4_ext_mark_unwritten(ex); /* Restore the flag */ /* Extend abut_ex by 'map_len' blocks */ abut_ex->ee_len = cpu_to_le16(prev_len + map_len); /* Result: number of initialized blocks past m_lblk */ *allocated = map_len; } } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) && (map_len < ee_len) && /*L1*/ ex < EXT_LAST_EXTENT(eh)) { /*L2*/ /* See if we can merge right */ ext4_lblk_t next_lblk; ext4_fsblk_t next_pblk, ee_pblk; unsigned int next_len; abut_ex = ex + 1; next_lblk = le32_to_cpu(abut_ex->ee_block); next_len = ext4_ext_get_actual_len(abut_ex); next_pblk = ext4_ext_pblock(abut_ex); ee_pblk = ext4_ext_pblock(ex); /* * A transfer of blocks from 'ex' to 'abut_ex' is allowed * upon those conditions: * - C1: abut_ex is initialized, * - C2: abut_ex is logically abutting ex, * - C3: abut_ex is physically abutting ex, * - C4: abut_ex can receive the additional blocks without * overflowing the (initialized) length limit. */ if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/ ((map->m_lblk + map_len) == next_lblk) && /*C2*/ ((ee_pblk + ee_len) == next_pblk) && /*C3*/ (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto errout; trace_ext4_ext_convert_to_initialized_fastpath(inode, map, ex, abut_ex); /* Shift the start of abut_ex by 'map_len' blocks */ abut_ex->ee_block = cpu_to_le32(next_lblk - map_len); ext4_ext_store_pblock(abut_ex, next_pblk - map_len); ex->ee_len = cpu_to_le16(ee_len - map_len); ext4_ext_mark_unwritten(ex); /* Restore the flag */ /* Extend abut_ex by 'map_len' blocks */ abut_ex->ee_len = cpu_to_le16(next_len + map_len); /* Result: number of initialized blocks past m_lblk */ *allocated = map_len; } } if (*allocated) { /* Mark the block containing both extents as dirty */ err = ext4_ext_dirty(handle, inode, path + depth); /* Update path to point to the right extent */ path[depth].p_ext = abut_ex; if (err) goto errout; goto out; } else *allocated = ee_len - (map->m_lblk - ee_block); WARN_ON(map->m_lblk < ee_block); /* * It is safe to convert extent to initialized via explicit * zeroout only if extent is fully inside i_size or new_size. */ split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; if (EXT4_EXT_MAY_ZEROOUT & split_flag) max_zeroout = sbi->s_extent_max_zeroout_kb >> (inode->i_sb->s_blocksize_bits - 10); /* * five cases: * 1. split the extent into three extents. * 2. split the extent into two extents, zeroout the head of the first * extent. * 3. split the extent into two extents, zeroout the tail of the second * extent. * 4. split the extent into two extents with out zeroout. * 5. no splitting needed, just possibly zeroout the head and / or the * tail of the extent. */ split_map.m_lblk = map->m_lblk; split_map.m_len = map->m_len; if (max_zeroout && (*allocated > split_map.m_len)) { if (*allocated <= max_zeroout) { /* case 3 or 5 */ zero_ex1.ee_block = cpu_to_le32(split_map.m_lblk + split_map.m_len); zero_ex1.ee_len = cpu_to_le16(*allocated - split_map.m_len); ext4_ext_store_pblock(&zero_ex1, ext4_ext_pblock(ex) + split_map.m_lblk + split_map.m_len - ee_block); err = ext4_ext_zeroout(inode, &zero_ex1); if (err) goto fallback; split_map.m_len = *allocated; } if (split_map.m_lblk - ee_block + split_map.m_len < max_zeroout) { /* case 2 or 5 */ if (split_map.m_lblk != ee_block) { zero_ex2.ee_block = ex->ee_block; zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk - ee_block); ext4_ext_store_pblock(&zero_ex2, ext4_ext_pblock(ex)); err = ext4_ext_zeroout(inode, &zero_ex2); if (err) goto fallback; } split_map.m_len += split_map.m_lblk - ee_block; split_map.m_lblk = ee_block; *allocated = map->m_len; } } fallback: path = ext4_split_extent(handle, inode, path, &split_map, split_flag, flags, NULL); if (IS_ERR(path)) return path; out: /* If we have gotten a failure, don't zero out status tree */ ext4_zeroout_es(inode, &zero_ex1); ext4_zeroout_es(inode, &zero_ex2); return path; errout: ext4_free_ext_path(path); return ERR_PTR(err); } /* * This function is called by ext4_ext_map_blocks() from * ext4_get_blocks_dio_write() when DIO to write * to an unwritten extent. * * Writing to an unwritten extent may result in splitting the unwritten * extent into multiple initialized/unwritten extents (up to three) * There are three possibilities: * a> There is no split required: Entire extent should be unwritten * b> Splits in two extents: Write is happening at either end of the extent * c> Splits in three extents: Somone is writing in middle of the extent * * This works the same way in the case of initialized -> unwritten conversion. * * One of more index blocks maybe needed if the extent tree grow after * the unwritten extent split. To prevent ENOSPC occur at the IO * complete, we need to split the unwritten extent before DIO submit * the IO. The unwritten extent called at this time will be split * into three unwritten extent(at most). After IO complete, the part * being filled will be convert to initialized by the end_io callback function * via ext4_convert_unwritten_extents(). * * The size of unwritten extent to be written is passed to the caller via the * allocated pointer. Return an extent path pointer on success, or an error * pointer on failure. */ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path *path, int flags, unsigned int *allocated) { ext4_lblk_t eof_block; ext4_lblk_t ee_block; struct ext4_extent *ex; unsigned int ee_len; int split_flag = 0, depth; ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)map->m_lblk, map->m_len); eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) eof_block = map->m_lblk + map->m_len; /* * It is safe to convert extent to initialized via explicit * zeroout only if extent is fully inside i_size or new_size. */ depth = ext_depth(inode); ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); /* Convert to unwritten */ if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) { split_flag |= EXT4_EXT_DATA_VALID1; /* Convert to initialized */ } else if (flags & EXT4_GET_BLOCKS_CONVERT) { split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); } flags |= EXT4_GET_BLOCKS_PRE_IO; return ext4_split_extent(handle, inode, path, map, split_flag, flags, allocated); } static struct ext4_ext_path * ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path *path) { struct ext4_extent *ex; ext4_lblk_t ee_block; unsigned int ee_len; int depth; int err = 0; depth = ext_depth(inode); ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)ee_block, ee_len); /* If extent is larger than requested it is a clear sign that we still * have some extent state machine issues left. So extent_split is still * required. * TODO: Once all related issues will be fixed this situation should be * illegal. */ if (ee_block != map->m_lblk || ee_len > map->m_len) { #ifdef CONFIG_EXT4_DEBUG ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu," " len %u; IO logical block %llu, len %u", inode->i_ino, (unsigned long long)ee_block, ee_len, (unsigned long long)map->m_lblk, map->m_len); #endif path = ext4_split_convert_extents(handle, inode, map, path, EXT4_GET_BLOCKS_CONVERT, NULL); if (IS_ERR(path)) return path; path = ext4_find_extent(inode, map->m_lblk, path, 0); if (IS_ERR(path)) return path; depth = ext_depth(inode); ex = path[depth].p_ext; } err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto errout; /* first mark the extent as initialized */ ext4_ext_mark_initialized(ex); /* note: ext4_ext_correct_indexes() isn't needed here because * borders are not changed */ ext4_ext_try_to_merge(handle, inode, path, ex); /* Mark modified extent as dirty */ err = ext4_ext_dirty(handle, inode, path + path->p_depth); if (err) goto errout; ext4_ext_show_leaf(inode, path); return path; errout: ext4_free_ext_path(path); return ERR_PTR(err); } static struct ext4_ext_path * convert_initialized_extent(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path *path, unsigned int *allocated) { struct ext4_extent *ex; ext4_lblk_t ee_block; unsigned int ee_len; int depth; int err = 0; /* * Make sure that the extent is no bigger than we support with * unwritten extent */ if (map->m_len > EXT_UNWRITTEN_MAX_LEN) map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; depth = ext_depth(inode); ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)ee_block, ee_len); if (ee_block != map->m_lblk || ee_len > map->m_len) { path = ext4_split_convert_extents(handle, inode, map, path, EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, NULL); if (IS_ERR(path)) return path; path = ext4_find_extent(inode, map->m_lblk, path, 0); if (IS_ERR(path)) return path; depth = ext_depth(inode); ex = path[depth].p_ext; if (!ex) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", (unsigned long) map->m_lblk); err = -EFSCORRUPTED; goto errout; } } err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto errout; /* first mark the extent as unwritten */ ext4_ext_mark_unwritten(ex); /* note: ext4_ext_correct_indexes() isn't needed here because * borders are not changed */ ext4_ext_try_to_merge(handle, inode, path, ex); /* Mark modified extent as dirty */ err = ext4_ext_dirty(handle, inode, path + path->p_depth); if (err) goto errout; ext4_ext_show_leaf(inode, path); ext4_update_inode_fsync_trans(handle, inode, 1); map->m_flags |= EXT4_MAP_UNWRITTEN; if (*allocated > map->m_len) *allocated = map->m_len; map->m_len = *allocated; return path; errout: ext4_free_ext_path(path); return ERR_PTR(err); } static struct ext4_ext_path * ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path *path, int flags, unsigned int *allocated, ext4_fsblk_t newblock) { int err = 0; ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n", (unsigned long long)map->m_lblk, map->m_len, flags, *allocated); ext4_ext_show_leaf(inode, path); /* * When writing into unwritten space, we should not fail to * allocate metadata blocks for the new extent block if needed. */ flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL; trace_ext4_ext_handle_unwritten_extents(inode, map, flags, *allocated, newblock); /* get_block() before submitting IO, split the extent */ if (flags & EXT4_GET_BLOCKS_PRE_IO) { path = ext4_split_convert_extents(handle, inode, map, path, flags | EXT4_GET_BLOCKS_CONVERT, allocated); if (IS_ERR(path)) return path; /* * shouldn't get a 0 allocated when splitting an extent unless * m_len is 0 (bug) or extent has been corrupted */ if (unlikely(*allocated == 0)) { EXT4_ERROR_INODE(inode, "unexpected allocated == 0, m_len = %u", map->m_len); err = -EFSCORRUPTED; goto errout; } map->m_flags |= EXT4_MAP_UNWRITTEN; goto out; } /* IO end_io complete, convert the filled extent to written */ if (flags & EXT4_GET_BLOCKS_CONVERT) { path = ext4_convert_unwritten_extents_endio(handle, inode, map, path); if (IS_ERR(path)) return path; ext4_update_inode_fsync_trans(handle, inode, 1); goto map_out; } /* buffered IO cases */ /* * repeat fallocate creation request * we already have an unwritten extent */ if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) { map->m_flags |= EXT4_MAP_UNWRITTEN; goto map_out; } /* buffered READ or buffered write_begin() lookup */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* * We have blocks reserved already. We * return allocated blocks so that delalloc * won't do block reservation for us. But * the buffer head will be unmapped so that * a read from the block returns 0s. */ map->m_flags |= EXT4_MAP_UNWRITTEN; goto out1; } /* * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1. * For buffered writes, at writepage time, etc. Convert a * discovered unwritten extent to written. */ path = ext4_ext_convert_to_initialized(handle, inode, map, path, flags, allocated); if (IS_ERR(path)) return path; ext4_update_inode_fsync_trans(handle, inode, 1); /* * shouldn't get a 0 allocated when converting an unwritten extent * unless m_len is 0 (bug) or extent has been corrupted */ if (unlikely(*allocated == 0)) { EXT4_ERROR_INODE(inode, "unexpected allocated == 0, m_len = %u", map->m_len); err = -EFSCORRUPTED; goto errout; } out: map->m_flags |= EXT4_MAP_NEW; map_out: map->m_flags |= EXT4_MAP_MAPPED; out1: map->m_pblk = newblock; if (*allocated > map->m_len) *allocated = map->m_len; map->m_len = *allocated; ext4_ext_show_leaf(inode, path); return path; errout: ext4_free_ext_path(path); return ERR_PTR(err); } /* * get_implied_cluster_alloc - check to see if the requested * allocation (in the map structure) overlaps with a cluster already * allocated in an extent. * @sb The filesystem superblock structure * @map The requested lblk->pblk mapping * @ex The extent structure which might contain an implied * cluster allocation * * This function is called by ext4_ext_map_blocks() after we failed to * find blocks that were already in the inode's extent tree. Hence, * we know that the beginning of the requested region cannot overlap * the extent from the inode's extent tree. There are three cases we * want to catch. The first is this case: * * |--- cluster # N--| * |--- extent ---| |---- requested region ---| * |==========| * * The second case that we need to test for is this one: * * |--------- cluster # N ----------------| * |--- requested region --| |------- extent ----| * |=======================| * * The third case is when the requested region lies between two extents * within the same cluster: * |------------- cluster # N-------------| * |----- ex -----| |---- ex_right ----| * |------ requested region ------| * |================| * * In each of the above cases, we need to set the map->m_pblk and * map->m_len so it corresponds to the return the extent labelled as * "|====|" from cluster #N, since it is already in use for data in * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to * signal to ext4_ext_map_blocks() that map->m_pblk should be treated * as a new "allocated" block region. Otherwise, we will return 0 and * ext4_ext_map_blocks() will then allocate one or more new clusters * by calling ext4_mb_new_blocks(). */ static int get_implied_cluster_alloc(struct super_block *sb, struct ext4_map_blocks *map, struct ext4_extent *ex, struct ext4_ext_path *path) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ext4_lblk_t ex_cluster_start, ex_cluster_end; ext4_lblk_t rr_cluster_start; ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); ext4_fsblk_t ee_start = ext4_ext_pblock(ex); unsigned short ee_len = ext4_ext_get_actual_len(ex); /* The extent passed in that we are trying to match */ ex_cluster_start = EXT4_B2C(sbi, ee_block); ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1); /* The requested region passed into ext4_map_blocks() */ rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); if ((rr_cluster_start == ex_cluster_end) || (rr_cluster_start == ex_cluster_start)) { if (rr_cluster_start == ex_cluster_end) ee_start += ee_len - 1; map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset; map->m_len = min(map->m_len, (unsigned) sbi->s_cluster_ratio - c_offset); /* * Check for and handle this case: * * |--------- cluster # N-------------| * |------- extent ----| * |--- requested region ---| * |===========| */ if (map->m_lblk < ee_block) map->m_len = min(map->m_len, ee_block - map->m_lblk); /* * Check for the case where there is already another allocated * block to the right of 'ex' but before the end of the cluster. * * |------------- cluster # N-------------| * |----- ex -----| |---- ex_right ----| * |------ requested region ------| * |================| */ if (map->m_lblk > ee_block) { ext4_lblk_t next = ext4_ext_next_allocated_block(path); map->m_len = min(map->m_len, next - map->m_lblk); } trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1); return 1; } trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0); return 0; } /* * Determine hole length around the given logical block, first try to * locate and expand the hole from the given @path, and then adjust it * if it's partially or completely converted to delayed extents, insert * it into the extent cache tree if it's indeed a hole, finally return * the length of the determined extent. */ static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t lblk) { ext4_lblk_t hole_start, len; struct extent_status es; hole_start = lblk; len = ext4_ext_find_hole(inode, path, &hole_start); again: ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, hole_start + len - 1, &es); if (!es.es_len) goto insert_hole; /* * There's a delalloc extent in the hole, handle it if the delalloc * extent is in front of, behind and straddle the queried range. */ if (lblk >= es.es_lblk + es.es_len) { /* * The delalloc extent is in front of the queried range, * find again from the queried start block. */ len -= lblk - hole_start; hole_start = lblk; goto again; } else if (in_range(lblk, es.es_lblk, es.es_len)) { /* * The delalloc extent containing lblk, it must have been * added after ext4_map_blocks() checked the extent status * tree so we are not holding i_rwsem and delalloc info is * only stabilized by i_data_sem we are going to release * soon. Don't modify the extent status tree and report * extent as a hole, just adjust the length to the delalloc * extent's after lblk. */ len = es.es_lblk + es.es_len - lblk; return len; } else { /* * The delalloc extent is partially or completely behind * the queried range, update hole length until the * beginning of the delalloc extent. */ len = min(es.es_lblk - hole_start, len); } insert_hole: /* Put just found gap into cache to speed up subsequent requests */ ext_debug(inode, " -> %u:%u\n", hole_start, len); ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE, 0); /* Update hole_len to reflect hole size after lblk */ if (hole_start != lblk) len -= lblk - hole_start; return len; } /* * Block allocation/map/preallocation routine for extents based files * * * Need to be called with * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block * (ie, flags is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) * * return > 0, number of blocks already mapped/allocated * if flags doesn't contain EXT4_GET_BLOCKS_CREATE and these are pre-allocated blocks * buffer head is unmapped * otherwise blocks are mapped * * return = 0, if plain look up failed (blocks have not been allocated) * buffer head is unmapped * * return < 0, error case. */ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags) { struct ext4_ext_path *path = NULL; struct ext4_extent newex, *ex, ex2; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_fsblk_t newblock = 0, pblk; int err = 0, depth; unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; struct ext4_allocation_request ar; ext4_lblk_t cluster_offset; ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len); trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* find extent for this block */ path = ext4_find_extent(inode, map->m_lblk, NULL, 0); if (IS_ERR(path)) { err = PTR_ERR(path); goto out; } depth = ext_depth(inode); /* * consistent leaf must not be empty; * this situation is possible, though, _during_ tree modification; * this is why assert can't be put in ext4_find_extent() */ if (unlikely(path[depth].p_ext == NULL && depth != 0)) { EXT4_ERROR_INODE(inode, "bad extent address " "lblock: %lu, depth: %d pblock %lld", (unsigned long) map->m_lblk, depth, path[depth].p_block); err = -EFSCORRUPTED; goto out; } ex = path[depth].p_ext; if (ex) { ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); ext4_fsblk_t ee_start = ext4_ext_pblock(ex); unsigned short ee_len; /* * unwritten extents are treated as holes, except that * we split out initialized portions during a write. */ ee_len = ext4_ext_get_actual_len(ex); trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len); /* if found extent covers block, simply return it */ if (in_range(map->m_lblk, ee_block, ee_len)) { newblock = map->m_lblk - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (map->m_lblk - ee_block); ext_debug(inode, "%u fit into %u:%d -> %llu\n", map->m_lblk, ee_block, ee_len, newblock); /* * If the extent is initialized check whether the * caller wants to convert it to unwritten. */ if ((!ext4_ext_is_unwritten(ex)) && (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { path = convert_initialized_extent(handle, inode, map, path, &allocated); if (IS_ERR(path)) err = PTR_ERR(path); goto out; } else if (!ext4_ext_is_unwritten(ex)) { map->m_flags |= EXT4_MAP_MAPPED; map->m_pblk = newblock; if (allocated > map->m_len) allocated = map->m_len; map->m_len = allocated; ext4_ext_show_leaf(inode, path); goto out; } path = ext4_ext_handle_unwritten_extents( handle, inode, map, path, flags, &allocated, newblock); if (IS_ERR(path)) err = PTR_ERR(path); goto out; } } /* * requested block isn't allocated yet; * we couldn't try to create block if flags doesn't contain EXT4_GET_BLOCKS_CREATE */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { ext4_lblk_t len; len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); map->m_pblk = 0; map->m_len = min_t(unsigned int, map->m_len, len); goto out; } /* * Okay, we need to do block allocation. */ newex.ee_block = cpu_to_le32(map->m_lblk); cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); /* * If we are doing bigalloc, check to see if the extent returned * by ext4_find_extent() implies a cluster we can use. */ if (cluster_offset && ex && get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { ar.len = allocated = map->m_len; newblock = map->m_pblk; goto got_allocated_blocks; } /* find neighbour allocated blocks */ ar.lleft = map->m_lblk; err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); if (err) goto out; ar.lright = map->m_lblk; err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2); if (err < 0) goto out; /* Check if the extent after searching to the right implies a * cluster we can use. */ if ((sbi->s_cluster_ratio > 1) && err && get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) { ar.len = allocated = map->m_len; newblock = map->m_pblk; err = 0; goto got_allocated_blocks; } /* * See if request is beyond maximum number of blocks we can have in * a single extent. For an initialized extent this limit is * EXT_INIT_MAX_LEN and for an unwritten extent this limit is * EXT_UNWRITTEN_MAX_LEN. */ if (map->m_len > EXT_INIT_MAX_LEN && !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT)) map->m_len = EXT_INIT_MAX_LEN; else if (map->m_len > EXT_UNWRITTEN_MAX_LEN && (flags & EXT4_GET_BLOCKS_UNWRIT_EXT)) map->m_len = EXT_UNWRITTEN_MAX_LEN; /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ newex.ee_len = cpu_to_le16(map->m_len); err = ext4_ext_check_overlap(sbi, inode, &newex, path); if (err) allocated = ext4_ext_get_actual_len(&newex); else allocated = map->m_len; /* allocate new block */ ar.inode = inode; ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); ar.logical = map->m_lblk; /* * We calculate the offset from the beginning of the cluster * for the logical block number, since when we allocate a * physical cluster, the physical block should start at the * same offset from the beginning of the cluster. This is * needed so that future calls to get_implied_cluster_alloc() * work correctly. */ offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ar.len = EXT4_NUM_B2C(sbi, offset+allocated); ar.goal -= offset; ar.logical -= offset; if (S_ISREG(inode->i_mode)) ar.flags = EXT4_MB_HINT_DATA; else /* disable in-core preallocation for non-regular files */ ar.flags = 0; if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) ar.flags |= EXT4_MB_HINT_NOPREALLOC; if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ar.flags |= EXT4_MB_DELALLOC_RESERVED; if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) ar.flags |= EXT4_MB_USE_RESERVED; newblock = ext4_mb_new_blocks(handle, &ar, &err); if (!newblock) goto out; allocated_clusters = ar.len; ar.len = EXT4_C2B(sbi, ar.len) - offset; ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n", ar.goal, newblock, ar.len, allocated); if (ar.len > allocated) ar.len = allocated; got_allocated_blocks: /* try to insert new extent into found leaf and return */ pblk = newblock + offset; ext4_ext_store_pblock(&newex, pblk); newex.ee_len = cpu_to_le16(ar.len); /* Mark unwritten */ if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) { ext4_ext_mark_unwritten(&newex); map->m_flags |= EXT4_MAP_UNWRITTEN; } path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (IS_ERR(path)) { err = PTR_ERR(path); if (allocated_clusters) { int fb_flags = 0; /* * free data blocks we just allocated. * not a good idea to call discard here directly, * but otherwise we'd need to call it every free(). */ ext4_discard_preallocations(inode); if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; ext4_free_blocks(handle, inode, NULL, newblock, EXT4_C2B(sbi, allocated_clusters), fb_flags); } goto out; } /* * Cache the extent and update transaction to commit on fdatasync only * when it is _not_ an unwritten extent. */ if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0) ext4_update_inode_fsync_trans(handle, inode, 1); else ext4_update_inode_fsync_trans(handle, inode, 0); map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED); map->m_pblk = pblk; map->m_len = ar.len; allocated = map->m_len; ext4_ext_show_leaf(inode, path); out: ext4_free_ext_path(path); trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); return err ? err : allocated; } int ext4_ext_truncate(handle_t *handle, struct inode *inode) { struct super_block *sb = inode->i_sb; ext4_lblk_t last_block; int err = 0; /* * TODO: optimization is possible here. * Probably we need not scan at all, * because page truncation is enough. */ /* we have to know where to truncate from in crash case */ EXT4_I(inode)->i_disksize = inode->i_size; err = ext4_mark_inode_dirty(handle, inode); if (err) return err; last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); retry_remove_space: err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); if (err == -ENOMEM) { memalloc_retry_wait(GFP_ATOMIC); goto retry_remove_space; } return err; } static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ext4_lblk_t len, loff_t new_size, int flags) { struct inode *inode = file_inode(file); handle_t *handle; int ret = 0, ret2 = 0, ret3 = 0; int retries = 0; int depth = 0; struct ext4_map_blocks map; unsigned int credits; loff_t epos; BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)); map.m_lblk = offset; map.m_len = len; /* * Don't normalize the request if it can fit in one extent so * that it doesn't get unnecessarily split into multiple * extents. */ if (len <= EXT_UNWRITTEN_MAX_LEN) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; /* * credits to insert 1 extent into extent tree */ credits = ext4_chunk_trans_blocks(inode, len); depth = ext_depth(inode); retry: while (len) { /* * Recalculate credits when extent tree depth changes. */ if (depth != ext_depth(inode)) { credits = ext4_chunk_trans_blocks(inode, len); depth = ext_depth(inode); } handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); break; } ret = ext4_map_blocks(handle, inode, &map, flags); if (ret <= 0) { ext4_debug("inode #%lu: block %u: len %u: " "ext4_ext_map_blocks returned %d", inode->i_ino, map.m_lblk, map.m_len, ret); ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); break; } /* * allow a full retry cycle for any remaining allocations */ retries = 0; map.m_lblk += ret; map.m_len = len = len - ret; epos = (loff_t)map.m_lblk << inode->i_blkbits; inode_set_ctime_current(inode); if (new_size) { if (epos > new_size) epos = new_size; if (ext4_update_inode_size(inode, epos) & 0x1) inode_set_mtime_to_ts(inode, inode_get_ctime(inode)); } ret2 = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); ret3 = ext4_journal_stop(handle); ret2 = ret3 ? ret3 : ret2; if (unlikely(ret2)) break; } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; return ret > 0 ? ret2 : ret; } static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len); static int ext4_insert_range(struct file *file, loff_t offset, loff_t len); static long ext4_zero_range(struct file *file, loff_t offset, loff_t len, int mode) { struct inode *inode = file_inode(file); struct address_space *mapping = file->f_mapping; handle_t *handle = NULL; unsigned int max_blocks; loff_t new_size = 0; int ret = 0; int flags; int credits; int partial_begin, partial_end; loff_t start, end; ext4_lblk_t lblk; unsigned int blkbits = inode->i_blkbits; trace_ext4_zero_range(inode, offset, len, mode); /* * Round up offset. This is not fallocate, we need to zero out * blocks, so convert interior block aligned part of the range to * unwritten and possibly manually zero out unaligned parts of the * range. Here, start and partial_begin are inclusive, end and * partial_end are exclusive. */ start = round_up(offset, 1 << blkbits); end = round_down((offset + len), 1 << blkbits); if (start < offset || end > offset + len) return -EINVAL; partial_begin = offset & ((1 << blkbits) - 1); partial_end = (offset + len) & ((1 << blkbits) - 1); lblk = start >> blkbits; max_blocks = (end >> blkbits); if (max_blocks < lblk) max_blocks = 0; else max_blocks -= lblk; inode_lock(inode); /* * Indirect files do not support unwritten extents */ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { ret = -EOPNOTSUPP; goto out_mutex; } if (!(mode & FALLOC_FL_KEEP_SIZE) && (offset + len > inode->i_size || offset + len > EXT4_I(inode)->i_disksize)) { new_size = offset + len; ret = inode_newsize_ok(inode, new_size); if (ret) goto out_mutex; } flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); ret = file_modified(file); if (ret) goto out_mutex; /* Preallocate the range including the unaligned edges */ if (partial_begin || partial_end) { ret = ext4_alloc_file_blocks(file, round_down(offset, 1 << blkbits) >> blkbits, (round_up((offset + len), 1 << blkbits) - round_down(offset, 1 << blkbits)) >> blkbits, new_size, flags); if (ret) goto out_mutex; } /* Zero range excluding the unaligned edges */ if (max_blocks > 0) { flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE); /* * Prevent page faults from reinstantiating pages we have * released from page cache. */ filemap_invalidate_lock(mapping); ret = ext4_break_layouts(inode); if (ret) { filemap_invalidate_unlock(mapping); goto out_mutex; } ret = ext4_update_disksize_before_punch(inode, offset, len); if (ret) { filemap_invalidate_unlock(mapping); goto out_mutex; } /* * For journalled data we need to write (and checkpoint) pages * before discarding page cache to avoid inconsitent data on * disk in case of crash before zeroing trans is committed. */ if (ext4_should_journal_data(inode)) { ret = filemap_write_and_wait_range(mapping, start, end - 1); if (ret) { filemap_invalidate_unlock(mapping); goto out_mutex; } } /* Now release the pages and zero block aligned part of pages */ truncate_pagecache_range(inode, start, end - 1); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); filemap_invalidate_unlock(mapping); if (ret) goto out_mutex; } if (!partial_begin && !partial_end) goto out_mutex; /* * In worst case we have to writeout two nonadjacent unwritten * blocks and update the inode */ credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1; if (ext4_should_journal_data(inode)) credits += 2; handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); ext4_std_error(inode->i_sb, ret); goto out_mutex; } inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (new_size) ext4_update_inode_size(inode, new_size); ret = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret)) goto out_handle; /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); if (ret >= 0) ext4_update_inode_fsync_trans(handle, inode, 1); if (file->f_flags & O_SYNC) ext4_handle_sync(handle); out_handle: ext4_journal_stop(handle); out_mutex: inode_unlock(inode); return ret; } /* * preallocate space for a file. This implements ext4's fallocate file * operation, which gets called from sys_fallocate system call. * For block-mapped files, posix_fallocate should fall back to the method * of writing zeroes to the required new blocks (the same behavior which is * expected for file systems which do not support fallocate() system call). */ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); loff_t new_size = 0; unsigned int max_blocks; int ret = 0; int flags; ext4_lblk_t lblk; unsigned int blkbits = inode->i_blkbits; /* * Encrypted inodes can't handle collapse range or insert * range since we would need to re-encrypt blocks with a * different IV or XTS tweak (which are based on the logical * block number). */ if (IS_ENCRYPTED(inode) && (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; /* Return error if mode is not supported */ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; inode_lock(inode); ret = ext4_convert_inline_data(inode); inode_unlock(inode); if (ret) goto exit; if (mode & FALLOC_FL_PUNCH_HOLE) { ret = ext4_punch_hole(file, offset, len); goto exit; } if (mode & FALLOC_FL_COLLAPSE_RANGE) { ret = ext4_collapse_range(file, offset, len); goto exit; } if (mode & FALLOC_FL_INSERT_RANGE) { ret = ext4_insert_range(file, offset, len); goto exit; } if (mode & FALLOC_FL_ZERO_RANGE) { ret = ext4_zero_range(file, offset, len, mode); goto exit; } trace_ext4_fallocate_enter(inode, offset, len, mode); lblk = offset >> blkbits; max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits); flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; inode_lock(inode); /* * We only support preallocation for extent-based files only */ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { ret = -EOPNOTSUPP; goto out; } if (!(mode & FALLOC_FL_KEEP_SIZE) && (offset + len > inode->i_size || offset + len > EXT4_I(inode)->i_disksize)) { new_size = offset + len; ret = inode_newsize_ok(inode, new_size); if (ret) goto out; } /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); ret = file_modified(file); if (ret) goto out; ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); if (ret) goto out; if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) { ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal, EXT4_I(inode)->i_sync_tid); } out: inode_unlock(inode); trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); exit: return ret; } /* * This function convert a range of blocks to written extents * The caller of this function will pass the start offset and the size. * all unwritten extents within this range will be converted to * written extents. * * This function is called from the direct IO end io call back * function, to convert the fallocated extents after IO is completed. * Returns 0 on success. */ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, loff_t offset, ssize_t len) { unsigned int max_blocks; int ret = 0, ret2 = 0, ret3 = 0; struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; unsigned int credits = 0; map.m_lblk = offset >> blkbits; max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits); if (!handle) { /* * credits to insert 1 extent into extent tree */ credits = ext4_chunk_trans_blocks(inode, max_blocks); } while (ret >= 0 && ret < max_blocks) { map.m_lblk += ret; map.m_len = (max_blocks -= ret); if (credits) { handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); break; } } ret = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_IO_CONVERT_EXT); if (ret <= 0) ext4_warning(inode->i_sb, "inode #%lu: block %u: len %u: " "ext4_ext_map_blocks returned %d", inode->i_ino, map.m_lblk, map.m_len, ret); ret2 = ext4_mark_inode_dirty(handle, inode); if (credits) { ret3 = ext4_journal_stop(handle); if (unlikely(ret3)) ret2 = ret3; } if (ret <= 0 || ret2) break; } return ret > 0 ? ret2 : ret; } int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end) { int ret = 0, err = 0; struct ext4_io_end_vec *io_end_vec; /* * This is somewhat ugly but the idea is clear: When transaction is * reserved, everything goes into it. Otherwise we rather start several * smaller transactions for conversion of each extent separately. */ if (handle) { handle = ext4_journal_start_reserved(handle, EXT4_HT_EXT_CONVERT); if (IS_ERR(handle)) return PTR_ERR(handle); } list_for_each_entry(io_end_vec, &io_end->list_vec, list) { ret = ext4_convert_unwritten_extents(handle, io_end->inode, io_end_vec->offset, io_end_vec->size); if (ret) break; } if (handle) err = ext4_journal_stop(handle); return ret < 0 ? ret : err; } static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap) { __u64 physical = 0; __u64 length = 0; int blockbits = inode->i_sb->s_blocksize_bits; int error = 0; u16 iomap_type; /* in-inode? */ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { struct ext4_iloc iloc; int offset; /* offset of xattr in inode */ error = ext4_get_inode_loc(inode, &iloc); if (error) return error; physical = (__u64)iloc.bh->b_blocknr << blockbits; offset = EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize; physical += offset; length = EXT4_SB(inode->i_sb)->s_inode_size - offset; brelse(iloc.bh); iomap_type = IOMAP_INLINE; } else if (EXT4_I(inode)->i_file_acl) { /* external block */ physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; iomap_type = IOMAP_MAPPED; } else { /* no in-inode or external block for xattr, so return -ENOENT */ error = -ENOENT; goto out; } iomap->addr = physical; iomap->offset = 0; iomap->length = length; iomap->type = iomap_type; iomap->flags = 0; out: return error; } static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset, loff_t length, unsigned flags, struct iomap *iomap, struct iomap *srcmap) { int error; error = ext4_iomap_xattr_fiemap(inode, iomap); if (error == 0 && (offset >= iomap->length)) error = -ENOENT; return error; } static const struct iomap_ops ext4_iomap_xattr_ops = { .iomap_begin = ext4_iomap_xattr_begin, }; static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len) { u64 maxbytes; if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) maxbytes = inode->i_sb->s_maxbytes; else maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; if (*len == 0) return -EINVAL; if (start > maxbytes) return -EFBIG; /* * Shrink request scope to what the fs can actually handle. */ if (*len > maxbytes || (maxbytes - *len) < start) *len = maxbytes - start; return 0; } int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { int error = 0; if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { error = ext4_ext_precache(inode); if (error) return error; fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE; } /* * For bitmap files the maximum size limit could be smaller than * s_maxbytes, so check len here manually instead of just relying on the * generic check. */ error = ext4_fiemap_check_ranges(inode, start, &len); if (error) return error; if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR; return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_xattr_ops); } return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops); } int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { ext4_lblk_t start_blk, len_blks; __u64 last_blk; int error = 0; if (ext4_has_inline_data(inode)) { int has_inline; down_read(&EXT4_I(inode)->xattr_sem); has_inline = ext4_has_inline_data(inode); up_read(&EXT4_I(inode)->xattr_sem); if (has_inline) return 0; } if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { error = ext4_ext_precache(inode); if (error) return error; fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE; } error = fiemap_prep(inode, fieinfo, start, &len, 0); if (error) return error; error = ext4_fiemap_check_ranges(inode, start, &len); if (error) return error; start_blk = start >> inode->i_sb->s_blocksize_bits; last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; if (last_blk >= EXT_MAX_BLOCKS) last_blk = EXT_MAX_BLOCKS-1; len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; /* * Walk the extent tree gathering extent information * and pushing extents back to the user. */ return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo); } /* * ext4_ext_shift_path_extents: * Shift the extents of a path structure lying between path[depth].p_ext * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells * if it is right shift or left shift operation. */ static int ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, struct inode *inode, handle_t *handle, enum SHIFT_DIRECTION SHIFT) { int depth, err = 0; struct ext4_extent *ex_start, *ex_last; bool update = false; int credits, restart_credits; depth = path->p_depth; while (depth >= 0) { if (depth == path->p_depth) { ex_start = path[depth].p_ext; if (!ex_start) return -EFSCORRUPTED; ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); /* leaf + sb + inode */ credits = 3; if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) { update = true; /* extent tree + sb + inode */ credits = depth + 2; } restart_credits = ext4_writepage_trans_blocks(inode); err = ext4_datasem_ensure_credits(handle, inode, credits, restart_credits, 0); if (err) { if (err > 0) err = -EAGAIN; goto out; } err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; while (ex_start <= ex_last) { if (SHIFT == SHIFT_LEFT) { le32_add_cpu(&ex_start->ee_block, -shift); /* Try to merge to the left. */ if ((ex_start > EXT_FIRST_EXTENT(path[depth].p_hdr)) && ext4_ext_try_to_merge_right(inode, path, ex_start - 1)) ex_last--; else ex_start++; } else { le32_add_cpu(&ex_last->ee_block, shift); ext4_ext_try_to_merge_right(inode, path, ex_last); ex_last--; } } err = ext4_ext_dirty(handle, inode, path + depth); if (err) goto out; if (--depth < 0 || !update) break; } /* Update index too */ err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; if (SHIFT == SHIFT_LEFT) le32_add_cpu(&path[depth].p_idx->ei_block, -shift); else le32_add_cpu(&path[depth].p_idx->ei_block, shift); err = ext4_ext_dirty(handle, inode, path + depth); if (err) goto out; /* we are done if current index is not a starting index */ if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr)) break; depth--; } out: return err; } /* * ext4_ext_shift_extents: * All the extents which lies in the range from @start to the last allocated * block for the @inode are shifted either towards left or right (depending * upon @SHIFT) by @shift blocks. * On success, 0 is returned, error otherwise. */ static int ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ext4_lblk_t start, ext4_lblk_t shift, enum SHIFT_DIRECTION SHIFT) { struct ext4_ext_path *path; int ret = 0, depth; struct ext4_extent *extent; ext4_lblk_t stop, *iterator, ex_start, ex_end; ext4_lblk_t tmp = EXT_MAX_BLOCKS; /* Let path point to the last extent */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; extent = path[depth].p_ext; if (!extent) goto out; stop = le32_to_cpu(extent->ee_block); /* * For left shifts, make sure the hole on the left is big enough to * accommodate the shift. For right shifts, make sure the last extent * won't be shifted beyond EXT_MAX_BLOCKS. */ if (SHIFT == SHIFT_LEFT) { path = ext4_find_extent(inode, start - 1, path, EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; extent = path[depth].p_ext; if (extent) { ex_start = le32_to_cpu(extent->ee_block); ex_end = le32_to_cpu(extent->ee_block) + ext4_ext_get_actual_len(extent); } else { ex_start = 0; ex_end = 0; } if ((start == ex_start && shift > ex_start) || (shift > start - ex_end)) { ret = -EINVAL; goto out; } } else { if (shift > EXT_MAX_BLOCKS - (stop + ext4_ext_get_actual_len(extent))) { ret = -EINVAL; goto out; } } /* * In case of left shift, iterator points to start and it is increased * till we reach stop. In case of right shift, iterator points to stop * and it is decreased till we reach start. */ again: ret = 0; if (SHIFT == SHIFT_LEFT) iterator = &start; else iterator = &stop; if (tmp != EXT_MAX_BLOCKS) *iterator = tmp; /* * Its safe to start updating extents. Start and stop are unsigned, so * in case of right shift if extent with 0 block is reached, iterator * becomes NULL to indicate the end of the loop. */ while (iterator && start <= stop) { path = ext4_find_extent(inode, *iterator, path, EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; extent = path[depth].p_ext; if (!extent) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", (unsigned long) *iterator); return -EFSCORRUPTED; } if (SHIFT == SHIFT_LEFT && *iterator > le32_to_cpu(extent->ee_block)) { /* Hole, move to the next extent */ if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) { path[depth].p_ext++; } else { *iterator = ext4_ext_next_allocated_block(path); continue; } } tmp = *iterator; if (SHIFT == SHIFT_LEFT) { extent = EXT_LAST_EXTENT(path[depth].p_hdr); *iterator = le32_to_cpu(extent->ee_block) + ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); if (le32_to_cpu(extent->ee_block) > start) *iterator = le32_to_cpu(extent->ee_block) - 1; else if (le32_to_cpu(extent->ee_block) == start) iterator = NULL; else { extent = EXT_LAST_EXTENT(path[depth].p_hdr); while (le32_to_cpu(extent->ee_block) >= start) extent--; if (extent == EXT_LAST_EXTENT(path[depth].p_hdr)) break; extent++; iterator = NULL; } path[depth].p_ext = extent; } ret = ext4_ext_shift_path_extents(path, shift, inode, handle, SHIFT); /* iterator can be NULL which means we should break */ if (ret == -EAGAIN) goto again; if (ret) break; } out: ext4_free_ext_path(path); return ret; } /* * ext4_collapse_range: * This implements the fallocate's collapse range functionality for ext4 * Returns: 0 and non-zero on error. */ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; ext4_lblk_t punch_start, punch_stop; handle_t *handle; unsigned int credits; loff_t new_size, ioffset; int ret; /* * We need to test this early because xfstests assumes that a * collapse range of (0, 1) will return EOPNOTSUPP if the file * system does not support collapse range. */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return -EOPNOTSUPP; /* Collapse range works only on fs cluster size aligned regions. */ if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb))) return -EINVAL; trace_ext4_collapse_range(inode, offset, len); punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); inode_lock(inode); /* * There is no need to overlap collapse range with EOF, in which case * it is effectively a truncate operation */ if (offset + len >= inode->i_size) { ret = -EINVAL; goto out_mutex; } /* Currently just for extent based files */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { ret = -EOPNOTSUPP; goto out_mutex; } /* Wait for existing dio to complete */ inode_dio_wait(inode); ret = file_modified(file); if (ret) goto out_mutex; /* * Prevent page faults from reinstantiating pages we have released from * page cache. */ filemap_invalidate_lock(mapping); ret = ext4_break_layouts(inode); if (ret) goto out_mmap; /* * Need to round down offset to be aligned with page size boundary * for page size > block size. */ ioffset = round_down(offset, PAGE_SIZE); /* * Write tail of the last page before removed range since it will get * removed from the page cache below. */ ret = filemap_write_and_wait_range(mapping, ioffset, offset); if (ret) goto out_mmap; /* * Write data that will be shifted to preserve them when discarding * page cache below. We are also protected from pages becoming dirty * by i_rwsem and invalidate_lock. */ ret = filemap_write_and_wait_range(mapping, offset + len, LLONG_MAX); if (ret) goto out_mmap; truncate_pagecache(inode, ioffset); credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out_mmap; } ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start); ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); if (ret) { up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } ext4_discard_preallocations(inode); ret = ext4_ext_shift_extents(inode, handle, punch_stop, punch_stop - punch_start, SHIFT_LEFT); if (ret) { up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } new_size = inode->i_size - len; i_size_write(inode, new_size); EXT4_I(inode)->i_disksize = new_size; up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ret = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); out_mmap: filemap_invalidate_unlock(mapping); out_mutex: inode_unlock(inode); return ret; } /* * ext4_insert_range: * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate. * The data blocks starting from @offset to the EOF are shifted by @len * towards right to create a hole in the @inode. Inode size is increased * by len bytes. * Returns 0 on success, error otherwise. */ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; handle_t *handle; struct ext4_ext_path *path; struct ext4_extent *extent; ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0; unsigned int credits, ee_len; int ret = 0, depth, split_flag = 0; loff_t ioffset; /* * We need to test this early because xfstests assumes that an * insert range of (0, 1) will return EOPNOTSUPP if the file * system does not support insert range. */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return -EOPNOTSUPP; /* Insert range works only on fs cluster size aligned regions. */ if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb))) return -EINVAL; trace_ext4_insert_range(inode, offset, len); offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb); len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb); inode_lock(inode); /* Currently just for extent based files */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { ret = -EOPNOTSUPP; goto out_mutex; } /* Check whether the maximum file size would be exceeded */ if (len > inode->i_sb->s_maxbytes - inode->i_size) { ret = -EFBIG; goto out_mutex; } /* Offset must be less than i_size */ if (offset >= inode->i_size) { ret = -EINVAL; goto out_mutex; } /* Wait for existing dio to complete */ inode_dio_wait(inode); ret = file_modified(file); if (ret) goto out_mutex; /* * Prevent page faults from reinstantiating pages we have released from * page cache. */ filemap_invalidate_lock(mapping); ret = ext4_break_layouts(inode); if (ret) goto out_mmap; /* * Need to round down to align start offset to page size boundary * for page size > block size. */ ioffset = round_down(offset, PAGE_SIZE); /* Write out all dirty pages */ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, LLONG_MAX); if (ret) goto out_mmap; truncate_pagecache(inode, ioffset); credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out_mmap; } ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; EXT4_I(inode)->i_disksize += len; inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ret = ext4_mark_inode_dirty(handle, inode); if (ret) goto out_stop; down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); path = ext4_find_extent(inode, offset_lblk, NULL, 0); if (IS_ERR(path)) { up_write(&EXT4_I(inode)->i_data_sem); ret = PTR_ERR(path); goto out_stop; } depth = ext_depth(inode); extent = path[depth].p_ext; if (extent) { ee_start_lblk = le32_to_cpu(extent->ee_block); ee_len = ext4_ext_get_actual_len(extent); /* * If offset_lblk is not the starting block of extent, split * the extent @offset_lblk */ if ((offset_lblk > ee_start_lblk) && (offset_lblk < (ee_start_lblk + ee_len))) { if (ext4_ext_is_unwritten(extent)) split_flag = EXT4_EXT_MARK_UNWRIT1 | EXT4_EXT_MARK_UNWRIT2; path = ext4_split_extent_at(handle, inode, path, offset_lblk, split_flag, EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_METADATA_NOFAIL); } if (IS_ERR(path)) { up_write(&EXT4_I(inode)->i_data_sem); ret = PTR_ERR(path); goto out_stop; } } ext4_free_ext_path(path); ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk); /* * if offset_lblk lies in a hole which is at start of file, use * ee_start_lblk to shift extents */ ret = ext4_ext_shift_extents(inode, handle, max(ee_start_lblk, offset_lblk), len_lblk, SHIFT_RIGHT); up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); if (ret >= 0) ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); out_mmap: filemap_invalidate_unlock(mapping); out_mutex: inode_unlock(inode); return ret; } /** * ext4_swap_extents() - Swap extents between two inodes * @handle: handle for this transaction * @inode1: First inode * @inode2: Second inode * @lblk1: Start block for first inode * @lblk2: Start block for second inode * @count: Number of blocks to swap * @unwritten: Mark second inode's extents as unwritten after swap * @erp: Pointer to save error value * * This helper routine does exactly what is promise "swap extents". All other * stuff such as page-cache locking consistency, bh mapping consistency or * extent's data copying must be performed by caller. * Locking: * i_rwsem is held for both inodes * i_data_sem is locked for write for both inodes * Assumptions: * All pages from requested range are locked for both inodes */ int ext4_swap_extents(handle_t *handle, struct inode *inode1, struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2, ext4_lblk_t count, int unwritten, int *erp) { struct ext4_ext_path *path1 = NULL; struct ext4_ext_path *path2 = NULL; int replaced_count = 0; BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem)); BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem)); BUG_ON(!inode_is_locked(inode1)); BUG_ON(!inode_is_locked(inode2)); ext4_es_remove_extent(inode1, lblk1, count); ext4_es_remove_extent(inode2, lblk2, count); while (count) { struct ext4_extent *ex1, *ex2, tmp_ex; ext4_lblk_t e1_blk, e2_blk; int e1_len, e2_len, len; int split = 0; path1 = ext4_find_extent(inode1, lblk1, path1, EXT4_EX_NOCACHE); if (IS_ERR(path1)) { *erp = PTR_ERR(path1); goto errout; } path2 = ext4_find_extent(inode2, lblk2, path2, EXT4_EX_NOCACHE); if (IS_ERR(path2)) { *erp = PTR_ERR(path2); goto errout; } ex1 = path1[path1->p_depth].p_ext; ex2 = path2[path2->p_depth].p_ext; /* Do we have something to swap ? */ if (unlikely(!ex2 || !ex1)) goto errout; e1_blk = le32_to_cpu(ex1->ee_block); e2_blk = le32_to_cpu(ex2->ee_block); e1_len = ext4_ext_get_actual_len(ex1); e2_len = ext4_ext_get_actual_len(ex2); /* Hole handling */ if (!in_range(lblk1, e1_blk, e1_len) || !in_range(lblk2, e2_blk, e2_len)) { ext4_lblk_t next1, next2; /* if hole after extent, then go to next extent */ next1 = ext4_ext_next_allocated_block(path1); next2 = ext4_ext_next_allocated_block(path2); /* If hole before extent, then shift to that extent */ if (e1_blk > lblk1) next1 = e1_blk; if (e2_blk > lblk2) next2 = e2_blk; /* Do we have something to swap */ if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) goto errout; /* Move to the rightest boundary */ len = next1 - lblk1; if (len < next2 - lblk2) len = next2 - lblk2; if (len > count) len = count; lblk1 += len; lblk2 += len; count -= len; continue; } /* Prepare left boundary */ if (e1_blk < lblk1) { split = 1; path1 = ext4_force_split_extent_at(handle, inode1, path1, lblk1, 0); if (IS_ERR(path1)) { *erp = PTR_ERR(path1); goto errout; } } if (e2_blk < lblk2) { split = 1; path2 = ext4_force_split_extent_at(handle, inode2, path2, lblk2, 0); if (IS_ERR(path2)) { *erp = PTR_ERR(path2); goto errout; } } /* ext4_split_extent_at() may result in leaf extent split, * path must to be revalidated. */ if (split) continue; /* Prepare right boundary */ len = count; if (len > e1_blk + e1_len - lblk1) len = e1_blk + e1_len - lblk1; if (len > e2_blk + e2_len - lblk2) len = e2_blk + e2_len - lblk2; if (len != e1_len) { split = 1; path1 = ext4_force_split_extent_at(handle, inode1, path1, lblk1 + len, 0); if (IS_ERR(path1)) { *erp = PTR_ERR(path1); goto errout; } } if (len != e2_len) { split = 1; path2 = ext4_force_split_extent_at(handle, inode2, path2, lblk2 + len, 0); if (IS_ERR(path2)) { *erp = PTR_ERR(path2); goto errout; } } /* ext4_split_extent_at() may result in leaf extent split, * path must to be revalidated. */ if (split) continue; BUG_ON(e2_len != e1_len); *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth); if (unlikely(*erp)) goto errout; *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth); if (unlikely(*erp)) goto errout; /* Both extents are fully inside boundaries. Swap it now */ tmp_ex = *ex1; ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2)); ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex)); ex1->ee_len = cpu_to_le16(e2_len); ex2->ee_len = cpu_to_le16(e1_len); if (unwritten) ext4_ext_mark_unwritten(ex2); if (ext4_ext_is_unwritten(&tmp_ex)) ext4_ext_mark_unwritten(ex1); ext4_ext_try_to_merge(handle, inode2, path2, ex2); ext4_ext_try_to_merge(handle, inode1, path1, ex1); *erp = ext4_ext_dirty(handle, inode2, path2 + path2->p_depth); if (unlikely(*erp)) goto errout; *erp = ext4_ext_dirty(handle, inode1, path1 + path1->p_depth); /* * Looks scarry ah..? second inode already points to new blocks, * and it was successfully dirtied. But luckily error may happen * only due to journal error, so full transaction will be * aborted anyway. */ if (unlikely(*erp)) goto errout; lblk1 += len; lblk2 += len; replaced_count += len; count -= len; } errout: ext4_free_ext_path(path1); ext4_free_ext_path(path2); return replaced_count; } /* * ext4_clu_mapped - determine whether any block in a logical cluster has * been mapped to a physical cluster * * @inode - file containing the logical cluster * @lclu - logical cluster of interest * * Returns 1 if any block in the logical cluster is mapped, signifying * that a physical cluster has been allocated for it. Otherwise, * returns 0. Can also return negative error codes. Derived from * ext4_ext_map_blocks(). */ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_ext_path *path; int depth, mapped = 0, err = 0; struct ext4_extent *extent; ext4_lblk_t first_lblk, first_lclu, last_lclu; /* * if data can be stored inline, the logical cluster isn't * mapped - no physical clusters have been allocated, and the * file has no extents */ if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) || ext4_has_inline_data(inode)) return 0; /* search for the extent closest to the first block in the cluster */ path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0); if (IS_ERR(path)) return PTR_ERR(path); depth = ext_depth(inode); /* * A consistent leaf must not be empty. This situation is possible, * though, _during_ tree modification, and it's why an assert can't * be put in ext4_find_extent(). */ if (unlikely(path[depth].p_ext == NULL && depth != 0)) { EXT4_ERROR_INODE(inode, "bad extent address - lblock: %lu, depth: %d, pblock: %lld", (unsigned long) EXT4_C2B(sbi, lclu), depth, path[depth].p_block); err = -EFSCORRUPTED; goto out; } extent = path[depth].p_ext; /* can't be mapped if the extent tree is empty */ if (extent == NULL) goto out; first_lblk = le32_to_cpu(extent->ee_block); first_lclu = EXT4_B2C(sbi, first_lblk); /* * Three possible outcomes at this point - found extent spanning * the target cluster, to the left of the target cluster, or to the * right of the target cluster. The first two cases are handled here. * The last case indicates the target cluster is not mapped. */ if (lclu >= first_lclu) { last_lclu = EXT4_B2C(sbi, first_lblk + ext4_ext_get_actual_len(extent) - 1); if (lclu <= last_lclu) { mapped = 1; } else { first_lblk = ext4_ext_next_allocated_block(path); first_lclu = EXT4_B2C(sbi, first_lblk); if (lclu == first_lclu) mapped = 1; } } out: ext4_free_ext_path(path); return err ? err : mapped; } /* * Updates physical block address and unwritten status of extent * starting at lblk start and of len. If such an extent doesn't exist, * this function splits the extent tree appropriately to create an * extent like this. This function is called in the fast commit * replay path. Returns 0 on success and error on failure. */ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, int len, int unwritten, ext4_fsblk_t pblk) { struct ext4_ext_path *path; struct ext4_extent *ex; int ret; path = ext4_find_extent(inode, start, NULL, 0); if (IS_ERR(path)) return PTR_ERR(path); ex = path[path->p_depth].p_ext; if (!ex) { ret = -EFSCORRUPTED; goto out; } if (le32_to_cpu(ex->ee_block) != start || ext4_ext_get_actual_len(ex) != len) { /* We need to split this extent to match our extent first */ down_write(&EXT4_I(inode)->i_data_sem); path = ext4_force_split_extent_at(NULL, inode, path, start, 1); up_write(&EXT4_I(inode)->i_data_sem); if (IS_ERR(path)) { ret = PTR_ERR(path); goto out; } path = ext4_find_extent(inode, start, path, 0); if (IS_ERR(path)) return PTR_ERR(path); ex = path[path->p_depth].p_ext; WARN_ON(le32_to_cpu(ex->ee_block) != start); if (ext4_ext_get_actual_len(ex) != len) { down_write(&EXT4_I(inode)->i_data_sem); path = ext4_force_split_extent_at(NULL, inode, path, start + len, 1); up_write(&EXT4_I(inode)->i_data_sem); if (IS_ERR(path)) { ret = PTR_ERR(path); goto out; } path = ext4_find_extent(inode, start, path, 0); if (IS_ERR(path)) return PTR_ERR(path); ex = path[path->p_depth].p_ext; } } if (unwritten) ext4_ext_mark_unwritten(ex); else ext4_ext_mark_initialized(ex); ext4_ext_store_pblock(ex, pblk); down_write(&EXT4_I(inode)->i_data_sem); ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]); up_write(&EXT4_I(inode)->i_data_sem); out: ext4_free_ext_path(path); ext4_mark_inode_dirty(NULL, inode); return ret; } /* Try to shrink the extent tree */ void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end) { struct ext4_ext_path *path = NULL; struct ext4_extent *ex; ext4_lblk_t old_cur, cur = 0; while (cur < end) { path = ext4_find_extent(inode, cur, NULL, 0); if (IS_ERR(path)) return; ex = path[path->p_depth].p_ext; if (!ex) { ext4_free_ext_path(path); ext4_mark_inode_dirty(NULL, inode); return; } old_cur = cur; cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); if (cur <= old_cur) cur = old_cur + 1; ext4_ext_try_to_merge(NULL, inode, path, ex); down_write(&EXT4_I(inode)->i_data_sem); ext4_ext_dirty(NULL, inode, &path[path->p_depth]); up_write(&EXT4_I(inode)->i_data_sem); ext4_mark_inode_dirty(NULL, inode); ext4_free_ext_path(path); } } /* Check if *cur is a hole and if it is, skip it */ static int skip_hole(struct inode *inode, ext4_lblk_t *cur) { int ret; struct ext4_map_blocks map; map.m_lblk = *cur; map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur; ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret < 0) return ret; if (ret != 0) return 0; *cur = *cur + map.m_len; return 0; } /* Count number of blocks used by this inode and update i_blocks */ int ext4_ext_replay_set_iblocks(struct inode *inode) { struct ext4_ext_path *path = NULL, *path2 = NULL; struct ext4_extent *ex; ext4_lblk_t cur = 0, end; int numblks = 0, i, ret = 0; ext4_fsblk_t cmp1, cmp2; struct ext4_map_blocks map; /* Determin the size of the file first */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); ex = path[path->p_depth].p_ext; if (!ex) goto out; end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); /* Count the number of data blocks */ cur = 0; while (cur < end) { map.m_lblk = cur; map.m_len = end - cur; ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret < 0) break; if (ret > 0) numblks += ret; cur = cur + map.m_len; } /* * Count the number of extent tree blocks. We do it by looking up * two successive extents and determining the difference between * their paths. When path is different for 2 successive extents * we compare the blocks in the path at each level and increment * iblocks by total number of differences found. */ cur = 0; ret = skip_hole(inode, &cur); if (ret < 0) goto out; path = ext4_find_extent(inode, cur, path, 0); if (IS_ERR(path)) goto out; numblks += path->p_depth; while (cur < end) { path = ext4_find_extent(inode, cur, path, 0); if (IS_ERR(path)) break; ex = path[path->p_depth].p_ext; if (!ex) goto cleanup; cur = max(cur + 1, le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex)); ret = skip_hole(inode, &cur); if (ret < 0) break; path2 = ext4_find_extent(inode, cur, path2, 0); if (IS_ERR(path2)) break; for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) { cmp1 = cmp2 = 0; if (i <= path->p_depth) cmp1 = path[i].p_bh ? path[i].p_bh->b_blocknr : 0; if (i <= path2->p_depth) cmp2 = path2[i].p_bh ? path2[i].p_bh->b_blocknr : 0; if (cmp1 != cmp2 && cmp2 != 0) numblks++; } } out: inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9); ext4_mark_inode_dirty(NULL, inode); cleanup: ext4_free_ext_path(path); ext4_free_ext_path(path2); return 0; } int ext4_ext_clear_bb(struct inode *inode) { struct ext4_ext_path *path = NULL; struct ext4_extent *ex; ext4_lblk_t cur = 0, end; int j, ret = 0; struct ext4_map_blocks map; if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) return 0; /* Determin the size of the file first */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); ex = path[path->p_depth].p_ext; if (!ex) goto out; end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); cur = 0; while (cur < end) { map.m_lblk = cur; map.m_len = end - cur; ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret < 0) break; if (ret > 0) { path = ext4_find_extent(inode, map.m_lblk, path, 0); if (!IS_ERR(path)) { for (j = 0; j < path->p_depth; j++) { ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, false); ext4_fc_record_regions(inode->i_sb, inode->i_ino, 0, path[j].p_block, 1, 1); } } else { path = NULL; } ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, false); ext4_fc_record_regions(inode->i_sb, inode->i_ino, map.m_lblk, map.m_pblk, map.m_len, 1); } cur = cur + map.m_len; } out: ext4_free_ext_path(path); return 0; } |
14 13 13 4 10 7 8 12 17 5 1 11 10 7 2 1 7 2 3 2 5 1 58 21 45 4 35 1 5 1 2 40 2 2 2 2 2 2 2 2 89 1 63 7 14 10 41 11 9 6 5 14 1 1 1 1 17 4 13 4 1 1 5 10 2 12 10 1 1 1 1 1 1 1 1 1 1 1 1 2 1 9 4 35 10 35 31 19 2 18 19 20 20 19 1 16 30 2 1 1 2 2 1 1 1 1 2 1 1 1 4 4 1 1 2 2 1 2 1 1 13 22 25 6 2 18 1 1 19 98 25 98 25 11 8 8 8 64 64 23 23 23 23 17 8 64 64 1 64 49 271 88 176 1 12 180 98 98 98 98 14 17 47 49 64 64 46 17 63 98 98 64 23 41 98 91 90 91 34 35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2000-2001 Vojtech Pavlik * Copyright (c) 2006-2010 Jiri Kosina * * HID to Linux Input mapping */ /* * * Should you need to contact me, the author, you can do so either by * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail: * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic */ #include <linux/module.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/hid.h> #include <linux/hid-debug.h> #include "hid-ids.h" #define unk KEY_UNKNOWN static const unsigned char hid_keyboard[256] = { 0, 0, 0, 0, 30, 48, 46, 32, 18, 33, 34, 35, 23, 36, 37, 38, 50, 49, 24, 25, 16, 19, 31, 20, 22, 47, 17, 45, 21, 44, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 1, 14, 15, 57, 12, 13, 26, 27, 43, 43, 39, 40, 41, 51, 52, 53, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 87, 88, 99, 70,119,110,102,104,111,107,109,106, 105,108,103, 69, 98, 55, 74, 78, 96, 79, 80, 81, 75, 76, 77, 71, 72, 73, 82, 83, 86,127,116,117,183,184,185,186,187,188,189,190, 191,192,193,194,134,138,130,132,128,129,131,137,133,135,136,113, 115,114,unk,unk,unk,121,unk, 89, 93,124, 92, 94, 95,unk,unk,unk, 122,123, 90, 91, 85,unk,unk,unk,unk,unk,unk,unk,111,unk,unk,unk, unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk, unk,unk,unk,unk,unk,unk,179,180,unk,unk,unk,unk,unk,unk,unk,unk, unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk, unk,unk,unk,unk,unk,unk,unk,unk,111,unk,unk,unk,unk,unk,unk,unk, 29, 42, 56,125, 97, 54,100,126,164,166,165,163,161,115,114,113, 150,158,159,128,136,177,178,176,142,152,173,140,unk,unk,unk,unk }; static const struct { __s32 x; __s32 y; } hid_hat_to_axis[] = {{ 0, 0}, { 0,-1}, { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1}, {-1, 1}, {-1, 0}, {-1,-1}}; struct usage_priority { __u32 usage; /* the HID usage associated */ bool global; /* we assume all usages to be slotted, * unless global */ unsigned int slot_overwrite; /* for globals: allows to set the usage * before or after the slots */ }; /* * hid-input will convert this list into priorities: * the first element will have the highest priority * (the length of the following array) and the last * element the lowest (1). * * hid-input will then shift the priority by 8 bits to leave some space * in case drivers want to interleave other fields. * * To accommodate slotted devices, the slot priority is * defined in the next 8 bits (defined by 0xff - slot). * * If drivers want to add fields before those, hid-input will * leave out the first 8 bits of the priority value. * * This still leaves us 65535 individual priority values. */ static const struct usage_priority hidinput_usages_priorities[] = { { /* Eraser (eraser touching) must always come before tipswitch */ .usage = HID_DG_ERASER, }, { /* Invert must always come before In Range */ .usage = HID_DG_INVERT, }, { /* Is the tip of the tool touching? */ .usage = HID_DG_TIPSWITCH, }, { /* Tip Pressure might emulate tip switch */ .usage = HID_DG_TIPPRESSURE, }, { /* In Range needs to come after the other tool states */ .usage = HID_DG_INRANGE, }, }; #define map_abs(c) hid_map_usage(hidinput, usage, &bit, &max, EV_ABS, (c)) #define map_rel(c) hid_map_usage(hidinput, usage, &bit, &max, EV_REL, (c)) #define map_key(c) hid_map_usage(hidinput, usage, &bit, &max, EV_KEY, (c)) #define map_led(c) hid_map_usage(hidinput, usage, &bit, &max, EV_LED, (c)) #define map_msc(c) hid_map_usage(hidinput, usage, &bit, &max, EV_MSC, (c)) #define map_abs_clear(c) hid_map_usage_clear(hidinput, usage, &bit, \ &max, EV_ABS, (c)) #define map_key_clear(c) hid_map_usage_clear(hidinput, usage, &bit, \ &max, EV_KEY, (c)) static bool match_scancode(struct hid_usage *usage, unsigned int cur_idx, unsigned int scancode) { return (usage->hid & (HID_USAGE_PAGE | HID_USAGE)) == scancode; } static bool match_keycode(struct hid_usage *usage, unsigned int cur_idx, unsigned int keycode) { /* * We should exclude unmapped usages when doing lookup by keycode. */ return (usage->type == EV_KEY && usage->code == keycode); } static bool match_index(struct hid_usage *usage, unsigned int cur_idx, unsigned int idx) { return cur_idx == idx; } typedef bool (*hid_usage_cmp_t)(struct hid_usage *usage, unsigned int cur_idx, unsigned int val); static struct hid_usage *hidinput_find_key(struct hid_device *hid, hid_usage_cmp_t match, unsigned int value, unsigned int *usage_idx) { unsigned int i, j, k, cur_idx = 0; struct hid_report *report; struct hid_usage *usage; for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { list_for_each_entry(report, &hid->report_enum[k].report_list, list) { for (i = 0; i < report->maxfield; i++) { for (j = 0; j < report->field[i]->maxusage; j++) { usage = report->field[i]->usage + j; if (usage->type == EV_KEY || usage->type == 0) { if (match(usage, cur_idx, value)) { if (usage_idx) *usage_idx = cur_idx; return usage; } cur_idx++; } } } } } return NULL; } static struct hid_usage *hidinput_locate_usage(struct hid_device *hid, const struct input_keymap_entry *ke, unsigned int *index) { struct hid_usage *usage; unsigned int scancode; if (ke->flags & INPUT_KEYMAP_BY_INDEX) usage = hidinput_find_key(hid, match_index, ke->index, index); else if (input_scancode_to_scalar(ke, &scancode) == 0) usage = hidinput_find_key(hid, match_scancode, scancode, index); else usage = NULL; return usage; } static int hidinput_getkeycode(struct input_dev *dev, struct input_keymap_entry *ke) { struct hid_device *hid = input_get_drvdata(dev); struct hid_usage *usage; unsigned int scancode, index; usage = hidinput_locate_usage(hid, ke, &index); if (usage) { ke->keycode = usage->type == EV_KEY ? usage->code : KEY_RESERVED; ke->index = index; scancode = usage->hid & (HID_USAGE_PAGE | HID_USAGE); ke->len = sizeof(scancode); memcpy(ke->scancode, &scancode, sizeof(scancode)); return 0; } return -EINVAL; } static int hidinput_setkeycode(struct input_dev *dev, const struct input_keymap_entry *ke, unsigned int *old_keycode) { struct hid_device *hid = input_get_drvdata(dev); struct hid_usage *usage; usage = hidinput_locate_usage(hid, ke, NULL); if (usage) { *old_keycode = usage->type == EV_KEY ? usage->code : KEY_RESERVED; usage->type = EV_KEY; usage->code = ke->keycode; clear_bit(*old_keycode, dev->keybit); set_bit(usage->code, dev->keybit); dbg_hid("Assigned keycode %d to HID usage code %x\n", usage->code, usage->hid); /* * Set the keybit for the old keycode if the old keycode is used * by another key */ if (hidinput_find_key(hid, match_keycode, *old_keycode, NULL)) set_bit(*old_keycode, dev->keybit); return 0; } return -EINVAL; } /** * hidinput_calc_abs_res - calculate an absolute axis resolution * @field: the HID report field to calculate resolution for * @code: axis code * * The formula is: * (logical_maximum - logical_minimum) * resolution = ---------------------------------------------------------- * (physical_maximum - physical_minimum) * 10 ^ unit_exponent * * as seen in the HID specification v1.11 6.2.2.7 Global Items. * * Only exponent 1 length units are processed. Centimeters and inches are * converted to millimeters. Degrees are converted to radians. */ __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code) { __s32 unit_exponent = field->unit_exponent; __s32 logical_extents = field->logical_maximum - field->logical_minimum; __s32 physical_extents = field->physical_maximum - field->physical_minimum; __s32 prev; /* Check if the extents are sane */ if (logical_extents <= 0 || physical_extents <= 0) return 0; /* * Verify and convert units. * See HID specification v1.11 6.2.2.7 Global Items for unit decoding */ switch (code) { case ABS_X: case ABS_Y: case ABS_Z: case ABS_MT_POSITION_X: case ABS_MT_POSITION_Y: case ABS_MT_TOOL_X: case ABS_MT_TOOL_Y: case ABS_MT_TOUCH_MAJOR: case ABS_MT_TOUCH_MINOR: if (field->unit == 0x11) { /* If centimeters */ /* Convert to millimeters */ unit_exponent += 1; } else if (field->unit == 0x13) { /* If inches */ /* Convert to millimeters */ prev = physical_extents; physical_extents *= 254; if (physical_extents < prev) return 0; unit_exponent -= 1; } else { return 0; } break; case ABS_RX: case ABS_RY: case ABS_RZ: case ABS_WHEEL: case ABS_TILT_X: case ABS_TILT_Y: if (field->unit == 0x14) { /* If degrees */ /* Convert to radians */ prev = logical_extents; logical_extents *= 573; if (logical_extents < prev) return 0; unit_exponent += 1; } else if (field->unit != 0x12) { /* If not radians */ return 0; } break; default: return 0; } /* Apply negative unit exponent */ for (; unit_exponent < 0; unit_exponent++) { prev = logical_extents; logical_extents *= 10; if (logical_extents < prev) return 0; } /* Apply positive unit exponent */ for (; unit_exponent > 0; unit_exponent--) { prev = physical_extents; physical_extents *= 10; if (physical_extents < prev) return 0; } /* Calculate resolution */ return DIV_ROUND_CLOSEST(logical_extents, physical_extents); } EXPORT_SYMBOL_GPL(hidinput_calc_abs_res); #ifdef CONFIG_HID_BATTERY_STRENGTH static enum power_supply_property hidinput_battery_props[] = { POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_ONLINE, POWER_SUPPLY_PROP_CAPACITY, POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_SCOPE, }; #define HID_BATTERY_QUIRK_PERCENT (1 << 0) /* always reports percent */ #define HID_BATTERY_QUIRK_FEATURE (1 << 1) /* ask for feature report */ #define HID_BATTERY_QUIRK_IGNORE (1 << 2) /* completely ignore the battery */ #define HID_BATTERY_QUIRK_AVOID_QUERY (1 << 3) /* do not query the battery */ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD), HID_BATTERY_QUIRK_IGNORE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084), HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_3), HID_BATTERY_QUIRK_IGNORE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD), HID_BATTERY_QUIRK_IGNORE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L), HID_BATTERY_QUIRK_AVOID_QUERY }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_MW), HID_BATTERY_QUIRK_AVOID_QUERY }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_SW), HID_BATTERY_QUIRK_AVOID_QUERY }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_CHROMEBOOK_TROGDOR_POMPOM), HID_BATTERY_QUIRK_AVOID_QUERY }, /* * Elan I2C-HID touchscreens seem to all report a non present battery, * set HID_BATTERY_QUIRK_IGNORE for all Elan I2C-HID devices. */ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_IGNORE }, {} }; static unsigned find_battery_quirk(struct hid_device *hdev) { unsigned quirks = 0; const struct hid_device_id *match; match = hid_match_id(hdev, hid_battery_quirks); if (match != NULL) quirks = match->driver_data; return quirks; } static int hidinput_scale_battery_capacity(struct hid_device *dev, int value) { if (dev->battery_min < dev->battery_max && value >= dev->battery_min && value <= dev->battery_max) value = ((value - dev->battery_min) * 100) / (dev->battery_max - dev->battery_min); return value; } static int hidinput_query_battery_capacity(struct hid_device *dev) { u8 *buf; int ret; buf = kmalloc(4, GFP_KERNEL); if (!buf) return -ENOMEM; ret = hid_hw_raw_request(dev, dev->battery_report_id, buf, 4, dev->battery_report_type, HID_REQ_GET_REPORT); if (ret < 2) { kfree(buf); return -ENODATA; } ret = hidinput_scale_battery_capacity(dev, buf[1]); kfree(buf); return ret; } static int hidinput_get_battery_property(struct power_supply *psy, enum power_supply_property prop, union power_supply_propval *val) { struct hid_device *dev = power_supply_get_drvdata(psy); int value; int ret = 0; switch (prop) { case POWER_SUPPLY_PROP_PRESENT: case POWER_SUPPLY_PROP_ONLINE: val->intval = 1; break; case POWER_SUPPLY_PROP_CAPACITY: if (dev->battery_status != HID_BATTERY_REPORTED && !dev->battery_avoid_query) { value = hidinput_query_battery_capacity(dev); if (value < 0) return value; } else { value = dev->battery_capacity; } val->intval = value; break; case POWER_SUPPLY_PROP_MODEL_NAME: val->strval = dev->name; break; case POWER_SUPPLY_PROP_STATUS: if (dev->battery_status != HID_BATTERY_REPORTED && !dev->battery_avoid_query) { value = hidinput_query_battery_capacity(dev); if (value < 0) return value; dev->battery_capacity = value; dev->battery_status = HID_BATTERY_QUERIED; } if (dev->battery_status == HID_BATTERY_UNKNOWN) val->intval = POWER_SUPPLY_STATUS_UNKNOWN; else val->intval = dev->battery_charge_status; break; case POWER_SUPPLY_PROP_SCOPE: val->intval = POWER_SUPPLY_SCOPE_DEVICE; break; default: ret = -EINVAL; break; } return ret; } static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type, struct hid_field *field, bool is_percentage) { struct power_supply_desc *psy_desc; struct power_supply_config psy_cfg = { .drv_data = dev, }; unsigned quirks; s32 min, max; int error; if (dev->battery) return 0; /* already initialized? */ quirks = find_battery_quirk(dev); hid_dbg(dev, "device %x:%x:%x %d quirks %d\n", dev->bus, dev->vendor, dev->product, dev->version, quirks); if (quirks & HID_BATTERY_QUIRK_IGNORE) return 0; psy_desc = kzalloc(sizeof(*psy_desc), GFP_KERNEL); if (!psy_desc) return -ENOMEM; psy_desc->name = kasprintf(GFP_KERNEL, "hid-%s-battery", strlen(dev->uniq) ? dev->uniq : dev_name(&dev->dev)); if (!psy_desc->name) { error = -ENOMEM; goto err_free_mem; } psy_desc->type = POWER_SUPPLY_TYPE_BATTERY; psy_desc->properties = hidinput_battery_props; psy_desc->num_properties = ARRAY_SIZE(hidinput_battery_props); psy_desc->use_for_apm = 0; psy_desc->get_property = hidinput_get_battery_property; min = field->logical_minimum; max = field->logical_maximum; if (is_percentage || (quirks & HID_BATTERY_QUIRK_PERCENT)) { min = 0; max = 100; } if (quirks & HID_BATTERY_QUIRK_FEATURE) report_type = HID_FEATURE_REPORT; dev->battery_min = min; dev->battery_max = max; dev->battery_report_type = report_type; dev->battery_report_id = field->report->id; dev->battery_charge_status = POWER_SUPPLY_STATUS_DISCHARGING; /* * Stylus is normally not connected to the device and thus we * can't query the device and get meaningful battery strength. * We have to wait for the device to report it on its own. */ dev->battery_avoid_query = report_type == HID_INPUT_REPORT && field->physical == HID_DG_STYLUS; if (quirks & HID_BATTERY_QUIRK_AVOID_QUERY) dev->battery_avoid_query = true; dev->battery = power_supply_register(&dev->dev, psy_desc, &psy_cfg); if (IS_ERR(dev->battery)) { error = PTR_ERR(dev->battery); hid_warn(dev, "can't register power supply: %d\n", error); goto err_free_name; } power_supply_powers(dev->battery, &dev->dev); return 0; err_free_name: kfree(psy_desc->name); err_free_mem: kfree(psy_desc); dev->battery = NULL; return error; } static void hidinput_cleanup_battery(struct hid_device *dev) { const struct power_supply_desc *psy_desc; if (!dev->battery) return; psy_desc = dev->battery->desc; power_supply_unregister(dev->battery); kfree(psy_desc->name); kfree(psy_desc); dev->battery = NULL; } static void hidinput_update_battery(struct hid_device *dev, int value) { int capacity; if (!dev->battery) return; if (value == 0 || value < dev->battery_min || value > dev->battery_max) return; capacity = hidinput_scale_battery_capacity(dev, value); if (dev->battery_status != HID_BATTERY_REPORTED || capacity != dev->battery_capacity || ktime_after(ktime_get_coarse(), dev->battery_ratelimit_time)) { dev->battery_capacity = capacity; dev->battery_status = HID_BATTERY_REPORTED; dev->battery_ratelimit_time = ktime_add_ms(ktime_get_coarse(), 30 * 1000); power_supply_changed(dev->battery); } } static bool hidinput_set_battery_charge_status(struct hid_device *dev, unsigned int usage, int value) { switch (usage) { case HID_BAT_CHARGING: dev->battery_charge_status = value ? POWER_SUPPLY_STATUS_CHARGING : POWER_SUPPLY_STATUS_DISCHARGING; return true; } return false; } #else /* !CONFIG_HID_BATTERY_STRENGTH */ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type, struct hid_field *field, bool is_percentage) { return 0; } static void hidinput_cleanup_battery(struct hid_device *dev) { } static void hidinput_update_battery(struct hid_device *dev, int value) { } static bool hidinput_set_battery_charge_status(struct hid_device *dev, unsigned int usage, int value) { return false; } #endif /* CONFIG_HID_BATTERY_STRENGTH */ static bool hidinput_field_in_collection(struct hid_device *device, struct hid_field *field, unsigned int type, unsigned int usage) { struct hid_collection *collection; collection = &device->collection[field->usage->collection_index]; return collection->type == type && collection->usage == usage; } static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_field *field, struct hid_usage *usage, unsigned int usage_index) { struct input_dev *input = hidinput->input; struct hid_device *device = input_get_drvdata(input); const struct usage_priority *usage_priority = NULL; int max = 0, code; unsigned int i = 0; unsigned long *bit = NULL; field->hidinput = hidinput; if (field->flags & HID_MAIN_ITEM_CONSTANT) goto ignore; /* Ignore if report count is out of bounds. */ if (field->report_count < 1) goto ignore; /* only LED usages are supported in output fields */ if (field->report_type == HID_OUTPUT_REPORT && (usage->hid & HID_USAGE_PAGE) != HID_UP_LED) { goto ignore; } /* assign a priority based on the static list declared here */ for (i = 0; i < ARRAY_SIZE(hidinput_usages_priorities); i++) { if (usage->hid == hidinput_usages_priorities[i].usage) { usage_priority = &hidinput_usages_priorities[i]; field->usages_priorities[usage_index] = (ARRAY_SIZE(hidinput_usages_priorities) - i) << 8; break; } } /* * For slotted devices, we need to also add the slot index * in the priority. */ if (usage_priority && usage_priority->global) field->usages_priorities[usage_index] |= usage_priority->slot_overwrite; else field->usages_priorities[usage_index] |= (0xff - field->slot_idx) << 16; if (device->driver->input_mapping) { int ret = device->driver->input_mapping(device, hidinput, field, usage, &bit, &max); if (ret > 0) goto mapped; if (ret < 0) goto ignore; } switch (usage->hid & HID_USAGE_PAGE) { case HID_UP_UNDEFINED: goto ignore; case HID_UP_KEYBOARD: set_bit(EV_REP, input->evbit); if ((usage->hid & HID_USAGE) < 256) { if (!hid_keyboard[usage->hid & HID_USAGE]) goto ignore; map_key_clear(hid_keyboard[usage->hid & HID_USAGE]); } else map_key(KEY_UNKNOWN); break; case HID_UP_BUTTON: code = ((usage->hid - 1) & HID_USAGE); switch (field->application) { case HID_GD_MOUSE: case HID_GD_POINTER: code += BTN_MOUSE; break; case HID_GD_JOYSTICK: if (code <= 0xf) code += BTN_JOYSTICK; else code += BTN_TRIGGER_HAPPY - 0x10; break; case HID_GD_GAMEPAD: if (code <= 0xf) code += BTN_GAMEPAD; else code += BTN_TRIGGER_HAPPY - 0x10; break; case HID_CP_CONSUMER_CONTROL: if (hidinput_field_in_collection(device, field, HID_COLLECTION_NAMED_ARRAY, HID_CP_PROGRAMMABLEBUTTONS)) { if (code <= 0x1d) code += KEY_MACRO1; else code += BTN_TRIGGER_HAPPY - 0x1e; break; } fallthrough; default: switch (field->physical) { case HID_GD_MOUSE: case HID_GD_POINTER: code += BTN_MOUSE; break; case HID_GD_JOYSTICK: code += BTN_JOYSTICK; break; case HID_GD_GAMEPAD: code += BTN_GAMEPAD; break; default: code += BTN_MISC; } } map_key(code); break; case HID_UP_SIMULATION: switch (usage->hid & 0xffff) { case 0xba: map_abs(ABS_RUDDER); break; case 0xbb: map_abs(ABS_THROTTLE); break; case 0xc4: map_abs(ABS_GAS); break; case 0xc5: map_abs(ABS_BRAKE); break; case 0xc8: map_abs(ABS_WHEEL); break; default: goto ignore; } break; case HID_UP_GENDESK: if ((usage->hid & 0xf0) == 0x80) { /* SystemControl */ switch (usage->hid & 0xf) { case 0x1: map_key_clear(KEY_POWER); break; case 0x2: map_key_clear(KEY_SLEEP); break; case 0x3: map_key_clear(KEY_WAKEUP); break; case 0x4: map_key_clear(KEY_CONTEXT_MENU); break; case 0x5: map_key_clear(KEY_MENU); break; case 0x6: map_key_clear(KEY_PROG1); break; case 0x7: map_key_clear(KEY_HELP); break; case 0x8: map_key_clear(KEY_EXIT); break; case 0x9: map_key_clear(KEY_SELECT); break; case 0xa: map_key_clear(KEY_RIGHT); break; case 0xb: map_key_clear(KEY_LEFT); break; case 0xc: map_key_clear(KEY_UP); break; case 0xd: map_key_clear(KEY_DOWN); break; case 0xe: map_key_clear(KEY_POWER2); break; case 0xf: map_key_clear(KEY_RESTART); break; default: goto unknown; } break; } if ((usage->hid & 0xf0) == 0x90) { /* SystemControl*/ switch (usage->hid & 0xf) { case 0xb: map_key_clear(KEY_DO_NOT_DISTURB); break; default: goto ignore; } break; } if ((usage->hid & 0xf0) == 0xa0) { /* SystemControl */ switch (usage->hid & 0xf) { case 0x9: map_key_clear(KEY_MICMUTE); break; case 0xa: map_key_clear(KEY_ACCESSIBILITY); break; default: goto ignore; } break; } if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */ switch (usage->hid & 0xf) { case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break; default: goto ignore; } break; } /* * Some lazy vendors declare 255 usages for System Control, * leading to the creation of ABS_X|Y axis and too many others. * It wouldn't be a problem if joydev doesn't consider the * device as a joystick then. */ if (field->application == HID_GD_SYSTEM_CONTROL) goto ignore; if ((usage->hid & 0xf0) == 0x90) { /* D-pad */ switch (usage->hid) { case HID_GD_UP: usage->hat_dir = 1; break; case HID_GD_DOWN: usage->hat_dir = 5; break; case HID_GD_RIGHT: usage->hat_dir = 3; break; case HID_GD_LEFT: usage->hat_dir = 7; break; default: goto unknown; } if (field->dpad) { map_abs(field->dpad); goto ignore; } map_abs(ABS_HAT0X); break; } switch (usage->hid) { /* These usage IDs map directly to the usage codes. */ case HID_GD_X: case HID_GD_Y: case HID_GD_Z: case HID_GD_RX: case HID_GD_RY: case HID_GD_RZ: if (field->flags & HID_MAIN_ITEM_RELATIVE) map_rel(usage->hid & 0xf); else map_abs_clear(usage->hid & 0xf); break; case HID_GD_WHEEL: if (field->flags & HID_MAIN_ITEM_RELATIVE) { set_bit(REL_WHEEL, input->relbit); map_rel(REL_WHEEL_HI_RES); } else { map_abs(usage->hid & 0xf); } break; case HID_GD_SLIDER: case HID_GD_DIAL: if (field->flags & HID_MAIN_ITEM_RELATIVE) map_rel(usage->hid & 0xf); else map_abs(usage->hid & 0xf); break; case HID_GD_HATSWITCH: usage->hat_min = field->logical_minimum; usage->hat_max = field->logical_maximum; map_abs(ABS_HAT0X); break; case HID_GD_START: map_key_clear(BTN_START); break; case HID_GD_SELECT: map_key_clear(BTN_SELECT); break; case HID_GD_RFKILL_BTN: /* MS wireless radio ctl extension, also check CA */ if (field->application == HID_GD_WIRELESS_RADIO_CTLS) { map_key_clear(KEY_RFKILL); /* We need to simulate the btn release */ field->flags |= HID_MAIN_ITEM_RELATIVE; break; } goto unknown; default: goto unknown; } break; case HID_UP_LED: switch (usage->hid & 0xffff) { /* HID-Value: */ case 0x01: map_led (LED_NUML); break; /* "Num Lock" */ case 0x02: map_led (LED_CAPSL); break; /* "Caps Lock" */ case 0x03: map_led (LED_SCROLLL); break; /* "Scroll Lock" */ case 0x04: map_led (LED_COMPOSE); break; /* "Compose" */ case 0x05: map_led (LED_KANA); break; /* "Kana" */ case 0x27: map_led (LED_SLEEP); break; /* "Stand-By" */ case 0x4c: map_led (LED_SUSPEND); break; /* "System Suspend" */ case 0x09: map_led (LED_MUTE); break; /* "Mute" */ case 0x4b: map_led (LED_MISC); break; /* "Generic Indicator" */ case 0x19: map_led (LED_MAIL); break; /* "Message Waiting" */ case 0x4d: map_led (LED_CHARGING); break; /* "External Power Connected" */ default: goto ignore; } break; case HID_UP_DIGITIZER: if ((field->application & 0xff) == 0x01) /* Digitizer */ __set_bit(INPUT_PROP_POINTER, input->propbit); else if ((field->application & 0xff) == 0x02) /* Pen */ __set_bit(INPUT_PROP_DIRECT, input->propbit); switch (usage->hid & 0xff) { case 0x00: /* Undefined */ goto ignore; case 0x30: /* TipPressure */ if (!test_bit(BTN_TOUCH, input->keybit)) { device->quirks |= HID_QUIRK_NOTOUCH; set_bit(EV_KEY, input->evbit); set_bit(BTN_TOUCH, input->keybit); } map_abs_clear(ABS_PRESSURE); break; case 0x32: /* InRange */ switch (field->physical) { case HID_DG_PUCK: map_key(BTN_TOOL_MOUSE); break; case HID_DG_FINGER: map_key(BTN_TOOL_FINGER); break; default: /* * If the physical is not given, * rely on the application. */ if (!field->physical) { switch (field->application) { case HID_DG_TOUCHSCREEN: case HID_DG_TOUCHPAD: map_key_clear(BTN_TOOL_FINGER); break; default: map_key_clear(BTN_TOOL_PEN); } } else { map_key(BTN_TOOL_PEN); } break; } break; case 0x3b: /* Battery Strength */ hidinput_setup_battery(device, HID_INPUT_REPORT, field, false); usage->type = EV_PWR; return; case 0x3c: /* Invert */ device->quirks &= ~HID_QUIRK_NOINVERT; map_key_clear(BTN_TOOL_RUBBER); break; case 0x3d: /* X Tilt */ map_abs_clear(ABS_TILT_X); break; case 0x3e: /* Y Tilt */ map_abs_clear(ABS_TILT_Y); break; case 0x33: /* Touch */ case 0x42: /* TipSwitch */ case 0x43: /* TipSwitch2 */ device->quirks &= ~HID_QUIRK_NOTOUCH; map_key_clear(BTN_TOUCH); break; case 0x44: /* BarrelSwitch */ map_key_clear(BTN_STYLUS); break; case 0x45: /* ERASER */ /* * This event is reported when eraser tip touches the surface. * Actual eraser (BTN_TOOL_RUBBER) is set and released either * by Invert if tool reports proximity or by Eraser directly. */ if (!test_bit(BTN_TOOL_RUBBER, input->keybit)) { device->quirks |= HID_QUIRK_NOINVERT; set_bit(BTN_TOOL_RUBBER, input->keybit); } map_key_clear(BTN_TOUCH); break; case 0x46: /* TabletPick */ case 0x5a: /* SecondaryBarrelSwitch */ map_key_clear(BTN_STYLUS2); break; case 0x5b: /* TransducerSerialNumber */ case 0x6e: /* TransducerSerialNumber2 */ map_msc(MSC_SERIAL); break; default: goto unknown; } break; case HID_UP_TELEPHONY: switch (usage->hid & HID_USAGE) { case 0x2f: map_key_clear(KEY_MICMUTE); break; case 0xb0: map_key_clear(KEY_NUMERIC_0); break; case 0xb1: map_key_clear(KEY_NUMERIC_1); break; case 0xb2: map_key_clear(KEY_NUMERIC_2); break; case 0xb3: map_key_clear(KEY_NUMERIC_3); break; case 0xb4: map_key_clear(KEY_NUMERIC_4); break; case 0xb5: map_key_clear(KEY_NUMERIC_5); break; case 0xb6: map_key_clear(KEY_NUMERIC_6); break; case 0xb7: map_key_clear(KEY_NUMERIC_7); break; case 0xb8: map_key_clear(KEY_NUMERIC_8); break; case 0xb9: map_key_clear(KEY_NUMERIC_9); break; case 0xba: map_key_clear(KEY_NUMERIC_STAR); break; case 0xbb: map_key_clear(KEY_NUMERIC_POUND); break; case 0xbc: map_key_clear(KEY_NUMERIC_A); break; case 0xbd: map_key_clear(KEY_NUMERIC_B); break; case 0xbe: map_key_clear(KEY_NUMERIC_C); break; case 0xbf: map_key_clear(KEY_NUMERIC_D); break; default: goto ignore; } break; case HID_UP_CONSUMER: /* USB HUT v1.12, pages 75-84 */ switch (usage->hid & HID_USAGE) { case 0x000: goto ignore; case 0x030: map_key_clear(KEY_POWER); break; case 0x031: map_key_clear(KEY_RESTART); break; case 0x032: map_key_clear(KEY_SLEEP); break; case 0x034: map_key_clear(KEY_SLEEP); break; case 0x035: map_key_clear(KEY_KBDILLUMTOGGLE); break; case 0x036: map_key_clear(BTN_MISC); break; case 0x040: map_key_clear(KEY_MENU); break; /* Menu */ case 0x041: map_key_clear(KEY_SELECT); break; /* Menu Pick */ case 0x042: map_key_clear(KEY_UP); break; /* Menu Up */ case 0x043: map_key_clear(KEY_DOWN); break; /* Menu Down */ case 0x044: map_key_clear(KEY_LEFT); break; /* Menu Left */ case 0x045: map_key_clear(KEY_RIGHT); break; /* Menu Right */ case 0x046: map_key_clear(KEY_ESC); break; /* Menu Escape */ case 0x047: map_key_clear(KEY_KPPLUS); break; /* Menu Value Increase */ case 0x048: map_key_clear(KEY_KPMINUS); break; /* Menu Value Decrease */ case 0x060: map_key_clear(KEY_INFO); break; /* Data On Screen */ case 0x061: map_key_clear(KEY_SUBTITLE); break; /* Closed Caption */ case 0x063: map_key_clear(KEY_VCR); break; /* VCR/TV */ case 0x065: map_key_clear(KEY_CAMERA); break; /* Snapshot */ case 0x069: map_key_clear(KEY_RED); break; case 0x06a: map_key_clear(KEY_GREEN); break; case 0x06b: map_key_clear(KEY_BLUE); break; case 0x06c: map_key_clear(KEY_YELLOW); break; case 0x06d: map_key_clear(KEY_ASPECT_RATIO); break; case 0x06f: map_key_clear(KEY_BRIGHTNESSUP); break; case 0x070: map_key_clear(KEY_BRIGHTNESSDOWN); break; case 0x072: map_key_clear(KEY_BRIGHTNESS_TOGGLE); break; case 0x073: map_key_clear(KEY_BRIGHTNESS_MIN); break; case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break; case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break; case 0x076: map_key_clear(KEY_CAMERA_ACCESS_ENABLE); break; case 0x077: map_key_clear(KEY_CAMERA_ACCESS_DISABLE); break; case 0x078: map_key_clear(KEY_CAMERA_ACCESS_TOGGLE); break; case 0x079: map_key_clear(KEY_KBDILLUMUP); break; case 0x07a: map_key_clear(KEY_KBDILLUMDOWN); break; case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE); break; case 0x082: map_key_clear(KEY_VIDEO_NEXT); break; case 0x083: map_key_clear(KEY_LAST); break; case 0x084: map_key_clear(KEY_ENTER); break; case 0x088: map_key_clear(KEY_PC); break; case 0x089: map_key_clear(KEY_TV); break; case 0x08a: map_key_clear(KEY_WWW); break; case 0x08b: map_key_clear(KEY_DVD); break; case 0x08c: map_key_clear(KEY_PHONE); break; case 0x08d: map_key_clear(KEY_PROGRAM); break; case 0x08e: map_key_clear(KEY_VIDEOPHONE); break; case 0x08f: map_key_clear(KEY_GAMES); break; case 0x090: map_key_clear(KEY_MEMO); break; case 0x091: map_key_clear(KEY_CD); break; case 0x092: map_key_clear(KEY_VCR); break; case 0x093: map_key_clear(KEY_TUNER); break; case 0x094: map_key_clear(KEY_EXIT); break; case 0x095: map_key_clear(KEY_HELP); break; case 0x096: map_key_clear(KEY_TAPE); break; case 0x097: map_key_clear(KEY_TV2); break; case 0x098: map_key_clear(KEY_SAT); break; case 0x09a: map_key_clear(KEY_PVR); break; case 0x09c: map_key_clear(KEY_CHANNELUP); break; case 0x09d: map_key_clear(KEY_CHANNELDOWN); break; case 0x0a0: map_key_clear(KEY_VCR2); break; case 0x0b0: map_key_clear(KEY_PLAY); break; case 0x0b1: map_key_clear(KEY_PAUSE); break; case 0x0b2: map_key_clear(KEY_RECORD); break; case 0x0b3: map_key_clear(KEY_FASTFORWARD); break; case 0x0b4: map_key_clear(KEY_REWIND); break; case 0x0b5: map_key_clear(KEY_NEXTSONG); break; case 0x0b6: map_key_clear(KEY_PREVIOUSSONG); break; case 0x0b7: map_key_clear(KEY_STOPCD); break; case 0x0b8: map_key_clear(KEY_EJECTCD); break; case 0x0bc: map_key_clear(KEY_MEDIA_REPEAT); break; case 0x0b9: map_key_clear(KEY_SHUFFLE); break; case 0x0bf: map_key_clear(KEY_SLOW); break; case 0x0cd: map_key_clear(KEY_PLAYPAUSE); break; case 0x0cf: map_key_clear(KEY_VOICECOMMAND); break; case 0x0d8: map_key_clear(KEY_DICTATE); break; case 0x0d9: map_key_clear(KEY_EMOJI_PICKER); break; case 0x0e0: map_abs_clear(ABS_VOLUME); break; case 0x0e2: map_key_clear(KEY_MUTE); break; case 0x0e5: map_key_clear(KEY_BASSBOOST); break; case 0x0e9: map_key_clear(KEY_VOLUMEUP); break; case 0x0ea: map_key_clear(KEY_VOLUMEDOWN); break; case 0x0f5: map_key_clear(KEY_SLOW); break; case 0x181: map_key_clear(KEY_BUTTONCONFIG); break; case 0x182: map_key_clear(KEY_BOOKMARKS); break; case 0x183: map_key_clear(KEY_CONFIG); break; case 0x184: map_key_clear(KEY_WORDPROCESSOR); break; case 0x185: map_key_clear(KEY_EDITOR); break; case 0x186: map_key_clear(KEY_SPREADSHEET); break; case 0x187: map_key_clear(KEY_GRAPHICSEDITOR); break; case 0x188: map_key_clear(KEY_PRESENTATION); break; case 0x189: map_key_clear(KEY_DATABASE); break; case 0x18a: map_key_clear(KEY_MAIL); break; case 0x18b: map_key_clear(KEY_NEWS); break; case 0x18c: map_key_clear(KEY_VOICEMAIL); break; case 0x18d: map_key_clear(KEY_ADDRESSBOOK); break; case 0x18e: map_key_clear(KEY_CALENDAR); break; case 0x18f: map_key_clear(KEY_TASKMANAGER); break; case 0x190: map_key_clear(KEY_JOURNAL); break; case 0x191: map_key_clear(KEY_FINANCE); break; case 0x192: map_key_clear(KEY_CALC); break; case 0x193: map_key_clear(KEY_PLAYER); break; case 0x194: map_key_clear(KEY_FILE); break; case 0x196: map_key_clear(KEY_WWW); break; case 0x199: map_key_clear(KEY_CHAT); break; case 0x19c: map_key_clear(KEY_LOGOFF); break; case 0x19e: map_key_clear(KEY_COFFEE); break; case 0x19f: map_key_clear(KEY_CONTROLPANEL); break; case 0x1a2: map_key_clear(KEY_APPSELECT); break; case 0x1a3: map_key_clear(KEY_NEXT); break; case 0x1a4: map_key_clear(KEY_PREVIOUS); break; case 0x1a6: map_key_clear(KEY_HELP); break; case 0x1a7: map_key_clear(KEY_DOCUMENTS); break; case 0x1ab: map_key_clear(KEY_SPELLCHECK); break; case 0x1ae: map_key_clear(KEY_KEYBOARD); break; case 0x1b1: map_key_clear(KEY_SCREENSAVER); break; case 0x1b4: map_key_clear(KEY_FILE); break; case 0x1b6: map_key_clear(KEY_IMAGES); break; case 0x1b7: map_key_clear(KEY_AUDIO); break; case 0x1b8: map_key_clear(KEY_VIDEO); break; case 0x1bc: map_key_clear(KEY_MESSENGER); break; case 0x1bd: map_key_clear(KEY_INFO); break; case 0x1cb: map_key_clear(KEY_ASSISTANT); break; case 0x201: map_key_clear(KEY_NEW); break; case 0x202: map_key_clear(KEY_OPEN); break; case 0x203: map_key_clear(KEY_CLOSE); break; case 0x204: map_key_clear(KEY_EXIT); break; case 0x207: map_key_clear(KEY_SAVE); break; case 0x208: map_key_clear(KEY_PRINT); break; case 0x209: map_key_clear(KEY_PROPS); break; case 0x21a: map_key_clear(KEY_UNDO); break; case 0x21b: map_key_clear(KEY_COPY); break; case 0x21c: map_key_clear(KEY_CUT); break; case 0x21d: map_key_clear(KEY_PASTE); break; case 0x21f: map_key_clear(KEY_FIND); break; case 0x221: map_key_clear(KEY_SEARCH); break; case 0x222: map_key_clear(KEY_GOTO); break; case 0x223: map_key_clear(KEY_HOMEPAGE); break; case 0x224: map_key_clear(KEY_BACK); break; case 0x225: map_key_clear(KEY_FORWARD); break; case 0x226: map_key_clear(KEY_STOP); break; case 0x227: map_key_clear(KEY_REFRESH); break; case 0x22a: map_key_clear(KEY_BOOKMARKS); break; case 0x22d: map_key_clear(KEY_ZOOMIN); break; case 0x22e: map_key_clear(KEY_ZOOMOUT); break; case 0x22f: map_key_clear(KEY_ZOOMRESET); break; case 0x232: map_key_clear(KEY_FULL_SCREEN); break; case 0x233: map_key_clear(KEY_SCROLLUP); break; case 0x234: map_key_clear(KEY_SCROLLDOWN); break; case 0x238: /* AC Pan */ set_bit(REL_HWHEEL, input->relbit); map_rel(REL_HWHEEL_HI_RES); break; case 0x23d: map_key_clear(KEY_EDIT); break; case 0x25f: map_key_clear(KEY_CANCEL); break; case 0x269: map_key_clear(KEY_INSERT); break; case 0x26a: map_key_clear(KEY_DELETE); break; case 0x279: map_key_clear(KEY_REDO); break; case 0x289: map_key_clear(KEY_REPLY); break; case 0x28b: map_key_clear(KEY_FORWARDMAIL); break; case 0x28c: map_key_clear(KEY_SEND); break; case 0x29d: map_key_clear(KEY_KBD_LAYOUT_NEXT); break; case 0x2a2: map_key_clear(KEY_ALL_APPLICATIONS); break; case 0x2c7: map_key_clear(KEY_KBDINPUTASSIST_PREV); break; case 0x2c8: map_key_clear(KEY_KBDINPUTASSIST_NEXT); break; case 0x2c9: map_key_clear(KEY_KBDINPUTASSIST_PREVGROUP); break; case 0x2ca: map_key_clear(KEY_KBDINPUTASSIST_NEXTGROUP); break; case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT); break; case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL); break; case 0x29f: map_key_clear(KEY_SCALE); break; default: map_key_clear(KEY_UNKNOWN); } break; case HID_UP_GENDEVCTRLS: switch (usage->hid) { case HID_DC_BATTERYSTRENGTH: hidinput_setup_battery(device, HID_INPUT_REPORT, field, false); usage->type = EV_PWR; return; } goto unknown; case HID_UP_BATTERY: switch (usage->hid) { case HID_BAT_ABSOLUTESTATEOFCHARGE: hidinput_setup_battery(device, HID_INPUT_REPORT, field, true); usage->type = EV_PWR; return; case HID_BAT_CHARGING: usage->type = EV_PWR; return; } goto unknown; case HID_UP_CAMERA: switch (usage->hid & HID_USAGE) { case 0x020: map_key_clear(KEY_CAMERA_FOCUS); break; case 0x021: map_key_clear(KEY_CAMERA); break; default: goto ignore; } break; case HID_UP_HPVENDOR: /* Reported on a Dutch layout HP5308 */ set_bit(EV_REP, input->evbit); switch (usage->hid & HID_USAGE) { case 0x021: map_key_clear(KEY_PRINT); break; case 0x070: map_key_clear(KEY_HP); break; case 0x071: map_key_clear(KEY_CAMERA); break; case 0x072: map_key_clear(KEY_SOUND); break; case 0x073: map_key_clear(KEY_QUESTION); break; case 0x080: map_key_clear(KEY_EMAIL); break; case 0x081: map_key_clear(KEY_CHAT); break; case 0x082: map_key_clear(KEY_SEARCH); break; case 0x083: map_key_clear(KEY_CONNECT); break; case 0x084: map_key_clear(KEY_FINANCE); break; case 0x085: map_key_clear(KEY_SPORT); break; case 0x086: map_key_clear(KEY_SHOP); break; default: goto ignore; } break; case HID_UP_HPVENDOR2: set_bit(EV_REP, input->evbit); switch (usage->hid & HID_USAGE) { case 0x001: map_key_clear(KEY_MICMUTE); break; case 0x003: map_key_clear(KEY_BRIGHTNESSDOWN); break; case 0x004: map_key_clear(KEY_BRIGHTNESSUP); break; default: goto ignore; } break; case HID_UP_MSVENDOR: goto ignore; case HID_UP_CUSTOM: /* Reported on Logitech and Apple USB keyboards */ set_bit(EV_REP, input->evbit); goto ignore; case HID_UP_LOGIVENDOR: /* intentional fallback */ case HID_UP_LOGIVENDOR2: /* intentional fallback */ case HID_UP_LOGIVENDOR3: goto ignore; case HID_UP_PID: switch (usage->hid & HID_USAGE) { case 0xa4: map_key_clear(BTN_DEAD); break; default: goto ignore; } break; default: unknown: if (field->report_size == 1) { if (field->report->type == HID_OUTPUT_REPORT) { map_led(LED_MISC); break; } map_key(BTN_MISC); break; } if (field->flags & HID_MAIN_ITEM_RELATIVE) { map_rel(REL_MISC); break; } map_abs(ABS_MISC); break; } mapped: /* Mapping failed, bail out */ if (!bit) return; if (device->driver->input_mapped && device->driver->input_mapped(device, hidinput, field, usage, &bit, &max) < 0) { /* * The driver indicated that no further generic handling * of the usage is desired. */ return; } set_bit(usage->type, input->evbit); /* * This part is *really* controversial: * - HID aims at being generic so we should do our best to export * all incoming events * - HID describes what events are, so there is no reason for ABS_X * to be mapped to ABS_Y * - HID is using *_MISC+N as a default value, but nothing prevents * *_MISC+N to overwrite a legitimate even, which confuses userspace * (for instance ABS_MISC + 7 is ABS_MT_SLOT, which has a different * processing) * * If devices still want to use this (at their own risk), they will * have to use the quirk HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE, but * the default should be a reliable mapping. */ while (usage->code <= max && test_and_set_bit(usage->code, bit)) { if (device->quirks & HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE) { usage->code = find_next_zero_bit(bit, max + 1, usage->code); } else { device->status |= HID_STAT_DUP_DETECTED; goto ignore; } } if (usage->code > max) goto ignore; if (usage->type == EV_ABS) { int a = field->logical_minimum; int b = field->logical_maximum; if ((device->quirks & HID_QUIRK_BADPAD) && (usage->code == ABS_X || usage->code == ABS_Y)) { a = field->logical_minimum = 0; b = field->logical_maximum = 255; } if (field->application == HID_GD_GAMEPAD || field->application == HID_GD_JOYSTICK) input_set_abs_params(input, usage->code, a, b, (b - a) >> 8, (b - a) >> 4); else input_set_abs_params(input, usage->code, a, b, 0, 0); input_abs_set_res(input, usage->code, hidinput_calc_abs_res(field, usage->code)); /* use a larger default input buffer for MT devices */ if (usage->code == ABS_MT_POSITION_X && input->hint_events_per_packet == 0) input_set_events_per_packet(input, 60); } if (usage->type == EV_ABS && (usage->hat_min < usage->hat_max || usage->hat_dir)) { int i; for (i = usage->code; i < usage->code + 2 && i <= max; i++) { input_set_abs_params(input, i, -1, 1, 0, 0); set_bit(i, input->absbit); } if (usage->hat_dir && !field->dpad) field->dpad = usage->code; } /* for those devices which produce Consumer volume usage as relative, * we emulate pressing volumeup/volumedown appropriate number of times * in hidinput_hid_event() */ if ((usage->type == EV_ABS) && (field->flags & HID_MAIN_ITEM_RELATIVE) && (usage->code == ABS_VOLUME)) { set_bit(KEY_VOLUMEUP, input->keybit); set_bit(KEY_VOLUMEDOWN, input->keybit); } if (usage->type == EV_KEY) { set_bit(EV_MSC, input->evbit); set_bit(MSC_SCAN, input->mscbit); } return; ignore: usage->type = 0; usage->code = 0; } static void hidinput_handle_scroll(struct hid_usage *usage, struct input_dev *input, __s32 value) { int code; int hi_res, lo_res; if (value == 0) return; if (usage->code == REL_WHEEL_HI_RES) code = REL_WHEEL; else code = REL_HWHEEL; /* * Windows reports one wheel click as value 120. Where a high-res * scroll wheel is present, a fraction of 120 is reported instead. * Our REL_WHEEL_HI_RES axis does the same because all HW must * adhere to the 120 expectation. */ hi_res = value * 120/usage->resolution_multiplier; usage->wheel_accumulated += hi_res; lo_res = usage->wheel_accumulated/120; if (lo_res) usage->wheel_accumulated -= lo_res * 120; input_event(input, EV_REL, code, lo_res); input_event(input, EV_REL, usage->code, hi_res); } static void hid_report_release_tool(struct hid_report *report, struct input_dev *input, unsigned int tool) { /* if the given tool is not currently reported, ignore */ if (!test_bit(tool, input->key)) return; /* * if the given tool was previously set, release it, * release any TOUCH and send an EV_SYN */ input_event(input, EV_KEY, BTN_TOUCH, 0); input_event(input, EV_KEY, tool, 0); input_event(input, EV_SYN, SYN_REPORT, 0); report->tool = 0; } static void hid_report_set_tool(struct hid_report *report, struct input_dev *input, unsigned int new_tool) { if (report->tool != new_tool) hid_report_release_tool(report, input, report->tool); input_event(input, EV_KEY, new_tool, 1); report->tool = new_tool; } void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct input_dev *input; struct hid_report *report = field->report; unsigned *quirks = &hid->quirks; if (!usage->type) return; if (usage->type == EV_PWR) { bool handled = hidinput_set_battery_charge_status(hid, usage->hid, value); if (!handled) hidinput_update_battery(hid, value); return; } if (!field->hidinput) return; input = field->hidinput->input; if (usage->hat_min < usage->hat_max || usage->hat_dir) { int hat_dir = usage->hat_dir; if (!hat_dir) hat_dir = (value - usage->hat_min) * 8 / (usage->hat_max - usage->hat_min + 1) + 1; if (hat_dir < 0 || hat_dir > 8) hat_dir = 0; input_event(input, usage->type, usage->code , hid_hat_to_axis[hat_dir].x); input_event(input, usage->type, usage->code + 1, hid_hat_to_axis[hat_dir].y); return; } /* * Ignore out-of-range values as per HID specification, * section 5.10 and 6.2.25, when NULL state bit is present. * When it's not, clamp the value to match Microsoft's input * driver as mentioned in "Required HID usages for digitizers": * https://msdn.microsoft.com/en-us/library/windows/hardware/dn672278(v=vs.85).asp * * The logical_minimum < logical_maximum check is done so that we * don't unintentionally discard values sent by devices which * don't specify logical min and max. */ if ((field->flags & HID_MAIN_ITEM_VARIABLE) && field->logical_minimum < field->logical_maximum) { if (field->flags & HID_MAIN_ITEM_NULL_STATE && (value < field->logical_minimum || value > field->logical_maximum)) { dbg_hid("Ignoring out-of-range value %x\n", value); return; } value = clamp(value, field->logical_minimum, field->logical_maximum); } switch (usage->hid) { case HID_DG_ERASER: report->tool_active |= !!value; /* * if eraser is set, we must enforce BTN_TOOL_RUBBER * to accommodate for devices not following the spec. */ if (value) hid_report_set_tool(report, input, BTN_TOOL_RUBBER); else if (report->tool != BTN_TOOL_RUBBER) /* value is off, tool is not rubber, ignore */ return; else if (*quirks & HID_QUIRK_NOINVERT && !test_bit(BTN_TOUCH, input->key)) { /* * There is no invert to release the tool, let hid_input * send BTN_TOUCH with scancode and release the tool after. */ hid_report_release_tool(report, input, BTN_TOOL_RUBBER); return; } /* let hid-input set BTN_TOUCH */ break; case HID_DG_INVERT: report->tool_active |= !!value; /* * If invert is set, we store BTN_TOOL_RUBBER. */ if (value) hid_report_set_tool(report, input, BTN_TOOL_RUBBER); else if (!report->tool_active) /* tool_active not set means Invert and Eraser are not set */ hid_report_release_tool(report, input, BTN_TOOL_RUBBER); /* no further processing */ return; case HID_DG_INRANGE: report->tool_active |= !!value; if (report->tool_active) { /* * if tool is not set but is marked as active, * assume ours */ if (!report->tool) report->tool = usage->code; /* drivers may have changed the value behind our back, resend it */ hid_report_set_tool(report, input, report->tool); } else { hid_report_release_tool(report, input, usage->code); } /* reset tool_active for the next event */ report->tool_active = false; /* no further processing */ return; case HID_DG_TIPSWITCH: report->tool_active |= !!value; /* if tool is set to RUBBER we should ignore the current value */ if (report->tool == BTN_TOOL_RUBBER) return; break; case HID_DG_TIPPRESSURE: if (*quirks & HID_QUIRK_NOTOUCH) { int a = field->logical_minimum; int b = field->logical_maximum; if (value > a + ((b - a) >> 3)) { input_event(input, EV_KEY, BTN_TOUCH, 1); report->tool_active = true; } } break; case HID_UP_PID | 0x83UL: /* Simultaneous Effects Max */ dbg_hid("Maximum Effects - %d\n",value); return; case HID_UP_PID | 0x7fUL: dbg_hid("PID Pool Report\n"); return; } switch (usage->type) { case EV_KEY: if (usage->code == 0) /* Key 0 is "unassigned", not KEY_UNKNOWN */ return; break; case EV_REL: if (usage->code == REL_WHEEL_HI_RES || usage->code == REL_HWHEEL_HI_RES) { hidinput_handle_scroll(usage, input, value); return; } break; case EV_ABS: if ((field->flags & HID_MAIN_ITEM_RELATIVE) && usage->code == ABS_VOLUME) { int count = abs(value); int direction = value > 0 ? KEY_VOLUMEUP : KEY_VOLUMEDOWN; int i; for (i = 0; i < count; i++) { input_event(input, EV_KEY, direction, 1); input_sync(input); input_event(input, EV_KEY, direction, 0); input_sync(input); } return; } else if (((*quirks & HID_QUIRK_X_INVERT) && usage->code == ABS_X) || ((*quirks & HID_QUIRK_Y_INVERT) && usage->code == ABS_Y)) value = field->logical_maximum - value; break; } /* * Ignore reports for absolute data if the data didn't change. This is * not only an optimization but also fixes 'dead' key reports. Some * RollOver implementations for localized keys (like BACKSLASH/PIPE; HID * 0x31 and 0x32) report multiple keys, even though a localized keyboard * can only have one of them physically available. The 'dead' keys * report constant 0. As all map to the same keycode, they'd confuse * the input layer. If we filter the 'dead' keys on the HID level, we * skip the keycode translation and only forward real events. */ if (!(field->flags & (HID_MAIN_ITEM_RELATIVE | HID_MAIN_ITEM_BUFFERED_BYTE)) && (field->flags & HID_MAIN_ITEM_VARIABLE) && usage->usage_index < field->maxusage && value == field->value[usage->usage_index]) return; /* report the usage code as scancode if the key status has changed */ if (usage->type == EV_KEY && (!test_bit(usage->code, input->key)) == value) input_event(input, EV_MSC, MSC_SCAN, usage->hid); input_event(input, usage->type, usage->code, value); if ((field->flags & HID_MAIN_ITEM_RELATIVE) && usage->type == EV_KEY && value) { input_sync(input); input_event(input, usage->type, usage->code, 0); } } void hidinput_report_event(struct hid_device *hid, struct hid_report *report) { struct hid_input *hidinput; if (hid->quirks & HID_QUIRK_NO_INPUT_SYNC) return; list_for_each_entry(hidinput, &hid->inputs, list) input_sync(hidinput->input); } EXPORT_SYMBOL_GPL(hidinput_report_event); static int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field) { struct hid_report *report; int i, j; list_for_each_entry(report, &hid->report_enum[HID_OUTPUT_REPORT].report_list, list) { for (i = 0; i < report->maxfield; i++) { *field = report->field[i]; for (j = 0; j < (*field)->maxusage; j++) if ((*field)->usage[j].type == type && (*field)->usage[j].code == code) return j; } } return -1; } struct hid_field *hidinput_get_led_field(struct hid_device *hid) { struct hid_report *report; struct hid_field *field; int i, j; list_for_each_entry(report, &hid->report_enum[HID_OUTPUT_REPORT].report_list, list) { for (i = 0; i < report->maxfield; i++) { field = report->field[i]; for (j = 0; j < field->maxusage; j++) if (field->usage[j].type == EV_LED) return field; } } return NULL; } EXPORT_SYMBOL_GPL(hidinput_get_led_field); unsigned int hidinput_count_leds(struct hid_device *hid) { struct hid_report *report; struct hid_field *field; int i, j; unsigned int count = 0; list_for_each_entry(report, &hid->report_enum[HID_OUTPUT_REPORT].report_list, list) { for (i = 0; i < report->maxfield; i++) { field = report->field[i]; for (j = 0; j < field->maxusage; j++) if (field->usage[j].type == EV_LED && field->value[j]) count += 1; } } return count; } EXPORT_SYMBOL_GPL(hidinput_count_leds); static void hidinput_led_worker(struct work_struct *work) { struct hid_device *hid = container_of(work, struct hid_device, led_work); struct hid_field *field; struct hid_report *report; int ret; u32 len; __u8 *buf; field = hidinput_get_led_field(hid); if (!field) return; /* * field->report is accessed unlocked regarding HID core. So there might * be another incoming SET-LED request from user-space, which changes * the LED state while we assemble our outgoing buffer. However, this * doesn't matter as hid_output_report() correctly converts it into a * boolean value no matter what information is currently set on the LED * field (even garbage). So the remote device will always get a valid * request. * And in case we send a wrong value, a next led worker is spawned * for every SET-LED request so the following worker will send the * correct value, guaranteed! */ report = field->report; /* use custom SET_REPORT request if possible (asynchronous) */ if (hid->ll_driver->request) return hid->ll_driver->request(hid, report, HID_REQ_SET_REPORT); /* fall back to generic raw-output-report */ len = hid_report_len(report); buf = hid_alloc_report_buf(report, GFP_KERNEL); if (!buf) return; hid_output_report(report, buf); /* synchronous output report */ ret = hid_hw_output_report(hid, buf, len); if (ret == -ENOSYS) hid_hw_raw_request(hid, report->id, buf, len, HID_OUTPUT_REPORT, HID_REQ_SET_REPORT); kfree(buf); } static int hidinput_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { struct hid_device *hid = input_get_drvdata(dev); struct hid_field *field; int offset; if (type == EV_FF) return input_ff_event(dev, type, code, value); if (type != EV_LED) return -1; if ((offset = hidinput_find_field(hid, type, code, &field)) == -1) { hid_warn(dev, "event field not found\n"); return -1; } hid_set_field(field, offset, value); schedule_work(&hid->led_work); return 0; } static int hidinput_open(struct input_dev *dev) { struct hid_device *hid = input_get_drvdata(dev); return hid_hw_open(hid); } static void hidinput_close(struct input_dev *dev) { struct hid_device *hid = input_get_drvdata(dev); hid_hw_close(hid); } static bool __hidinput_change_resolution_multipliers(struct hid_device *hid, struct hid_report *report, bool use_logical_max) { struct hid_usage *usage; bool update_needed = false; bool get_report_completed = false; int i, j; if (report->maxfield == 0) return false; for (i = 0; i < report->maxfield; i++) { __s32 value = use_logical_max ? report->field[i]->logical_maximum : report->field[i]->logical_minimum; /* There is no good reason for a Resolution * Multiplier to have a count other than 1. * Ignore that case. */ if (report->field[i]->report_count != 1) continue; for (j = 0; j < report->field[i]->maxusage; j++) { usage = &report->field[i]->usage[j]; if (usage->hid != HID_GD_RESOLUTION_MULTIPLIER) continue; /* * If we have more than one feature within this * report we need to fill in the bits from the * others before we can overwrite the ones for the * Resolution Multiplier. * * But if we're not allowed to read from the device, * we just bail. Such a device should not exist * anyway. */ if (!get_report_completed && report->maxfield > 1) { if (hid->quirks & HID_QUIRK_NO_INIT_REPORTS) return update_needed; hid_hw_request(hid, report, HID_REQ_GET_REPORT); hid_hw_wait(hid); get_report_completed = true; } report->field[i]->value[j] = value; update_needed = true; } } return update_needed; } static void hidinput_change_resolution_multipliers(struct hid_device *hid) { struct hid_report_enum *rep_enum; struct hid_report *rep; int ret; rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) { bool update_needed = __hidinput_change_resolution_multipliers(hid, rep, true); if (update_needed) { ret = __hid_request(hid, rep, HID_REQ_SET_REPORT); if (ret) { __hidinput_change_resolution_multipliers(hid, rep, false); return; } } } /* refresh our structs */ hid_setup_resolution_multiplier(hid); } static void report_features(struct hid_device *hid) { struct hid_driver *drv = hid->driver; struct hid_report_enum *rep_enum; struct hid_report *rep; struct hid_usage *usage; int i, j; rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) for (i = 0; i < rep->maxfield; i++) { /* Ignore if report count is out of bounds. */ if (rep->field[i]->report_count < 1) continue; for (j = 0; j < rep->field[i]->maxusage; j++) { usage = &rep->field[i]->usage[j]; /* Verify if Battery Strength feature is available */ if (usage->hid == HID_DC_BATTERYSTRENGTH) hidinput_setup_battery(hid, HID_FEATURE_REPORT, rep->field[i], false); if (drv->feature_mapping) drv->feature_mapping(hid, rep->field[i], usage); } } } static struct hid_input *hidinput_allocate(struct hid_device *hid, unsigned int application) { struct hid_input *hidinput = kzalloc(sizeof(*hidinput), GFP_KERNEL); struct input_dev *input_dev = input_allocate_device(); const char *suffix = NULL; size_t suffix_len, name_len; if (!hidinput || !input_dev) goto fail; if ((hid->quirks & HID_QUIRK_INPUT_PER_APP) && hid->maxapplication > 1) { switch (application) { case HID_GD_KEYBOARD: suffix = "Keyboard"; break; case HID_GD_KEYPAD: suffix = "Keypad"; break; case HID_GD_MOUSE: suffix = "Mouse"; break; case HID_DG_PEN: /* * yes, there is an issue here: * DG_PEN -> "Stylus" * DG_STYLUS -> "Pen" * But changing this now means users with config snippets * will have to change it and the test suite will not be happy. */ suffix = "Stylus"; break; case HID_DG_STYLUS: suffix = "Pen"; break; case HID_DG_TOUCHSCREEN: suffix = "Touchscreen"; break; case HID_DG_TOUCHPAD: suffix = "Touchpad"; break; case HID_GD_SYSTEM_CONTROL: suffix = "System Control"; break; case HID_CP_CONSUMER_CONTROL: suffix = "Consumer Control"; break; case HID_GD_WIRELESS_RADIO_CTLS: suffix = "Wireless Radio Control"; break; case HID_GD_SYSTEM_MULTIAXIS: suffix = "System Multi Axis"; break; default: break; } } if (suffix) { name_len = strlen(hid->name); suffix_len = strlen(suffix); if ((name_len < suffix_len) || strcmp(hid->name + name_len - suffix_len, suffix)) { hidinput->name = kasprintf(GFP_KERNEL, "%s %s", hid->name, suffix); if (!hidinput->name) goto fail; } } input_set_drvdata(input_dev, hid); input_dev->event = hidinput_input_event; input_dev->open = hidinput_open; input_dev->close = hidinput_close; input_dev->setkeycode = hidinput_setkeycode; input_dev->getkeycode = hidinput_getkeycode; input_dev->name = hidinput->name ? hidinput->name : hid->name; input_dev->phys = hid->phys; input_dev->uniq = hid->uniq; input_dev->id.bustype = hid->bus; input_dev->id.vendor = hid->vendor; input_dev->id.product = hid->product; input_dev->id.version = hid->version; input_dev->dev.parent = &hid->dev; hidinput->input = input_dev; hidinput->application = application; list_add_tail(&hidinput->list, &hid->inputs); INIT_LIST_HEAD(&hidinput->reports); return hidinput; fail: kfree(hidinput); input_free_device(input_dev); hid_err(hid, "Out of memory during hid input probe\n"); return NULL; } static bool hidinput_has_been_populated(struct hid_input *hidinput) { int i; unsigned long r = 0; for (i = 0; i < BITS_TO_LONGS(EV_CNT); i++) r |= hidinput->input->evbit[i]; for (i = 0; i < BITS_TO_LONGS(KEY_CNT); i++) r |= hidinput->input->keybit[i]; for (i = 0; i < BITS_TO_LONGS(REL_CNT); i++) r |= hidinput->input->relbit[i]; for (i = 0; i < BITS_TO_LONGS(ABS_CNT); i++) r |= hidinput->input->absbit[i]; for (i = 0; i < BITS_TO_LONGS(MSC_CNT); i++) r |= hidinput->input->mscbit[i]; for (i = 0; i < BITS_TO_LONGS(LED_CNT); i++) r |= hidinput->input->ledbit[i]; for (i = 0; i < BITS_TO_LONGS(SND_CNT); i++) r |= hidinput->input->sndbit[i]; for (i = 0; i < BITS_TO_LONGS(FF_CNT); i++) r |= hidinput->input->ffbit[i]; for (i = 0; i < BITS_TO_LONGS(SW_CNT); i++) r |= hidinput->input->swbit[i]; return !!r; } static void hidinput_cleanup_hidinput(struct hid_device *hid, struct hid_input *hidinput) { struct hid_report *report; int i, k; list_del(&hidinput->list); input_free_device(hidinput->input); kfree(hidinput->name); for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { if (k == HID_OUTPUT_REPORT && hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORTS) continue; list_for_each_entry(report, &hid->report_enum[k].report_list, list) { for (i = 0; i < report->maxfield; i++) if (report->field[i]->hidinput == hidinput) report->field[i]->hidinput = NULL; } } kfree(hidinput); } static struct hid_input *hidinput_match(struct hid_report *report) { struct hid_device *hid = report->device; struct hid_input *hidinput; list_for_each_entry(hidinput, &hid->inputs, list) { if (hidinput->report && hidinput->report->id == report->id) return hidinput; } return NULL; } static struct hid_input *hidinput_match_application(struct hid_report *report) { struct hid_device *hid = report->device; struct hid_input *hidinput; list_for_each_entry(hidinput, &hid->inputs, list) { if (hidinput->application == report->application) return hidinput; /* * Keep SystemControl and ConsumerControl applications together * with the main keyboard, if present. */ if ((report->application == HID_GD_SYSTEM_CONTROL || report->application == HID_CP_CONSUMER_CONTROL) && hidinput->application == HID_GD_KEYBOARD) { return hidinput; } } return NULL; } static inline void hidinput_configure_usages(struct hid_input *hidinput, struct hid_report *report) { int i, j, k; int first_field_index = 0; int slot_collection_index = -1; int prev_collection_index = -1; unsigned int slot_idx = 0; struct hid_field *field; /* * First tag all the fields that are part of a slot, * a slot needs to have one Contact ID in the collection */ for (i = 0; i < report->maxfield; i++) { field = report->field[i]; /* ignore fields without usage */ if (field->maxusage < 1) continue; /* * janitoring when collection_index changes */ if (prev_collection_index != field->usage->collection_index) { prev_collection_index = field->usage->collection_index; first_field_index = i; } /* * if we already found a Contact ID in the collection, * tag and continue to the next. */ if (slot_collection_index == field->usage->collection_index) { field->slot_idx = slot_idx; continue; } /* check if the current field has Contact ID */ for (j = 0; j < field->maxusage; j++) { if (field->usage[j].hid == HID_DG_CONTACTID) { slot_collection_index = field->usage->collection_index; slot_idx++; /* * mark all previous fields and this one in the * current collection to be slotted. */ for (k = first_field_index; k <= i; k++) report->field[k]->slot_idx = slot_idx; break; } } } for (i = 0; i < report->maxfield; i++) for (j = 0; j < report->field[i]->maxusage; j++) hidinput_configure_usage(hidinput, report->field[i], report->field[i]->usage + j, j); } /* * Register the input device; print a message. * Configure the input layer interface * Read all reports and initialize the absolute field values. */ int hidinput_connect(struct hid_device *hid, unsigned int force) { struct hid_driver *drv = hid->driver; struct hid_report *report; struct hid_input *next, *hidinput = NULL; unsigned int application; int i, k; INIT_LIST_HEAD(&hid->inputs); INIT_WORK(&hid->led_work, hidinput_led_worker); hid->status &= ~HID_STAT_DUP_DETECTED; if (!force) { for (i = 0; i < hid->maxcollection; i++) { struct hid_collection *col = &hid->collection[i]; if (col->type == HID_COLLECTION_APPLICATION || col->type == HID_COLLECTION_PHYSICAL) if (IS_INPUT_APPLICATION(col->usage)) break; } if (i == hid->maxcollection) return -1; } report_features(hid); for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { if (k == HID_OUTPUT_REPORT && hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORTS) continue; list_for_each_entry(report, &hid->report_enum[k].report_list, list) { if (!report->maxfield) continue; application = report->application; /* * Find the previous hidinput report attached * to this report id. */ if (hid->quirks & HID_QUIRK_MULTI_INPUT) hidinput = hidinput_match(report); else if (hid->maxapplication > 1 && (hid->quirks & HID_QUIRK_INPUT_PER_APP)) hidinput = hidinput_match_application(report); if (!hidinput) { hidinput = hidinput_allocate(hid, application); if (!hidinput) goto out_unwind; } hidinput_configure_usages(hidinput, report); if (hid->quirks & HID_QUIRK_MULTI_INPUT) hidinput->report = report; list_add_tail(&report->hidinput_list, &hidinput->reports); } } hidinput_change_resolution_multipliers(hid); list_for_each_entry_safe(hidinput, next, &hid->inputs, list) { if (drv->input_configured && drv->input_configured(hid, hidinput)) goto out_unwind; if (!hidinput_has_been_populated(hidinput)) { /* no need to register an input device not populated */ hidinput_cleanup_hidinput(hid, hidinput); continue; } if (input_register_device(hidinput->input)) goto out_unwind; hidinput->registered = true; } if (list_empty(&hid->inputs)) { hid_err(hid, "No inputs registered, leaving\n"); goto out_unwind; } if (hid->status & HID_STAT_DUP_DETECTED) hid_dbg(hid, "Some usages could not be mapped, please use HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE if this is legitimate.\n"); return 0; out_unwind: /* unwind the ones we already registered */ hidinput_disconnect(hid); return -1; } EXPORT_SYMBOL_GPL(hidinput_connect); void hidinput_disconnect(struct hid_device *hid) { struct hid_input *hidinput, *next; hidinput_cleanup_battery(hid); list_for_each_entry_safe(hidinput, next, &hid->inputs, list) { list_del(&hidinput->list); if (hidinput->registered) input_unregister_device(hidinput->input); else input_free_device(hidinput->input); kfree(hidinput->name); kfree(hidinput); } /* led_work is spawned by input_dev callbacks, but doesn't access the * parent input_dev at all. Once all input devices are removed, we * know that led_work will never get restarted, so we can cancel it * synchronously and are safe. */ cancel_work_sync(&hid->led_work); } EXPORT_SYMBOL_GPL(hidinput_disconnect); #ifdef CONFIG_HID_KUNIT_TEST #include "hid-input-test.c" #endif |
5 4 11 11 4 4 3 4 16 2 11 9 4 11 6 9 8 9 11 11 9 3 8 9 9 9 5 9 2 9 1 3 11 8 13 12 10 26 26 28 28 26 1 19 8 1 26 22 11 22 25 25 19 2 3 20 4 19 19 19 19 42 12 42 36 36 36 15 15 36 36 15 36 24 36 42 42 17 40 40 40 40 20 24 5 4 4 1 3 6 2 30 17 30 7 30 15 44 25 43 28 43 43 18 43 26 5 2 2 2 15 13 2 15 5 7 2 3 1 2 1 2 22 45 23 22 45 5 31 18 3 22 8 23 8 1 8 2 1 16 7 14 3 2 14 1 1 9 6 6 7 5 5 11 9 7 65 65 2 63 37 15 17 2 1 1 2 3 1 3 1 60 51 2 51 43 43 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 | /* * usbmidi.c - ALSA USB MIDI driver * * Copyright (c) 2002-2009 Clemens Ladisch * All rights reserved. * * Based on the OSS usb-midi driver by |