Total coverage: 439182 (25%)of 1766742
7 7 6 6 6 4 2 1 1 1 1 1 21 20 6 5 4 4 4 4 4 5 6 5 4 6 5 4 2 1 2 1 2 1 4 4 4 1 1 1 7 6 1 5 2 4 7 4 4 4 1 22 21 22 20 21 20 7 1 6 2 5 2 2 2 15 14 11 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 // SPDX-License-Identifier: GPL-2.0-or-later /* * User-space I/O driver support for HID subsystem * Copyright (c) 2012 David Herrmann */ /* */ #include <linux/atomic.h> #include <linux/compat.h> #include <linux/cred.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/hid.h> #include <linux/input.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/uhid.h> #include <linux/wait.h> #define UHID_NAME "uhid" #define UHID_BUFSIZE 32 struct uhid_device { struct mutex devlock; /* This flag tracks whether the HID device is usable for commands from * userspace. The flag is already set before hid_add_device(), which * runs in workqueue context, to allow hid_add_device() to communicate * with userspace. * However, if hid_add_device() fails, the flag is cleared without * holding devlock. * We guarantee that if @running changes from true to false while you're * holding @devlock, it's still fine to access @hid. */ bool running; __u8 *rd_data; uint rd_size; /* When this is NULL, userspace may use UHID_CREATE/UHID_CREATE2. */ struct hid_device *hid; struct uhid_event input_buf; wait_queue_head_t waitq; spinlock_t qlock; __u8 head; __u8 tail; struct uhid_event *outq[UHID_BUFSIZE]; /* blocking GET_REPORT support; state changes protected by qlock */ struct mutex report_lock; wait_queue_head_t report_wait; bool report_running; u32 report_id; u32 report_type; struct uhid_event report_buf; struct work_struct worker; }; static struct miscdevice uhid_misc; static void uhid_device_add_worker(struct work_struct *work) { struct uhid_device *uhid = container_of(work, struct uhid_device, worker); int ret; ret = hid_add_device(uhid->hid); if (ret) { hid_err(uhid->hid, "Cannot register HID device: error %d\n", ret); /* We used to call hid_destroy_device() here, but that's really * messy to get right because we have to coordinate with * concurrent writes from userspace that might be in the middle * of using uhid->hid. * Just leave uhid->hid as-is for now, and clean it up when * userspace tries to close or reinitialize the uhid instance. * * However, we do have to clear the ->running flag and do a * wakeup to make sure userspace knows that the device is gone. */ WRITE_ONCE(uhid->running, false); wake_up_interruptible(&uhid->report_wait); } } static void uhid_queue(struct uhid_device *uhid, struct uhid_event *ev) { __u8 newhead; newhead = (uhid->head + 1) % UHID_BUFSIZE; if (newhead != uhid->tail) { uhid->outq[uhid->head] = ev; uhid->head = newhead; wake_up_interruptible(&uhid->waitq); } else { hid_warn(uhid->hid, "Output queue is full\n"); kfree(ev); } } static int uhid_queue_event(struct uhid_device *uhid, __u32 event) { unsigned long flags; struct uhid_event *ev; ev = kzalloc(sizeof(*ev), GFP_KERNEL); if (!ev) return -ENOMEM; ev->type = event; spin_lock_irqsave(&uhid->qlock, flags); uhid_queue(uhid, ev); spin_unlock_irqrestore(&uhid->qlock, flags); return 0; } static int uhid_hid_start(struct hid_device *hid) { struct uhid_device *uhid = hid->driver_data; struct uhid_event *ev; unsigned long flags; ev = kzalloc(sizeof(*ev), GFP_KERNEL); if (!ev) return -ENOMEM; ev->type = UHID_START; if (hid->report_enum[HID_FEATURE_REPORT].numbered) ev->u.start.dev_flags |= UHID_DEV_NUMBERED_FEATURE_REPORTS; if (hid->report_enum[HID_OUTPUT_REPORT].numbered) ev->u.start.dev_flags |= UHID_DEV_NUMBERED_OUTPUT_REPORTS; if (hid->report_enum[HID_INPUT_REPORT].numbered) ev->u.start.dev_flags |= UHID_DEV_NUMBERED_INPUT_REPORTS; spin_lock_irqsave(&uhid->qlock, flags); uhid_queue(uhid, ev); spin_unlock_irqrestore(&uhid->qlock, flags); return 0; } static void uhid_hid_stop(struct hid_device *hid) { struct uhid_device *uhid = hid->driver_data; hid->claimed = 0; uhid_queue_event(uhid, UHID_STOP); } static int uhid_hid_open(struct hid_device *hid) { struct uhid_device *uhid = hid->driver_data; return uhid_queue_event(uhid, UHID_OPEN); } static void uhid_hid_close(struct hid_device *hid) { struct uhid_device *uhid = hid->driver_data; uhid_queue_event(uhid, UHID_CLOSE); } static int uhid_hid_parse(struct hid_device *hid) { struct uhid_device *uhid = hid->driver_data; return hid_parse_report(hid, uhid->rd_data, uhid->rd_size); } /* must be called with report_lock held */ static int __uhid_report_queue_and_wait(struct uhid_device *uhid, struct uhid_event *ev, __u32 *report_id) { unsigned long flags; int ret; spin_lock_irqsave(&uhid->qlock, flags); *report_id = ++uhid->report_id; uhid->report_type = ev->type + 1; uhid->report_running = true; uhid_queue(uhid, ev); spin_unlock_irqrestore(&uhid->qlock, flags); ret = wait_event_interruptible_timeout(uhid->report_wait, !uhid->report_running || !READ_ONCE(uhid->running), 5 * HZ); if (!ret || !READ_ONCE(uhid->running) || uhid->report_running) ret = -EIO; else if (ret < 0) ret = -ERESTARTSYS; else ret = 0; uhid->report_running = false; return ret; } static void uhid_report_wake_up(struct uhid_device *uhid, u32 id, const struct uhid_event *ev) { unsigned long flags; spin_lock_irqsave(&uhid->qlock, flags); /* id for old report; drop it silently */ if (uhid->report_type != ev->type || uhid->report_id != id) goto unlock; if (!uhid->report_running) goto unlock; memcpy(&uhid->report_buf, ev, sizeof(*ev)); uhid->report_running = false; wake_up_interruptible(&uhid->report_wait); unlock: spin_unlock_irqrestore(&uhid->qlock, flags); } static int uhid_hid_get_report(struct hid_device *hid, unsigned char rnum, u8 *buf, size_t count, u8 rtype) { struct uhid_device *uhid = hid->driver_data; struct uhid_get_report_reply_req *req; struct uhid_event *ev; int ret; if (!READ_ONCE(uhid->running)) return -EIO; ev = kzalloc(sizeof(*ev), GFP_KERNEL); if (!ev) return -ENOMEM; ev->type = UHID_GET_REPORT; ev->u.get_report.rnum = rnum; ev->u.get_report.rtype = rtype; ret = mutex_lock_interruptible(&uhid->report_lock); if (ret) { kfree(ev); return ret; } /* this _always_ takes ownership of @ev */ ret = __uhid_report_queue_and_wait(uhid, ev, &ev->u.get_report.id); if (ret) goto unlock; req = &uhid->report_buf.u.get_report_reply; if (req->err) { ret = -EIO; } else { ret = min3(count, (size_t)req->size, (size_t)UHID_DATA_MAX); memcpy(buf, req->data, ret); } unlock: mutex_unlock(&uhid->report_lock); return ret; } static int uhid_hid_set_report(struct hid_device *hid, unsigned char rnum, const u8 *buf, size_t count, u8 rtype) { struct uhid_device *uhid = hid->driver_data; struct uhid_event *ev; int ret; if (!READ_ONCE(uhid->running) || count > UHID_DATA_MAX) return -EIO; ev = kzalloc(sizeof(*ev), GFP_KERNEL); if (!ev) return -ENOMEM; ev->type = UHID_SET_REPORT; ev->u.set_report.rnum = rnum; ev->u.set_report.rtype = rtype; ev->u.set_report.size = count; memcpy(ev->u.set_report.data, buf, count); ret = mutex_lock_interruptible(&uhid->report_lock); if (ret) { kfree(ev); return ret; } /* this _always_ takes ownership of @ev */ ret = __uhid_report_queue_and_wait(uhid, ev, &ev->u.set_report.id); if (ret) goto unlock; if (uhid->report_buf.u.set_report_reply.err) ret = -EIO; else ret = count; unlock: mutex_unlock(&uhid->report_lock); return ret; } static int uhid_hid_raw_request(struct hid_device *hid, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) { u8 u_rtype; switch (rtype) { case HID_FEATURE_REPORT: u_rtype = UHID_FEATURE_REPORT; break; case HID_OUTPUT_REPORT: u_rtype = UHID_OUTPUT_REPORT; break; case HID_INPUT_REPORT: u_rtype = UHID_INPUT_REPORT; break; default: return -EINVAL; } switch (reqtype) { case HID_REQ_GET_REPORT: return uhid_hid_get_report(hid, reportnum, buf, len, u_rtype); case HID_REQ_SET_REPORT: return uhid_hid_set_report(hid, reportnum, buf, len, u_rtype); default: return -EIO; } } static int uhid_hid_output_raw(struct hid_device *hid, __u8 *buf, size_t count, unsigned char report_type) { struct uhid_device *uhid = hid->driver_data; __u8 rtype; unsigned long flags; struct uhid_event *ev; switch (report_type) { case HID_FEATURE_REPORT: rtype = UHID_FEATURE_REPORT; break; case HID_OUTPUT_REPORT: rtype = UHID_OUTPUT_REPORT; break; default: return -EINVAL; } if (count < 1 || count > UHID_DATA_MAX) return -EINVAL; ev = kzalloc(sizeof(*ev), GFP_KERNEL); if (!ev) return -ENOMEM; ev->type = UHID_OUTPUT; ev->u.output.size = count; ev->u.output.rtype = rtype; memcpy(ev->u.output.data, buf, count); spin_lock_irqsave(&uhid->qlock, flags); uhid_queue(uhid, ev); spin_unlock_irqrestore(&uhid->qlock, flags); return count; } static int uhid_hid_output_report(struct hid_device *hid, __u8 *buf, size_t count) { return uhid_hid_output_raw(hid, buf, count, HID_OUTPUT_REPORT); } static const struct hid_ll_driver uhid_hid_driver = { .start = uhid_hid_start, .stop = uhid_hid_stop, .open = uhid_hid_open, .close = uhid_hid_close, .parse = uhid_hid_parse, .raw_request = uhid_hid_raw_request, .output_report = uhid_hid_output_report, .max_buffer_size = UHID_DATA_MAX, }; #ifdef CONFIG_COMPAT /* Apparently we haven't stepped on these rakes enough times yet. */ struct uhid_create_req_compat { __u8 name[128]; __u8 phys[64]; __u8 uniq[64]; compat_uptr_t rd_data; __u16 rd_size; __u16 bus; __u32 vendor; __u32 product; __u32 version; __u32 country; } __attribute__((__packed__)); static int uhid_event_from_user(const char __user *buffer, size_t len, struct uhid_event *event) { if (in_compat_syscall()) { u32 type; if (get_user(type, buffer)) return -EFAULT; if (type == UHID_CREATE) { /* * This is our messed up request with compat pointer. * It is largish (more than 256 bytes) so we better * allocate it from the heap. */ struct uhid_create_req_compat *compat; compat = kzalloc(sizeof(*compat), GFP_KERNEL); if (!compat) return -ENOMEM; buffer += sizeof(type); len -= sizeof(type); if (copy_from_user(compat, buffer, min(len, sizeof(*compat)))) { kfree(compat); return -EFAULT; } /* Shuffle the data over to proper structure */ event->type = type; memcpy(event->u.create.name, compat->name, sizeof(compat->name)); memcpy(event->u.create.phys, compat->phys, sizeof(compat->phys)); memcpy(event->u.create.uniq, compat->uniq, sizeof(compat->uniq)); event->u.create.rd_data = compat_ptr(compat->rd_data); event->u.create.rd_size = compat->rd_size; event->u.create.bus = compat->bus; event->u.create.vendor = compat->vendor; event->u.create.product = compat->product; event->u.create.version = compat->version; event->u.create.country = compat->country; kfree(compat); return 0; } /* All others can be copied directly */ } if (copy_from_user(event, buffer, min(len, sizeof(*event)))) return -EFAULT; return 0; } #else static int uhid_event_from_user(const char __user *buffer, size_t len, struct uhid_event *event) { if (copy_from_user(event, buffer, min(len, sizeof(*event)))) return -EFAULT; return 0; } #endif static int uhid_dev_create2(struct uhid_device *uhid, const struct uhid_event *ev) { struct hid_device *hid; size_t rd_size; void *rd_data; int ret; if (uhid->hid) return -EALREADY; rd_size = ev->u.create2.rd_size; if (rd_size <= 0 || rd_size > HID_MAX_DESCRIPTOR_SIZE) return -EINVAL; rd_data = kmemdup(ev->u.create2.rd_data, rd_size, GFP_KERNEL); if (!rd_data) return -ENOMEM; uhid->rd_size = rd_size; uhid->rd_data = rd_data; hid = hid_allocate_device(); if (IS_ERR(hid)) { ret = PTR_ERR(hid); goto err_free; } BUILD_BUG_ON(sizeof(hid->name) != sizeof(ev->u.create2.name)); strscpy(hid->name, ev->u.create2.name, sizeof(hid->name)); BUILD_BUG_ON(sizeof(hid->phys) != sizeof(ev->u.create2.phys)); strscpy(hid->phys, ev->u.create2.phys, sizeof(hid->phys)); BUILD_BUG_ON(sizeof(hid->uniq) != sizeof(ev->u.create2.uniq)); strscpy(hid->uniq, ev->u.create2.uniq, sizeof(hid->uniq)); hid->ll_driver = &uhid_hid_driver; hid->bus = ev->u.create2.bus; hid->vendor = ev->u.create2.vendor; hid->product = ev->u.create2.product; hid->version = ev->u.create2.version; hid->country = ev->u.create2.country; hid->driver_data = uhid; hid->dev.parent = uhid_misc.this_device; uhid->hid = hid; uhid->running = true; /* Adding of a HID device is done through a worker, to allow HID drivers * which use feature requests during .probe to work, without they would * be blocked on devlock, which is held by uhid_char_write. */ schedule_work(&uhid->worker); return 0; err_free: kfree(uhid->rd_data); uhid->rd_data = NULL; uhid->rd_size = 0; return ret; } static int uhid_dev_create(struct uhid_device *uhid, struct uhid_event *ev) { struct uhid_create_req orig; orig = ev->u.create; if (orig.rd_size <= 0 || orig.rd_size > HID_MAX_DESCRIPTOR_SIZE) return -EINVAL; if (copy_from_user(&ev->u.create2.rd_data, orig.rd_data, orig.rd_size)) return -EFAULT; memcpy(ev->u.create2.name, orig.name, sizeof(orig.name)); memcpy(ev->u.create2.phys, orig.phys, sizeof(orig.phys)); memcpy(ev->u.create2.uniq, orig.uniq, sizeof(orig.uniq)); ev->u.create2.rd_size = orig.rd_size; ev->u.create2.bus = orig.bus; ev->u.create2.vendor = orig.vendor; ev->u.create2.product = orig.product; ev->u.create2.version = orig.version; ev->u.create2.country = orig.country; return uhid_dev_create2(uhid, ev); } static int uhid_dev_destroy(struct uhid_device *uhid) { if (!uhid->hid) return -EINVAL; WRITE_ONCE(uhid->running, false); wake_up_interruptible(&uhid->report_wait); cancel_work_sync(&uhid->worker); hid_destroy_device(uhid->hid); uhid->hid = NULL; kfree(uhid->rd_data); return 0; } static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev) { if (!READ_ONCE(uhid->running)) return -EINVAL; hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input.data, min_t(size_t, ev->u.input.size, UHID_DATA_MAX), 0); return 0; } static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev) { if (!READ_ONCE(uhid->running)) return -EINVAL; hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input2.data, min_t(size_t, ev->u.input2.size, UHID_DATA_MAX), 0); return 0; } static int uhid_dev_get_report_reply(struct uhid_device *uhid, struct uhid_event *ev) { if (!READ_ONCE(uhid->running)) return -EINVAL; uhid_report_wake_up(uhid, ev->u.get_report_reply.id, ev); return 0; } static int uhid_dev_set_report_reply(struct uhid_device *uhid, struct uhid_event *ev) { if (!READ_ONCE(uhid->running)) return -EINVAL; uhid_report_wake_up(uhid, ev->u.set_report_reply.id, ev); return 0; } static int uhid_char_open(struct inode *inode, struct file *file) { struct uhid_device *uhid; uhid = kzalloc(sizeof(*uhid), GFP_KERNEL); if (!uhid) return -ENOMEM; mutex_init(&uhid->devlock); mutex_init(&uhid->report_lock); spin_lock_init(&uhid->qlock); init_waitqueue_head(&uhid->waitq); init_waitqueue_head(&uhid->report_wait); uhid->running = false; INIT_WORK(&uhid->worker, uhid_device_add_worker); file->private_data = uhid; stream_open(inode, file); return 0; } static int uhid_char_release(struct inode *inode, struct file *file) { struct uhid_device *uhid = file->private_data; unsigned int i; uhid_dev_destroy(uhid); for (i = 0; i < UHID_BUFSIZE; ++i) kfree(uhid->outq[i]); kfree(uhid); return 0; } static ssize_t uhid_char_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct uhid_device *uhid = file->private_data; int ret; unsigned long flags; size_t len; /* they need at least the "type" member of uhid_event */ if (count < sizeof(__u32)) return -EINVAL; try_again: if (file->f_flags & O_NONBLOCK) { if (uhid->head == uhid->tail) return -EAGAIN; } else { ret = wait_event_interruptible(uhid->waitq, uhid->head != uhid->tail); if (ret) return ret; } ret = mutex_lock_interruptible(&uhid->devlock); if (ret) return ret; if (uhid->head == uhid->tail) { mutex_unlock(&uhid->devlock); goto try_again; } else { len = min(count, sizeof(**uhid->outq)); if (copy_to_user(buffer, uhid->outq[uhid->tail], len)) { ret = -EFAULT; } else { kfree(uhid->outq[uhid->tail]); uhid->outq[uhid->tail] = NULL; spin_lock_irqsave(&uhid->qlock, flags); uhid->tail = (uhid->tail + 1) % UHID_BUFSIZE; spin_unlock_irqrestore(&uhid->qlock, flags); } } mutex_unlock(&uhid->devlock); return ret ? ret : len; } static ssize_t uhid_char_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct uhid_device *uhid = file->private_data; int ret; size_t len; /* we need at least the "type" member of uhid_event */ if (count < sizeof(__u32)) return -EINVAL; ret = mutex_lock_interruptible(&uhid->devlock); if (ret) return ret; memset(&uhid->input_buf, 0, sizeof(uhid->input_buf)); len = min(count, sizeof(uhid->input_buf)); ret = uhid_event_from_user(buffer, len, &uhid->input_buf); if (ret) goto unlock; switch (uhid->input_buf.type) { case UHID_CREATE: /* * 'struct uhid_create_req' contains a __user pointer which is * copied from, so it's unsafe to allow this with elevated * privileges (e.g. from a setuid binary) or via kernel_write(). */ if (file->f_cred != current_cred()) { pr_err_once("UHID_CREATE from different security context by process %d (%s), this is not allowed.\n", task_tgid_vnr(current), current->comm); ret = -EACCES; goto unlock; } ret = uhid_dev_create(uhid, &uhid->input_buf); break; case UHID_CREATE2: ret = uhid_dev_create2(uhid, &uhid->input_buf); break; case UHID_DESTROY: ret = uhid_dev_destroy(uhid); break; case UHID_INPUT: ret = uhid_dev_input(uhid, &uhid->input_buf); break; case UHID_INPUT2: ret = uhid_dev_input2(uhid, &uhid->input_buf); break; case UHID_GET_REPORT_REPLY: ret = uhid_dev_get_report_reply(uhid, &uhid->input_buf); break; case UHID_SET_REPORT_REPLY: ret = uhid_dev_set_report_reply(uhid, &uhid->input_buf); break; default: ret = -EOPNOTSUPP; } unlock: mutex_unlock(&uhid->devlock); /* return "count" not "len" to not confuse the caller */ return ret ? ret : count; } static __poll_t uhid_char_poll(struct file *file, poll_table *wait) { struct uhid_device *uhid = file->private_data; __poll_t mask = EPOLLOUT | EPOLLWRNORM; /* uhid is always writable */ poll_wait(file, &uhid->waitq, wait); if (uhid->head != uhid->tail) mask |= EPOLLIN | EPOLLRDNORM; return mask; } static const struct file_operations uhid_fops = { .owner = THIS_MODULE, .open = uhid_char_open, .release = uhid_char_release, .read = uhid_char_read, .write = uhid_char_write, .poll = uhid_char_poll, }; static struct miscdevice uhid_misc = { .fops = &uhid_fops, .minor = UHID_MINOR, .name = UHID_NAME, }; module_misc_device(uhid_misc); MODULE_LICENSE("GPL"); MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>"); MODULE_DESCRIPTION("User-space I/O driver support for HID subsystem"); MODULE_ALIAS_MISCDEV(UHID_MINOR); MODULE_ALIAS("devname:" UHID_NAME);
3193 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM kmem #if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KMEM_H #include <linux/types.h> #include <linux/tracepoint.h> #include <trace/events/mmflags.h> TRACE_EVENT(kmem_cache_alloc, TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s, gfp_t gfp_flags, int node), TP_ARGS(call_site, ptr, s, gfp_flags, node), TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) __field( size_t, bytes_req ) __field( size_t, bytes_alloc ) __field( unsigned long, gfp_flags ) __field( int, node ) __field( bool, accounted ) ), TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; __entry->bytes_req = s->object_size; __entry->bytes_alloc = s->size; __entry->gfp_flags = (__force unsigned long)gfp_flags; __entry->node = node; __entry->accounted = IS_ENABLED(CONFIG_MEMCG) ? ((gfp_flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)) : false; ), TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s", (void *)__entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, show_gfp_flags(__entry->gfp_flags), __entry->node, __entry->accounted ? "true" : "false") ); TRACE_EVENT(kmalloc, TP_PROTO(unsigned long call_site, const void *ptr, size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags, int node), TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) __field( size_t, bytes_req ) __field( size_t, bytes_alloc ) __field( unsigned long, gfp_flags ) __field( int, node ) ), TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; __entry->bytes_req = bytes_req; __entry->bytes_alloc = bytes_alloc; __entry->gfp_flags = (__force unsigned long)gfp_flags; __entry->node = node; ), TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s", (void *)__entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, show_gfp_flags(__entry->gfp_flags), __entry->node, (IS_ENABLED(CONFIG_MEMCG) && (__entry->gfp_flags & (__force unsigned long)__GFP_ACCOUNT)) ? "true" : "false") ); TRACE_EVENT(kfree, TP_PROTO(unsigned long call_site, const void *ptr), TP_ARGS(call_site, ptr), TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) ), TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; ), TP_printk("call_site=%pS ptr=%p", (void *)__entry->call_site, __entry->ptr) ); TRACE_EVENT(kmem_cache_free, TP_PROTO(unsigned long call_site, const void *ptr, const struct kmem_cache *s), TP_ARGS(call_site, ptr, s), TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) __string( name, s->name ) ), TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; __assign_str(name); ), TP_printk("call_site=%pS ptr=%p name=%s", (void *)__entry->call_site, __entry->ptr, __get_str(name)) ); TRACE_EVENT(mm_page_free, TP_PROTO(struct page *page, unsigned int order), TP_ARGS(page, order), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) ), TP_fast_assign( __entry->pfn = page_to_pfn(page); __entry->order = order; ), TP_printk("page=%p pfn=0x%lx order=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order) ); TRACE_EVENT(mm_page_free_batched, TP_PROTO(struct page *page), TP_ARGS(page), TP_STRUCT__entry( __field( unsigned long, pfn ) ), TP_fast_assign( __entry->pfn = page_to_pfn(page); ), TP_printk("page=%p pfn=0x%lx order=0", pfn_to_page(__entry->pfn), __entry->pfn) ); TRACE_EVENT(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, gfp_t gfp_flags, int migratetype), TP_ARGS(page, order, gfp_flags, migratetype), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) __field( unsigned long, gfp_flags ) __field( int, migratetype ) ), TP_fast_assign( __entry->pfn = page ? page_to_pfn(page) : -1UL; __entry->order = order; __entry->gfp_flags = (__force unsigned long)gfp_flags; __entry->migratetype = migratetype; ), TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d gfp_flags=%s", __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL, __entry->pfn != -1UL ? __entry->pfn : 0, __entry->order, __entry->migratetype, show_gfp_flags(__entry->gfp_flags)) ); DECLARE_EVENT_CLASS(mm_page, TP_PROTO(struct page *page, unsigned int order, int migratetype, int percpu_refill), TP_ARGS(page, order, migratetype, percpu_refill), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) __field( int, migratetype ) __field( int, percpu_refill ) ), TP_fast_assign( __entry->pfn = page ? page_to_pfn(page) : -1UL; __entry->order = order; __entry->migratetype = migratetype; __entry->percpu_refill = percpu_refill; ), TP_printk("page=%p pfn=0x%lx order=%u migratetype=%d percpu_refill=%d", __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL, __entry->pfn != -1UL ? __entry->pfn : 0, __entry->order, __entry->migratetype, __entry->percpu_refill) ); DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, TP_PROTO(struct page *page, unsigned int order, int migratetype, int percpu_refill), TP_ARGS(page, order, migratetype, percpu_refill) ); TRACE_EVENT(mm_page_pcpu_drain, TP_PROTO(struct page *page, unsigned int order, int migratetype), TP_ARGS(page, order, migratetype), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) __field( int, migratetype ) ), TP_fast_assign( __entry->pfn = page ? page_to_pfn(page) : -1UL; __entry->order = order; __entry->migratetype = migratetype; ), TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order, __entry->migratetype) ); TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, int alloc_order, int fallback_order, int alloc_migratetype, int fallback_migratetype), TP_ARGS(page, alloc_order, fallback_order, alloc_migratetype, fallback_migratetype), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( int, alloc_order ) __field( int, fallback_order ) __field( int, alloc_migratetype ) __field( int, fallback_migratetype ) __field( int, change_ownership ) ), TP_fast_assign( __entry->pfn = page_to_pfn(page); __entry->alloc_order = alloc_order; __entry->fallback_order = fallback_order; __entry->alloc_migratetype = alloc_migratetype; __entry->fallback_migratetype = fallback_migratetype; __entry->change_ownership = (alloc_migratetype == get_pageblock_migratetype(page)); ), TP_printk("page=%p pfn=0x%lx alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->alloc_order, __entry->fallback_order, pageblock_order, __entry->alloc_migratetype, __entry->fallback_migratetype, __entry->fallback_order < pageblock_order, __entry->change_ownership) ); TRACE_EVENT(mm_alloc_contig_migrate_range_info, TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_migrated, unsigned long nr_reclaimed, unsigned long nr_mapped, int migratetype), TP_ARGS(start, end, nr_migrated, nr_reclaimed, nr_mapped, migratetype), TP_STRUCT__entry( __field(unsigned long, start) __field(unsigned long, end) __field(unsigned long, nr_migrated) __field(unsigned long, nr_reclaimed) __field(unsigned long, nr_mapped) __field(int, migratetype) ), TP_fast_assign( __entry->start = start; __entry->end = end; __entry->nr_migrated = nr_migrated; __entry->nr_reclaimed = nr_reclaimed; __entry->nr_mapped = nr_mapped; __entry->migratetype = migratetype; ), TP_printk("start=0x%lx end=0x%lx migratetype=%d nr_migrated=%lu nr_reclaimed=%lu nr_mapped=%lu", __entry->start, __entry->end, __entry->migratetype, __entry->nr_migrated, __entry->nr_reclaimed, __entry->nr_mapped) ); /* * Required for uniquely and securely identifying mm in rss_stat tracepoint. */ #ifndef __PTR_TO_HASHVAL static unsigned int __maybe_unused mm_ptr_to_hash(const void *ptr) { int ret; unsigned long hashval; ret = ptr_to_hashval(ptr, &hashval); if (ret) return 0; /* The hashed value is only 32-bit */ return (unsigned int)hashval; } #define __PTR_TO_HASHVAL #endif #define TRACE_MM_PAGES \ EM(MM_FILEPAGES) \ EM(MM_ANONPAGES) \ EM(MM_SWAPENTS) \ EMe(MM_SHMEMPAGES) #undef EM #undef EMe #define EM(a) TRACE_DEFINE_ENUM(a); #define EMe(a) TRACE_DEFINE_ENUM(a); TRACE_MM_PAGES #undef EM #undef EMe #define EM(a) { a, #a }, #define EMe(a) { a, #a } TRACE_EVENT(rss_stat, TP_PROTO(struct mm_struct *mm, int member), TP_ARGS(mm, member), TP_STRUCT__entry( __field(unsigned int, mm_id) __field(unsigned int, curr) __field(int, member) __field(long, size) ), TP_fast_assign( __entry->mm_id = mm_ptr_to_hash(mm); __entry->curr = !!(current->mm == mm); __entry->member = member; __entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member]) << PAGE_SHIFT); ), TP_printk("mm_id=%u curr=%d type=%s size=%ldB", __entry->mm_id, __entry->curr, __print_symbolic(__entry->member, TRACE_MM_PAGES), __entry->size) ); #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
49 46 45 44 45 49 41 39 38 41 44 43 44 43 44 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 // SPDX-License-Identifier: GPL-2.0-or-later /* * Authenc: Simple AEAD wrapper for IPsec * * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/aead.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/authenc.h> #include <crypto/null.h> #include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/spinlock.h> struct authenc_instance_ctx { struct crypto_ahash_spawn auth; struct crypto_skcipher_spawn enc; unsigned int reqoff; }; struct crypto_authenc_ctx { struct crypto_ahash *auth; struct crypto_skcipher *enc; struct crypto_sync_skcipher *null; }; struct authenc_request_ctx { struct scatterlist src[2]; struct scatterlist dst[2]; char tail[]; }; static void authenc_request_complete(struct aead_request *req, int err) { if (err != -EINPROGRESS) aead_request_complete(req, err); } int crypto_authenc_extractkeys(struct crypto_authenc_keys *keys, const u8 *key, unsigned int keylen) { struct rtattr *rta = (struct rtattr *)key; struct crypto_authenc_key_param *param; if (!RTA_OK(rta, keylen)) return -EINVAL; if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) return -EINVAL; /* * RTA_OK() didn't align the rtattr's payload when validating that it * fits in the buffer. Yet, the keys should start on the next 4-byte * aligned boundary. To avoid confusion, require that the rtattr * payload be exactly the param struct, which has a 4-byte aligned size. */ if (RTA_PAYLOAD(rta) != sizeof(*param)) return -EINVAL; BUILD_BUG_ON(sizeof(*param) % RTA_ALIGNTO); param = RTA_DATA(rta); keys->enckeylen = be32_to_cpu(param->enckeylen); key += rta->rta_len; keylen -= rta->rta_len; if (keylen < keys->enckeylen) return -EINVAL; keys->authkeylen = keylen - keys->enckeylen; keys->authkey = key; keys->enckey = key + keys->authkeylen; return 0; } EXPORT_SYMBOL_GPL(crypto_authenc_extractkeys); static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key, unsigned int keylen) { struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct crypto_ahash *auth = ctx->auth; struct crypto_skcipher *enc = ctx->enc; struct crypto_authenc_keys keys; int err = -EINVAL; if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) goto out; crypto_ahash_clear_flags(auth, CRYPTO_TFM_REQ_MASK); crypto_ahash_set_flags(auth, crypto_aead_get_flags(authenc) & CRYPTO_TFM_REQ_MASK); err = crypto_ahash_setkey(auth, keys.authkey, keys.authkeylen); if (err) goto out; crypto_skcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(enc, crypto_aead_get_flags(authenc) & CRYPTO_TFM_REQ_MASK); err = crypto_skcipher_setkey(enc, keys.enckey, keys.enckeylen); out: memzero_explicit(&keys, sizeof(keys)); return err; } static void authenc_geniv_ahash_done(void *data, int err) { struct aead_request *req = data; struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct aead_instance *inst = aead_alg_instance(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff); if (err) goto out; scatterwalk_map_and_copy(ahreq->result, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(authenc), 1); out: aead_request_complete(req, err); } static int crypto_authenc_genicv(struct aead_request *req, unsigned int flags) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct aead_instance *inst = aead_alg_instance(authenc); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_ahash *auth = ctx->auth; struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff); u8 *hash = areq_ctx->tail; int err; ahash_request_set_tfm(ahreq, auth); ahash_request_set_crypt(ahreq, req->dst, hash, req->assoclen + req->cryptlen); ahash_request_set_callback(ahreq, flags, authenc_geniv_ahash_done, req); err = crypto_ahash_digest(ahreq); if (err) return err; scatterwalk_map_and_copy(hash, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(authenc), 1); return 0; } static void crypto_authenc_encrypt_done(void *data, int err) { struct aead_request *areq = data; if (err) goto out; err = crypto_authenc_genicv(areq, 0); out: authenc_request_complete(areq, err); } static int crypto_authenc_copy_assoc(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); skcipher_request_set_sync_tfm(skreq, ctx->null); skcipher_request_set_callback(skreq, aead_request_flags(req), NULL, NULL); skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen, NULL); return crypto_skcipher_encrypt(skreq); } static int crypto_authenc_encrypt(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct aead_instance *inst = aead_alg_instance(authenc); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct crypto_skcipher *enc = ctx->enc; unsigned int cryptlen = req->cryptlen; struct skcipher_request *skreq = (void *)(areq_ctx->tail + ictx->reqoff); struct scatterlist *src, *dst; int err; src = scatterwalk_ffwd(areq_ctx->src, req->src, req->assoclen); dst = src; if (req->src != req->dst) { err = crypto_authenc_copy_assoc(req); if (err) return err; dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); } skcipher_request_set_tfm(skreq, enc); skcipher_request_set_callback(skreq, aead_request_flags(req), crypto_authenc_encrypt_done, req); skcipher_request_set_crypt(skreq, src, dst, cryptlen, req->iv); err = crypto_skcipher_encrypt(skreq); if (err) return err; return crypto_authenc_genicv(req, aead_request_flags(req)); } static int crypto_authenc_decrypt_tail(struct aead_request *req, unsigned int flags) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct aead_instance *inst = aead_alg_instance(authenc); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff); struct skcipher_request *skreq = (void *)(areq_ctx->tail + ictx->reqoff); unsigned int authsize = crypto_aead_authsize(authenc); u8 *ihash = ahreq->result + authsize; struct scatterlist *src, *dst; scatterwalk_map_and_copy(ihash, req->src, ahreq->nbytes, authsize, 0); if (crypto_memneq(ihash, ahreq->result, authsize)) return -EBADMSG; src = scatterwalk_ffwd(areq_ctx->src, req->src, req->assoclen); dst = src; if (req->src != req->dst) dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); skcipher_request_set_tfm(skreq, ctx->enc); skcipher_request_set_callback(skreq, flags, req->base.complete, req->base.data); skcipher_request_set_crypt(skreq, src, dst, req->cryptlen - authsize, req->iv); return crypto_skcipher_decrypt(skreq); } static void authenc_verify_ahash_done(void *data, int err) { struct aead_request *req = data; if (err) goto out; err = crypto_authenc_decrypt_tail(req, 0); out: authenc_request_complete(req, err); } static int crypto_authenc_decrypt(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(authenc); struct aead_instance *inst = aead_alg_instance(authenc); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_ahash *auth = ctx->auth; struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff); u8 *hash = areq_ctx->tail; int err; ahash_request_set_tfm(ahreq, auth); ahash_request_set_crypt(ahreq, req->src, hash, req->assoclen + req->cryptlen - authsize); ahash_request_set_callback(ahreq, aead_request_flags(req), authenc_verify_ahash_done, req); err = crypto_ahash_digest(ahreq); if (err) return err; return crypto_authenc_decrypt_tail(req, aead_request_flags(req)); } static int crypto_authenc_init_tfm(struct crypto_aead *tfm) { struct aead_instance *inst = aead_alg_instance(tfm); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_ahash *auth; struct crypto_skcipher *enc; struct crypto_sync_skcipher *null; int err; auth = crypto_spawn_ahash(&ictx->auth); if (IS_ERR(auth)) return PTR_ERR(auth); enc = crypto_spawn_skcipher(&ictx->enc); err = PTR_ERR(enc); if (IS_ERR(enc)) goto err_free_ahash; null = crypto_get_default_null_skcipher(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_skcipher; ctx->auth = auth; ctx->enc = enc; ctx->null = null; crypto_aead_set_reqsize( tfm, sizeof(struct authenc_request_ctx) + ictx->reqoff + max_t(unsigned int, crypto_ahash_reqsize(auth) + sizeof(struct ahash_request), sizeof(struct skcipher_request) + crypto_skcipher_reqsize(enc))); return 0; err_free_skcipher: crypto_free_skcipher(enc); err_free_ahash: crypto_free_ahash(auth); return err; } static void crypto_authenc_exit_tfm(struct crypto_aead *tfm) { struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_ahash(ctx->auth); crypto_free_skcipher(ctx->enc); crypto_put_default_null_skcipher(); } static void crypto_authenc_free(struct aead_instance *inst) { struct authenc_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_skcipher(&ctx->enc); crypto_drop_ahash(&ctx->auth); kfree(inst); } static int crypto_authenc_create(struct crypto_template *tmpl, struct rtattr **tb) { u32 mask; struct aead_instance *inst; struct authenc_instance_ctx *ctx; struct skcipher_alg_common *enc; struct hash_alg_common *auth; struct crypto_alg *auth_base; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = aead_instance_ctx(inst); err = crypto_grab_ahash(&ctx->auth, aead_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; auth = crypto_spawn_ahash_alg(&ctx->auth); auth_base = &auth->base; err = crypto_grab_skcipher(&ctx->enc, aead_crypto_instance(inst), crypto_attr_alg_name(tb[2]), 0, mask); if (err) goto err_free_inst; enc = crypto_spawn_skcipher_alg_common(&ctx->enc); ctx->reqoff = 2 * auth->digestsize; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", auth_base->cra_name, enc->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", auth_base->cra_driver_name, enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; inst->alg.base.cra_priority = enc->base.cra_priority * 10 + auth_base->cra_priority; inst->alg.base.cra_blocksize = enc->base.cra_blocksize; inst->alg.base.cra_alignmask = enc->base.cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct crypto_authenc_ctx); inst->alg.ivsize = enc->ivsize; inst->alg.chunksize = enc->chunksize; inst->alg.maxauthsize = auth->digestsize; inst->alg.init = crypto_authenc_init_tfm; inst->alg.exit = crypto_authenc_exit_tfm; inst->alg.setkey = crypto_authenc_setkey; inst->alg.encrypt = crypto_authenc_encrypt; inst->alg.decrypt = crypto_authenc_decrypt; inst->free = crypto_authenc_free; err = aead_register_instance(tmpl, inst); if (err) { err_free_inst: crypto_authenc_free(inst); } return err; } static struct crypto_template crypto_authenc_tmpl = { .name = "authenc", .create = crypto_authenc_create, .module = THIS_MODULE, }; static int __init crypto_authenc_module_init(void) { return crypto_register_template(&crypto_authenc_tmpl); } static void __exit crypto_authenc_module_exit(void) { crypto_unregister_template(&crypto_authenc_tmpl); } subsys_initcall(crypto_authenc_module_init); module_exit(crypto_authenc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Simple AEAD wrapper for IPsec"); MODULE_ALIAS_CRYPTO("authenc");
12 12 12 12 11 11 15 15 15 15 15 15 15 8 12 12 12 12 5 5 5 5 5 5 5 5 5 5 9 9 9 5 9 9 9 9 8 9 9 9 9 8 9 5 5 5 5 5 9 9 9 9 9 9 9 9 9 9 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 // SPDX-License-Identifier: GPL-2.0-or-later /* * Core registration and callback routines for MTD * drivers and users. * * Copyright © 1999-2010 David Woodhouse <dwmw2@infradead.org> * Copyright © 2006 Red Hat UK Limited */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/ptrace.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/timer.h> #include <linux/major.h> #include <linux/fs.h> #include <linux/err.h> #include <linux/ioctl.h> #include <linux/init.h> #include <linux/of.h> #include <linux/proc_fs.h> #include <linux/idr.h> #include <linux/backing-dev.h> #include <linux/gfp.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/reboot.h> #include <linux/leds.h> #include <linux/debugfs.h> #include <linux/nvmem-provider.h> #include <linux/root_dev.h> #include <linux/error-injection.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include "mtdcore.h" struct backing_dev_info *mtd_bdi; #ifdef CONFIG_PM_SLEEP static int mtd_cls_suspend(struct device *dev) { struct mtd_info *mtd = dev_get_drvdata(dev); return mtd ? mtd_suspend(mtd) : 0; } static int mtd_cls_resume(struct device *dev) { struct mtd_info *mtd = dev_get_drvdata(dev); if (mtd) mtd_resume(mtd); return 0; } static SIMPLE_DEV_PM_OPS(mtd_cls_pm_ops, mtd_cls_suspend, mtd_cls_resume); #define MTD_CLS_PM_OPS (&mtd_cls_pm_ops) #else #define MTD_CLS_PM_OPS NULL #endif static struct class mtd_class = { .name = "mtd", .pm = MTD_CLS_PM_OPS, }; static DEFINE_IDR(mtd_idr); /* These are exported solely for the purpose of mtd_blkdevs.c. You should not use them for _anything_ else */ DEFINE_MUTEX(mtd_table_mutex); EXPORT_SYMBOL_GPL(mtd_table_mutex); struct mtd_info *__mtd_next_device(int i) { return idr_get_next(&mtd_idr, &i); } EXPORT_SYMBOL_GPL(__mtd_next_device); static LIST_HEAD(mtd_notifiers); #define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2) /* REVISIT once MTD uses the driver model better, whoever allocates * the mtd_info will probably want to use the release() hook... */ static void mtd_release(struct device *dev) { struct mtd_info *mtd = dev_get_drvdata(dev); dev_t index = MTD_DEVT(mtd->index); idr_remove(&mtd_idr, mtd->index); of_node_put(mtd_get_of_node(mtd)); if (mtd_is_partition(mtd)) release_mtd_partition(mtd); /* remove /dev/mtdXro node */ device_destroy(&mtd_class, index + 1); } static void mtd_device_release(struct kref *kref) { struct mtd_info *mtd = container_of(kref, struct mtd_info, refcnt); bool is_partition = mtd_is_partition(mtd); debugfs_remove_recursive(mtd->dbg.dfs_dir); /* Try to remove the NVMEM provider */ nvmem_unregister(mtd->nvmem); device_unregister(&mtd->dev); /* * Clear dev so mtd can be safely re-registered later if desired. * Should not be done for partition, * as it was already destroyed in device_unregister(). */ if (!is_partition) memset(&mtd->dev, 0, sizeof(mtd->dev)); module_put(THIS_MODULE); } #define MTD_DEVICE_ATTR_RO(name) \ static DEVICE_ATTR(name, 0444, mtd_##name##_show, NULL) #define MTD_DEVICE_ATTR_RW(name) \ static DEVICE_ATTR(name, 0644, mtd_##name##_show, mtd_##name##_store) static ssize_t mtd_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); char *type; switch (mtd->type) { case MTD_ABSENT: type = "absent"; break; case MTD_RAM: type = "ram"; break; case MTD_ROM: type = "rom"; break; case MTD_NORFLASH: type = "nor"; break; case MTD_NANDFLASH: type = "nand"; break; case MTD_DATAFLASH: type = "dataflash"; break; case MTD_UBIVOLUME: type = "ubi"; break; case MTD_MLCNANDFLASH: type = "mlc-nand"; break; default: type = "unknown"; } return sysfs_emit(buf, "%s\n", type); } MTD_DEVICE_ATTR_RO(type); static ssize_t mtd_flags_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "0x%lx\n", (unsigned long)mtd->flags); } MTD_DEVICE_ATTR_RO(flags); static ssize_t mtd_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%llu\n", (unsigned long long)mtd->size); } MTD_DEVICE_ATTR_RO(size); static ssize_t mtd_erasesize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%lu\n", (unsigned long)mtd->erasesize); } MTD_DEVICE_ATTR_RO(erasesize); static ssize_t mtd_writesize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%lu\n", (unsigned long)mtd->writesize); } MTD_DEVICE_ATTR_RO(writesize); static ssize_t mtd_subpagesize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); unsigned int subpagesize = mtd->writesize >> mtd->subpage_sft; return sysfs_emit(buf, "%u\n", subpagesize); } MTD_DEVICE_ATTR_RO(subpagesize); static ssize_t mtd_oobsize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%lu\n", (unsigned long)mtd->oobsize); } MTD_DEVICE_ATTR_RO(oobsize); static ssize_t mtd_oobavail_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->oobavail); } MTD_DEVICE_ATTR_RO(oobavail); static ssize_t mtd_numeraseregions_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->numeraseregions); } MTD_DEVICE_ATTR_RO(numeraseregions); static ssize_t mtd_name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", mtd->name); } MTD_DEVICE_ATTR_RO(name); static ssize_t mtd_ecc_strength_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->ecc_strength); } MTD_DEVICE_ATTR_RO(ecc_strength); static ssize_t mtd_bitflip_threshold_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->bitflip_threshold); } static ssize_t mtd_bitflip_threshold_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mtd_info *mtd = dev_get_drvdata(dev); unsigned int bitflip_threshold; int retval; retval = kstrtouint(buf, 0, &bitflip_threshold); if (retval) return retval; mtd->bitflip_threshold = bitflip_threshold; return count; } MTD_DEVICE_ATTR_RW(bitflip_threshold); static ssize_t mtd_ecc_step_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->ecc_step_size); } MTD_DEVICE_ATTR_RO(ecc_step_size); static ssize_t mtd_corrected_bits_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return sysfs_emit(buf, "%u\n", ecc_stats->corrected); } MTD_DEVICE_ATTR_RO(corrected_bits); /* ecc stats corrected */ static ssize_t mtd_ecc_failures_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return sysfs_emit(buf, "%u\n", ecc_stats->failed); } MTD_DEVICE_ATTR_RO(ecc_failures); /* ecc stats errors */ static ssize_t mtd_bad_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return sysfs_emit(buf, "%u\n", ecc_stats->badblocks); } MTD_DEVICE_ATTR_RO(bad_blocks); static ssize_t mtd_bbt_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return sysfs_emit(buf, "%u\n", ecc_stats->bbtblocks); } MTD_DEVICE_ATTR_RO(bbt_blocks); static struct attribute *mtd_attrs[] = { &dev_attr_type.attr, &dev_attr_flags.attr, &dev_attr_size.attr, &dev_attr_erasesize.attr, &dev_attr_writesize.attr, &dev_attr_subpagesize.attr, &dev_attr_oobsize.attr, &dev_attr_oobavail.attr, &dev_attr_numeraseregions.attr, &dev_attr_name.attr, &dev_attr_ecc_strength.attr, &dev_attr_ecc_step_size.attr, &dev_attr_corrected_bits.attr, &dev_attr_ecc_failures.attr, &dev_attr_bad_blocks.attr, &dev_attr_bbt_blocks.attr, &dev_attr_bitflip_threshold.attr, NULL, }; ATTRIBUTE_GROUPS(mtd); static const struct device_type mtd_devtype = { .name = "mtd", .groups = mtd_groups, .release = mtd_release, }; static bool mtd_expert_analysis_mode; #ifdef CONFIG_DEBUG_FS bool mtd_check_expert_analysis_mode(void) { const char *mtd_expert_analysis_warning = "Bad block checks have been entirely disabled.\n" "This is only reserved for post-mortem forensics and debug purposes.\n" "Never enable this mode if you do not know what you are doing!\n"; return WARN_ONCE(mtd_expert_analysis_mode, mtd_expert_analysis_warning); } EXPORT_SYMBOL_GPL(mtd_check_expert_analysis_mode); #endif static struct dentry *dfs_dir_mtd; static void mtd_debugfs_populate(struct mtd_info *mtd) { struct device *dev = &mtd->dev; if (IS_ERR_OR_NULL(dfs_dir_mtd)) return; mtd->dbg.dfs_dir = debugfs_create_dir(dev_name(dev), dfs_dir_mtd); } #ifndef CONFIG_MMU unsigned mtd_mmap_capabilities(struct mtd_info *mtd) { switch (mtd->type) { case MTD_RAM: return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | NOMMU_MAP_EXEC | NOMMU_MAP_READ | NOMMU_MAP_WRITE; case MTD_ROM: return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | NOMMU_MAP_EXEC | NOMMU_MAP_READ; default: return NOMMU_MAP_COPY; } } EXPORT_SYMBOL_GPL(mtd_mmap_capabilities); #endif static int mtd_reboot_notifier(struct notifier_block *n, unsigned long state, void *cmd) { struct mtd_info *mtd; mtd = container_of(n, struct mtd_info, reboot_notifier); mtd->_reboot(mtd); return NOTIFY_DONE; } /** * mtd_wunit_to_pairing_info - get pairing information of a wunit * @mtd: pointer to new MTD device info structure * @wunit: write unit we are interested in * @info: returned pairing information * * Retrieve pairing information associated to the wunit. * This is mainly useful when dealing with MLC/TLC NANDs where pages can be * paired together, and where programming a page may influence the page it is * paired with. * The notion of page is replaced by the term wunit (write-unit) to stay * consistent with the ->writesize field. * * The @wunit argument can be extracted from an absolute offset using * mtd_offset_to_wunit(). @info is filled with the pairing information attached * to @wunit. * * From the pairing info the MTD user can find all the wunits paired with * @wunit using the following loop: * * for (i = 0; i < mtd_pairing_groups(mtd); i++) { * info.pair = i; * mtd_pairing_info_to_wunit(mtd, &info); * ... * } */ int mtd_wunit_to_pairing_info(struct mtd_info *mtd, int wunit, struct mtd_pairing_info *info) { struct mtd_info *master = mtd_get_master(mtd); int npairs = mtd_wunit_per_eb(master) / mtd_pairing_groups(master); if (wunit < 0 || wunit >= npairs) return -EINVAL; if (master->pairing && master->pairing->get_info) return master->pairing->get_info(master, wunit, info); info->group = 0; info->pair = wunit; return 0; } EXPORT_SYMBOL_GPL(mtd_wunit_to_pairing_info); /** * mtd_pairing_info_to_wunit - get wunit from pairing information * @mtd: pointer to new MTD device info structure * @info: pairing information struct * * Returns a positive number representing the wunit associated to the info * struct, or a negative error code. * * This is the reverse of mtd_wunit_to_pairing_info(), and can help one to * iterate over all wunits of a given pair (see mtd_wunit_to_pairing_info() * doc). * * It can also be used to only program the first page of each pair (i.e. * page attached to group 0), which allows one to use an MLC NAND in * software-emulated SLC mode: * * info.group = 0; * npairs = mtd_wunit_per_eb(mtd) / mtd_pairing_groups(mtd); * for (info.pair = 0; info.pair < npairs; info.pair++) { * wunit = mtd_pairing_info_to_wunit(mtd, &info); * mtd_write(mtd, mtd_wunit_to_offset(mtd, blkoffs, wunit), * mtd->writesize, &retlen, buf + (i * mtd->writesize)); * } */ int mtd_pairing_info_to_wunit(struct mtd_info *mtd, const struct mtd_pairing_info *info) { struct mtd_info *master = mtd_get_master(mtd); int ngroups = mtd_pairing_groups(master); int npairs = mtd_wunit_per_eb(master) / ngroups; if (!info || info->pair < 0 || info->pair >= npairs || info->group < 0 || info->group >= ngroups) return -EINVAL; if (master->pairing && master->pairing->get_wunit) return mtd->pairing->get_wunit(master, info); return info->pair; } EXPORT_SYMBOL_GPL(mtd_pairing_info_to_wunit); /** * mtd_pairing_groups - get the number of pairing groups * @mtd: pointer to new MTD device info structure * * Returns the number of pairing groups. * * This number is usually equal to the number of bits exposed by a single * cell, and can be used in conjunction with mtd_pairing_info_to_wunit() * to iterate over all pages of a given pair. */ int mtd_pairing_groups(struct mtd_info *mtd) { struct mtd_info *master = mtd_get_master(mtd); if (!master->pairing || !master->pairing->ngroups) return 1; return master->pairing->ngroups; } EXPORT_SYMBOL_GPL(mtd_pairing_groups); static int mtd_nvmem_reg_read(void *priv, unsigned int offset, void *val, size_t bytes) { struct mtd_info *mtd = priv; size_t retlen; int err; err = mtd_read(mtd, offset, bytes, &retlen, val); if (err && err != -EUCLEAN) return err; return retlen == bytes ? 0 : -EIO; } static int mtd_nvmem_add(struct mtd_info *mtd) { struct device_node *node = mtd_get_of_node(mtd); struct nvmem_config config = {}; config.id = NVMEM_DEVID_NONE; config.dev = &mtd->dev; config.name = dev_name(&mtd->dev); config.owner = THIS_MODULE; config.add_legacy_fixed_of_cells = of_device_is_compatible(node, "nvmem-cells"); config.reg_read = mtd_nvmem_reg_read; config.size = mtd->size; config.word_size = 1; config.stride = 1; config.read_only = true; config.root_only = true; config.ignore_wp = true; config.priv = mtd; mtd->nvmem = nvmem_register(&config); if (IS_ERR(mtd->nvmem)) { /* Just ignore if there is no NVMEM support in the kernel */ if (PTR_ERR(mtd->nvmem) == -EOPNOTSUPP) mtd->nvmem = NULL; else return dev_err_probe(&mtd->dev, PTR_ERR(mtd->nvmem), "Failed to register NVMEM device\n"); } return 0; } static void mtd_check_of_node(struct mtd_info *mtd) { struct device_node *partitions, *parent_dn, *mtd_dn = NULL; const char *pname, *prefix = "partition-"; int plen, mtd_name_len, offset, prefix_len; /* Check if MTD already has a device node */ if (mtd_get_of_node(mtd)) return; if (!mtd_is_partition(mtd)) return; parent_dn = of_node_get(mtd_get_of_node(mtd->parent)); if (!parent_dn) return; if (mtd_is_partition(mtd->parent)) partitions = of_node_get(parent_dn); else partitions = of_get_child_by_name(parent_dn, "partitions"); if (!partitions) goto exit_parent; prefix_len = strlen(prefix); mtd_name_len = strlen(mtd->name); /* Search if a partition is defined with the same name */ for_each_child_of_node(partitions, mtd_dn) { /* Skip partition with no/wrong prefix */ if (!of_node_name_prefix(mtd_dn, prefix)) continue; /* Label have priority. Check that first */ if (!of_property_read_string(mtd_dn, "label", &pname)) { offset = 0; } else { pname = mtd_dn->name; offset = prefix_len; } plen = strlen(pname) - offset; if (plen == mtd_name_len && !strncmp(mtd->name, pname + offset, plen)) { mtd_set_of_node(mtd, mtd_dn); of_node_put(mtd_dn); break; } } of_node_put(partitions); exit_parent: of_node_put(parent_dn); } /** * add_mtd_device - register an MTD device * @mtd: pointer to new MTD device info structure * * Add a device to the list of MTD devices present in the system, and * notify each currently active MTD 'user' of its arrival. Returns * zero on success or non-zero on failure. */ int add_mtd_device(struct mtd_info *mtd) { struct device_node *np = mtd_get_of_node(mtd); struct mtd_info *master = mtd_get_master(mtd); struct mtd_notifier *not; int i, error, ofidx; /* * May occur, for instance, on buggy drivers which call * mtd_device_parse_register() multiple times on the same master MTD, * especially with CONFIG_MTD_PARTITIONED_MASTER=y. */ if (WARN_ONCE(mtd->dev.type, "MTD already registered\n")) return -EEXIST; BUG_ON(mtd->writesize == 0); /* * MTD drivers should implement ->_{write,read}() or * ->_{write,read}_oob(), but not both. */ if (WARN_ON((mtd->_write && mtd->_write_oob) || (mtd->_read && mtd->_read_oob))) return -EINVAL; if (WARN_ON((!mtd->erasesize || !master->_erase) && !(mtd->flags & MTD_NO_ERASE))) return -EINVAL; /* * MTD_SLC_ON_MLC_EMULATION can only be set on partitions, when the * master is an MLC NAND and has a proper pairing scheme defined. * We also reject masters that implement ->_writev() for now, because * NAND controller drivers don't implement this hook, and adding the * SLC -> MLC address/length conversion to this path is useless if we * don't have a user. */ if (mtd->flags & MTD_SLC_ON_MLC_EMULATION && (!mtd_is_partition(mtd) || master->type != MTD_MLCNANDFLASH || !master->pairing || master->_writev)) return -EINVAL; mutex_lock(&mtd_table_mutex); ofidx = -1; if (np) ofidx = of_alias_get_id(np, "mtd"); if (ofidx >= 0) i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); else i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); if (i < 0) { error = i; goto fail_locked; } mtd->index = i; kref_init(&mtd->refcnt); /* default value if not set by driver */ if (mtd->bitflip_threshold == 0) mtd->bitflip_threshold = mtd->ecc_strength; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { int ngroups = mtd_pairing_groups(master); mtd->erasesize /= ngroups; mtd->size = (u64)mtd_div_by_eb(mtd->size, master) * mtd->erasesize; } if (is_power_of_2(mtd->erasesize)) mtd->erasesize_shift = ffs(mtd->erasesize) - 1; else mtd->erasesize_shift = 0; if (is_power_of_2(mtd->writesize)) mtd->writesize_shift = ffs(mtd->writesize) - 1; else mtd->writesize_shift = 0; mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1; mtd->writesize_mask = (1 << mtd->writesize_shift) - 1; /* Some chips always power up locked. Unlock them now */ if ((mtd->flags & MTD_WRITEABLE) && (mtd->flags & MTD_POWERUP_LOCK)) { error = mtd_unlock(mtd, 0, mtd->size); if (error && error != -EOPNOTSUPP) printk(KERN_WARNING "%s: unlock failed, writes may not work\n", mtd->name); /* Ignore unlock failures? */ error = 0; } /* Caller should have set dev.parent to match the * physical device, if appropriate. */ mtd->dev.type = &mtd_devtype; mtd->dev.class = &mtd_class; mtd->dev.devt = MTD_DEVT(i); dev_set_name(&mtd->dev, "mtd%d", i); dev_set_drvdata(&mtd->dev, mtd); mtd_check_of_node(mtd); of_node_get(mtd_get_of_node(mtd)); error = device_register(&mtd->dev); if (error) { put_device(&mtd->dev); goto fail_added; } /* Add the nvmem provider */ error = mtd_nvmem_add(mtd); if (error) goto fail_nvmem_add; mtd_debugfs_populate(mtd); device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, "mtd%dro", i); pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name); /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ list_for_each_entry(not, &mtd_notifiers, list) not->add(mtd); mutex_unlock(&mtd_table_mutex); if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) { if (IS_BUILTIN(CONFIG_MTD)) { pr_info("mtd: setting mtd%d (%s) as root device\n", mtd->index, mtd->name); ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index); } else { pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n", mtd->index, mtd->name); } } /* We _know_ we aren't being removed, because our caller is still holding us here. So none of this try_ nonsense, and no bitching about it either. :) */ __module_get(THIS_MODULE); return 0; fail_nvmem_add: device_unregister(&mtd->dev); fail_added: of_node_put(mtd_get_of_node(mtd)); idr_remove(&mtd_idr, i); fail_locked: mutex_unlock(&mtd_table_mutex); return error; } /** * del_mtd_device - unregister an MTD device * @mtd: pointer to MTD device info structure * * Remove a device from the list of MTD devices present in the system, * and notify each currently active MTD 'user' of its departure. * Returns zero on success or 1 on failure, which currently will happen * if the requested device does not appear to be present in the list. */ int del_mtd_device(struct mtd_info *mtd) { int ret; struct mtd_notifier *not; mutex_lock(&mtd_table_mutex); if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; goto out_error; } /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ list_for_each_entry(not, &mtd_notifiers, list) not->remove(mtd); kref_put(&mtd->refcnt, mtd_device_release); ret = 0; out_error: mutex_unlock(&mtd_table_mutex); return ret; } /* * Set a few defaults based on the parent devices, if not provided by the * driver */ static void mtd_set_dev_defaults(struct mtd_info *mtd) { if (mtd->dev.parent) { if (!mtd->owner && mtd->dev.parent->driver) mtd->owner = mtd->dev.parent->driver->owner; if (!mtd->name) mtd->name = dev_name(mtd->dev.parent); } else { pr_debug("mtd device won't show a device symlink in sysfs\n"); } INIT_LIST_HEAD(&mtd->partitions); mutex_init(&mtd->master.partitions_lock); mutex_init(&mtd->master.chrdev_lock); } static ssize_t mtd_otp_size(struct mtd_info *mtd, bool is_user) { struct otp_info *info; ssize_t size = 0; unsigned int i; size_t retlen; int ret; info = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!info) return -ENOMEM; if (is_user) ret = mtd_get_user_prot_info(mtd, PAGE_SIZE, &retlen, info); else ret = mtd_get_fact_prot_info(mtd, PAGE_SIZE, &retlen, info); if (ret) goto err; for (i = 0; i < retlen / sizeof(*info); i++) size += info[i].length; kfree(info); return size; err: kfree(info); /* ENODATA means there is no OTP region. */ return ret == -ENODATA ? 0 : ret; } static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, const char *compatible, int size, nvmem_reg_read_t reg_read) { struct nvmem_device *nvmem = NULL; struct nvmem_config config = {}; struct device_node *np; /* DT binding is optional */ np = of_get_compatible_child(mtd->dev.of_node, compatible); /* OTP nvmem will be registered on the physical device */ config.dev = mtd->dev.parent; config.name = compatible; config.id = NVMEM_DEVID_AUTO; config.owner = THIS_MODULE; config.add_legacy_fixed_of_cells = !mtd_type_is_nand(mtd); config.type = NVMEM_TYPE_OTP; config.root_only = true; config.ignore_wp = true; config.reg_read = reg_read; config.size = size; config.of_node = np; config.priv = mtd; nvmem = nvmem_register(&config); /* Just ignore if there is no NVMEM support in the kernel */ if (IS_ERR(nvmem) && PTR_ERR(nvmem) == -EOPNOTSUPP) nvmem = NULL; of_node_put(np); return nvmem; } static int mtd_nvmem_user_otp_reg_read(void *priv, unsigned int offset, void *val, size_t bytes) { struct mtd_info *mtd = priv; size_t retlen; int ret; ret = mtd_read_user_prot_reg(mtd, offset, bytes, &retlen, val); if (ret) return ret; return retlen == bytes ? 0 : -EIO; } static int mtd_nvmem_fact_otp_reg_read(void *priv, unsigned int offset, void *val, size_t bytes) { struct mtd_info *mtd = priv; size_t retlen; int ret; ret = mtd_read_fact_prot_reg(mtd, offset, bytes, &retlen, val); if (ret) return ret; return retlen == bytes ? 0 : -EIO; } static int mtd_otp_nvmem_add(struct mtd_info *mtd) { struct device *dev = mtd->dev.parent; struct nvmem_device *nvmem; ssize_t size; int err; if (mtd->_get_user_prot_info && mtd->_read_user_prot_reg) { size = mtd_otp_size(mtd, true); if (size < 0) { err = size; goto err; } if (size > 0) { nvmem = mtd_otp_nvmem_register(mtd, "user-otp", size, mtd_nvmem_user_otp_reg_read); if (IS_ERR(nvmem)) { err = PTR_ERR(nvmem); goto err; } mtd->otp_user_nvmem = nvmem; } } if (mtd->_get_fact_prot_info && mtd->_read_fact_prot_reg) { size = mtd_otp_size(mtd, false); if (size < 0) { err = size; goto err; } if (size > 0) { /* * The factory OTP contains thing such as a unique serial * number and is small, so let's read it out and put it * into the entropy pool. */ void *otp; otp = kmalloc(size, GFP_KERNEL); if (!otp) { err = -ENOMEM; goto err; } err = mtd_nvmem_fact_otp_reg_read(mtd, 0, otp, size); if (err < 0) { kfree(otp); goto err; } add_device_randomness(otp, err); kfree(otp); nvmem = mtd_otp_nvmem_register(mtd, "factory-otp", size, mtd_nvmem_fact_otp_reg_read); if (IS_ERR(nvmem)) { err = PTR_ERR(nvmem); goto err; } mtd->otp_factory_nvmem = nvmem; } } return 0; err: nvmem_unregister(mtd->otp_user_nvmem); /* Don't report error if OTP is not supported. */ if (err == -EOPNOTSUPP) return 0; return dev_err_probe(dev, err, "Failed to register OTP NVMEM device\n"); } /** * mtd_device_parse_register - parse partitions and register an MTD device. * * @mtd: the MTD device to register * @types: the list of MTD partition probes to try, see * 'parse_mtd_partitions()' for more information * @parser_data: MTD partition parser-specific data * @parts: fallback partition information to register, if parsing fails; * only valid if %nr_parts > %0 * @nr_parts: the number of partitions in parts, if zero then the full * MTD device is registered if no partition info is found * * This function aggregates MTD partitions parsing (done by * 'parse_mtd_partitions()') and MTD device and partitions registering. It * basically follows the most common pattern found in many MTD drivers: * * * If the MTD_PARTITIONED_MASTER option is set, then the device as a whole is * registered first. * * Then It tries to probe partitions on MTD device @mtd using parsers * specified in @types (if @types is %NULL, then the default list of parsers * is used, see 'parse_mtd_partitions()' for more information). If none are * found this functions tries to fallback to information specified in * @parts/@nr_parts. * * If no partitions were found this function just registers the MTD device * @mtd and exits. * * Returns zero in case of success and a negative error code in case of failure. */ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, struct mtd_part_parser_data *parser_data, const struct mtd_partition *parts, int nr_parts) { int ret; mtd_set_dev_defaults(mtd); ret = mtd_otp_nvmem_add(mtd); if (ret) goto out; if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) { ret = add_mtd_device(mtd); if (ret) goto out; } /* Prefer parsed partitions over driver-provided fallback */ ret = parse_mtd_partitions(mtd, types, parser_data); if (ret == -EPROBE_DEFER) goto out; if (ret > 0) ret = 0; else if (nr_parts) ret = add_mtd_partitions(mtd, parts, nr_parts); else if (!device_is_registered(&mtd->dev)) ret = add_mtd_device(mtd); else ret = 0; if (ret) goto out; /* * FIXME: some drivers unfortunately call this function more than once. * So we have to check if we've already assigned the reboot notifier. * * Generally, we can make multiple calls work for most cases, but it * does cause problems with parse_mtd_partitions() above (e.g., * cmdlineparts will register partitions more than once). */ WARN_ONCE(mtd->_reboot && mtd->reboot_notifier.notifier_call, "MTD already registered\n"); if (mtd->_reboot && !mtd->reboot_notifier.notifier_call) { mtd->reboot_notifier.notifier_call = mtd_reboot_notifier; register_reboot_notifier(&mtd->reboot_notifier); } out: if (ret) { nvmem_unregister(mtd->otp_user_nvmem); nvmem_unregister(mtd->otp_factory_nvmem); } if (ret && device_is_registered(&mtd->dev)) del_mtd_device(mtd); return ret; } EXPORT_SYMBOL_GPL(mtd_device_parse_register); /** * mtd_device_unregister - unregister an existing MTD device. * * @master: the MTD device to unregister. This will unregister both the master * and any partitions if registered. */ int mtd_device_unregister(struct mtd_info *master) { int err; if (master->_reboot) { unregister_reboot_notifier(&master->reboot_notifier); memset(&master->reboot_notifier, 0, sizeof(master->reboot_notifier)); } nvmem_unregister(master->otp_user_nvmem); nvmem_unregister(master->otp_factory_nvmem); err = del_mtd_partitions(master); if (err) return err; if (!device_is_registered(&master->dev)) return 0; return del_mtd_device(master); } EXPORT_SYMBOL_GPL(mtd_device_unregister); /** * register_mtd_user - register a 'user' of MTD devices. * @new: pointer to notifier info structure * * Registers a pair of callbacks function to be called upon addition * or removal of MTD devices. Causes the 'add' callback to be immediately * invoked for each MTD device currently present in the system. */ void register_mtd_user (struct mtd_notifier *new) { struct mtd_info *mtd; mutex_lock(&mtd_table_mutex); list_add(&new->list, &mtd_notifiers); __module_get(THIS_MODULE); mtd_for_each_device(mtd) new->add(mtd); mutex_unlock(&mtd_table_mutex); } EXPORT_SYMBOL_GPL(register_mtd_user); /** * unregister_mtd_user - unregister a 'user' of MTD devices. * @old: pointer to notifier info structure * * Removes a callback function pair from the list of 'users' to be * notified upon addition or removal of MTD devices. Causes the * 'remove' callback to be immediately invoked for each MTD device * currently present in the system. */ int unregister_mtd_user (struct mtd_notifier *old) { struct mtd_info *mtd; mutex_lock(&mtd_table_mutex); module_put(THIS_MODULE); mtd_for_each_device(mtd) old->remove(mtd); list_del(&old->list); mutex_unlock(&mtd_table_mutex); return 0; } EXPORT_SYMBOL_GPL(unregister_mtd_user); /** * get_mtd_device - obtain a validated handle for an MTD device * @mtd: last known address of the required MTD device * @num: internal device number of the required MTD device * * Given a number and NULL address, return the num'th entry in the device * table, if any. Given an address and num == -1, search the device table * for a device with that address and return if it's still present. Given * both, return the num'th driver only if its address matches. Return * error code if not. */ struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num) { struct mtd_info *ret = NULL, *other; int err = -ENODEV; mutex_lock(&mtd_table_mutex); if (num == -1) { mtd_for_each_device(other) { if (other == mtd) { ret = mtd; break; } } } else if (num >= 0) { ret = idr_find(&mtd_idr, num); if (mtd && mtd != ret) ret = NULL; } if (!ret) { ret = ERR_PTR(err); goto out; } err = __get_mtd_device(ret); if (err) ret = ERR_PTR(err); out: mutex_unlock(&mtd_table_mutex); return ret; } EXPORT_SYMBOL_GPL(get_mtd_device); int __get_mtd_device(struct mtd_info *mtd) { struct mtd_info *master = mtd_get_master(mtd); int err; if (master->_get_device) { err = master->_get_device(mtd); if (err) return err; } if (!try_module_get(master->owner)) { if (master->_put_device) master->_put_device(master); return -ENODEV; } while (mtd) { if (mtd != master) kref_get(&mtd->refcnt); mtd = mtd->parent; } if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) kref_get(&master->refcnt); return 0; } EXPORT_SYMBOL_GPL(__get_mtd_device); /** * of_get_mtd_device_by_node - obtain an MTD device associated with a given node * * @np: device tree node */ struct mtd_info *of_get_mtd_device_by_node(struct device_node *np) { struct mtd_info *mtd = NULL; struct mtd_info *tmp; int err; mutex_lock(&mtd_table_mutex); err = -EPROBE_DEFER; mtd_for_each_device(tmp) { if (mtd_get_of_node(tmp) == np) { mtd = tmp; err = __get_mtd_device(mtd); break; } } mutex_unlock(&mtd_table_mutex); return err ? ERR_PTR(err) : mtd; } EXPORT_SYMBOL_GPL(of_get_mtd_device_by_node); /** * get_mtd_device_nm - obtain a validated handle for an MTD device by * device name * @name: MTD device name to open * * This function returns MTD device description structure in case of * success and an error code in case of failure. */ struct mtd_info *get_mtd_device_nm(const char *name) { int err = -ENODEV; struct mtd_info *mtd = NULL, *other; mutex_lock(&mtd_table_mutex); mtd_for_each_device(other) { if (!strcmp(name, other->name)) { mtd = other; break; } } if (!mtd) goto out_unlock; err = __get_mtd_device(mtd); if (err) goto out_unlock; mutex_unlock(&mtd_table_mutex); return mtd; out_unlock: mutex_unlock(&mtd_table_mutex); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(get_mtd_device_nm); void put_mtd_device(struct mtd_info *mtd) { mutex_lock(&mtd_table_mutex); __put_mtd_device(mtd); mutex_unlock(&mtd_table_mutex); } EXPORT_SYMBOL_GPL(put_mtd_device); void __put_mtd_device(struct mtd_info *mtd) { struct mtd_info *master = mtd_get_master(mtd); while (mtd) { /* kref_put() can relese mtd, so keep a reference mtd->parent */ struct mtd_info *parent = mtd->parent; if (mtd != master) kref_put(&mtd->refcnt, mtd_device_release); mtd = parent; } if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) kref_put(&master->refcnt, mtd_device_release); module_put(master->owner); /* must be the last as master can be freed in the _put_device */ if (master->_put_device) master->_put_device(master); } EXPORT_SYMBOL_GPL(__put_mtd_device); /* * Erase is an synchronous operation. Device drivers are epected to return a * negative error code if the operation failed and update instr->fail_addr * to point the portion that was not properly erased. */ int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) { struct mtd_info *master = mtd_get_master(mtd); u64 mst_ofs = mtd_get_master_ofs(mtd, 0); struct erase_info adjinstr; int ret; instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; adjinstr = *instr; if (!mtd->erasesize || !master->_erase) return -ENOTSUPP; if (instr->addr >= mtd->size || instr->len > mtd->size - instr->addr) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!instr->len) return 0; ledtrig_mtd_activity(); if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { adjinstr.addr = (loff_t)mtd_div_by_eb(instr->addr, mtd) * master->erasesize; adjinstr.len = ((u64)mtd_div_by_eb(instr->addr + instr->len, mtd) * master->erasesize) - adjinstr.addr; } adjinstr.addr += mst_ofs; ret = master->_erase(master, &adjinstr); if (adjinstr.fail_addr != MTD_FAIL_ADDR_UNKNOWN) { instr->fail_addr = adjinstr.fail_addr - mst_ofs; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { instr->fail_addr = mtd_div_by_eb(instr->fail_addr, master); instr->fail_addr *= mtd->erasesize; } } return ret; } EXPORT_SYMBOL_GPL(mtd_erase); ALLOW_ERROR_INJECTION(mtd_erase, ERRNO); /* * This stuff for eXecute-In-Place. phys is optional and may be set to NULL. */ int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, void **virt, resource_size_t *phys) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; *virt = NULL; if (phys) *phys = 0; if (!master->_point) return -EOPNOTSUPP; if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; from = mtd_get_master_ofs(mtd, from); return master->_point(master, from, len, retlen, virt, phys); } EXPORT_SYMBOL_GPL(mtd_point); /* We probably shouldn't allow XIP if the unpoint isn't a NULL */ int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_unpoint) return -EOPNOTSUPP; if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; return master->_unpoint(master, mtd_get_master_ofs(mtd, from), len); } EXPORT_SYMBOL_GPL(mtd_unpoint); /* * Allow NOMMU mmap() to directly map the device (if not NULL) * - return the address to which the offset maps * - return -ENOSYS to indicate refusal to do the mapping */ unsigned long mtd_get_unmapped_area(struct mtd_info *mtd, unsigned long len, unsigned long offset, unsigned long flags) { size_t retlen; void *virt; int ret; ret = mtd_point(mtd, offset, len, &retlen, &virt, NULL); if (ret) return ret; if (retlen != len) { mtd_unpoint(mtd, offset, retlen); return -ENOSYS; } return (unsigned long)virt; } EXPORT_SYMBOL_GPL(mtd_get_unmapped_area); static void mtd_update_ecc_stats(struct mtd_info *mtd, struct mtd_info *master, const struct mtd_ecc_stats *old_stats) { struct mtd_ecc_stats diff; if (master == mtd) return; diff = master->ecc_stats; diff.failed -= old_stats->failed; diff.corrected -= old_stats->corrected; while (mtd->parent) { mtd->ecc_stats.failed += diff.failed; mtd->ecc_stats.corrected += diff.corrected; mtd = mtd->parent; } } int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct mtd_oob_ops ops = { .len = len, .datbuf = buf, }; int ret; ret = mtd_read_oob(mtd, from, &ops); *retlen = ops.retlen; WARN_ON_ONCE(*retlen != len && mtd_is_bitflip_or_eccerr(ret)); return ret; } EXPORT_SYMBOL_GPL(mtd_read); ALLOW_ERROR_INJECTION(mtd_read, ERRNO); int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { struct mtd_oob_ops ops = { .len = len, .datbuf = (u8 *)buf, }; int ret; ret = mtd_write_oob(mtd, to, &ops); *retlen = ops.retlen; return ret; } EXPORT_SYMBOL_GPL(mtd_write); ALLOW_ERROR_INJECTION(mtd_write, ERRNO); /* * In blackbox flight recorder like scenarios we want to make successful writes * in interrupt context. panic_write() is only intended to be called when its * known the kernel is about to panic and we need the write to succeed. Since * the kernel is not going to be running for much longer, this function can * break locks and delay to ensure the write succeeds (but not sleep). */ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; if (!master->_panic_write) return -EOPNOTSUPP; if (to < 0 || to >= mtd->size || len > mtd->size - to) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!len) return 0; if (!master->oops_panic_write) master->oops_panic_write = true; return master->_panic_write(master, mtd_get_master_ofs(mtd, to), len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_panic_write); static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs, struct mtd_oob_ops *ops) { /* * Some users are setting ->datbuf or ->oobbuf to NULL, but are leaving * ->len or ->ooblen uninitialized. Force ->len and ->ooblen to 0 in * this case. */ if (!ops->datbuf) ops->len = 0; if (!ops->oobbuf) ops->ooblen = 0; if (offs < 0 || offs + ops->len > mtd->size) return -EINVAL; if (ops->ooblen) { size_t maxooblen; if (ops->ooboffs >= mtd_oobavail(mtd, ops)) return -EINVAL; maxooblen = ((size_t)(mtd_div_by_ws(mtd->size, mtd) - mtd_div_by_ws(offs, mtd)) * mtd_oobavail(mtd, ops)) - ops->ooboffs; if (ops->ooblen > maxooblen) return -EINVAL; } return 0; } static int mtd_read_oob_std(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); int ret; from = mtd_get_master_ofs(mtd, from); if (master->_read_oob) ret = master->_read_oob(master, from, ops); else ret = master->_read(master, from, ops->len, &ops->retlen, ops->datbuf); return ret; } static int mtd_write_oob_std(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); int ret; to = mtd_get_master_ofs(mtd, to); if (master->_write_oob) ret = master->_write_oob(master, to, ops); else ret = master->_write(master, to, ops->len, &ops->retlen, ops->datbuf); return ret; } static int mtd_io_emulated_slc(struct mtd_info *mtd, loff_t start, bool read, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); int ngroups = mtd_pairing_groups(master); int npairs = mtd_wunit_per_eb(master) / ngroups; struct mtd_oob_ops adjops = *ops; unsigned int wunit, oobavail; struct mtd_pairing_info info; int max_bitflips = 0; u32 ebofs, pageofs; loff_t base, pos; ebofs = mtd_mod_by_eb(start, mtd); base = (loff_t)mtd_div_by_eb(start, mtd) * master->erasesize; info.group = 0; info.pair = mtd_div_by_ws(ebofs, mtd); pageofs = mtd_mod_by_ws(ebofs, mtd); oobavail = mtd_oobavail(mtd, ops); while (ops->retlen < ops->len || ops->oobretlen < ops->ooblen) { int ret; if (info.pair >= npairs) { info.pair = 0; base += master->erasesize; } wunit = mtd_pairing_info_to_wunit(master, &info); pos = mtd_wunit_to_offset(mtd, base, wunit); adjops.len = ops->len - ops->retlen; if (adjops.len > mtd->writesize - pageofs) adjops.len = mtd->writesize - pageofs; adjops.ooblen = ops->ooblen - ops->oobretlen; if (adjops.ooblen > oobavail - adjops.ooboffs) adjops.ooblen = oobavail - adjops.ooboffs; if (read) { ret = mtd_read_oob_std(mtd, pos + pageofs, &adjops); if (ret > 0) max_bitflips = max(max_bitflips, ret); } else { ret = mtd_write_oob_std(mtd, pos + pageofs, &adjops); } if (ret < 0) return ret; max_bitflips = max(max_bitflips, ret); ops->retlen += adjops.retlen; ops->oobretlen += adjops.oobretlen; adjops.datbuf += adjops.retlen; adjops.oobbuf += adjops.oobretlen; adjops.ooboffs = 0; pageofs = 0; info.pair++; } return max_bitflips; } int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); struct mtd_ecc_stats old_stats = master->ecc_stats; int ret_code; ops->retlen = ops->oobretlen = 0; ret_code = mtd_check_oob_ops(mtd, from, ops); if (ret_code) return ret_code; ledtrig_mtd_activity(); /* Check the validity of a potential fallback on mtd->_read */ if (!master->_read_oob && (!master->_read || ops->oobbuf)) return -EOPNOTSUPP; if (ops->stats) memset(ops->stats, 0, sizeof(*ops->stats)); if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) ret_code = mtd_io_emulated_slc(mtd, from, true, ops); else ret_code = mtd_read_oob_std(mtd, from, ops); mtd_update_ecc_stats(mtd, master, &old_stats); /* * In cases where ops->datbuf != NULL, mtd->_read_oob() has semantics * similar to mtd->_read(), returning a non-negative integer * representing max bitflips. In other cases, mtd->_read_oob() may * return -EUCLEAN. In all cases, perform similar logic to mtd_read(). */ if (unlikely(ret_code < 0)) return ret_code; if (mtd->ecc_strength == 0) return 0; /* device lacks ecc */ if (ops->stats) ops->stats->max_bitflips = ret_code; return ret_code >= mtd->bitflip_threshold ? -EUCLEAN : 0; } EXPORT_SYMBOL_GPL(mtd_read_oob); int mtd_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); int ret; ops->retlen = ops->oobretlen = 0; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; ret = mtd_check_oob_ops(mtd, to, ops); if (ret) return ret; ledtrig_mtd_activity(); /* Check the validity of a potential fallback on mtd->_write */ if (!master->_write_oob && (!master->_write || ops->oobbuf)) return -EOPNOTSUPP; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) return mtd_io_emulated_slc(mtd, to, false, ops); return mtd_write_oob_std(mtd, to, ops); } EXPORT_SYMBOL_GPL(mtd_write_oob); /** * mtd_ooblayout_ecc - Get the OOB region definition of a specific ECC section * @mtd: MTD device structure * @section: ECC section. Depending on the layout you may have all the ECC * bytes stored in a single contiguous section, or one section * per ECC chunk (and sometime several sections for a single ECC * ECC chunk) * @oobecc: OOB region struct filled with the appropriate ECC position * information * * This function returns ECC section information in the OOB area. If you want * to get all the ECC bytes information, then you should call * mtd_ooblayout_ecc(mtd, section++, oobecc) until it returns -ERANGE. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, struct mtd_oob_region *oobecc) { struct mtd_info *master = mtd_get_master(mtd); memset(oobecc, 0, sizeof(*oobecc)); if (!master || section < 0) return -EINVAL; if (!master->ooblayout || !master->ooblayout->ecc) return -ENOTSUPP; return master->ooblayout->ecc(master, section, oobecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_ecc); /** * mtd_ooblayout_free - Get the OOB region definition of a specific free * section * @mtd: MTD device structure * @section: Free section you are interested in. Depending on the layout * you may have all the free bytes stored in a single contiguous * section, or one section per ECC chunk plus an extra section * for the remaining bytes (or other funky layout). * @oobfree: OOB region struct filled with the appropriate free position * information * * This function returns free bytes position in the OOB area. If you want * to get all the free bytes information, then you should call * mtd_ooblayout_free(mtd, section++, oobfree) until it returns -ERANGE. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_free(struct mtd_info *mtd, int section, struct mtd_oob_region *oobfree) { struct mtd_info *master = mtd_get_master(mtd); memset(oobfree, 0, sizeof(*oobfree)); if (!master || section < 0) return -EINVAL; if (!master->ooblayout || !master->ooblayout->free) return -ENOTSUPP; return master->ooblayout->free(master, section, oobfree); } EXPORT_SYMBOL_GPL(mtd_ooblayout_free); /** * mtd_ooblayout_find_region - Find the region attached to a specific byte * @mtd: mtd info structure * @byte: the byte we are searching for * @sectionp: pointer where the section id will be stored * @oobregion: used to retrieve the ECC position * @iter: iterator function. Should be either mtd_ooblayout_free or * mtd_ooblayout_ecc depending on the region type you're searching for * * This function returns the section id and oobregion information of a * specific byte. For example, say you want to know where the 4th ECC byte is * stored, you'll use: * * mtd_ooblayout_find_region(mtd, 3, &section, &oobregion, mtd_ooblayout_ecc); * * Returns zero on success, a negative error code otherwise. */ static int mtd_ooblayout_find_region(struct mtd_info *mtd, int byte, int *sectionp, struct mtd_oob_region *oobregion, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { int pos = 0, ret, section = 0; memset(oobregion, 0, sizeof(*oobregion)); while (1) { ret = iter(mtd, section, oobregion); if (ret) return ret; if (pos + oobregion->length > byte) break; pos += oobregion->length; section++; } /* * Adjust region info to make it start at the beginning at the * 'start' ECC byte. */ oobregion->offset += byte - pos; oobregion->length -= byte - pos; *sectionp = section; return 0; } /** * mtd_ooblayout_find_eccregion - Find the ECC region attached to a specific * ECC byte * @mtd: mtd info structure * @eccbyte: the byte we are searching for * @section: pointer where the section id will be stored * @oobregion: OOB region information * * Works like mtd_ooblayout_find_region() except it searches for a specific ECC * byte. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_find_eccregion(struct mtd_info *mtd, int eccbyte, int *section, struct mtd_oob_region *oobregion) { return mtd_ooblayout_find_region(mtd, eccbyte, section, oobregion, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_find_eccregion); /** * mtd_ooblayout_get_bytes - Extract OOB bytes from the oob buffer * @mtd: mtd info structure * @buf: destination buffer to store OOB bytes * @oobbuf: OOB buffer * @start: first byte to retrieve * @nbytes: number of bytes to retrieve * @iter: section iterator * * Extract bytes attached to a specific category (ECC or free) * from the OOB buffer and copy them into buf. * * Returns zero on success, a negative error code otherwise. */ static int mtd_ooblayout_get_bytes(struct mtd_info *mtd, u8 *buf, const u8 *oobbuf, int start, int nbytes, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { struct mtd_oob_region oobregion; int section, ret; ret = mtd_ooblayout_find_region(mtd, start, &section, &oobregion, iter); while (!ret) { int cnt; cnt = min_t(int, nbytes, oobregion.length); memcpy(buf, oobbuf + oobregion.offset, cnt); buf += cnt; nbytes -= cnt; if (!nbytes) break; ret = iter(mtd, ++section, &oobregion); } return ret; } /** * mtd_ooblayout_set_bytes - put OOB bytes into the oob buffer * @mtd: mtd info structure * @buf: source buffer to get OOB bytes from * @oobbuf: OOB buffer * @start: first OOB byte to set * @nbytes: number of OOB bytes to set * @iter: section iterator * * Fill the OOB buffer with data provided in buf. The category (ECC or free) * is selected by passing the appropriate iterator. * * Returns zero on success, a negative error code otherwise. */ static int mtd_ooblayout_set_bytes(struct mtd_info *mtd, const u8 *buf, u8 *oobbuf, int start, int nbytes, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { struct mtd_oob_region oobregion; int section, ret; ret = mtd_ooblayout_find_region(mtd, start, &section, &oobregion, iter); while (!ret) { int cnt; cnt = min_t(int, nbytes, oobregion.length); memcpy(oobbuf + oobregion.offset, buf, cnt); buf += cnt; nbytes -= cnt; if (!nbytes) break; ret = iter(mtd, ++section, &oobregion); } return ret; } /** * mtd_ooblayout_count_bytes - count the number of bytes in a OOB category * @mtd: mtd info structure * @iter: category iterator * * Count the number of bytes in a given category. * * Returns a positive value on success, a negative error code otherwise. */ static int mtd_ooblayout_count_bytes(struct mtd_info *mtd, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { struct mtd_oob_region oobregion; int section = 0, ret, nbytes = 0; while (1) { ret = iter(mtd, section++, &oobregion); if (ret) { if (ret == -ERANGE) ret = nbytes; break; } nbytes += oobregion.length; } return ret; } /** * mtd_ooblayout_get_eccbytes - extract ECC bytes from the oob buffer * @mtd: mtd info structure * @eccbuf: destination buffer to store ECC bytes * @oobbuf: OOB buffer * @start: first ECC byte to retrieve * @nbytes: number of ECC bytes to retrieve * * Works like mtd_ooblayout_get_bytes(), except it acts on ECC bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_get_eccbytes(struct mtd_info *mtd, u8 *eccbuf, const u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_get_bytes(mtd, eccbuf, oobbuf, start, nbytes, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_get_eccbytes); /** * mtd_ooblayout_set_eccbytes - set ECC bytes into the oob buffer * @mtd: mtd info structure * @eccbuf: source buffer to get ECC bytes from * @oobbuf: OOB buffer * @start: first ECC byte to set * @nbytes: number of ECC bytes to set * * Works like mtd_ooblayout_set_bytes(), except it acts on ECC bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_set_eccbytes(struct mtd_info *mtd, const u8 *eccbuf, u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_set_bytes(mtd, eccbuf, oobbuf, start, nbytes, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_set_eccbytes); /** * mtd_ooblayout_get_databytes - extract data bytes from the oob buffer * @mtd: mtd info structure * @databuf: destination buffer to store ECC bytes * @oobbuf: OOB buffer * @start: first ECC byte to retrieve * @nbytes: number of ECC bytes to retrieve * * Works like mtd_ooblayout_get_bytes(), except it acts on free bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_get_databytes(struct mtd_info *mtd, u8 *databuf, const u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_get_bytes(mtd, databuf, oobbuf, start, nbytes, mtd_ooblayout_free); } EXPORT_SYMBOL_GPL(mtd_ooblayout_get_databytes); /** * mtd_ooblayout_set_databytes - set data bytes into the oob buffer * @mtd: mtd info structure * @databuf: source buffer to get data bytes from * @oobbuf: OOB buffer * @start: first ECC byte to set * @nbytes: number of ECC bytes to set * * Works like mtd_ooblayout_set_bytes(), except it acts on free bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_set_bytes(mtd, databuf, oobbuf, start, nbytes, mtd_ooblayout_free); } EXPORT_SYMBOL_GPL(mtd_ooblayout_set_databytes); /** * mtd_ooblayout_count_freebytes - count the number of free bytes in OOB * @mtd: mtd info structure * * Works like mtd_ooblayout_count_bytes(), except it count free bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_count_freebytes(struct mtd_info *mtd) { return mtd_ooblayout_count_bytes(mtd, mtd_ooblayout_free); } EXPORT_SYMBOL_GPL(mtd_ooblayout_count_freebytes); /** * mtd_ooblayout_count_eccbytes - count the number of ECC bytes in OOB * @mtd: mtd info structure * * Works like mtd_ooblayout_count_bytes(), except it count ECC bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd) { return mtd_ooblayout_count_bytes(mtd, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_count_eccbytes); /* * Method to access the protection register area, present in some flash * devices. The user data is one time programmable but the factory data is read * only. */ int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, struct otp_info *buf) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_get_fact_prot_info) return -EOPNOTSUPP; if (!len) return 0; return master->_get_fact_prot_info(master, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_get_fact_prot_info); int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; if (!master->_read_fact_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return master->_read_fact_prot_reg(master, from, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_read_fact_prot_reg); int mtd_get_user_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, struct otp_info *buf) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_get_user_prot_info) return -EOPNOTSUPP; if (!len) return 0; return master->_get_user_prot_info(master, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_get_user_prot_info); int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; if (!master->_read_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return master->_read_user_prot_reg(master, from, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_read_user_prot_reg); int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { struct mtd_info *master = mtd_get_master(mtd); int ret; *retlen = 0; if (!master->_write_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; ret = master->_write_user_prot_reg(master, to, len, retlen, buf); if (ret) return ret; /* * If no data could be written at all, we are out of memory and * must return -ENOSPC. */ return (*retlen) ? 0 : -ENOSPC; } EXPORT_SYMBOL_GPL(mtd_write_user_prot_reg); int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_lock_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return master->_lock_user_prot_reg(master, from, len); } EXPORT_SYMBOL_GPL(mtd_lock_user_prot_reg); int mtd_erase_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_erase_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return master->_erase_user_prot_reg(master, from, len); } EXPORT_SYMBOL_GPL(mtd_erase_user_prot_reg); /* Chip-supported device locking */ int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_lock) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize; } return master->_lock(master, mtd_get_master_ofs(mtd, ofs), len); } EXPORT_SYMBOL_GPL(mtd_lock); int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_unlock) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize; } return master->_unlock(master, mtd_get_master_ofs(mtd, ofs), len); } EXPORT_SYMBOL_GPL(mtd_unlock); int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_is_locked) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize; } return master->_is_locked(master, mtd_get_master_ofs(mtd, ofs), len); } EXPORT_SYMBOL_GPL(mtd_is_locked); int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs) { struct mtd_info *master = mtd_get_master(mtd); if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!master->_block_isreserved) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; return master->_block_isreserved(master, mtd_get_master_ofs(mtd, ofs)); } EXPORT_SYMBOL_GPL(mtd_block_isreserved); int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) { struct mtd_info *master = mtd_get_master(mtd); if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!master->_block_isbad) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; return master->_block_isbad(master, mtd_get_master_ofs(mtd, ofs)); } EXPORT_SYMBOL_GPL(mtd_block_isbad); int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) { struct mtd_info *master = mtd_get_master(mtd); int ret; if (!master->_block_markbad) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; ret = master->_block_markbad(master, mtd_get_master_ofs(mtd, ofs)); if (ret) return ret; while (mtd->parent) { mtd->ecc_stats.badblocks++; mtd = mtd->parent; } return 0; } EXPORT_SYMBOL_GPL(mtd_block_markbad); ALLOW_ERROR_INJECTION(mtd_block_markbad, ERRNO); /* * default_mtd_writev - the default writev method * @mtd: mtd device description object pointer * @vecs: the vectors to write * @count: count of vectors in @vecs * @to: the MTD device offset to write to * @retlen: on exit contains the count of bytes written to the MTD device. * * This function returns zero in case of success and a negative error code in * case of failure. */ static int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen) { unsigned long i; size_t totlen = 0, thislen; int ret = 0; for (i = 0; i < count; i++) { if (!vecs[i].iov_len) continue; ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base); totlen += thislen; if (ret || thislen != vecs[i].iov_len) break; to += vecs[i].iov_len; } *retlen = totlen; return ret; } /* * mtd_writev - the vector-based MTD write method * @mtd: mtd device description object pointer * @vecs: the vectors to write * @count: count of vectors in @vecs * @to: the MTD device offset to write to * @retlen: on exit contains the count of bytes written to the MTD device. * * This function returns zero in case of success and a negative error code in * case of failure. */ int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!master->_writev) return default_mtd_writev(mtd, vecs, count, to, retlen); return master->_writev(master, vecs, count, mtd_get_master_ofs(mtd, to), retlen); } EXPORT_SYMBOL_GPL(mtd_writev); /** * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size * @mtd: mtd device description object pointer * @size: a pointer to the ideal or maximum size of the allocation, points * to the actual allocation size on success. * * This routine attempts to allocate a contiguous kernel buffer up to * the specified size, backing off the size of the request exponentially * until the request succeeds or until the allocation size falls below * the system page size. This attempts to make sure it does not adversely * impact system performance, so when allocating more than one page, we * ask the memory allocator to avoid re-trying, swapping, writing back * or performing I/O. * * Note, this function also makes sure that the allocated buffer is aligned to * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value. * * This is called, for example by mtd_{read,write} and jffs2_scan_medium, * to handle smaller (i.e. degraded) buffer allocations under low- or * fragmented-memory situations where such reduced allocations, from a * requested ideal, are allowed. * * Returns a pointer to the allocated buffer on success; otherwise, NULL. */ void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) { gfp_t flags = __GFP_NOWARN | __GFP_DIRECT_RECLAIM | __GFP_NORETRY; size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); void *kbuf; *size = min_t(size_t, *size, KMALLOC_MAX_SIZE); while (*size > min_alloc) { kbuf = kmalloc(*size, flags); if (kbuf) return kbuf; *size >>= 1; *size = ALIGN(*size, mtd->writesize); } /* * For the last resort allocation allow 'kmalloc()' to do all sorts of * things (write-back, dropping caches, etc) by using GFP_KERNEL. */ return kmalloc(*size, GFP_KERNEL); } EXPORT_SYMBOL_GPL(mtd_kmalloc_up_to); #ifdef CONFIG_PROC_FS /*====================================================================*/ /* Support for /proc/mtd */ static int mtd_proc_show(struct seq_file *m, void *v) { struct mtd_info *mtd; seq_puts(m, "dev: size erasesize name\n"); mutex_lock(&mtd_table_mutex); mtd_for_each_device(mtd) { seq_printf(m, "mtd%d: %8.8llx %8.8x \"%s\"\n", mtd->index, (unsigned long long)mtd->size, mtd->erasesize, mtd->name); } mutex_unlock(&mtd_table_mutex); return 0; } #endif /* CONFIG_PROC_FS */ /*====================================================================*/ /* Init code */ static struct backing_dev_info * __init mtd_bdi_init(const char *name) { struct backing_dev_info *bdi; int ret; bdi = bdi_alloc(NUMA_NO_NODE); if (!bdi) return ERR_PTR(-ENOMEM); bdi->ra_pages = 0; bdi->io_pages = 0; /* * We put '-0' suffix to the name to get the same name format as we * used to get. Since this is called only once, we get a unique name. */ ret = bdi_register(bdi, "%.28s-0", name); if (ret) bdi_put(bdi); return ret ? ERR_PTR(ret) : bdi; } static struct proc_dir_entry *proc_mtd; static int __init init_mtd(void) { int ret; ret = class_register(&mtd_class); if (ret) goto err_reg; mtd_bdi = mtd_bdi_init("mtd"); if (IS_ERR(mtd_bdi)) { ret = PTR_ERR(mtd_bdi); goto err_bdi; } proc_mtd = proc_create_single("mtd", 0, NULL, mtd_proc_show); ret = init_mtdchar(); if (ret) goto out_procfs; dfs_dir_mtd = debugfs_create_dir("mtd", NULL); debugfs_create_bool("expert_analysis_mode", 0600, dfs_dir_mtd, &mtd_expert_analysis_mode); return 0; out_procfs: if (proc_mtd) remove_proc_entry("mtd", NULL); bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); err_bdi: class_unregister(&mtd_class); err_reg: pr_err("Error registering mtd class or bdi: %d\n", ret); return ret; } static void __exit cleanup_mtd(void) { debugfs_remove_recursive(dfs_dir_mtd); cleanup_mtdchar(); if (proc_mtd) remove_proc_entry("mtd", NULL); class_unregister(&mtd_class); bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); idr_destroy(&mtd_idr); } module_init(init_mtd); module_exit(cleanup_mtd); MODULE_LICENSE("GPL"); MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>"); MODULE_DESCRIPTION("Core MTD registration and access routines");
13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef RDMA_CORE_H #define RDMA_CORE_H #include <linux/idr.h> #include <rdma/uverbs_types.h> #include <rdma/uverbs_ioctl.h> #include <rdma/ib_verbs.h> #include <linux/mutex.h> struct ib_uverbs_device; void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason); int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); /* * Get an ib_uobject that corresponds to the given id from ufile, assuming * the object is from the given type. Lock it to the required access when * applicable. * This function could create (access == NEW), destroy (access == DESTROY) * or unlock (access == READ || access == WRITE) objects if required. * The action will be finalized only when uverbs_finalize_object or * uverbs_finalize_objects are called. */ struct ib_uobject * uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, s64 id, struct uverbs_attr_bundle *attrs); void uverbs_finalize_object(struct ib_uobject *uobj, enum uverbs_obj_access access, bool hw_obj_valid, bool commit, struct uverbs_attr_bundle *attrs); int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs); /* * This is the runtime description of the uverbs API, used by the syscall * machinery to validate and dispatch calls. */ /* * Depending on ID the slot pointer in the radix tree points at one of these * structs. */ struct uverbs_api_ioctl_method { int(__rcu *handler)(struct uverbs_attr_bundle *attrs); DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN); u16 bundle_size; u8 use_stack:1; u8 driver_method:1; u8 disabled:1; u8 has_udata:1; u8 key_bitmap_len; u8 destroy_bkey; }; struct uverbs_api_write_method { int (*handler)(struct uverbs_attr_bundle *attrs); u8 disabled:1; u8 is_ex:1; u8 has_udata:1; u8 has_resp:1; u8 req_size; u8 resp_size; }; struct uverbs_api_attr { struct uverbs_attr_spec spec; }; struct uverbs_api { /* radix tree contains struct uverbs_api_* pointers */ struct radix_tree_root radix; enum rdma_driver_id driver_id; unsigned int num_write; unsigned int num_write_ex; struct uverbs_api_write_method notsupp_method; const struct uverbs_api_write_method **write_methods; const struct uverbs_api_write_method **write_ex_methods; }; /* * Get an uverbs_api_object that corresponds to the given object_id. * Note: * -ENOMSG means that any object is allowed to match during lookup. */ static inline const struct uverbs_api_object * uapi_get_object(struct uverbs_api *uapi, u16 object_id) { const struct uverbs_api_object *res; if (object_id == UVERBS_IDR_ANY_OBJECT) return ERR_PTR(-ENOMSG); res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); if (!res) return ERR_PTR(-ENOENT); return res; } char *uapi_key_format(char *S, unsigned int key); struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev); void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev); void uverbs_disassociate_api(struct uverbs_api *uapi); void uverbs_destroy_api(struct uverbs_api *uapi); void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, unsigned int num_attrs); void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile); extern const struct uapi_definition uverbs_def_obj_async_fd[]; extern const struct uapi_definition uverbs_def_obj_counters[]; extern const struct uapi_definition uverbs_def_obj_cq[]; extern const struct uapi_definition uverbs_def_obj_device[]; extern const struct uapi_definition uverbs_def_obj_dm[]; extern const struct uapi_definition uverbs_def_obj_flow_action[]; extern const struct uapi_definition uverbs_def_obj_intf[]; extern const struct uapi_definition uverbs_def_obj_mr[]; extern const struct uapi_definition uverbs_def_obj_qp[]; extern const struct uapi_definition uverbs_def_obj_srq[]; extern const struct uapi_definition uverbs_def_obj_wq[]; extern const struct uapi_definition uverbs_def_write_intf[]; static inline const struct uverbs_api_write_method * uapi_get_method(const struct uverbs_api *uapi, u32 command) { u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK; if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED | IB_USER_VERBS_CMD_COMMAND_MASK)) return ERR_PTR(-EINVAL); if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) { if (cmd_idx >= uapi->num_write_ex) return ERR_PTR(-EOPNOTSUPP); return uapi->write_ex_methods[cmd_idx]; } if (cmd_idx >= uapi->num_write) return ERR_PTR(-EOPNOTSUPP); return uapi->write_methods[cmd_idx]; } void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata, unsigned int attr_in, unsigned int attr_out); #endif /* RDMA_CORE_H */
81 94 93 86 81 81 101 100 99 94 101 101 2 3 1 6 4 2 86 94 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 /* * Cryptographic API. * * Glue code for the SHA512 Secure Hash Algorithm assembler * implementation using supplemental SSE3 / AVX / AVX2 instructions. * * This file is based on sha512_generic.c * * Copyright (C) 2013 Intel Corporation * Author: Tim Chen <tim.c.chen@linux.intel.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/types.h> #include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <asm/cpu_device_id.h> #include <asm/simd.h> asmlinkage void sha512_transform_ssse3(struct sha512_state *state, const u8 *data, int blocks); static int sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len, sha512_block_fn *sha512_xform) { struct sha512_state *sctx = shash_desc_ctx(desc); if (!crypto_simd_usable() || (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) return crypto_sha512_update(desc, data, len); /* * Make sure struct sha512_state begins directly with the SHA512 * 512-bit internal state, as this is what the asm functions expect. */ BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0); kernel_fpu_begin(); sha512_base_do_update(desc, data, len, sha512_xform); kernel_fpu_end(); return 0; } static int sha512_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha512_block_fn *sha512_xform) { if (!crypto_simd_usable()) return crypto_sha512_finup(desc, data, len, out); kernel_fpu_begin(); if (len) sha512_base_do_update(desc, data, len, sha512_xform); sha512_base_do_finalize(desc, sha512_xform); kernel_fpu_end(); return sha512_base_finish(desc, out); } static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha512_update(desc, data, len, sha512_transform_ssse3); } static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha512_finup(desc, data, len, out, sha512_transform_ssse3); } /* Add padding and return the message digest. */ static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) { return sha512_ssse3_finup(desc, NULL, 0, out); } static struct shash_alg sha512_ssse3_algs[] = { { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_base_init, .update = sha512_ssse3_update, .final = sha512_ssse3_final, .finup = sha512_ssse3_finup, .descsize = sizeof(struct sha512_state), .base = { .cra_name = "sha512", .cra_driver_name = "sha512-ssse3", .cra_priority = 150, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } }, { .digestsize = SHA384_DIGEST_SIZE, .init = sha384_base_init, .update = sha512_ssse3_update, .final = sha512_ssse3_final, .finup = sha512_ssse3_finup, .descsize = sizeof(struct sha512_state), .base = { .cra_name = "sha384", .cra_driver_name = "sha384-ssse3", .cra_priority = 150, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_module = THIS_MODULE, } } }; static int register_sha512_ssse3(void) { if (boot_cpu_has(X86_FEATURE_SSSE3)) return crypto_register_shashes(sha512_ssse3_algs, ARRAY_SIZE(sha512_ssse3_algs)); return 0; } static void unregister_sha512_ssse3(void) { if (boot_cpu_has(X86_FEATURE_SSSE3)) crypto_unregister_shashes(sha512_ssse3_algs, ARRAY_SIZE(sha512_ssse3_algs)); } asmlinkage void sha512_transform_avx(struct sha512_state *state, const u8 *data, int blocks); static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { if (boot_cpu_has(X86_FEATURE_AVX)) pr_info("AVX detected but unusable.\n"); return false; } return true; } static int sha512_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha512_update(desc, data, len, sha512_transform_avx); } static int sha512_avx_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha512_finup(desc, data, len, out, sha512_transform_avx); } /* Add padding and return the message digest. */ static int sha512_avx_final(struct shash_desc *desc, u8 *out) { return sha512_avx_finup(desc, NULL, 0, out); } static struct shash_alg sha512_avx_algs[] = { { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_base_init, .update = sha512_avx_update, .final = sha512_avx_final, .finup = sha512_avx_finup, .descsize = sizeof(struct sha512_state), .base = { .cra_name = "sha512", .cra_driver_name = "sha512-avx", .cra_priority = 160, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } }, { .digestsize = SHA384_DIGEST_SIZE, .init = sha384_base_init, .update = sha512_avx_update, .final = sha512_avx_final, .finup = sha512_avx_finup, .descsize = sizeof(struct sha512_state), .base = { .cra_name = "sha384", .cra_driver_name = "sha384-avx", .cra_priority = 160, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_module = THIS_MODULE, } } }; static int register_sha512_avx(void) { if (avx_usable()) return crypto_register_shashes(sha512_avx_algs, ARRAY_SIZE(sha512_avx_algs)); return 0; } static void unregister_sha512_avx(void) { if (avx_usable()) crypto_unregister_shashes(sha512_avx_algs, ARRAY_SIZE(sha512_avx_algs)); } asmlinkage void sha512_transform_rorx(struct sha512_state *state, const u8 *data, int blocks); static int sha512_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha512_update(desc, data, len, sha512_transform_rorx); } static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha512_finup(desc, data, len, out, sha512_transform_rorx); } /* Add padding and return the message digest. */ static int sha512_avx2_final(struct shash_desc *desc, u8 *out) { return sha512_avx2_finup(desc, NULL, 0, out); } static struct shash_alg sha512_avx2_algs[] = { { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_base_init, .update = sha512_avx2_update, .final = sha512_avx2_final, .finup = sha512_avx2_finup, .descsize = sizeof(struct sha512_state), .base = { .cra_name = "sha512", .cra_driver_name = "sha512-avx2", .cra_priority = 170, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } }, { .digestsize = SHA384_DIGEST_SIZE, .init = sha384_base_init, .update = sha512_avx2_update, .final = sha512_avx2_final, .finup = sha512_avx2_finup, .descsize = sizeof(struct sha512_state), .base = { .cra_name = "sha384", .cra_driver_name = "sha384-avx2", .cra_priority = 170, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_module = THIS_MODULE, } } }; static bool avx2_usable(void) { if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) return true; return false; } static int register_sha512_avx2(void) { if (avx2_usable()) return crypto_register_shashes(sha512_avx2_algs, ARRAY_SIZE(sha512_avx2_algs)); return 0; } static const struct x86_cpu_id module_cpu_ids[] = { X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); static void unregister_sha512_avx2(void) { if (avx2_usable()) crypto_unregister_shashes(sha512_avx2_algs, ARRAY_SIZE(sha512_avx2_algs)); } static int __init sha512_ssse3_mod_init(void) { if (!x86_match_cpu(module_cpu_ids)) return -ENODEV; if (register_sha512_ssse3()) goto fail; if (register_sha512_avx()) { unregister_sha512_ssse3(); goto fail; } if (register_sha512_avx2()) { unregister_sha512_avx(); unregister_sha512_ssse3(); goto fail; } return 0; fail: return -ENODEV; } static void __exit sha512_ssse3_mod_fini(void) { unregister_sha512_avx2(); unregister_sha512_avx(); unregister_sha512_ssse3(); } module_init(sha512_ssse3_mod_init); module_exit(sha512_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS_CRYPTO("sha512"); MODULE_ALIAS_CRYPTO("sha512-ssse3"); MODULE_ALIAS_CRYPTO("sha512-avx"); MODULE_ALIAS_CRYPTO("sha512-avx2"); MODULE_ALIAS_CRYPTO("sha384"); MODULE_ALIAS_CRYPTO("sha384-ssse3"); MODULE_ALIAS_CRYPTO("sha384-avx"); MODULE_ALIAS_CRYPTO("sha384-avx2");
17 16 17 17 13 8 5 3 7 17 3 3 4 4 4 4 4 4 3 3 3 4 4 4 4 2 4 4 3 2 3 3 1 1 1 3 3 3 3 3 3 3 1 5 8 7 8 2 4 3 3 3 5 5 3 2 1 2 2 8 12 11 12 9 4 1 7 7 5 2 1 2 1 1 1 35 34 5 2 2 2 2 32 30 35 35 41 41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 // SPDX-License-Identifier: GPL-2.0-only /* * Crypto user configuration API. * * Copyright (C) 2011 secunet Security Networks AG * Copyright (C) 2011 Steffen Klassert <steffen.klassert@secunet.com> */ #include <linux/module.h> #include <linux/crypto.h> #include <linux/cryptouser.h> #include <linux/sched.h> #include <linux/security.h> #include <net/netlink.h> #include <net/net_namespace.h> #include <net/sock.h> #include <crypto/internal/skcipher.h> #include <crypto/internal/rng.h> #include <crypto/akcipher.h> #include <crypto/kpp.h> #include "internal.h" #define null_terminated(x) (strnlen(x, sizeof(x)) < sizeof(x)) static DEFINE_MUTEX(crypto_cfg_mutex); struct crypto_dump_info { struct sk_buff *in_skb; struct sk_buff *out_skb; u32 nlmsg_seq; u16 nlmsg_flags; }; static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact) { struct crypto_alg *q, *alg = NULL; down_read(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { int match = 0; if (crypto_is_larval(q)) continue; if ((q->cra_flags ^ p->cru_type) & p->cru_mask) continue; if (strlen(p->cru_driver_name)) match = !strcmp(q->cra_driver_name, p->cru_driver_name); else if (!exact) match = !strcmp(q->cra_name, p->cru_name); if (!match) continue; if (unlikely(!crypto_mod_get(q))) continue; alg = q; break; } up_read(&crypto_alg_sem); return alg; } static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_cipher rcipher; memset(&rcipher, 0, sizeof(rcipher)); strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); rcipher.blocksize = alg->cra_blocksize; rcipher.min_keysize = alg->cra_cipher.cia_min_keysize; rcipher.max_keysize = alg->cra_cipher.cia_max_keysize; return nla_put(skb, CRYPTOCFGA_REPORT_CIPHER, sizeof(rcipher), &rcipher); } static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rcomp; memset(&rcomp, 0, sizeof(rcomp)); strscpy(rcomp.type, "compression", sizeof(rcomp.type)); return nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(rcomp), &rcomp); } static int crypto_report_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { memset(ualg, 0, sizeof(*ualg)); strscpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); strscpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); strscpy(ualg->cru_module_name, module_name(alg->cra_module), sizeof(ualg->cru_module_name)); ualg->cru_type = 0; ualg->cru_mask = 0; ualg->cru_flags = alg->cra_flags; ualg->cru_refcnt = refcount_read(&alg->cra_refcnt); if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority)) goto nla_put_failure; if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_report_larval rl; memset(&rl, 0, sizeof(rl)); strscpy(rl.type, "larval", sizeof(rl.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, sizeof(rl), &rl)) goto nla_put_failure; goto out; } if (alg->cra_type && alg->cra_type->report) { if (alg->cra_type->report(skb, alg)) goto nla_put_failure; goto out; } switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) { case CRYPTO_ALG_TYPE_CIPHER: if (crypto_report_cipher(skb, alg)) goto nla_put_failure; break; case CRYPTO_ALG_TYPE_COMPRESS: if (crypto_report_comp(skb, alg)) goto nla_put_failure; break; } out: return 0; nla_put_failure: return -EMSGSIZE; } static int crypto_report_alg(struct crypto_alg *alg, struct crypto_dump_info *info) { struct sk_buff *in_skb = info->in_skb; struct sk_buff *skb = info->out_skb; struct nlmsghdr *nlh; struct crypto_user_alg *ualg; int err = 0; nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq, CRYPTO_MSG_GETALG, sizeof(*ualg), info->nlmsg_flags); if (!nlh) { err = -EMSGSIZE; goto out; } ualg = nlmsg_data(nlh); err = crypto_report_one(alg, ualg, skb); if (err) { nlmsg_cancel(skb, nlh); goto out; } nlmsg_end(skb, nlh); out: return err; } static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs) { struct net *net = sock_net(in_skb->sk); struct crypto_user_alg *p = nlmsg_data(in_nlh); struct crypto_alg *alg; struct sk_buff *skb; struct crypto_dump_info info; int err; if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) return -EINVAL; alg = crypto_alg_match(p, 0); if (!alg) return -ENOENT; err = -ENOMEM; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) goto drop_alg; info.in_skb = in_skb; info.out_skb = skb; info.nlmsg_seq = in_nlh->nlmsg_seq; info.nlmsg_flags = 0; err = crypto_report_alg(alg, &info); drop_alg: crypto_mod_put(alg); if (err) { kfree_skb(skb); return err; } return nlmsg_unicast(net->crypto_nlsk, skb, NETLINK_CB(in_skb).portid); } static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb) { const size_t start_pos = cb->args[0]; size_t pos = 0; struct crypto_dump_info info; struct crypto_alg *alg; int res; info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) { if (pos >= start_pos) { res = crypto_report_alg(alg, &info); if (res == -EMSGSIZE) break; if (res) goto out; } pos++; } cb->args[0] = pos; res = skb->len; out: up_read(&crypto_alg_sem); return res; } static int crypto_dump_report_done(struct netlink_callback *cb) { return 0; } static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { struct crypto_alg *alg; struct crypto_user_alg *p = nlmsg_data(nlh); struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL]; LIST_HEAD(list); if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) return -EINVAL; if (priority && !strlen(p->cru_driver_name)) return -EINVAL; alg = crypto_alg_match(p, 1); if (!alg) return -ENOENT; down_write(&crypto_alg_sem); crypto_remove_spawns(alg, &list, NULL); if (priority) alg->cra_priority = nla_get_u32(priority); up_write(&crypto_alg_sem); crypto_mod_put(alg); crypto_remove_final(&list); return 0; } static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { struct crypto_alg *alg; struct crypto_user_alg *p = nlmsg_data(nlh); int err; if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) return -EINVAL; alg = crypto_alg_match(p, 1); if (!alg) return -ENOENT; /* We can not unregister core algorithms such as aes-generic. * We would loose the reference in the crypto_alg_list to this algorithm * if we try to unregister. Unregistering such an algorithm without * removing the module is not possible, so we restrict to crypto * instances that are build from templates. */ err = -EINVAL; if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE)) goto drop_alg; err = -EBUSY; if (refcount_read(&alg->cra_refcnt) > 2) goto drop_alg; crypto_unregister_instance((struct crypto_instance *)alg); err = 0; drop_alg: crypto_mod_put(alg); return err; } static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { int exact = 0; const char *name; struct crypto_alg *alg; struct crypto_user_alg *p = nlmsg_data(nlh); struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL]; if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) return -EINVAL; if (strlen(p->cru_driver_name)) exact = 1; if (priority && !exact) return -EINVAL; alg = crypto_alg_match(p, exact); if (alg) { crypto_mod_put(alg); return -EEXIST; } if (strlen(p->cru_driver_name)) name = p->cru_driver_name; else name = p->cru_name; alg = crypto_alg_mod_lookup(name, p->cru_type, p->cru_mask); if (IS_ERR(alg)) return PTR_ERR(alg); down_write(&crypto_alg_sem); if (priority) alg->cra_priority = nla_get_u32(priority); up_write(&crypto_alg_sem); crypto_mod_put(alg); return 0; } static int crypto_del_rng(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; return crypto_del_default_rng(); } static int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs) { /* No longer supported */ return -ENOTSUPP; } #define MSGSIZE(type) sizeof(struct type) static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = { [CRYPTO_MSG_NEWALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = 0, [CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), }; static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = { [CRYPTOCFGA_PRIORITY_VAL] = { .type = NLA_U32}, }; #undef MSGSIZE static const struct crypto_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); int (*done)(struct netlink_callback *); } crypto_dispatch[CRYPTO_NR_MSGTYPES] = { [CRYPTO_MSG_NEWALG - CRYPTO_MSG_BASE] = { .doit = crypto_add_alg}, [CRYPTO_MSG_DELALG - CRYPTO_MSG_BASE] = { .doit = crypto_del_alg}, [CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = { .doit = crypto_update_alg}, [CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = { .doit = crypto_report, .dump = crypto_dump_report, .done = crypto_dump_report_done}, [CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = { .doit = crypto_del_rng }, [CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = { .doit = crypto_reportstat}, }; static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *attrs[CRYPTOCFGA_MAX+1]; const struct crypto_link *link; int type, err; type = nlh->nlmsg_type; if (type > CRYPTO_MSG_MAX) return -EINVAL; type -= CRYPTO_MSG_BASE; link = &crypto_dispatch[type]; if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) && (nlh->nlmsg_flags & NLM_F_DUMP))) { struct crypto_alg *alg; unsigned long dump_alloc = 0; if (link->dump == NULL) return -EINVAL; down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) dump_alloc += CRYPTO_REPORT_MAXSIZE; up_read(&crypto_alg_sem); { struct netlink_dump_control c = { .dump = link->dump, .done = link->done, .min_dump_alloc = min(dump_alloc, 65535UL), }; err = netlink_dump_start(net->crypto_nlsk, skb, nlh, &c); } return err; } err = nlmsg_parse_deprecated(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, crypto_policy, extack); if (err < 0) return err; if (link->doit == NULL) return -EINVAL; return link->doit(skb, nlh, attrs); } static void crypto_netlink_rcv(struct sk_buff *skb) { mutex_lock(&crypto_cfg_mutex); netlink_rcv_skb(skb, &crypto_user_rcv_msg); mutex_unlock(&crypto_cfg_mutex); } static int __net_init crypto_netlink_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = crypto_netlink_rcv, }; net->crypto_nlsk = netlink_kernel_create(net, NETLINK_CRYPTO, &cfg); return net->crypto_nlsk == NULL ? -ENOMEM : 0; } static void __net_exit crypto_netlink_exit(struct net *net) { netlink_kernel_release(net->crypto_nlsk); net->crypto_nlsk = NULL; } static struct pernet_operations crypto_netlink_net_ops = { .init = crypto_netlink_init, .exit = crypto_netlink_exit, }; static int __init crypto_user_init(void) { return register_pernet_subsys(&crypto_netlink_net_ops); } static void __exit crypto_user_exit(void) { unregister_pernet_subsys(&crypto_netlink_net_ops); } module_init(crypto_user_init); module_exit(crypto_user_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); MODULE_DESCRIPTION("Crypto userspace configuration API"); MODULE_ALIAS("net-pf-16-proto-21");
695 582 693 694 693 98 97 594 598 356 694 597 13 580 593 99 3 95 98 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter.h> #include <linux/mutex.h> #include <net/sock.h> #include "nf_internals.h" /* Sockopts only registered and called from user context, so net locking would be overkill. Also, [gs]etsockopt calls may sleep. */ static DEFINE_MUTEX(nf_sockopt_mutex); static LIST_HEAD(nf_sockopts); /* Do exclusive ranges overlap? */ static inline int overlap(int min1, int max1, int min2, int max2) { return max1 > min2 && min1 < max2; } /* Functions to register sockopt ranges (exclusive). */ int nf_register_sockopt(struct nf_sockopt_ops *reg) { struct nf_sockopt_ops *ops; int ret = 0; mutex_lock(&nf_sockopt_mutex); list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == reg->pf && (overlap(ops->set_optmin, ops->set_optmax, reg->set_optmin, reg->set_optmax) || overlap(ops->get_optmin, ops->get_optmax, reg->get_optmin, reg->get_optmax))) { pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", ops->set_optmin, ops->set_optmax, ops->get_optmin, ops->get_optmax, reg->set_optmin, reg->set_optmax, reg->get_optmin, reg->get_optmax); ret = -EBUSY; goto out; } } list_add(&reg->list, &nf_sockopts); out: mutex_unlock(&nf_sockopt_mutex); return ret; } EXPORT_SYMBOL(nf_register_sockopt); void nf_unregister_sockopt(struct nf_sockopt_ops *reg) { mutex_lock(&nf_sockopt_mutex); list_del(&reg->list); mutex_unlock(&nf_sockopt_mutex); } EXPORT_SYMBOL(nf_unregister_sockopt); static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf, int val, int get) { struct nf_sockopt_ops *ops; mutex_lock(&nf_sockopt_mutex); list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == pf) { if (!try_module_get(ops->owner)) goto out_nosup; if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) goto out; } else { if (val >= ops->set_optmin && val < ops->set_optmax) goto out; } module_put(ops->owner); } } out_nosup: ops = ERR_PTR(-ENOPROTOOPT); out: mutex_unlock(&nf_sockopt_mutex); return ops; } int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, sockptr_t opt, unsigned int len) { struct nf_sockopt_ops *ops; int ret; ops = nf_sockopt_find(sk, pf, val, 0); if (IS_ERR(ops)) return PTR_ERR(ops); ret = ops->set(sk, val, opt, len); module_put(ops->owner); return ret; } EXPORT_SYMBOL(nf_setsockopt); int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len) { struct nf_sockopt_ops *ops; int ret; ops = nf_sockopt_find(sk, pf, val, 1); if (IS_ERR(ops)) return PTR_ERR(ops); ret = ops->get(sk, val, opt, len); module_put(ops->owner); return ret; } EXPORT_SYMBOL(nf_getsockopt);
17 17 17 17 17 17 17 10 10 10 10 10 1 1 1 1 1 17 17 10 10 10 10 17 14 10 10 17 17 3 3 3 3 17 17 17 17 17 17 17 16 17 3 17 17 17 3 3 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 6 6 6 6 6 6 6 6 6 6 20 3 3 20 3 1 1 1 1 3 3 3 3 1 3 1 1 1 3 1 3 15 14 14 14 14 10 10 10 15 12 12 11 10 12 15 15 1 1 14 1 14 2 1 1 1 14 1 14 14 14 4 4 14 14 14 14 8 8 9 9 9 9 9 9 9 8 9 9 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 10 10 10 10 1 10 10 10 10 10 10 10 3 10 10 10 1 10 10 10 10 10 3 3 3 3 3 3 10 10 1 15 15 14 15 15 15 14 14 14 14 10 15 15 15 15 15 15 15 15 15 15 15 3 3 3 3 3 3 3 8 8 8 8 8 4 8 8 8 8 8 8 8 8 8 8 8 8 8 13 13 1 13 13 13 13 8 1 1 1 13 6 6 6 6 2 6 6 6 2 6 6 6 3 3 3 6 1 1 1 6 5 6 6 6 6 7 7 7 7 7 7 7 6 7 7 2 8 8 8 1 7 7 7 7 7 7 3 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 1 7 8 8 8 8 8 8 8 8 1 8 8 8 8 8 2 8 8 1 7 8 10 10 10 10 10 10 10 1 9 9 10 10 10 10 9 9 8 9 9 9 9 9 9 1 1 1 1 1 1 2 1 1 10 10 6 5 6 6 6 11 11 11 1 10 10 10 10 10 1 9 9 9 9 1 1 11 11 12 6 4 11 1 11 11 10 9 1 11 5 11 1 11 2 12 12 16 16 16 8 16 16 15 16 8 8 16 8 16 16 16 16 16 8 8 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 // SPDX-License-Identifier: GPL-2.0-only /* * refcounttree.c * * Copyright (C) 2009 Oracle. All rights reserved. */ #include <linux/sort.h> #include <cluster/masklog.h> #include "ocfs2.h" #include "inode.h" #include "alloc.h" #include "suballoc.h" #include "journal.h" #include "uptodate.h" #include "super.h" #include "buffer_head_io.h" #include "blockcheck.h" #include "refcounttree.h" #include "sysfile.h" #include "dlmglue.h" #include "extent_map.h" #include "aops.h" #include "xattr.h" #include "namei.h" #include "ocfs2_trace.h" #include "file.h" #include "symlink.h" #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/writeback.h> #include <linux/pagevec.h> #include <linux/swap.h> #include <linux/security.h> #include <linux/fsnotify.h> #include <linux/quotaops.h> #include <linux/namei.h> #include <linux/mount.h> #include <linux/posix_acl.h> struct ocfs2_cow_context { struct inode *inode; u32 cow_start; u32 cow_len; struct ocfs2_extent_tree data_et; struct ocfs2_refcount_tree *ref_tree; struct buffer_head *ref_root_bh; struct ocfs2_alloc_context *meta_ac; struct ocfs2_alloc_context *data_ac; struct ocfs2_cached_dealloc_ctxt dealloc; void *cow_object; struct ocfs2_post_refcount *post_refcount; int extra_credits; int (*get_clusters)(struct ocfs2_cow_context *context, u32 v_cluster, u32 *p_cluster, u32 *num_clusters, unsigned int *extent_flags); int (*cow_duplicate_clusters)(handle_t *handle, struct inode *inode, u32 cpos, u32 old_cluster, u32 new_cluster, u32 new_len); }; static inline struct ocfs2_refcount_tree * cache_info_to_refcount(struct ocfs2_caching_info *ci) { return container_of(ci, struct ocfs2_refcount_tree, rf_ci); } static int ocfs2_validate_refcount_block(struct super_block *sb, struct buffer_head *bh) { int rc; struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)bh->b_data; trace_ocfs2_validate_refcount_block((unsigned long long)bh->b_blocknr); BUG_ON(!buffer_uptodate(bh)); /* * If the ecc fails, we return the error but otherwise * leave the filesystem running. We know any error is * local to this block. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check); if (rc) { mlog(ML_ERROR, "Checksum failed for refcount block %llu\n", (unsigned long long)bh->b_blocknr); return rc; } if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) { rc = ocfs2_error(sb, "Refcount block #%llu has bad signature %.*s\n", (unsigned long long)bh->b_blocknr, 7, rb->rf_signature); goto out; } if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) { rc = ocfs2_error(sb, "Refcount block #%llu has an invalid rf_blkno of %llu\n", (unsigned long long)bh->b_blocknr, (unsigned long long)le64_to_cpu(rb->rf_blkno)); goto out; } if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) { rc = ocfs2_error(sb, "Refcount block #%llu has an invalid rf_fs_generation of #%u\n", (unsigned long long)bh->b_blocknr, le32_to_cpu(rb->rf_fs_generation)); goto out; } out: return rc; } static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci, u64 rb_blkno, struct buffer_head **bh) { int rc; struct buffer_head *tmp = *bh; rc = ocfs2_read_block(ci, rb_blkno, &tmp, ocfs2_validate_refcount_block); /* If ocfs2_read_block() got us a new bh, pass it up. */ if (!rc && !*bh) *bh = tmp; return rc; } static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci) { struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); return rf->rf_blkno; } static struct super_block * ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci) { struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); return rf->rf_sb; } static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci) __acquires(&rf->rf_lock) { struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); spin_lock(&rf->rf_lock); } static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci) __releases(&rf->rf_lock) { struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); spin_unlock(&rf->rf_lock); } static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci) { struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); mutex_lock(&rf->rf_io_mutex); } static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci) { struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); mutex_unlock(&rf->rf_io_mutex); } static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = { .co_owner = ocfs2_refcount_cache_owner, .co_get_super = ocfs2_refcount_cache_get_super, .co_cache_lock = ocfs2_refcount_cache_lock, .co_cache_unlock = ocfs2_refcount_cache_unlock, .co_io_lock = ocfs2_refcount_cache_io_lock, .co_io_unlock = ocfs2_refcount_cache_io_unlock, }; static struct ocfs2_refcount_tree * ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno) { struct rb_node *n = osb->osb_rf_lock_tree.rb_node; struct ocfs2_refcount_tree *tree = NULL; while (n) { tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node); if (blkno < tree->rf_blkno) n = n->rb_left; else if (blkno > tree->rf_blkno) n = n->rb_right; else return tree; } return NULL; } /* osb_lock is already locked. */ static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb, struct ocfs2_refcount_tree *new) { u64 rf_blkno = new->rf_blkno; struct rb_node *parent = NULL; struct rb_node **p = &osb->osb_rf_lock_tree.rb_node; struct ocfs2_refcount_tree *tmp; while (*p) { parent = *p; tmp = rb_entry(parent, struct ocfs2_refcount_tree, rf_node); if (rf_blkno < tmp->rf_blkno) p = &(*p)->rb_left; else if (rf_blkno > tmp->rf_blkno) p = &(*p)->rb_right; else { /* This should never happen! */ mlog(ML_ERROR, "Duplicate refcount block %llu found!\n", (unsigned long long)rf_blkno); BUG(); } } rb_link_node(&new->rf_node, parent, p); rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree); } static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree) { ocfs2_metadata_cache_exit(&tree->rf_ci); ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres); ocfs2_lock_res_free(&tree->rf_lockres); kfree(tree); } static inline void ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb, struct ocfs2_refcount_tree *tree) { rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree); if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree) osb->osb_ref_tree_lru = NULL; } static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb, struct ocfs2_refcount_tree *tree) { spin_lock(&osb->osb_lock); ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); spin_unlock(&osb->osb_lock); } static void ocfs2_kref_remove_refcount_tree(struct kref *kref) { struct ocfs2_refcount_tree *tree = container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); ocfs2_free_refcount_tree(tree); } static inline void ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree) { kref_get(&tree->rf_getcnt); } static inline void ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree) { kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree); } static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new, struct super_block *sb) { ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops); mutex_init(&new->rf_io_mutex); new->rf_sb = sb; spin_lock_init(&new->rf_lock); } static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb, struct ocfs2_refcount_tree *new, u64 rf_blkno, u32 generation) { init_rwsem(&new->rf_sem); ocfs2_refcount_lock_res_init(&new->rf_lockres, osb, rf_blkno, generation); } static struct ocfs2_refcount_tree* ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno) { struct ocfs2_refcount_tree *new; new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS); if (!new) return NULL; new->rf_blkno = rf_blkno; kref_init(&new->rf_getcnt); ocfs2_init_refcount_tree_ci(new, osb->sb); return new; } static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno, struct ocfs2_refcount_tree **ret_tree) { int ret = 0; struct ocfs2_refcount_tree *tree, *new = NULL; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_block *ref_rb; spin_lock(&osb->osb_lock); if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru->rf_blkno == rf_blkno) tree = osb->osb_ref_tree_lru; else tree = ocfs2_find_refcount_tree(osb, rf_blkno); if (tree) goto out; spin_unlock(&osb->osb_lock); new = ocfs2_allocate_refcount_tree(osb, rf_blkno); if (!new) { ret = -ENOMEM; mlog_errno(ret); return ret; } /* * We need the generation to create the refcount tree lock and since * it isn't changed during the tree modification, we are safe here to * read without protection. * We also have to purge the cache after we create the lock since the * refcount block may have the stale data. It can only be trusted when * we hold the refcount lock. */ ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh); if (ret) { mlog_errno(ret); ocfs2_metadata_cache_exit(&new->rf_ci); kfree(new); return ret; } ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; new->rf_generation = le32_to_cpu(ref_rb->rf_generation); ocfs2_init_refcount_tree_lock(osb, new, rf_blkno, new->rf_generation); ocfs2_metadata_cache_purge(&new->rf_ci); spin_lock(&osb->osb_lock); tree = ocfs2_find_refcount_tree(osb, rf_blkno); if (tree) goto out; ocfs2_insert_refcount_tree(osb, new); tree = new; new = NULL; out: *ret_tree = tree; osb->osb_ref_tree_lru = tree; spin_unlock(&osb->osb_lock); if (new) ocfs2_free_refcount_tree(new); brelse(ref_root_bh); return ret; } static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno) { int ret; struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; } BUG_ON(!ocfs2_is_refcount_inode(inode)); di = (struct ocfs2_dinode *)di_bh->b_data; *ref_blkno = le64_to_cpu(di->i_refcount_loc); brelse(di_bh); out: return ret; } static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb, struct ocfs2_refcount_tree *tree, int rw) { int ret; ret = ocfs2_refcount_lock(tree, rw); if (ret) { mlog_errno(ret); goto out; } if (rw) down_write(&tree->rf_sem); else down_read(&tree->rf_sem); out: return ret; } /* * Lock the refcount tree pointed by ref_blkno and return the tree. * In most case, we lock the tree and read the refcount block. * So read it here if the caller really needs it. * * If the tree has been re-created by other node, it will free the * old one and re-create it. */ int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw, struct ocfs2_refcount_tree **ret_tree, struct buffer_head **ref_bh) { int ret, delete_tree = 0; struct ocfs2_refcount_tree *tree = NULL; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_block *rb; again: ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree); if (ret) { mlog_errno(ret); return ret; } ocfs2_refcount_tree_get(tree); ret = __ocfs2_lock_refcount_tree(osb, tree, rw); if (ret) { mlog_errno(ret); ocfs2_refcount_tree_put(tree); goto out; } ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, &ref_root_bh); if (ret) { mlog_errno(ret); ocfs2_unlock_refcount_tree(osb, tree, rw); goto out; } rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; /* * If the refcount block has been freed and re-created, we may need * to recreate the refcount tree also. * * Here we just remove the tree from the rb-tree, and the last * kref holder will unlock and delete this refcount_tree. * Then we goto "again" and ocfs2_get_refcount_tree will create * the new refcount tree for us. */ if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) { if (!tree->rf_removed) { ocfs2_erase_refcount_tree_from_list(osb, tree); tree->rf_removed = 1; delete_tree = 1; } ocfs2_unlock_refcount_tree(osb, tree, rw); /* * We get an extra reference when we create the refcount * tree, so another put will destroy it. */ if (delete_tree) ocfs2_refcount_tree_put(tree); brelse(ref_root_bh); ref_root_bh = NULL; goto again; } *ret_tree = tree; if (ref_bh) { *ref_bh = ref_root_bh; ref_root_bh = NULL; } out: brelse(ref_root_bh); return ret; } void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, struct ocfs2_refcount_tree *tree, int rw) { if (rw) up_write(&tree->rf_sem); else up_read(&tree->rf_sem); ocfs2_refcount_unlock(tree, rw); ocfs2_refcount_tree_put(tree); } void ocfs2_purge_refcount_trees(struct ocfs2_super *osb) { struct rb_node *node; struct ocfs2_refcount_tree *tree; struct rb_root *root = &osb->osb_rf_lock_tree; while ((node = rb_last(root)) != NULL) { tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node); trace_ocfs2_purge_refcount_trees( (unsigned long long) tree->rf_blkno); rb_erase(&tree->rf_node, root); ocfs2_free_refcount_tree(tree); } } /* * Create a refcount tree for an inode. * We take for granted that the inode is already locked. */ static int ocfs2_create_refcount_tree(struct inode *inode, struct buffer_head *di_bh) { int ret; handle_t *handle = NULL; struct ocfs2_alloc_context *meta_ac = NULL; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct buffer_head *new_bh = NULL; struct ocfs2_refcount_block *rb; struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; u16 suballoc_bit_start; u32 num_got; u64 suballoc_loc, first_blkno; BUG_ON(ocfs2_is_refcount_inode(inode)); trace_ocfs2_create_refcount_tree( (unsigned long long)oi->ip_blkno); ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); if (ret) { mlog_errno(ret); goto out; } handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out; } ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &first_blkno); if (ret) { mlog_errno(ret); goto out_commit; } new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno); if (!new_tree) { ret = -ENOMEM; mlog_errno(ret); goto out_commit; } new_bh = sb_getblk(inode->i_sb, first_blkno); if (!new_bh) { ret = -ENOMEM; mlog_errno(ret); goto out_commit; } ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh); ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh, OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); goto out_commit; } /* Initialize ocfs2_refcount_block. */ rb = (struct ocfs2_refcount_block *)new_bh->b_data; memset(rb, 0, inode->i_sb->s_blocksize); strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); rb->rf_blkno = cpu_to_le64(first_blkno); rb->rf_count = cpu_to_le32(1); rb->rf_records.rl_count = cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb)); spin_lock(&osb->osb_lock); rb->rf_generation = cpu_to_le32(osb->s_next_generation++); spin_unlock(&osb->osb_lock); ocfs2_journal_dirty(handle, new_bh); spin_lock(&oi->ip_lock); oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); di->i_refcount_loc = cpu_to_le64(first_blkno); spin_unlock(&oi->ip_lock); trace_ocfs2_create_refcount_tree_blkno((unsigned long long)first_blkno); ocfs2_journal_dirty(handle, di_bh); /* * We have to init the tree lock here since it will use * the generation number to create it. */ new_tree->rf_generation = le32_to_cpu(rb->rf_generation); ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno, new_tree->rf_generation); spin_lock(&osb->osb_lock); tree = ocfs2_find_refcount_tree(osb, first_blkno); /* * We've just created a new refcount tree in this block. If * we found a refcount tree on the ocfs2_super, it must be * one we just deleted. We free the old tree before * inserting the new tree. */ BUG_ON(tree && tree->rf_generation == new_tree->rf_generation); if (tree) ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); ocfs2_insert_refcount_tree(osb, new_tree); spin_unlock(&osb->osb_lock); new_tree = NULL; if (tree) ocfs2_refcount_tree_put(tree); out_commit: ocfs2_commit_trans(osb, handle); out: if (new_tree) { ocfs2_metadata_cache_exit(&new_tree->rf_ci); kfree(new_tree); } brelse(new_bh); if (meta_ac) ocfs2_free_alloc_context(meta_ac); return ret; } static int ocfs2_set_refcount_tree(struct inode *inode, struct buffer_head *di_bh, u64 refcount_loc) { int ret; handle_t *handle = NULL; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_block *rb; struct ocfs2_refcount_tree *ref_tree; BUG_ON(ocfs2_is_refcount_inode(inode)); ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, &ref_tree, &ref_root_bh); if (ret) { mlog_errno(ret); return ret; } handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out; } ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; le32_add_cpu(&rb->rf_count, 1); ocfs2_journal_dirty(handle, ref_root_bh); spin_lock(&oi->ip_lock); oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); di->i_refcount_loc = cpu_to_le64(refcount_loc); spin_unlock(&oi->ip_lock); ocfs2_journal_dirty(handle, di_bh); out_commit: ocfs2_commit_trans(osb, handle); out: ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); return ret; } int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) { int ret, delete_tree = 0; handle_t *handle = NULL; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_refcount_block *rb; struct inode *alloc_inode = NULL; struct buffer_head *alloc_bh = NULL; struct buffer_head *blk_bh = NULL; struct ocfs2_refcount_tree *ref_tree; int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS; u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc); u16 bit = 0; if (!ocfs2_is_refcount_inode(inode)) return 0; BUG_ON(!ref_blkno); ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh); if (ret) { mlog_errno(ret); return ret; } rb = (struct ocfs2_refcount_block *)blk_bh->b_data; /* * If we are the last user, we need to free the block. * So lock the allocator ahead. */ if (le32_to_cpu(rb->rf_count) == 1) { blk = le64_to_cpu(rb->rf_blkno); bit = le16_to_cpu(rb->rf_suballoc_bit); if (rb->rf_suballoc_loc) bg_blkno = le64_to_cpu(rb->rf_suballoc_loc); else bg_blkno = ocfs2_which_suballoc_group(blk, bit); alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, le16_to_cpu(rb->rf_suballoc_slot)); if (!alloc_inode) { ret = -ENOMEM; mlog_errno(ret); goto out; } inode_lock(alloc_inode); ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1); if (ret) { mlog_errno(ret); goto out_mutex; } credits += OCFS2_SUBALLOC_FREE; } handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out_unlock; } ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } spin_lock(&oi->ip_lock); oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL; di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); di->i_refcount_loc = 0; spin_unlock(&oi->ip_lock); ocfs2_journal_dirty(handle, di_bh); le32_add_cpu(&rb->rf_count , -1); ocfs2_journal_dirty(handle, blk_bh); if (!rb->rf_count) { delete_tree = 1; ocfs2_erase_refcount_tree_from_list(osb, ref_tree); ret = ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh, bit, bg_blkno, 1); if (ret) mlog_errno(ret); } out_commit: ocfs2_commit_trans(osb, handle); out_unlock: if (alloc_inode) { ocfs2_inode_unlock(alloc_inode, 1); brelse(alloc_bh); } out_mutex: if (alloc_inode) { inode_unlock(alloc_inode); iput(alloc_inode); } out: ocfs2_unlock_refcount_tree(osb, ref_tree, 1); if (delete_tree) ocfs2_refcount_tree_put(ref_tree); brelse(blk_bh); return ret; } static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci, struct buffer_head *ref_leaf_bh, u64 cpos, unsigned int len, struct ocfs2_refcount_rec *ret_rec, int *index) { int i = 0; struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; struct ocfs2_refcount_rec *rec = NULL; for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) { rec = &rb->rf_records.rl_recs[i]; if (le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters) <= cpos) continue; else if (le64_to_cpu(rec->r_cpos) > cpos) break; /* ok, cpos fail in this rec. Just return. */ if (ret_rec) *ret_rec = *rec; goto out; } if (ret_rec) { /* We meet with a hole here, so fake the rec. */ ret_rec->r_cpos = cpu_to_le64(cpos); ret_rec->r_refcount = 0; if (i < le16_to_cpu(rb->rf_records.rl_used) && le64_to_cpu(rec->r_cpos) < cpos + len) ret_rec->r_clusters = cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos); else ret_rec->r_clusters = cpu_to_le32(len); } out: *index = i; } /* * Try to remove refcount tree. The mechanism is: * 1) Check whether i_clusters == 0, if no, exit. * 2) check whether we have i_xattr_loc in dinode. if yes, exit. * 3) Check whether we have inline xattr stored outside, if yes, exit. * 4) Remove the tree. */ int ocfs2_try_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) { int ret; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; down_write(&oi->ip_xattr_sem); down_write(&oi->ip_alloc_sem); if (oi->ip_clusters) goto out; if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && di->i_xattr_loc) goto out; if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL && ocfs2_has_inline_xattr_value_outside(inode, di)) goto out; ret = ocfs2_remove_refcount_tree(inode, di_bh); if (ret) mlog_errno(ret); out: up_write(&oi->ip_alloc_sem); up_write(&oi->ip_xattr_sem); return 0; } /* * Find the end range for a leaf refcount block indicated by * el->l_recs[index].e_blkno. */ static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct ocfs2_extent_block *eb, struct ocfs2_extent_list *el, int index, u32 *cpos_end) { int ret, i, subtree_root; u32 cpos; u64 blkno; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct ocfs2_path *left_path = NULL, *right_path = NULL; struct ocfs2_extent_tree et; struct ocfs2_extent_list *tmp_el; if (index < le16_to_cpu(el->l_next_free_rec) - 1) { /* * We have a extent rec after index, so just use the e_cpos * of the next extent rec. */ *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos); return 0; } if (!eb || !eb->h_next_leaf_blk) { /* * We are the last extent rec, so any high cpos should * be stored in this leaf refcount block. */ *cpos_end = UINT_MAX; return 0; } /* * If the extent block isn't the last one, we have to find * the subtree root between this extent block and the next * leaf extent block and get the corresponding e_cpos from * the subroot. Otherwise we may corrupt the b-tree. */ ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); left_path = ocfs2_new_path_from_et(&et); if (!left_path) { ret = -ENOMEM; mlog_errno(ret); goto out; } cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos); ret = ocfs2_find_path(ci, left_path, cpos); if (ret) { mlog_errno(ret); goto out; } right_path = ocfs2_new_path_from_path(left_path); if (!right_path) { ret = -ENOMEM; mlog_errno(ret); goto out; } ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_find_path(ci, right_path, cpos); if (ret) { mlog_errno(ret); goto out; } subtree_root = ocfs2_find_subtree_root(&et, left_path, right_path); tmp_el = left_path->p_node[subtree_root].el; blkno = left_path->p_node[subtree_root+1].bh->b_blocknr; for (i = 0; i < le16_to_cpu(tmp_el->l_next_free_rec); i++) { if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) { *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos); break; } } BUG_ON(i == le16_to_cpu(tmp_el->l_next_free_rec)); out: ocfs2_free_path(left_path); ocfs2_free_path(right_path); return ret; } /* * Given a cpos and len, try to find the refcount record which contains cpos. * 1. If cpos can be found in one refcount record, return the record. * 2. If cpos can't be found, return a fake record which start from cpos * and end at a small value between cpos+len and start of the next record. * This fake record has r_refcount = 0. */ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, u64 cpos, unsigned int len, struct ocfs2_refcount_rec *ret_rec, int *index, struct buffer_head **ret_bh) { int ret = 0, i, found; u32 low_cpos, cpos_end; struct ocfs2_extent_list *el; struct ocfs2_extent_rec *rec = NULL; struct ocfs2_extent_block *eb = NULL; struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) { ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len, ret_rec, index); *ret_bh = ref_root_bh; get_bh(ref_root_bh); return 0; } el = &rb->rf_list; low_cpos = cpos & OCFS2_32BIT_POS_MASK; if (el->l_tree_depth) { ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh); if (ret) { mlog_errno(ret); goto out; } eb = (struct ocfs2_extent_block *) eb_bh->b_data; el = &eb->h_list; if (el->l_tree_depth) { ret = ocfs2_error(sb, "refcount tree %llu has non zero tree depth in leaf btree tree block %llu\n", (unsigned long long)ocfs2_metadata_cache_owner(ci), (unsigned long long)eb_bh->b_blocknr); goto out; } } found = 0; for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { rec = &el->l_recs[i]; if (le32_to_cpu(rec->e_cpos) <= low_cpos) { found = 1; break; } } if (found) { ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh, eb, el, i, &cpos_end); if (ret) { mlog_errno(ret); goto out; } if (cpos_end < low_cpos + len) len = cpos_end - low_cpos; } ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), &ref_leaf_bh); if (ret) { mlog_errno(ret); goto out; } ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len, ret_rec, index); *ret_bh = ref_leaf_bh; out: brelse(eb_bh); return ret; } enum ocfs2_ref_rec_contig { REF_CONTIG_NONE = 0, REF_CONTIG_LEFT, REF_CONTIG_RIGHT, REF_CONTIG_LEFTRIGHT, }; static enum ocfs2_ref_rec_contig ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb, int index) { if ((rb->rf_records.rl_recs[index].r_refcount == rb->rf_records.rl_recs[index + 1].r_refcount) && (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) + le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) == le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos))) return REF_CONTIG_RIGHT; return REF_CONTIG_NONE; } static enum ocfs2_ref_rec_contig ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb, int index) { enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE; if (index < le16_to_cpu(rb->rf_records.rl_used) - 1) ret = ocfs2_refcount_rec_adjacent(rb, index); if (index > 0) { enum ocfs2_ref_rec_contig tmp; tmp = ocfs2_refcount_rec_adjacent(rb, index - 1); if (tmp == REF_CONTIG_RIGHT) { if (ret == REF_CONTIG_RIGHT) ret = REF_CONTIG_LEFTRIGHT; else ret = REF_CONTIG_LEFT; } } return ret; } static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb, int index) { BUG_ON(rb->rf_records.rl_recs[index].r_refcount != rb->rf_records.rl_recs[index+1].r_refcount); le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters, le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters)); if (index < le16_to_cpu(rb->rf_records.rl_used) - 2) memmove(&rb->rf_records.rl_recs[index + 1], &rb->rf_records.rl_recs[index + 2], sizeof(struct ocfs2_refcount_rec) * (le16_to_cpu(rb->rf_records.rl_used) - index - 2)); memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1], 0, sizeof(struct ocfs2_refcount_rec)); le16_add_cpu(&rb->rf_records.rl_used, -1); } /* * Merge the refcount rec if we are contiguous with the adjacent recs. */ static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb, int index) { enum ocfs2_ref_rec_contig contig = ocfs2_refcount_rec_contig(rb, index); if (contig == REF_CONTIG_NONE) return; if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) { BUG_ON(index == 0); index--; } ocfs2_rotate_refcount_rec_left(rb, index); if (contig == REF_CONTIG_LEFTRIGHT) ocfs2_rotate_refcount_rec_left(rb, index); } /* * Change the refcount indexed by "index" in ref_bh. * If refcount reaches 0, remove it. */ static int ocfs2_change_refcount_rec(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_leaf_bh, int index, int merge, int change) { int ret; struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; struct ocfs2_refcount_list *rl = &rb->rf_records; struct ocfs2_refcount_rec *rec = &rl->rl_recs[index]; ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } trace_ocfs2_change_refcount_rec( (unsigned long long)ocfs2_metadata_cache_owner(ci), index, le32_to_cpu(rec->r_refcount), change); le32_add_cpu(&rec->r_refcount, change); if (!rec->r_refcount) { if (index != le16_to_cpu(rl->rl_used) - 1) { memmove(rec, rec + 1, (le16_to_cpu(rl->rl_used) - index - 1) * sizeof(struct ocfs2_refcount_rec)); memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1], 0, sizeof(struct ocfs2_refcount_rec)); } le16_add_cpu(&rl->rl_used, -1); } else if (merge) ocfs2_refcount_rec_merge(rb, index); ocfs2_journal_dirty(handle, ref_leaf_bh); out: return ret; } static int ocfs2_expand_inline_ref_root(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head **ref_leaf_bh, struct ocfs2_alloc_context *meta_ac) { int ret; u16 suballoc_bit_start; u32 num_got; u64 suballoc_loc, blkno; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct buffer_head *new_bh = NULL; struct ocfs2_refcount_block *new_rb; struct ocfs2_refcount_block *root_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &blkno); if (ret) { mlog_errno(ret); goto out; } new_bh = sb_getblk(sb, blkno); if (new_bh == NULL) { ret = -ENOMEM; mlog_errno(ret); goto out; } ocfs2_set_new_buffer_uptodate(ci, new_bh); ret = ocfs2_journal_access_rb(handle, ci, new_bh, OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); goto out; } /* * Initialize ocfs2_refcount_block. * It should contain the same information as the old root. * so just memcpy it and change the corresponding field. */ memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); new_rb->rf_blkno = cpu_to_le64(blkno); new_rb->rf_cpos = cpu_to_le32(0); new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); ocfs2_journal_dirty(handle, new_bh); /* Now change the root. */ memset(&root_rb->rf_list, 0, sb->s_blocksize - offsetof(struct ocfs2_refcount_block, rf_list)); root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb)); root_rb->rf_clusters = cpu_to_le32(1); root_rb->rf_list.l_next_free_rec = cpu_to_le16(1); root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno); root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL); ocfs2_journal_dirty(handle, ref_root_bh); trace_ocfs2_expand_inline_ref_root((unsigned long long)blkno, le16_to_cpu(new_rb->rf_records.rl_used)); *ref_leaf_bh = new_bh; new_bh = NULL; out: brelse(new_bh); return ret; } static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev, struct ocfs2_refcount_rec *next) { if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <= ocfs2_get_ref_rec_low_cpos(next)) return 1; return 0; } static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b) { const struct ocfs2_refcount_rec *l = a, *r = b; u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l); u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r); if (l_cpos > r_cpos) return 1; if (l_cpos < r_cpos) return -1; return 0; } static int cmp_refcount_rec_by_cpos(const void *a, const void *b) { const struct ocfs2_refcount_rec *l = a, *r = b; u64 l_cpos = le64_to_cpu(l->r_cpos); u64 r_cpos = le64_to_cpu(r->r_cpos); if (l_cpos > r_cpos) return 1; if (l_cpos < r_cpos) return -1; return 0; } /* * The refcount cpos are ordered by their 64bit cpos, * But we will use the low 32 bit to be the e_cpos in the b-tree. * So we need to make sure that this pos isn't intersected with others. * * Note: The refcount block is already sorted by their low 32 bit cpos, * So just try the middle pos first, and we will exit when we find * the good position. */ static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl, u32 *split_pos, int *split_index) { int num_used = le16_to_cpu(rl->rl_used); int delta, middle = num_used / 2; for (delta = 0; delta < middle; delta++) { /* Let's check delta earlier than middle */ if (ocfs2_refcount_rec_no_intersect( &rl->rl_recs[middle - delta - 1], &rl->rl_recs[middle - delta])) { *split_index = middle - delta; break; } /* For even counts, don't walk off the end */ if ((middle + delta + 1) == num_used) continue; /* Now try delta past middle */ if (ocfs2_refcount_rec_no_intersect( &rl->rl_recs[middle + delta], &rl->rl_recs[middle + delta + 1])) { *split_index = middle + delta + 1; break; } } if (delta >= middle) return -ENOSPC; *split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]); return 0; } static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, struct buffer_head *new_bh, u32 *split_cpos) { int split_index = 0, num_moved, ret; u32 cpos = 0; struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; struct ocfs2_refcount_list *rl = &rb->rf_records; struct ocfs2_refcount_block *new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; struct ocfs2_refcount_list *new_rl = &new_rb->rf_records; trace_ocfs2_divide_leaf_refcount_block( (unsigned long long)ref_leaf_bh->b_blocknr, le16_to_cpu(rl->rl_count), le16_to_cpu(rl->rl_used)); /* * XXX: Improvement later. * If we know all the high 32 bit cpos is the same, no need to sort. * * In order to make the whole process safe, we do: * 1. sort the entries by their low 32 bit cpos first so that we can * find the split cpos easily. * 2. call ocfs2_insert_extent to insert the new refcount block. * 3. move the refcount rec to the new block. * 4. sort the entries by their 64 bit cpos. * 5. dirty the new_rb and rb. */ sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), sizeof(struct ocfs2_refcount_rec), cmp_refcount_rec_by_low_cpos, NULL); ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index); if (ret) { mlog_errno(ret); return ret; } new_rb->rf_cpos = cpu_to_le32(cpos); /* move refcount records starting from split_index to the new block. */ num_moved = le16_to_cpu(rl->rl_used) - split_index; memcpy(new_rl->rl_recs, &rl->rl_recs[split_index], num_moved * sizeof(struct ocfs2_refcount_rec)); /*ok, remove the entries we just moved over to the other block. */ memset(&rl->rl_recs[split_index], 0, num_moved * sizeof(struct ocfs2_refcount_rec)); /* change old and new rl_used accordingly. */ le16_add_cpu(&rl->rl_used, -num_moved); new_rl->rl_used = cpu_to_le16(num_moved); sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), sizeof(struct ocfs2_refcount_rec), cmp_refcount_rec_by_cpos, NULL); sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used), sizeof(struct ocfs2_refcount_rec), cmp_refcount_rec_by_cpos, NULL); *split_cpos = cpos; return 0; } static int ocfs2_new_leaf_refcount_block(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_alloc_context *meta_ac) { int ret; u16 suballoc_bit_start; u32 num_got, new_cpos; u64 suballoc_loc, blkno; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct ocfs2_refcount_block *root_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; struct buffer_head *new_bh = NULL; struct ocfs2_refcount_block *new_rb; struct ocfs2_extent_tree ref_et; BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)); ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &blkno); if (ret) { mlog_errno(ret); goto out; } new_bh = sb_getblk(sb, blkno); if (new_bh == NULL) { ret = -ENOMEM; mlog_errno(ret); goto out; } ocfs2_set_new_buffer_uptodate(ci, new_bh); ret = ocfs2_journal_access_rb(handle, ci, new_bh, OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); goto out; } /* Initialize ocfs2_refcount_block. */ new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; memset(new_rb, 0, sb->s_blocksize); strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); new_rb->rf_blkno = cpu_to_le64(blkno); new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); new_rb->rf_records.rl_count = cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); new_rb->rf_generation = root_rb->rf_generation; ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos); if (ret) { mlog_errno(ret); goto out; } ocfs2_journal_dirty(handle, ref_leaf_bh); ocfs2_journal_dirty(handle, new_bh); ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh); trace_ocfs2_new_leaf_refcount_block( (unsigned long long)new_bh->b_blocknr, new_cpos); /* Insert the new leaf block with the specific offset cpos. */ ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr, 1, 0, meta_ac); if (ret) mlog_errno(ret); out: brelse(new_bh); return ret; } static int ocfs2_expand_refcount_tree(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_alloc_context *meta_ac) { int ret; struct buffer_head *expand_bh = NULL; if (ref_root_bh == ref_leaf_bh) { /* * the old root bh hasn't been expanded to a b-tree, * so expand it first. */ ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh, &expand_bh, meta_ac); if (ret) { mlog_errno(ret); goto out; } } else { expand_bh = ref_leaf_bh; get_bh(expand_bh); } /* Now add a new refcount block into the tree.*/ ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh, expand_bh, meta_ac); if (ret) mlog_errno(ret); out: brelse(expand_bh); return ret; } /* * Adjust the extent rec in b-tree representing ref_leaf_bh. * * Only called when we have inserted a new refcount rec at index 0 * which means ocfs2_extent_rec.e_cpos may need some change. */ static int ocfs2_adjust_refcount_rec(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_refcount_rec *rec) { int ret = 0, i; u32 new_cpos, old_cpos; struct ocfs2_path *path = NULL; struct ocfs2_extent_tree et; struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; struct ocfs2_extent_list *el; if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) goto out; rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; old_cpos = le32_to_cpu(rb->rf_cpos); new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK; if (old_cpos <= new_cpos) goto out; ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); path = ocfs2_new_path_from_et(&et); if (!path) { ret = -ENOMEM; mlog_errno(ret); goto out; } ret = ocfs2_find_path(ci, path, old_cpos); if (ret) { mlog_errno(ret); goto out; } /* * 2 more credits, one for the leaf refcount block, one for * the extent block contains the extent rec. */ ret = ocfs2_extend_trans(handle, 2); if (ret < 0) { mlog_errno(ret); goto out; } ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out; } ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path), OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out; } /* change the leaf extent block first. */ el = path_leaf_el(path); for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos) break; BUG_ON(i == le16_to_cpu(el->l_next_free_rec)); el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); /* change the r_cpos in the leaf block. */ rb->rf_cpos = cpu_to_le32(new_cpos); ocfs2_journal_dirty(handle, path_leaf_bh(path)); ocfs2_journal_dirty(handle, ref_leaf_bh); out: ocfs2_free_path(path); return ret; } static int ocfs2_insert_refcount_rec(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_refcount_rec *rec, int index, int merge, struct ocfs2_alloc_context *meta_ac) { int ret; struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; struct ocfs2_refcount_list *rf_list = &rb->rf_records; struct buffer_head *new_bh = NULL; BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); if (rf_list->rl_used == rf_list->rl_count) { u64 cpos = le64_to_cpu(rec->r_cpos); u32 len = le32_to_cpu(rec->r_clusters); ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, ref_leaf_bh, meta_ac); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_get_refcount_rec(ci, ref_root_bh, cpos, len, NULL, &index, &new_bh); if (ret) { mlog_errno(ret); goto out; } ref_leaf_bh = new_bh; rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; rf_list = &rb->rf_records; } ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } if (index < le16_to_cpu(rf_list->rl_used)) memmove(&rf_list->rl_recs[index + 1], &rf_list->rl_recs[index], (le16_to_cpu(rf_list->rl_used) - index) * sizeof(struct ocfs2_refcount_rec)); trace_ocfs2_insert_refcount_rec( (unsigned long long)ref_leaf_bh->b_blocknr, index, (unsigned long long)le64_to_cpu(rec->r_cpos), le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount)); rf_list->rl_recs[index] = *rec; le16_add_cpu(&rf_list->rl_used, 1); if (merge) ocfs2_refcount_rec_merge(rb, index); ocfs2_journal_dirty(handle, ref_leaf_bh); if (index == 0) { ret = ocfs2_adjust_refcount_rec(handle, ci, ref_root_bh, ref_leaf_bh, rec); if (ret) mlog_errno(ret); } out: brelse(new_bh); return ret; } /* * Split the refcount_rec indexed by "index" in ref_leaf_bh. * This is much simple than our b-tree code. * split_rec is the new refcount rec we want to insert. * If split_rec->r_refcount > 0, we are changing the refcount(in case we * increase refcount or decrease a refcount to non-zero). * If split_rec->r_refcount == 0, we are punching a hole in current refcount * rec( in case we decrease a refcount to zero). */ static int ocfs2_split_refcount_rec(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_refcount_rec *split_rec, int index, int merge, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc) { int ret, recs_need; u32 len; struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; struct ocfs2_refcount_list *rf_list = &rb->rf_records; struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index]; struct ocfs2_refcount_rec *tail_rec = NULL; struct buffer_head *new_bh = NULL; BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); trace_ocfs2_split_refcount_rec(le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters), le32_to_cpu(orig_rec->r_refcount), le64_to_cpu(split_rec->r_cpos), le32_to_cpu(split_rec->r_clusters), le32_to_cpu(split_rec->r_refcount)); /* * If we just need to split the header or tail clusters, * no more recs are needed, just split is OK. * Otherwise we at least need one new recs. */ if (!split_rec->r_refcount && (split_rec->r_cpos == orig_rec->r_cpos || le64_to_cpu(split_rec->r_cpos) + le32_to_cpu(split_rec->r_clusters) == le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) recs_need = 0; else recs_need = 1; /* * We need one more rec if we split in the middle and the new rec have * some refcount in it. */ if (split_rec->r_refcount && (split_rec->r_cpos != orig_rec->r_cpos && le64_to_cpu(split_rec->r_cpos) + le32_to_cpu(split_rec->r_clusters) != le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) recs_need++; /* If the leaf block don't have enough record, expand it. */ if (le16_to_cpu(rf_list->rl_used) + recs_need > le16_to_cpu(rf_list->rl_count)) { struct ocfs2_refcount_rec tmp_rec; u64 cpos = le64_to_cpu(orig_rec->r_cpos); len = le32_to_cpu(orig_rec->r_clusters); ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, ref_leaf_bh, meta_ac); if (ret) { mlog_errno(ret); goto out; } /* * We have to re-get it since now cpos may be moved to * another leaf block. */ ret = ocfs2_get_refcount_rec(ci, ref_root_bh, cpos, len, &tmp_rec, &index, &new_bh); if (ret) { mlog_errno(ret); goto out; } ref_leaf_bh = new_bh; rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; rf_list = &rb->rf_records; orig_rec = &rf_list->rl_recs[index]; } ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } /* * We have calculated out how many new records we need and store * in recs_need, so spare enough space first by moving the records * after "index" to the end. */ if (index != le16_to_cpu(rf_list->rl_used) - 1) memmove(&rf_list->rl_recs[index + 1 + recs_need], &rf_list->rl_recs[index + 1], (le16_to_cpu(rf_list->rl_used) - index - 1) * sizeof(struct ocfs2_refcount_rec)); len = (le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)) - (le64_to_cpu(split_rec->r_cpos) + le32_to_cpu(split_rec->r_clusters)); /* * If we have "len", the we will split in the tail and move it * to the end of the space we have just spared. */ if (len) { tail_rec = &rf_list->rl_recs[index + recs_need]; memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); le64_add_cpu(&tail_rec->r_cpos, le32_to_cpu(tail_rec->r_clusters) - len); tail_rec->r_clusters = cpu_to_le32(len); } /* * If the split pos isn't the same as the original one, we need to * split in the head. * * Note: We have the chance that split_rec.r_refcount = 0, * recs_need = 0 and len > 0, which means we just cut the head from * the orig_rec and in that case we have done some modification in * orig_rec above, so the check for r_cpos is faked. */ if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) { len = le64_to_cpu(split_rec->r_cpos) - le64_to_cpu(orig_rec->r_cpos); orig_rec->r_clusters = cpu_to_le32(len); index++; } le16_add_cpu(&rf_list->rl_used, recs_need); if (split_rec->r_refcount) { rf_list->rl_recs[index] = *split_rec; trace_ocfs2_split_refcount_rec_insert( (unsigned long long)ref_leaf_bh->b_blocknr, index, (unsigned long long)le64_to_cpu(split_rec->r_cpos), le32_to_cpu(split_rec->r_clusters), le32_to_cpu(split_rec->r_refcount)); if (merge) ocfs2_refcount_rec_merge(rb, index); } ocfs2_journal_dirty(handle, ref_leaf_bh); out: brelse(new_bh); return ret; } static int __ocfs2_increase_refcount(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, u64 cpos, u32 len, int merge, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc) { int ret = 0, index; struct buffer_head *ref_leaf_bh = NULL; struct ocfs2_refcount_rec rec; unsigned int set_len = 0; trace_ocfs2_increase_refcount_begin( (unsigned long long)ocfs2_metadata_cache_owner(ci), (unsigned long long)cpos, len); while (len) { ret = ocfs2_get_refcount_rec(ci, ref_root_bh, cpos, len, &rec, &index, &ref_leaf_bh); if (ret) { mlog_errno(ret); goto out; } set_len = le32_to_cpu(rec.r_clusters); /* * Here we may meet with 3 situations: * * 1. If we find an already existing record, and the length * is the same, cool, we just need to increase the r_refcount * and it is OK. * 2. If we find a hole, just insert it with r_refcount = 1. * 3. If we are in the middle of one extent record, split * it. */ if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos && set_len <= len) { trace_ocfs2_increase_refcount_change( (unsigned long long)cpos, set_len, le32_to_cpu(rec.r_refcount)); ret = ocfs2_change_refcount_rec(handle, ci, ref_leaf_bh, index, merge, 1); if (ret) { mlog_errno(ret); goto out; } } else if (!rec.r_refcount) { rec.r_refcount = cpu_to_le32(1); trace_ocfs2_increase_refcount_insert( (unsigned long long)le64_to_cpu(rec.r_cpos), set_len); ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh, ref_leaf_bh, &rec, index, merge, meta_ac); if (ret) { mlog_errno(ret); goto out; } } else { set_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) + set_len) - cpos; rec.r_cpos = cpu_to_le64(cpos); rec.r_clusters = cpu_to_le32(set_len); le32_add_cpu(&rec.r_refcount, 1); trace_ocfs2_increase_refcount_split( (unsigned long long)le64_to_cpu(rec.r_cpos), set_len, le32_to_cpu(rec.r_refcount)); ret = ocfs2_split_refcount_rec(handle, ci, ref_root_bh, ref_leaf_bh, &rec, index, merge, meta_ac, dealloc); if (ret) { mlog_errno(ret); goto out; } } cpos += set_len; len -= set_len; brelse(ref_leaf_bh); ref_leaf_bh = NULL; } out: brelse(ref_leaf_bh); return ret; } static int ocfs2_remove_refcount_extent(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc) { int ret; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; struct ocfs2_extent_tree et; BUG_ON(rb->rf_records.rl_used); trace_ocfs2_remove_refcount_extent( (unsigned long long)ocfs2_metadata_cache_owner(ci), (unsigned long long)ref_leaf_bh->b_blocknr, le32_to_cpu(rb->rf_cpos)); ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos), 1, meta_ac, dealloc); if (ret) { mlog_errno(ret); goto out; } ocfs2_remove_from_cache(ci, ref_leaf_bh); /* * add the freed block to the dealloc so that it will be freed * when we run dealloc. */ ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, le16_to_cpu(rb->rf_suballoc_slot), le64_to_cpu(rb->rf_suballoc_loc), le64_to_cpu(rb->rf_blkno), le16_to_cpu(rb->rf_suballoc_bit)); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; le32_add_cpu(&rb->rf_clusters, -1); /* * check whether we need to restore the root refcount block if * there is no leaf extent block at atll. */ if (!rb->rf_list.l_next_free_rec) { BUG_ON(rb->rf_clusters); trace_ocfs2_restore_refcount_block( (unsigned long long)ref_root_bh->b_blocknr); rb->rf_flags = 0; rb->rf_parent = 0; rb->rf_cpos = 0; memset(&rb->rf_records, 0, sb->s_blocksize - offsetof(struct ocfs2_refcount_block, rf_records)); rb->rf_records.rl_count = cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); } ocfs2_journal_dirty(handle, ref_root_bh); out: return ret; } int ocfs2_increase_refcount(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, u64 cpos, u32 len, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc) { return __ocfs2_increase_refcount(handle, ci, ref_root_bh, cpos, len, 1, meta_ac, dealloc); } static int ocfs2_decrease_refcount_rec(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, int index, u64 cpos, unsigned int len, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc) { int ret; struct ocfs2_refcount_block *rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index]; BUG_ON(cpos < le64_to_cpu(rec->r_cpos)); BUG_ON(cpos + len > le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters)); trace_ocfs2_decrease_refcount_rec( (unsigned long long)ocfs2_metadata_cache_owner(ci), (unsigned long long)cpos, len); if (cpos == le64_to_cpu(rec->r_cpos) && len == le32_to_cpu(rec->r_clusters)) ret = ocfs2_change_refcount_rec(handle, ci, ref_leaf_bh, index, 1, -1); else { struct ocfs2_refcount_rec split = *rec; split.r_cpos = cpu_to_le64(cpos); split.r_clusters = cpu_to_le32(len); le32_add_cpu(&split.r_refcount, -1); ret = ocfs2_split_refcount_rec(handle, ci, ref_root_bh, ref_leaf_bh, &split, index, 1, meta_ac, dealloc); } if (ret) { mlog_errno(ret); goto out; } /* Remove the leaf refcount block if it contains no refcount record. */ if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) { ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh, ref_leaf_bh, meta_ac, dealloc); if (ret) mlog_errno(ret); } out: return ret; } static int __ocfs2_decrease_refcount(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, u64 cpos, u32 len, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc, int delete) { int ret = 0, index = 0; struct ocfs2_refcount_rec rec; unsigned int r_count = 0, r_len; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct buffer_head *ref_leaf_bh = NULL; trace_ocfs2_decrease_refcount( (unsigned long long)ocfs2_metadata_cache_owner(ci), (unsigned long long)cpos, len, delete); while (len) { ret = ocfs2_get_refcount_rec(ci, ref_root_bh, cpos, len, &rec, &index, &ref_leaf_bh); if (ret) { mlog_errno(ret); goto out; } r_count = le32_to_cpu(rec.r_refcount); BUG_ON(r_count == 0); if (!delete) BUG_ON(r_count > 1); r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) + le32_to_cpu(rec.r_clusters)) - cpos; ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh, ref_leaf_bh, index, cpos, r_len, meta_ac, dealloc); if (ret) { mlog_errno(ret); goto out; } if (le32_to_cpu(rec.r_refcount) == 1 && delete) { ret = ocfs2_cache_cluster_dealloc(dealloc, ocfs2_clusters_to_blocks(sb, cpos), r_len); if (ret) { mlog_errno(ret); goto out; } } cpos += r_len; len -= r_len; brelse(ref_leaf_bh); ref_leaf_bh = NULL; } out: brelse(ref_leaf_bh); return ret; } /* Caller must hold refcount tree lock. */ int ocfs2_decrease_refcount(struct inode *inode, handle_t *handle, u32 cpos, u32 len, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc, int delete) { int ret; u64 ref_blkno; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree; BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_get_refcount_block(inode, &ref_blkno); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, &ref_root_bh); if (ret) { mlog_errno(ret); goto out; } ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh, cpos, len, meta_ac, dealloc, delete); if (ret) mlog_errno(ret); out: brelse(ref_root_bh); return ret; } /* * Mark the already-existing extent at cpos as refcounted for len clusters. * This adds the refcount extent flag. * * If the existing extent is larger than the request, initiate a * split. An attempt will be made at merging with adjacent extents. * * The caller is responsible for passing down meta_ac if we'll need it. */ static int ocfs2_mark_extent_refcounted(struct inode *inode, struct ocfs2_extent_tree *et, handle_t *handle, u32 cpos, u32 len, u32 phys, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc) { int ret; trace_ocfs2_mark_extent_refcounted(OCFS2_I(inode)->ip_blkno, cpos, len, phys); if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n", inode->i_ino); goto out; } ret = ocfs2_change_extent_flag(handle, et, cpos, len, phys, meta_ac, dealloc, OCFS2_EXT_REFCOUNTED, 0); if (ret) mlog_errno(ret); out: return ret; } /* * Given some contiguous physical clusters, calculate what we need * for modifying their refcount. */ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, u64 start_cpos, u32 clusters, int *meta_add, int *credits) { int ret = 0, index, ref_blocks = 0, recs_add = 0; u64 cpos = start_cpos; struct ocfs2_refcount_block *rb; struct ocfs2_refcount_rec rec; struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL; u32 len; while (clusters) { ret = ocfs2_get_refcount_rec(ci, ref_root_bh, cpos, clusters, &rec, &index, &ref_leaf_bh); if (ret) { mlog_errno(ret); goto out; } if (ref_leaf_bh != prev_bh) { /* * Now we encounter a new leaf block, so calculate * whether we need to extend the old leaf. */ if (prev_bh) { rb = (struct ocfs2_refcount_block *) prev_bh->b_data; if (le16_to_cpu(rb->rf_records.rl_used) + recs_add > le16_to_cpu(rb->rf_records.rl_count)) ref_blocks++; } recs_add = 0; *credits += 1; brelse(prev_bh); prev_bh = ref_leaf_bh; get_bh(prev_bh); } trace_ocfs2_calc_refcount_meta_credits_iterate( recs_add, (unsigned long long)cpos, clusters, (unsigned long long)le64_to_cpu(rec.r_cpos), le32_to_cpu(rec.r_clusters), le32_to_cpu(rec.r_refcount), index); len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + le32_to_cpu(rec.r_clusters)) - cpos; /* * We record all the records which will be inserted to the * same refcount block, so that we can tell exactly whether * we need a new refcount block or not. * * If we will insert a new one, this is easy and only happens * during adding refcounted flag to the extent, so we don't * have a chance of spliting. We just need one record. * * If the refcount rec already exists, that would be a little * complicated. we may have to: * 1) split at the beginning if the start pos isn't aligned. * we need 1 more record in this case. * 2) split int the end if the end pos isn't aligned. * we need 1 more record in this case. * 3) split in the middle because of file system fragmentation. * we need 2 more records in this case(we can't detect this * beforehand, so always think of the worst case). */ if (rec.r_refcount) { recs_add += 2; /* Check whether we need a split at the beginning. */ if (cpos == start_cpos && cpos != le64_to_cpu(rec.r_cpos)) recs_add++; /* Check whether we need a split in the end. */ if (cpos + clusters < le64_to_cpu(rec.r_cpos) + le32_to_cpu(rec.r_clusters)) recs_add++; } else recs_add++; brelse(ref_leaf_bh); ref_leaf_bh = NULL; clusters -= len; cpos += len; } if (prev_bh) { rb = (struct ocfs2_refcount_block *)prev_bh->b_data; if (le16_to_cpu(rb->rf_records.rl_used) + recs_add > le16_to_cpu(rb->rf_records.rl_count)) ref_blocks++; *credits += 1; } if (!ref_blocks) goto out; *meta_add += ref_blocks; *credits += ref_blocks; /* * So we may need ref_blocks to insert into the tree. * That also means we need to change the b-tree and add that number * of records since we never merge them. * We need one more block for expansion since the new created leaf * block is also full and needs split. */ rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) { struct ocfs2_extent_tree et; ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); *meta_add += ocfs2_extend_meta_needed(et.et_root_el); *credits += ocfs2_calc_extend_credits(sb, et.et_root_el); } else { *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; *meta_add += 1; } out: trace_ocfs2_calc_refcount_meta_credits( (unsigned long long)start_cpos, clusters, *meta_add, *credits); brelse(ref_leaf_bh); brelse(prev_bh); return ret; } /* * For refcount tree, we will decrease some contiguous clusters * refcount count, so just go through it to see how many blocks * we gonna touch and whether we need to create new blocks. * * Normally the refcount blocks store these refcount should be * contiguous also, so that we can get the number easily. * We will at most add split 2 refcount records and 2 more * refcount blocks, so just check it in a rough way. * * Caller must hold refcount tree lock. */ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, u64 refcount_loc, u64 phys_blkno, u32 clusters, int *credits, int *ref_blocks) { int ret; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree; u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno); if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n", inode->i_ino); goto out; } BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), refcount_loc, &tree); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc, &ref_root_bh); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, &tree->rf_ci, ref_root_bh, start_cpos, clusters, ref_blocks, credits); if (ret) { mlog_errno(ret); goto out; } trace_ocfs2_prepare_refcount_change_for_del(*ref_blocks, *credits); out: brelse(ref_root_bh); return ret; } #define MAX_CONTIG_BYTES 1048576 static inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb) { return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES); } static inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb) { return ~(ocfs2_cow_contig_clusters(sb) - 1); } /* * Given an extent that starts at 'start' and an I/O that starts at 'cpos', * find an offset (start + (n * contig_clusters)) that is closest to cpos * while still being less than or equal to it. * * The goal is to break the extent at a multiple of contig_clusters. */ static inline unsigned int ocfs2_cow_align_start(struct super_block *sb, unsigned int start, unsigned int cpos) { BUG_ON(start > cpos); return start + ((cpos - start) & ocfs2_cow_contig_mask(sb)); } /* * Given a cluster count of len, pad it out so that it is a multiple * of contig_clusters. */ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb, unsigned int len) { unsigned int padded = (len + (ocfs2_cow_contig_clusters(sb) - 1)) & ocfs2_cow_contig_mask(sb); /* Did we wrap? */ if (padded < len) padded = UINT_MAX; return padded; } /* * Calculate out the start and number of virtual clusters we need to CoW. * * cpos is vitual start cluster position we want to do CoW in a * file and write_len is the cluster length. * max_cpos is the place where we want to stop CoW intentionally. * * Normal we will start CoW from the beginning of extent record cotaining cpos. * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we * get good I/O from the resulting extent tree. */ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode, struct ocfs2_extent_list *el, u32 cpos, u32 write_len, u32 max_cpos, u32 *cow_start, u32 *cow_len) { int ret = 0; int tree_height = le16_to_cpu(el->l_tree_depth), i; struct buffer_head *eb_bh = NULL; struct ocfs2_extent_block *eb = NULL; struct ocfs2_extent_rec *rec; unsigned int want_clusters, rec_end = 0; int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb); int leaf_clusters; BUG_ON(cpos + write_len > max_cpos); if (tree_height > 0) { ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh); if (ret) { mlog_errno(ret); goto out; } eb = (struct ocfs2_extent_block *) eb_bh->b_data; el = &eb->h_list; if (el->l_tree_depth) { ret = ocfs2_error(inode->i_sb, "Inode %lu has non zero tree depth in leaf block %llu\n", inode->i_ino, (unsigned long long)eb_bh->b_blocknr); goto out; } } *cow_len = 0; for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { rec = &el->l_recs[i]; if (ocfs2_is_empty_extent(rec)) { mlog_bug_on_msg(i != 0, "Inode %lu has empty record in " "index %d\n", inode->i_ino, i); continue; } if (le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters) <= cpos) continue; if (*cow_len == 0) { /* * We should find a refcounted record in the * first pass. */ BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED)); *cow_start = le32_to_cpu(rec->e_cpos); } /* * If we encounter a hole, a non-refcounted record or * pass the max_cpos, stop the search. */ if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) || (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)) || (max_cpos <= le32_to_cpu(rec->e_cpos))) break; leaf_clusters = le16_to_cpu(rec->e_leaf_clusters); rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters; if (rec_end > max_cpos) { rec_end = max_cpos; leaf_clusters = rec_end - le32_to_cpu(rec->e_cpos); } /* * How many clusters do we actually need from * this extent? First we see how many we actually * need to complete the write. If that's smaller * than contig_clusters, we try for contig_clusters. */ if (!*cow_len) want_clusters = write_len; else want_clusters = (cpos + write_len) - (*cow_start + *cow_len); if (want_clusters < contig_clusters) want_clusters = contig_clusters; /* * If the write does not cover the whole extent, we * need to calculate how we're going to split the extent. * We try to do it on contig_clusters boundaries. * * Any extent smaller than contig_clusters will be * CoWed in its entirety. */ if (leaf_clusters <= contig_clusters) *cow_len += leaf_clusters; else if (*cow_len || (*cow_start == cpos)) { /* * This extent needs to be CoW'd from its * beginning, so all we have to do is compute * how many clusters to grab. We align * want_clusters to the edge of contig_clusters * to get better I/O. */ want_clusters = ocfs2_cow_align_length(inode->i_sb, want_clusters); if (leaf_clusters < want_clusters) *cow_len += leaf_clusters; else *cow_len += want_clusters; } else if ((*cow_start + contig_clusters) >= (cpos + write_len)) { /* * Breaking off contig_clusters at the front * of the extent will cover our write. That's * easy. */ *cow_len = contig_clusters; } else if ((rec_end - cpos) <= contig_clusters) { /* * Breaking off contig_clusters at the tail of * this extent will cover cpos. */ *cow_start = rec_end - contig_clusters; *cow_len = contig_clusters; } else if ((rec_end - cpos) <= want_clusters) { /* * While we can't fit the entire write in this * extent, we know that the write goes from cpos * to the end of the extent. Break that off. * We try to break it at some multiple of * contig_clusters from the front of the extent. * Failing that (ie, cpos is within * contig_clusters of the front), we'll CoW the * entire extent. */ *cow_start = ocfs2_cow_align_start(inode->i_sb, *cow_start, cpos); *cow_len = rec_end - *cow_start; } else { /* * Ok, the entire write lives in the middle of * this extent. Let's try to slice the extent up * nicely. Optimally, our CoW region starts at * m*contig_clusters from the beginning of the * extent and goes for n*contig_clusters, * covering the entire write. */ *cow_start = ocfs2_cow_align_start(inode->i_sb, *cow_start, cpos); want_clusters = (cpos + write_len) - *cow_start; want_clusters = ocfs2_cow_align_length(inode->i_sb, want_clusters); if (*cow_start + want_clusters <= rec_end) *cow_len = want_clusters; else *cow_len = rec_end - *cow_start; } /* Have we covered our entire write yet? */ if ((*cow_start + *cow_len) >= (cpos + write_len)) break; /* * If we reach the end of the extent block and don't get enough * clusters, continue with the next extent block if possible. */ if (i + 1 == le16_to_cpu(el->l_next_free_rec) && eb && eb->h_next_leaf_blk) { brelse(eb_bh); eb_bh = NULL; ret = ocfs2_read_extent_block(INODE_CACHE(inode), le64_to_cpu(eb->h_next_leaf_blk), &eb_bh); if (ret) { mlog_errno(ret); goto out; } eb = (struct ocfs2_extent_block *) eb_bh->b_data; el = &eb->h_list; i = -1; } } out: brelse(eb_bh); return ret; } /* * Prepare meta_ac, data_ac and calculate credits when we want to add some * num_clusters in data_tree "et" and change the refcount for the old * clusters(starting form p_cluster) in the refcount tree. * * Note: * 1. since we may split the old tree, so we at most will need num_clusters + 2 * more new leaf records. * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so * just give data_ac = NULL. */ static int ocfs2_lock_refcount_allocators(struct super_block *sb, u32 p_cluster, u32 num_clusters, struct ocfs2_extent_tree *et, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh, struct ocfs2_alloc_context **meta_ac, struct ocfs2_alloc_context **data_ac, int *credits) { int ret = 0, meta_add = 0; int num_free_extents = ocfs2_num_free_extents(et); if (num_free_extents < 0) { ret = num_free_extents; mlog_errno(ret); goto out; } if (num_free_extents < num_clusters + 2) meta_add = ocfs2_extend_meta_needed(et->et_root_el); *credits += ocfs2_calc_extend_credits(sb, et->et_root_el); ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh, p_cluster, num_clusters, &meta_add, credits); if (ret) { mlog_errno(ret); goto out; } trace_ocfs2_lock_refcount_allocators(meta_add, *credits); ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add, meta_ac); if (ret) { mlog_errno(ret); goto out; } if (data_ac) { ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters, data_ac); if (ret) mlog_errno(ret); } out: if (ret) { if (*meta_ac) { ocfs2_free_alloc_context(*meta_ac); *meta_ac = NULL; } } return ret; } static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) { BUG_ON(buffer_dirty(bh)); clear_buffer_mapped(bh); return 0; } int ocfs2_duplicate_clusters_by_page(handle_t *handle, struct inode *inode, u32 cpos, u32 old_cluster, u32 new_cluster, u32 new_len) { int ret = 0, partial; struct super_block *sb = inode->i_sb; u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); struct page *page; pgoff_t page_index; unsigned int from, to; loff_t offset, end, map_end; struct address_space *mapping = inode->i_mapping; trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, new_cluster, new_len); offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); /* * We only duplicate pages until we reach the page contains i_size - 1. * So trim 'end' to i_size. */ if (end > i_size_read(inode)) end = i_size_read(inode); while (offset < end) { page_index = offset >> PAGE_SHIFT; map_end = ((loff_t)page_index + 1) << PAGE_SHIFT; if (map_end > end) map_end = end; /* from, to is the offset within the page. */ from = offset & (PAGE_SIZE - 1); to = PAGE_SIZE; if (map_end & (PAGE_SIZE - 1)) to = map_end & (PAGE_SIZE - 1); retry: page = find_or_create_page(mapping, page_index, GFP_NOFS); if (!page) { ret = -ENOMEM; mlog_errno(ret); break; } /* * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty * page, so write it back. */ if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) { if (PageDirty(page)) { unlock_page(page); put_page(page); ret = filemap_write_and_wait_range(mapping, offset, map_end - 1); goto retry; } } if (!PageUptodate(page)) { struct folio *folio = page_folio(page); ret = block_read_full_folio(folio, ocfs2_get_block); if (ret) { mlog_errno(ret); goto unlock; } folio_lock(folio); } if (page_has_buffers(page)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, &partial, ocfs2_clear_cow_buffer); if (ret) { mlog_errno(ret); goto unlock; } } ocfs2_map_and_dirty_page(inode, handle, from, to, page, 0, &new_block); mark_page_accessed(page); unlock: unlock_page(page); put_page(page); page = NULL; offset = map_end; if (ret) break; } return ret; } int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, struct inode *inode, u32 cpos, u32 old_cluster, u32 new_cluster, u32 new_len) { int ret = 0; struct super_block *sb = inode->i_sb; struct ocfs2_caching_info *ci = INODE_CACHE(inode); int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); struct ocfs2_super *osb = OCFS2_SB(sb); struct buffer_head *old_bh = NULL; struct buffer_head *new_bh = NULL; trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, new_cluster, new_len); for (i = 0; i < blocks; i++, old_block++, new_block++) { new_bh = sb_getblk(osb->sb, new_block); if (new_bh == NULL) { ret = -ENOMEM; mlog_errno(ret); break; } ocfs2_set_new_buffer_uptodate(ci, new_bh); ret = ocfs2_read_block(ci, old_block, &old_bh, NULL); if (ret) { mlog_errno(ret); break; } ret = ocfs2_journal_access(handle, ci, new_bh, OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); break; } memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize); ocfs2_journal_dirty(handle, new_bh); brelse(new_bh); brelse(old_bh); new_bh = NULL; old_bh = NULL; } brelse(new_bh); brelse(old_bh); return ret; } static int ocfs2_clear_ext_refcount(handle_t *handle, struct ocfs2_extent_tree *et, u32 cpos, u32 p_cluster, u32 len, unsigned int ext_flags, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc) { int ret, index; struct ocfs2_extent_rec replace_rec; struct ocfs2_path *path = NULL; struct ocfs2_extent_list *el; struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); u64 ino = ocfs2_metadata_cache_owner(et->et_ci); trace_ocfs2_clear_ext_refcount((unsigned long long)ino, cpos, len, p_cluster, ext_flags); memset(&replace_rec, 0, sizeof(replace_rec)); replace_rec.e_cpos = cpu_to_le32(cpos); replace_rec.e_leaf_clusters = cpu_to_le16(len); replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb, p_cluster)); replace_rec.e_flags = ext_flags; replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED; path = ocfs2_new_path_from_et(et); if (!path) { ret = -ENOMEM; mlog_errno(ret); goto out; } ret = ocfs2_find_path(et->et_ci, path, cpos); if (ret) { mlog_errno(ret); goto out; } el = path_leaf_el(path); index = ocfs2_search_extent_list(el, cpos); if (index == -1) { ret = ocfs2_error(sb, "Inode %llu has an extent at cpos %u which can no longer be found\n", (unsigned long long)ino, cpos); goto out; } ret = ocfs2_split_extent(handle, et, path, index, &replace_rec, meta_ac, dealloc); if (ret) mlog_errno(ret); out: ocfs2_free_path(path); return ret; } static int ocfs2_replace_clusters(handle_t *handle, struct ocfs2_cow_context *context, u32 cpos, u32 old, u32 new, u32 len, unsigned int ext_flags) { int ret; struct ocfs2_caching_info *ci = context->data_et.et_ci; u64 ino = ocfs2_metadata_cache_owner(ci); trace_ocfs2_replace_clusters((unsigned long long)ino, cpos, old, new, len, ext_flags); /*If the old clusters is unwritten, no need to duplicate. */ if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { ret = context->cow_duplicate_clusters(handle, context->inode, cpos, old, new, len); if (ret) { mlog_errno(ret); goto out; } } ret = ocfs2_clear_ext_refcount(handle, &context->data_et, cpos, new, len, ext_flags, context->meta_ac, &context->dealloc); if (ret) mlog_errno(ret); out: return ret; } int ocfs2_cow_sync_writeback(struct super_block *sb, struct inode *inode, u32 cpos, u32 num_clusters) { int ret; loff_t start, end; if (ocfs2_should_order_data(inode)) return 0; start = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; end = start + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits) - 1; ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret < 0) mlog_errno(ret); return ret; } static int ocfs2_di_get_clusters(struct ocfs2_cow_context *context, u32 v_cluster, u32 *p_cluster, u32 *num_clusters, unsigned int *extent_flags) { return ocfs2_get_clusters(context->inode, v_cluster, p_cluster, num_clusters, extent_flags); } static int ocfs2_make_clusters_writable(struct super_block *sb, struct ocfs2_cow_context *context, u32 cpos, u32 p_cluster, u32 num_clusters, unsigned int e_flags) { int ret, delete, index, credits = 0; u32 new_bit, new_len, orig_num_clusters; unsigned int set_len; struct ocfs2_super *osb = OCFS2_SB(sb); handle_t *handle; struct buffer_head *ref_leaf_bh = NULL; struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci; struct ocfs2_refcount_rec rec; trace_ocfs2_make_clusters_writable(cpos, p_cluster, num_clusters, e_flags); ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters, &context->data_et, ref_ci, context->ref_root_bh, &context->meta_ac, &context->data_ac, &credits); if (ret) { mlog_errno(ret); return ret; } if (context->post_refcount) credits += context->post_refcount->credits; credits += context->extra_credits; handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out; } orig_num_clusters = num_clusters; while (num_clusters) { ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, p_cluster, num_clusters, &rec, &index, &ref_leaf_bh); if (ret) { mlog_errno(ret); goto out_commit; } BUG_ON(!rec.r_refcount); set_len = min((u64)p_cluster + num_clusters, le64_to_cpu(rec.r_cpos) + le32_to_cpu(rec.r_clusters)) - p_cluster; /* * There are many different situation here. * 1. If refcount == 1, remove the flag and don't COW. * 2. If refcount > 1, allocate clusters. * Here we may not allocate r_len once at a time, so continue * until we reach num_clusters. */ if (le32_to_cpu(rec.r_refcount) == 1) { delete = 0; ret = ocfs2_clear_ext_refcount(handle, &context->data_et, cpos, p_cluster, set_len, e_flags, context->meta_ac, &context->dealloc); if (ret) { mlog_errno(ret); goto out_commit; } } else { delete = 1; ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, set_len, &new_bit, &new_len); if (ret) { mlog_errno(ret); goto out_commit; } ret = ocfs2_replace_clusters(handle, context, cpos, p_cluster, new_bit, new_len, e_flags); if (ret) { mlog_errno(ret); goto out_commit; } set_len = new_len; } ret = __ocfs2_decrease_refcount(handle, ref_ci, context->ref_root_bh, p_cluster, set_len, context->meta_ac, &context->dealloc, delete); if (ret) { mlog_errno(ret); goto out_commit; } cpos += set_len; p_cluster += set_len; num_clusters -= set_len; brelse(ref_leaf_bh); ref_leaf_bh = NULL; } /* handle any post_cow action. */ if (context->post_refcount && context->post_refcount->func) { ret = context->post_refcount->func(context->inode, handle, context->post_refcount->para); if (ret) { mlog_errno(ret); goto out_commit; } } /* * Here we should write the new page out first if we are * in write-back mode. */ if (context->get_clusters == ocfs2_di_get_clusters) { ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos, orig_num_clusters); if (ret) mlog_errno(ret); } out_commit: ocfs2_commit_trans(osb, handle); out: if (context->data_ac) { ocfs2_free_alloc_context(context->data_ac); context->data_ac = NULL; } if (context->meta_ac) { ocfs2_free_alloc_context(context->meta_ac); context->meta_ac = NULL; } brelse(ref_leaf_bh); return ret; } static int ocfs2_replace_cow(struct ocfs2_cow_context *context) { int ret = 0; struct inode *inode = context->inode; u32 cow_start = context->cow_start, cow_len = context->cow_len; u32 p_cluster, num_clusters; unsigned int ext_flags; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (!ocfs2_refcount_tree(osb)) { return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n", inode->i_ino); } ocfs2_init_dealloc_ctxt(&context->dealloc); while (cow_len) { ret = context->get_clusters(context, cow_start, &p_cluster, &num_clusters, &ext_flags); if (ret) { mlog_errno(ret); break; } BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED)); if (cow_len < num_clusters) num_clusters = cow_len; ret = ocfs2_make_clusters_writable(inode->i_sb, context, cow_start, p_cluster, num_clusters, ext_flags); if (ret) { mlog_errno(ret); break; } cow_len -= num_clusters; cow_start += num_clusters; } if (ocfs2_dealloc_has_cluster(&context->dealloc)) { ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &context->dealloc); } return ret; } /* * Starting at cpos, try to CoW write_len clusters. Don't CoW * past max_cpos. This will stop when it runs into a hole or an * unrefcounted extent. */ static int ocfs2_refcount_cow_hunk(struct inode *inode, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos) { int ret; u32 cow_start = 0, cow_len = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *ref_tree; struct ocfs2_cow_context *context = NULL; BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list, cpos, write_len, max_cpos, &cow_start, &cow_len); if (ret) { mlog_errno(ret); goto out; } trace_ocfs2_refcount_cow_hunk(OCFS2_I(inode)->ip_blkno, cpos, write_len, max_cpos, cow_start, cow_len); BUG_ON(cow_len == 0); context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); if (!context) { ret = -ENOMEM; mlog_errno(ret); goto out; } ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 1, &ref_tree, &ref_root_bh); if (ret) { mlog_errno(ret); goto out; } context->inode = inode; context->cow_start = cow_start; context->cow_len = cow_len; context->ref_tree = ref_tree; context->ref_root_bh = ref_root_bh; context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; context->get_clusters = ocfs2_di_get_clusters; ocfs2_init_dinode_extent_tree(&context->data_et, INODE_CACHE(inode), di_bh); ret = ocfs2_replace_cow(context); if (ret) mlog_errno(ret); /* * truncate the extent map here since no matter whether we meet with * any error during the action, we shouldn't trust cached extent map * any more. */ ocfs2_extent_map_trunc(inode, cow_start); ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); out: kfree(context); return ret; } /* * CoW any and all clusters between cpos and cpos+write_len. * Don't CoW past max_cpos. If this returns successfully, all * clusters between cpos and cpos+write_len are safe to modify. */ int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos) { int ret = 0; u32 p_cluster, num_clusters; unsigned int ext_flags; while (write_len) { ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, &ext_flags); if (ret) { mlog_errno(ret); break; } if (write_len < num_clusters) num_clusters = write_len; if (ext_flags & OCFS2_EXT_REFCOUNTED) { ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, num_clusters, max_cpos); if (ret) { mlog_errno(ret); break; } } write_len -= num_clusters; cpos += num_clusters; } return ret; } static int ocfs2_xattr_value_get_clusters(struct ocfs2_cow_context *context, u32 v_cluster, u32 *p_cluster, u32 *num_clusters, unsigned int *extent_flags) { struct inode *inode = context->inode; struct ocfs2_xattr_value_root *xv = context->cow_object; return ocfs2_xattr_get_clusters(inode, v_cluster, p_cluster, num_clusters, &xv->xr_list, extent_flags); } /* * Given a xattr value root, calculate the most meta/credits we need for * refcount tree change if we truncate it to 0. */ int ocfs2_refcounted_xattr_delete_need(struct inode *inode, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh, struct ocfs2_xattr_value_root *xv, int *meta_add, int *credits) { int ret = 0, index, ref_blocks = 0; u32 p_cluster, num_clusters; u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters); struct ocfs2_refcount_block *rb; struct ocfs2_refcount_rec rec; struct buffer_head *ref_leaf_bh = NULL; while (cpos < clusters) { ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, &num_clusters, &xv->xr_list, NULL); if (ret) { mlog_errno(ret); goto out; } cpos += num_clusters; while (num_clusters) { ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh, p_cluster, num_clusters, &rec, &index, &ref_leaf_bh); if (ret) { mlog_errno(ret); goto out; } BUG_ON(!rec.r_refcount); rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; /* * We really don't know whether the other clusters is in * this refcount block or not, so just take the worst * case that all the clusters are in this block and each * one will split a refcount rec, so totally we need * clusters * 2 new refcount rec. */ if (le16_to_cpu(rb->rf_records.rl_used) + clusters * 2 > le16_to_cpu(rb->rf_records.rl_count)) ref_blocks++; *credits += 1; brelse(ref_leaf_bh); ref_leaf_bh = NULL; if (num_clusters <= le32_to_cpu(rec.r_clusters)) break; else num_clusters -= le32_to_cpu(rec.r_clusters); p_cluster += num_clusters; } } *meta_add += ref_blocks; if (!ref_blocks) goto out; rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; else { struct ocfs2_extent_tree et; ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh); *credits += ocfs2_calc_extend_credits(inode->i_sb, et.et_root_el); } out: brelse(ref_leaf_bh); return ret; } /* * Do CoW for xattr. */ int ocfs2_refcount_cow_xattr(struct inode *inode, struct ocfs2_dinode *di, struct ocfs2_xattr_value_buf *vb, struct ocfs2_refcount_tree *ref_tree, struct buffer_head *ref_root_bh, u32 cpos, u32 write_len, struct ocfs2_post_refcount *post) { int ret; struct ocfs2_xattr_value_root *xv = vb->vb_xv; struct ocfs2_cow_context *context = NULL; u32 cow_start, cow_len; BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list, cpos, write_len, UINT_MAX, &cow_start, &cow_len); if (ret) { mlog_errno(ret); goto out; } BUG_ON(cow_len == 0); context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); if (!context) { ret = -ENOMEM; mlog_errno(ret); goto out; } context->inode = inode; context->cow_start = cow_start; context->cow_len = cow_len; context->ref_tree = ref_tree; context->ref_root_bh = ref_root_bh; context->cow_object = xv; context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd; /* We need the extra credits for duplicate_clusters by jbd. */ context->extra_credits = ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len; context->get_clusters = ocfs2_xattr_value_get_clusters; context->post_refcount = post; ocfs2_init_xattr_value_extent_tree(&context->data_et, INODE_CACHE(inode), vb); ret = ocfs2_replace_cow(context); if (ret) mlog_errno(ret); out: kfree(context); return ret; } /* * Insert a new extent into refcount tree and mark a extent rec * as refcounted in the dinode tree. */ int ocfs2_add_refcount_flag(struct inode *inode, struct ocfs2_extent_tree *data_et, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh, u32 cpos, u32 p_cluster, u32 num_clusters, struct ocfs2_cached_dealloc_ctxt *dealloc, struct ocfs2_post_refcount *post) { int ret; handle_t *handle; int credits = 1, ref_blocks = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_alloc_context *meta_ac = NULL; /* We need to be able to handle at least an extent tree split. */ ref_blocks = ocfs2_extend_meta_needed(data_et->et_root_el); ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, ref_ci, ref_root_bh, p_cluster, num_clusters, &ref_blocks, &credits); if (ret) { mlog_errno(ret); goto out; } trace_ocfs2_add_refcount_flag(ref_blocks, credits); if (ref_blocks) { ret = ocfs2_reserve_new_metadata_blocks(osb, ref_blocks, &meta_ac); if (ret) { mlog_errno(ret); goto out; } } if (post) credits += post->credits; handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out; } ret = ocfs2_mark_extent_refcounted(inode, data_et, handle, cpos, num_clusters, p_cluster, meta_ac, dealloc); if (ret) { mlog_errno(ret); goto out_commit; } ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, p_cluster, num_clusters, 0, meta_ac, dealloc); if (ret) { mlog_errno(ret); goto out_commit; } if (post && post->func) { ret = post->func(inode, handle, post->para); if (ret) mlog_errno(ret); } out_commit: ocfs2_commit_trans(osb, handle); out: if (meta_ac) ocfs2_free_alloc_context(meta_ac); return ret; } static int ocfs2_change_ctime(struct inode *inode, struct buffer_head *di_bh) { int ret; handle_t *handle; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out; } ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } inode_set_ctime_current(inode); di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); ocfs2_journal_dirty(handle, di_bh); out_commit: ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: return ret; } static int ocfs2_attach_refcount_tree(struct inode *inode, struct buffer_head *di_bh) { int ret, data_changed = 0; struct buffer_head *ref_root_bh = NULL; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_refcount_tree *ref_tree; unsigned int ext_flags; loff_t size; u32 cpos, num_clusters, clusters, p_cluster; struct ocfs2_cached_dealloc_ctxt dealloc; struct ocfs2_extent_tree di_et; ocfs2_init_dealloc_ctxt(&dealloc); if (!ocfs2_is_refcount_inode(inode)) { ret = ocfs2_create_refcount_tree(inode, di_bh); if (ret) { mlog_errno(ret); goto out; } } BUG_ON(!di->i_refcount_loc); ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 1, &ref_tree, &ref_root_bh); if (ret) { mlog_errno(ret); goto out; } if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) goto attach_xattr; ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh); size = i_size_read(inode); clusters = ocfs2_clusters_for_bytes(inode->i_sb, size); cpos = 0; while (cpos < clusters) { ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, &ext_flags); if (ret) { mlog_errno(ret); goto unlock; } if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) { ret = ocfs2_add_refcount_flag(inode, &di_et, &ref_tree->rf_ci, ref_root_bh, cpos, p_cluster, num_clusters, &dealloc, NULL); if (ret) { mlog_errno(ret); goto unlock; } data_changed = 1; } cpos += num_clusters; } attach_xattr: if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) { ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh, &ref_tree->rf_ci, ref_root_bh, &dealloc); if (ret) { mlog_errno(ret); goto unlock; } } if (data_changed) { ret = ocfs2_change_ctime(inode, di_bh); if (ret) mlog_errno(ret); } unlock: ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); if (!ret && ocfs2_dealloc_has_cluster(&dealloc)) { ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &dealloc); } out: /* * Empty the extent map so that we may get the right extent * record from the disk. */ ocfs2_extent_map_trunc(inode, 0); return ret; } static int ocfs2_add_refcounted_extent(struct inode *inode, struct ocfs2_extent_tree *et, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh, u32 cpos, u32 p_cluster, u32 num_clusters, unsigned int ext_flags, struct ocfs2_cached_dealloc_ctxt *dealloc) { int ret; handle_t *handle; int credits = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_alloc_context *meta_ac = NULL; ret = ocfs2_lock_refcount_allocators(inode->i_sb, p_cluster, num_clusters, et, ref_ci, ref_root_bh, &meta_ac, NULL, &credits); if (ret) { mlog_errno(ret); goto out; } handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out; } ret = ocfs2_insert_extent(handle, et, cpos, ocfs2_clusters_to_blocks(inode->i_sb, p_cluster), num_clusters, ext_flags, meta_ac); if (ret) { mlog_errno(ret); goto out_commit; } ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, p_cluster, num_clusters, meta_ac, dealloc); if (ret) { mlog_errno(ret); goto out_commit; } ret = dquot_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, num_clusters)); if (ret) mlog_errno(ret); out_commit: ocfs2_commit_trans(osb, handle); out: if (meta_ac) ocfs2_free_alloc_context(meta_ac); return ret; } static int ocfs2_duplicate_inline_data(struct inode *s_inode, struct buffer_head *s_bh, struct inode *t_inode, struct buffer_head *t_bh) { int ret; handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb); struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data; struct ocfs2_dinode *t_di = (struct ocfs2_dinode *)t_bh->b_data; BUG_ON(!(OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out; } ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } t_di->id2.i_data.id_count = s_di->id2.i_data.id_count; memcpy(t_di->id2.i_data.id_data, s_di->id2.i_data.id_data, le16_to_cpu(s_di->id2.i_data.id_count)); spin_lock(&OCFS2_I(t_inode)->ip_lock); OCFS2_I(t_inode)->ip_dyn_features |= OCFS2_INLINE_DATA_FL; t_di->i_dyn_features = cpu_to_le16(OCFS2_I(t_inode)->ip_dyn_features); spin_unlock(&OCFS2_I(t_inode)->ip_lock); ocfs2_journal_dirty(handle, t_bh); out_commit: ocfs2_commit_trans(osb, handle); out: return ret; } static int ocfs2_duplicate_extent_list(struct inode *s_inode, struct inode *t_inode, struct buffer_head *t_bh, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh, struct ocfs2_cached_dealloc_ctxt *dealloc) { int ret = 0; u32 p_cluster, num_clusters, clusters, cpos; loff_t size; unsigned int ext_flags; struct ocfs2_extent_tree et; ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(t_inode), t_bh); size = i_size_read(s_inode); clusters = ocfs2_clusters_for_bytes(s_inode->i_sb, size); cpos = 0; while (cpos < clusters) { ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster, &num_clusters, &ext_flags); if (ret) { mlog_errno(ret); goto out; } if (p_cluster) { ret = ocfs2_add_refcounted_extent(t_inode, &et, ref_ci, ref_root_bh, cpos, p_cluster, num_clusters, ext_flags, dealloc); if (ret) { mlog_errno(ret); goto out; } } cpos += num_clusters; } out: return ret; } /* * change the new file's attributes to the src. * * reflink creates a snapshot of a file, that means the attributes * must be identical except for three exceptions - nlink, ino, and ctime. */ static int ocfs2_complete_reflink(struct inode *s_inode, struct buffer_head *s_bh, struct inode *t_inode, struct buffer_head *t_bh, bool preserve) { int ret; handle_t *handle; struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data; struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data; loff_t size = i_size_read(s_inode); handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb), OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); return ret; } ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } spin_lock(&OCFS2_I(t_inode)->ip_lock); OCFS2_I(t_inode)->ip_clusters = OCFS2_I(s_inode)->ip_clusters; OCFS2_I(t_inode)->ip_attr = OCFS2_I(s_inode)->ip_attr; OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; spin_unlock(&OCFS2_I(t_inode)->ip_lock); i_size_write(t_inode, size); t_inode->i_blocks = s_inode->i_blocks; di->i_xattr_inline_size = s_di->i_xattr_inline_size; di->i_clusters = s_di->i_clusters; di->i_size = s_di->i_size; di->i_dyn_features = s_di->i_dyn_features; di->i_attr = s_di->i_attr; if (preserve) { t_inode->i_uid = s_inode->i_uid; t_inode->i_gid = s_inode->i_gid; t_inode->i_mode = s_inode->i_mode; di->i_uid = s_di->i_uid; di->i_gid = s_di->i_gid; di->i_mode = s_di->i_mode; /* * update time. * we want mtime to appear identical to the source and * update ctime. */ inode_set_ctime_current(t_inode); di->i_ctime = cpu_to_le64(inode_get_ctime_sec(t_inode)); di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(t_inode)); inode_set_mtime_to_ts(t_inode, inode_get_mtime(s_inode)); di->i_mtime = s_di->i_mtime; di->i_mtime_nsec = s_di->i_mtime_nsec; } ocfs2_journal_dirty(handle, t_bh); out_commit: ocfs2_commit_trans(OCFS2_SB(t_inode->i_sb), handle); return ret; } static int ocfs2_create_reflink_node(struct inode *s_inode, struct buffer_head *s_bh, struct inode *t_inode, struct buffer_head *t_bh, bool preserve) { int ret; struct buffer_head *ref_root_bh = NULL; struct ocfs2_cached_dealloc_ctxt dealloc; struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data; struct ocfs2_refcount_tree *ref_tree; ocfs2_init_dealloc_ctxt(&dealloc); ret = ocfs2_set_refcount_tree(t_inode, t_bh, le64_to_cpu(di->i_refcount_loc)); if (ret) { mlog_errno(ret); goto out; } if (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { ret = ocfs2_duplicate_inline_data(s_inode, s_bh, t_inode, t_bh); if (ret) mlog_errno(ret); goto out; } ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 1, &ref_tree, &ref_root_bh); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh, &ref_tree->rf_ci, ref_root_bh, &dealloc); if (ret) { mlog_errno(ret); goto out_unlock_refcount; } out_unlock_refcount: ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); out: if (ocfs2_dealloc_has_cluster(&dealloc)) { ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &dealloc); } return ret; } static int __ocfs2_reflink(struct dentry *old_dentry, struct buffer_head *old_bh, struct inode *new_inode, bool preserve) { int ret; struct inode *inode = d_inode(old_dentry); struct buffer_head *new_bh = NULL; struct ocfs2_inode_info *oi = OCFS2_I(inode); if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) { ret = -EINVAL; mlog_errno(ret); goto out; } ret = filemap_fdatawrite(inode->i_mapping); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_attach_refcount_tree(inode, old_bh); if (ret) { mlog_errno(ret); goto out; } inode_lock_nested(new_inode, I_MUTEX_CHILD); ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1, OI_LS_REFLINK_TARGET); if (ret) { mlog_errno(ret); goto out_unlock; } if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { /* * Adjust extent record count to reserve space for extended attribute. * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). */ struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode); if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && !(ocfs2_inode_is_fast_symlink(new_inode))) { struct ocfs2_dinode *new_di = (struct ocfs2_dinode *)new_bh->b_data; struct ocfs2_dinode *old_di = (struct ocfs2_dinode *)old_bh->b_data; struct ocfs2_extent_list *el = &new_di->id2.i_list; int inline_size = le16_to_cpu(old_di->i_xattr_inline_size); le16_add_cpu(&el->l_count, -(inline_size / sizeof(struct ocfs2_extent_rec))); } } ret = ocfs2_create_reflink_node(inode, old_bh, new_inode, new_bh, preserve); if (ret) { mlog_errno(ret); goto inode_unlock; } if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) { ret = ocfs2_reflink_xattrs(inode, old_bh, new_inode, new_bh, preserve); if (ret) { mlog_errno(ret); goto inode_unlock; } } ret = ocfs2_complete_reflink(inode, old_bh, new_inode, new_bh, preserve); if (ret) mlog_errno(ret); inode_unlock: ocfs2_inode_unlock(new_inode, 1); brelse(new_bh); out_unlock: inode_unlock(new_inode); out: if (!ret) { ret = filemap_fdatawait(inode->i_mapping); if (ret) mlog_errno(ret); } return ret; } static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, bool preserve) { int error, had_lock; struct inode *inode = d_inode(old_dentry); struct buffer_head *old_bh = NULL; struct inode *new_orphan_inode = NULL; struct ocfs2_lock_holder oh; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; error = ocfs2_create_inode_in_orphan(dir, inode->i_mode, &new_orphan_inode); if (error) { mlog_errno(error); goto out; } error = ocfs2_rw_lock(inode, 1); if (error) { mlog_errno(error); goto out; } error = ocfs2_inode_lock(inode, &old_bh, 1); if (error) { mlog_errno(error); ocfs2_rw_unlock(inode, 1); goto out; } down_write(&OCFS2_I(inode)->ip_xattr_sem); down_write(&OCFS2_I(inode)->ip_alloc_sem); error = __ocfs2_reflink(old_dentry, old_bh, new_orphan_inode, preserve); up_write(&OCFS2_I(inode)->ip_alloc_sem); up_write(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock(inode, 1); ocfs2_rw_unlock(inode, 1); brelse(old_bh); if (error) { mlog_errno(error); goto out; } had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1, &oh); if (had_lock < 0) { error = had_lock; mlog_errno(error); goto out; } /* If the security isn't preserved, we need to re-initialize them. */ if (!preserve) { error = ocfs2_init_security_and_acl(dir, new_orphan_inode, &new_dentry->d_name); if (error) mlog_errno(error); } if (!error) { error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, new_dentry); if (error) mlog_errno(error); } ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock); out: if (new_orphan_inode) { /* * We need to open_unlock the inode no matter whether we * succeed or not, so that other nodes can delete it later. */ ocfs2_open_unlock(new_orphan_inode); if (error) iput(new_orphan_inode); } return error; } /* * Below here are the bits used by OCFS2_IOC_REFLINK() to fake * sys_reflink(). This will go away when vfs_reflink() exists in * fs/namei.c. */ /* copied from may_create in VFS. */ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child) { if (d_really_is_positive(child)) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; return inode_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC); } /** * ocfs2_vfs_reflink - Create a reference-counted link * * @old_dentry: source dentry + inode * @dir: directory to create the target * @new_dentry: target dentry * @preserve: if true, preserve all file attributes */ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, bool preserve) { struct inode *inode = d_inode(old_dentry); int error; if (!inode) return -ENOENT; error = ocfs2_may_create(dir, new_dentry); if (error) return error; if (dir->i_sb != inode->i_sb) return -EXDEV; /* * A reflink to an append-only or immutable file cannot be created. */ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return -EPERM; /* Only regular files can be reflinked. */ if (!S_ISREG(inode->i_mode)) return -EPERM; /* * If the caller wants to preserve ownership, they require the * rights to do so. */ if (preserve) { if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_CHOWN)) return -EPERM; if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN)) return -EPERM; } /* * If the caller is modifying any aspect of the attributes, they * are not creating a snapshot. They need read permission on the * file. */ if (!preserve) { error = inode_permission(&nop_mnt_idmap, inode, MAY_READ); if (error) return error; } inode_lock(inode); error = dquot_initialize(dir); if (!error) error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve); inode_unlock(inode); if (!error) fsnotify_create(dir, new_dentry); return error; } /* * Most codes are copied from sys_linkat. */ int ocfs2_reflink_ioctl(struct inode *inode, const char __user *oldname, const char __user *newname, bool preserve) { struct dentry *new_dentry; struct path old_path, new_path; int error; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; error = user_path_at(AT_FDCWD, oldname, 0, &old_path); if (error) { mlog_errno(error); return error; } new_dentry = user_path_create(AT_FDCWD, newname, &new_path, 0); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) { mlog_errno(error); goto out; } error = -EXDEV; if (old_path.mnt != new_path.mnt) { mlog_errno(error); goto out_dput; } error = ocfs2_vfs_reflink(old_path.dentry, d_inode(new_path.dentry), new_dentry, preserve); out_dput: done_path_create(&new_path, new_dentry); out: path_put(&old_path); return error; } /* Update destination inode size, if necessary. */ int ocfs2_reflink_update_dest(struct inode *dest, struct buffer_head *d_bh, loff_t newlen) { handle_t *handle; int ret; dest->i_blocks = ocfs2_inode_sector_count(dest); if (newlen <= i_size_read(dest)) return 0; handle = ocfs2_start_trans(OCFS2_SB(dest->i_sb), OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); return ret; } /* Extend i_size if needed. */ spin_lock(&OCFS2_I(dest)->ip_lock); if (newlen > i_size_read(dest)) i_size_write(dest, newlen); spin_unlock(&OCFS2_I(dest)->ip_lock); inode_set_mtime_to_ts(dest, inode_set_ctime_current(dest)); ret = ocfs2_mark_inode_dirty(handle, dest, d_bh); if (ret) { mlog_errno(ret); goto out_commit; } out_commit: ocfs2_commit_trans(OCFS2_SB(dest->i_sb), handle); return ret; } /* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */ static loff_t ocfs2_reflink_remap_extent(struct inode *s_inode, struct buffer_head *s_bh, loff_t pos_in, struct inode *t_inode, struct buffer_head *t_bh, loff_t pos_out, loff_t len, struct ocfs2_cached_dealloc_ctxt *dealloc) { struct ocfs2_extent_tree s_et; struct ocfs2_extent_tree t_et; struct ocfs2_dinode *dis; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *ref_tree; struct ocfs2_super *osb; loff_t remapped_bytes = 0; loff_t pstart, plen; u32 p_cluster, num_clusters, slast, spos, tpos, remapped_clus = 0; unsigned int ext_flags; int ret = 0; osb = OCFS2_SB(s_inode->i_sb); dis = (struct ocfs2_dinode *)s_bh->b_data; ocfs2_init_dinode_extent_tree(&s_et, INODE_CACHE(s_inode), s_bh); ocfs2_init_dinode_extent_tree(&t_et, INODE_CACHE(t_inode), t_bh); spos = ocfs2_bytes_to_clusters(s_inode->i_sb, pos_in); tpos = ocfs2_bytes_to_clusters(t_inode->i_sb, pos_out); slast = ocfs2_clusters_for_bytes(s_inode->i_sb, pos_in + len); while (spos < slast) { if (fatal_signal_pending(current)) { ret = -EINTR; goto out; } /* Look up the extent. */ ret = ocfs2_get_clusters(s_inode, spos, &p_cluster, &num_clusters, &ext_flags); if (ret) { mlog_errno(ret); goto out; } num_clusters = min_t(u32, num_clusters, slast - spos); /* Punch out the dest range. */ pstart = ocfs2_clusters_to_bytes(t_inode->i_sb, tpos); plen = ocfs2_clusters_to_bytes(t_inode->i_sb, num_clusters); ret = ocfs2_remove_inode_range(t_inode, t_bh, pstart, plen); if (ret) { mlog_errno(ret); goto out; } if (p_cluster == 0) goto next_loop; /* Lock the refcount btree... */ ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(dis->i_refcount_loc), 1, &ref_tree, &ref_root_bh); if (ret) { mlog_errno(ret); goto out; } /* Mark s_inode's extent as refcounted. */ if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) { ret = ocfs2_add_refcount_flag(s_inode, &s_et, &ref_tree->rf_ci, ref_root_bh, spos, p_cluster, num_clusters, dealloc, NULL); if (ret) { mlog_errno(ret); goto out_unlock_refcount; } } /* Map in the new extent. */ ext_flags |= OCFS2_EXT_REFCOUNTED; ret = ocfs2_add_refcounted_extent(t_inode, &t_et, &ref_tree->rf_ci, ref_root_bh, tpos, p_cluster, num_clusters, ext_flags, dealloc); if (ret) { mlog_errno(ret); goto out_unlock_refcount; } ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); next_loop: spos += num_clusters; tpos += num_clusters; remapped_clus += num_clusters; } goto out; out_unlock_refcount: ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); out: remapped_bytes = ocfs2_clusters_to_bytes(t_inode->i_sb, remapped_clus); remapped_bytes = min_t(loff_t, len, remapped_bytes); return remapped_bytes > 0 ? remapped_bytes : ret; } /* Set up refcount tree and remap s_inode to t_inode. */ loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode, struct buffer_head *s_bh, loff_t pos_in, struct inode *t_inode, struct buffer_head *t_bh, loff_t pos_out, loff_t len) { struct ocfs2_cached_dealloc_ctxt dealloc; struct ocfs2_super *osb; struct ocfs2_dinode *dis; struct ocfs2_dinode *dit; loff_t ret; osb = OCFS2_SB(s_inode->i_sb); dis = (struct ocfs2_dinode *)s_bh->b_data; dit = (struct ocfs2_dinode *)t_bh->b_data; ocfs2_init_dealloc_ctxt(&dealloc); /* * If we're reflinking the entire file and the source is inline * data, just copy the contents. */ if (pos_in == pos_out && pos_in == 0 && len == i_size_read(s_inode) && i_size_read(t_inode) <= len && (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) { ret = ocfs2_duplicate_inline_data(s_inode, s_bh, t_inode, t_bh); if (ret) mlog_errno(ret); goto out; } /* * If both inodes belong to two different refcount groups then * forget it because we don't know how (or want) to go merging * refcount trees. */ ret = -EOPNOTSUPP; if (ocfs2_is_refcount_inode(s_inode) && ocfs2_is_refcount_inode(t_inode) && le64_to_cpu(dis->i_refcount_loc) != le64_to_cpu(dit->i_refcount_loc)) goto out; /* Neither inode has a refcount tree. Add one to s_inode. */ if (!ocfs2_is_refcount_inode(s_inode) && !ocfs2_is_refcount_inode(t_inode)) { ret = ocfs2_create_refcount_tree(s_inode, s_bh); if (ret) { mlog_errno(ret); goto out; } } /* Ensure that both inodes end up with the same refcount tree. */ if (!ocfs2_is_refcount_inode(s_inode)) { ret = ocfs2_set_refcount_tree(s_inode, s_bh, le64_to_cpu(dit->i_refcount_loc)); if (ret) { mlog_errno(ret); goto out; } } if (!ocfs2_is_refcount_inode(t_inode)) { ret = ocfs2_set_refcount_tree(t_inode, t_bh, le64_to_cpu(dis->i_refcount_loc)); if (ret) { mlog_errno(ret); goto out; } } /* Turn off inline data in the dest file. */ if (OCFS2_I(t_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { ret = ocfs2_convert_inline_data_to_extents(t_inode, t_bh); if (ret) { mlog_errno(ret); goto out; } } /* Actually remap extents now. */ ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh, pos_out, len, &dealloc); if (ret < 0) { mlog_errno(ret); goto out; } out: if (ocfs2_dealloc_has_cluster(&dealloc)) { ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &dealloc); } return ret; } /* Lock an inode and grab a bh pointing to the inode. */ int ocfs2_reflink_inodes_lock(struct inode *s_inode, struct buffer_head **bh_s, struct inode *t_inode, struct buffer_head **bh_t) { struct inode *inode1 = s_inode; struct inode *inode2 = t_inode; struct ocfs2_inode_info *oi1; struct ocfs2_inode_info *oi2; struct buffer_head *bh1 = NULL; struct buffer_head *bh2 = NULL; bool same_inode = (s_inode == t_inode); bool need_swap = (inode1->i_ino > inode2->i_ino); int status; /* First grab the VFS and rw locks. */ lock_two_nondirectories(s_inode, t_inode); if (need_swap) swap(inode1, inode2); status = ocfs2_rw_lock(inode1, 1); if (status) { mlog_errno(status); goto out_i1; } if (!same_inode) { status = ocfs2_rw_lock(inode2, 1); if (status) { mlog_errno(status); goto out_i2; } } /* Now go for the cluster locks */ oi1 = OCFS2_I(inode1); oi2 = OCFS2_I(inode2); trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno, (unsigned long long)oi2->ip_blkno); /* We always want to lock the one with the lower lockid first. */ if (oi1->ip_blkno > oi2->ip_blkno) mlog_errno(-ENOLCK); /* lock id1 */ status = ocfs2_inode_lock_nested(inode1, &bh1, 1, OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto out_rw2; } /* lock id2 */ if (!same_inode) { status = ocfs2_inode_lock_nested(inode2, &bh2, 1, OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto out_cl1; } } else { bh2 = bh1; } /* * If we swapped inode order above, we have to swap the buffer heads * before passing them back to the caller. */ if (need_swap) swap(bh1, bh2); *bh_s = bh1; *bh_t = bh2; trace_ocfs2_double_lock_end( (unsigned long long)oi1->ip_blkno, (unsigned long long)oi2->ip_blkno); return 0; out_cl1: ocfs2_inode_unlock(inode1, 1); brelse(bh1); out_rw2: ocfs2_rw_unlock(inode2, 1); out_i2: ocfs2_rw_unlock(inode1, 1); out_i1: unlock_two_nondirectories(s_inode, t_inode); return status; } /* Unlock both inodes and release buffers. */ void ocfs2_reflink_inodes_unlock(struct inode *s_inode, struct buffer_head *s_bh, struct inode *t_inode, struct buffer_head *t_bh) { ocfs2_inode_unlock(s_inode, 1); ocfs2_rw_unlock(s_inode, 1); brelse(s_bh); if (s_inode != t_inode) { ocfs2_inode_unlock(t_inode, 1); ocfs2_rw_unlock(t_inode, 1); brelse(t_bh); } unlock_two_nondirectories(s_inode, t_inode); }
27 27 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 // SPDX-License-Identifier: GPL-2.0-only /* * drivers/acpi/device_sysfs.c - ACPI device sysfs attributes and modalias. * * Copyright (C) 2015, Intel Corp. * Author: Mika Westerberg <mika.westerberg@linux.intel.com> * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <linux/acpi.h> #include <linux/device.h> #include <linux/export.h> #include <linux/nls.h> #include "internal.h" static ssize_t acpi_object_path(acpi_handle handle, char *buf) { struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL}; int result; result = acpi_get_name(handle, ACPI_FULL_PATHNAME, &path); if (result) return result; result = sprintf(buf, "%s\n", (char *)path.pointer); kfree(path.pointer); return result; } struct acpi_data_node_attr { struct attribute attr; ssize_t (*show)(struct acpi_data_node *, char *); ssize_t (*store)(struct acpi_data_node *, const char *, size_t count); }; #define DATA_NODE_ATTR(_name) \ static struct acpi_data_node_attr data_node_##_name = \ __ATTR(_name, 0444, data_node_show_##_name, NULL) static ssize_t data_node_show_path(struct acpi_data_node *dn, char *buf) { return dn->handle ? acpi_object_path(dn->handle, buf) : 0; } DATA_NODE_ATTR(path); static struct attribute *acpi_data_node_default_attrs[] = { &data_node_path.attr, NULL }; ATTRIBUTE_GROUPS(acpi_data_node_default); #define to_data_node(k) container_of(k, struct acpi_data_node, kobj) #define to_attr(a) container_of(a, struct acpi_data_node_attr, attr) static ssize_t acpi_data_node_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct acpi_data_node *dn = to_data_node(kobj); struct acpi_data_node_attr *dn_attr = to_attr(attr); return dn_attr->show ? dn_attr->show(dn, buf) : -ENXIO; } static const struct sysfs_ops acpi_data_node_sysfs_ops = { .show = acpi_data_node_attr_show, }; static void acpi_data_node_release(struct kobject *kobj) { struct acpi_data_node *dn = to_data_node(kobj); complete(&dn->kobj_done); } static const struct kobj_type acpi_data_node_ktype = { .sysfs_ops = &acpi_data_node_sysfs_ops, .default_groups = acpi_data_node_default_groups, .release = acpi_data_node_release, }; static void acpi_expose_nondev_subnodes(struct kobject *kobj, struct acpi_device_data *data) { struct list_head *list = &data->subnodes; struct acpi_data_node *dn; if (list_empty(list)) return; list_for_each_entry(dn, list, sibling) { int ret; init_completion(&dn->kobj_done); ret = kobject_init_and_add(&dn->kobj, &acpi_data_node_ktype, kobj, "%s", dn->name); if (!ret) acpi_expose_nondev_subnodes(&dn->kobj, &dn->data); else if (dn->handle) acpi_handle_err(dn->handle, "Failed to expose (%d)\n", ret); } } static void acpi_hide_nondev_subnodes(struct acpi_device_data *data) { struct list_head *list = &data->subnodes; struct acpi_data_node *dn; if (list_empty(list)) return; list_for_each_entry_reverse(dn, list, sibling) { acpi_hide_nondev_subnodes(&dn->data); kobject_put(&dn->kobj); } } /** * create_pnp_modalias - Create hid/cid(s) string for modalias and uevent * @acpi_dev: ACPI device object. * @modalias: Buffer to print into. * @size: Size of the buffer. * * Creates hid/cid(s) string needed for modalias and uevent * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get: * char *modalias: "acpi:IBM0001:ACPI0001" * Return: 0: no _HID and no _CID * -EINVAL: output error * -ENOMEM: output is truncated */ static int create_pnp_modalias(const struct acpi_device *acpi_dev, char *modalias, int size) { int len; int count; struct acpi_hardware_id *id; /* Avoid unnecessarily loading modules for non present devices. */ if (!acpi_device_is_present(acpi_dev)) return 0; /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the * device's list. */ count = 0; list_for_each_entry(id, &acpi_dev->pnp.ids, list) if (strcmp(id->id, ACPI_DT_NAMESPACE_HID)) count++; if (!count) return 0; len = snprintf(modalias, size, "acpi:"); if (len >= size) return -ENOMEM; size -= len; list_for_each_entry(id, &acpi_dev->pnp.ids, list) { if (!strcmp(id->id, ACPI_DT_NAMESPACE_HID)) continue; count = snprintf(&modalias[len], size, "%s:", id->id); if (count >= size) return -ENOMEM; len += count; size -= count; } return len; } /** * create_of_modalias - Creates DT compatible string for modalias and uevent * @acpi_dev: ACPI device object. * @modalias: Buffer to print into. * @size: Size of the buffer. * * Expose DT compatible modalias as of:NnameTCcompatible. This function should * only be called for devices having ACPI_DT_NAMESPACE_HID in their list of * ACPI/PNP IDs. */ static int create_of_modalias(const struct acpi_device *acpi_dev, char *modalias, int size) { struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; const union acpi_object *of_compatible, *obj; acpi_status status; int len, count; int i, nval; char *c; status = acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); if (ACPI_FAILURE(status)) return -ENODEV; /* DT strings are all in lower case */ for (c = buf.pointer; *c != '\0'; c++) *c = tolower(*c); len = snprintf(modalias, size, "of:N%sT", (char *)buf.pointer); ACPI_FREE(buf.pointer); if (len >= size) return -ENOMEM; size -= len; of_compatible = acpi_dev->data.of_compatible; if (of_compatible->type == ACPI_TYPE_PACKAGE) { nval = of_compatible->package.count; obj = of_compatible->package.elements; } else { /* Must be ACPI_TYPE_STRING. */ nval = 1; obj = of_compatible; } for (i = 0; i < nval; i++, obj++) { count = snprintf(&modalias[len], size, "C%s", obj->string.pointer); if (count >= size) return -ENOMEM; len += count; size -= count; } return len; } int __acpi_device_uevent_modalias(const struct acpi_device *adev, struct kobj_uevent_env *env) { int len; if (!adev) return -ENODEV; if (list_empty(&adev->pnp.ids)) return 0; if (add_uevent_var(env, "MODALIAS=")) return -ENOMEM; if (adev->data.of_compatible) len = create_of_modalias(adev, &env->buf[env->buflen - 1], sizeof(env->buf) - env->buflen); else len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], sizeof(env->buf) - env->buflen); if (len < 0) return len; env->buflen += len; return 0; } /** * acpi_device_uevent_modalias - uevent modalias for ACPI-enumerated devices. * @dev: Struct device to get ACPI device node. * @env: Environment variables of the kobject uevent. * * Create the uevent modalias field for ACPI-enumerated devices. * * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". */ int acpi_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env) { return __acpi_device_uevent_modalias(acpi_companion_match(dev), env); } EXPORT_SYMBOL_GPL(acpi_device_uevent_modalias); static int __acpi_device_modalias(const struct acpi_device *adev, char *buf, int size) { int len, count; if (!adev) return -ENODEV; if (list_empty(&adev->pnp.ids)) return 0; len = create_pnp_modalias(adev, buf, size - 1); if (len < 0) { return len; } else if (len > 0) { buf[len++] = '\n'; size -= len; } if (!adev->data.of_compatible) return len; count = create_of_modalias(adev, buf + len, size - 1); if (count < 0) { return count; } else if (count > 0) { len += count; buf[len++] = '\n'; } return len; } /** * acpi_device_modalias - modalias sysfs attribute for ACPI-enumerated devices. * @dev: Struct device to get ACPI device node. * @buf: The buffer to save pnp_modalias and of_modalias. * @size: Size of buffer. * * Create the modalias sysfs attribute for ACPI-enumerated devices. * * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". */ int acpi_device_modalias(struct device *dev, char *buf, int size) { return __acpi_device_modalias(acpi_companion_match(dev), buf, size); } EXPORT_SYMBOL_GPL(acpi_device_modalias); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { return __acpi_device_modalias(to_acpi_device(dev), buf, 1024); } static DEVICE_ATTR_RO(modalias); static ssize_t real_power_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *adev = to_acpi_device(dev); int state; int ret; ret = acpi_device_get_power(adev, &state); if (ret) return ret; return sprintf(buf, "%s\n", acpi_power_state_string(state)); } static DEVICE_ATTR_RO(real_power_state); static ssize_t power_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *adev = to_acpi_device(dev); return sprintf(buf, "%s\n", acpi_power_state_string(adev->power.state)); } static DEVICE_ATTR_RO(power_state); static ssize_t eject_store(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { struct acpi_device *acpi_device = to_acpi_device(d); acpi_object_type not_used; acpi_status status; if (!count || buf[0] != '1') return -EINVAL; if ((!acpi_device->handler || !acpi_device->handler->hotplug.enabled) && !d->driver) return -ENODEV; status = acpi_get_type(acpi_device->handle, &not_used); if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable) return -ENODEV; acpi_dev_get(acpi_device); status = acpi_hotplug_schedule(acpi_device, ACPI_OST_EC_OSPM_EJECT); if (ACPI_SUCCESS(status)) return count; acpi_dev_put(acpi_device); acpi_evaluate_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); return status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN; } static DEVICE_ATTR_WO(eject); static ssize_t hid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); return sprintf(buf, "%s\n", acpi_device_hid(acpi_dev)); } static DEVICE_ATTR_RO(hid); static ssize_t uid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); return sprintf(buf, "%s\n", acpi_device_uid(acpi_dev)); } static DEVICE_ATTR_RO(uid); static ssize_t adr_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); if (acpi_dev->pnp.bus_address > U32_MAX) return sprintf(buf, "0x%016llx\n", acpi_dev->pnp.bus_address); else return sprintf(buf, "0x%08llx\n", acpi_dev->pnp.bus_address); } static DEVICE_ATTR_RO(adr); static ssize_t path_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); return acpi_object_path(acpi_dev->handle, buf); } static DEVICE_ATTR_RO(path); /* sysfs file that shows description text from the ACPI _STR method */ static ssize_t description_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; union acpi_object *str_obj; acpi_status status; int result; status = acpi_evaluate_object_typed(acpi_dev->handle, "_STR", NULL, &buffer, ACPI_TYPE_BUFFER); if (ACPI_FAILURE(status)) return -EIO; str_obj = buffer.pointer; /* * The _STR object contains a Unicode identifier for a device. * We need to convert to utf-8 so it can be displayed. */ result = utf16s_to_utf8s( (wchar_t *)str_obj->buffer.pointer, str_obj->buffer.length, UTF16_LITTLE_ENDIAN, buf, PAGE_SIZE - 1); buf[result++] = '\n'; kfree(str_obj); return result; } static DEVICE_ATTR_RO(description); static ssize_t sun_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); acpi_status status; unsigned long long sun; status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun); if (ACPI_FAILURE(status)) return -EIO; return sprintf(buf, "%llu\n", sun); } static DEVICE_ATTR_RO(sun); static ssize_t hrv_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); acpi_status status; unsigned long long hrv; status = acpi_evaluate_integer(acpi_dev->handle, "_HRV", NULL, &hrv); if (ACPI_FAILURE(status)) return -EIO; return sprintf(buf, "%llu\n", hrv); } static DEVICE_ATTR_RO(hrv); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); acpi_status status; unsigned long long sta; status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta); if (ACPI_FAILURE(status)) return -EIO; return sprintf(buf, "%llu\n", sta); } static DEVICE_ATTR_RO(status); static struct attribute *acpi_attrs[] = { &dev_attr_path.attr, &dev_attr_hid.attr, &dev_attr_modalias.attr, &dev_attr_description.attr, &dev_attr_adr.attr, &dev_attr_uid.attr, &dev_attr_sun.attr, &dev_attr_hrv.attr, &dev_attr_status.attr, &dev_attr_eject.attr, &dev_attr_power_state.attr, &dev_attr_real_power_state.attr, NULL }; static bool acpi_show_attr(struct acpi_device *dev, const struct device_attribute *attr) { /* * Devices gotten from FADT don't have a "path" attribute */ if (attr == &dev_attr_path) return dev->handle; if (attr == &dev_attr_hid || attr == &dev_attr_modalias) return !list_empty(&dev->pnp.ids); if (attr == &dev_attr_description) return acpi_has_method(dev->handle, "_STR"); if (attr == &dev_attr_adr) return dev->pnp.type.bus_address; if (attr == &dev_attr_uid) return acpi_device_uid(dev); if (attr == &dev_attr_sun) return acpi_has_method(dev->handle, "_SUN"); if (attr == &dev_attr_hrv) return acpi_has_method(dev->handle, "_HRV"); if (attr == &dev_attr_status) return acpi_has_method(dev->handle, "_STA"); /* * If device has _EJ0, 'eject' file is created that is used to trigger * hot-removal function from userland. */ if (attr == &dev_attr_eject) return acpi_has_method(dev->handle, "_EJ0"); if (attr == &dev_attr_power_state) return dev->flags.power_manageable; if (attr == &dev_attr_real_power_state) return dev->flags.power_manageable && dev->power.flags.power_resources; dev_warn_once(&dev->dev, "Unexpected attribute: %s\n", attr->attr.name); return false; } static umode_t acpi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int attrno) { struct acpi_device *dev = to_acpi_device(kobj_to_dev(kobj)); if (acpi_show_attr(dev, container_of(attr, struct device_attribute, attr))) return attr->mode; else return 0; } static const struct attribute_group acpi_group = { .attrs = acpi_attrs, .is_visible = acpi_attr_is_visible, }; const struct attribute_group *acpi_groups[] = { &acpi_group, NULL }; /** * acpi_device_setup_files - Create sysfs attributes of an ACPI device. * @dev: ACPI device object. */ void acpi_device_setup_files(struct acpi_device *dev) { acpi_expose_nondev_subnodes(&dev->dev.kobj, &dev->data); } /** * acpi_device_remove_files - Remove sysfs attributes of an ACPI device. * @dev: ACPI device object. */ void acpi_device_remove_files(struct acpi_device *dev) { acpi_hide_nondev_subnodes(&dev->data); }
2 31 31 31 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 // SPDX-License-Identifier: GPL-2.0-only #include <linux/if_bridge.h> #include <linux/in.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rhashtable.h> #include <linux/rhashtable-types.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/netlink.h> #include <net/vxlan.h> #include "vxlan_private.h" struct vxlan_mdb_entry_key { union vxlan_addr src; union vxlan_addr dst; __be32 vni; }; struct vxlan_mdb_entry { struct rhash_head rhnode; struct list_head remotes; struct vxlan_mdb_entry_key key; struct hlist_node mdb_node; struct rcu_head rcu; }; #define VXLAN_MDB_REMOTE_F_BLOCKED BIT(0) struct vxlan_mdb_remote { struct list_head list; struct vxlan_rdst __rcu *rd; u8 flags; u8 filter_mode; u8 rt_protocol; struct hlist_head src_list; struct rcu_head rcu; }; #define VXLAN_SGRP_F_DELETE BIT(0) struct vxlan_mdb_src_entry { struct hlist_node node; union vxlan_addr addr; u8 flags; }; struct vxlan_mdb_dump_ctx { long reserved; long entry_idx; long remote_idx; }; struct vxlan_mdb_config_src_entry { union vxlan_addr addr; struct list_head node; }; struct vxlan_mdb_config { struct vxlan_dev *vxlan; struct vxlan_mdb_entry_key group; struct list_head src_list; union vxlan_addr remote_ip; u32 remote_ifindex; __be32 remote_vni; __be16 remote_port; u16 nlflags; u8 flags; u8 filter_mode; u8 rt_protocol; }; struct vxlan_mdb_flush_desc { union vxlan_addr remote_ip; __be32 src_vni; __be32 remote_vni; __be16 remote_port; u8 rt_protocol; }; static const struct rhashtable_params vxlan_mdb_rht_params = { .head_offset = offsetof(struct vxlan_mdb_entry, rhnode), .key_offset = offsetof(struct vxlan_mdb_entry, key), .key_len = sizeof(struct vxlan_mdb_entry_key), .automatic_shrinking = true, }; static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg, struct netlink_ext_ack *extack); static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg, struct netlink_ext_ack *extack); static void vxlan_br_mdb_entry_fill(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote, struct br_mdb_entry *e) { const union vxlan_addr *dst = &mdb_entry->key.dst; memset(e, 0, sizeof(*e)); e->ifindex = vxlan->dev->ifindex; e->state = MDB_PERMANENT; if (remote->flags & VXLAN_MDB_REMOTE_F_BLOCKED) e->flags |= MDB_FLAGS_BLOCKED; switch (dst->sa.sa_family) { case AF_INET: e->addr.u.ip4 = dst->sin.sin_addr.s_addr; e->addr.proto = htons(ETH_P_IP); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: e->addr.u.ip6 = dst->sin6.sin6_addr; e->addr.proto = htons(ETH_P_IPV6); break; #endif } } static int vxlan_mdb_entry_info_fill_srcs(struct sk_buff *skb, const struct vxlan_mdb_remote *remote) { struct vxlan_mdb_src_entry *ent; struct nlattr *nest; if (hlist_empty(&remote->src_list)) return 0; nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST); if (!nest) return -EMSGSIZE; hlist_for_each_entry(ent, &remote->src_list, node) { struct nlattr *nest_ent; nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY); if (!nest_ent) goto out_cancel_err; if (vxlan_nla_put_addr(skb, MDBA_MDB_SRCATTR_ADDRESS, &ent->addr) || nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, 0)) goto out_cancel_err; nla_nest_end(skb, nest_ent); } nla_nest_end(skb, nest); return 0; out_cancel_err: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int vxlan_mdb_entry_info_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote) { struct vxlan_rdst *rd = rtnl_dereference(remote->rd); struct br_mdb_entry e; struct nlattr *nest; nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO); if (!nest) return -EMSGSIZE; vxlan_br_mdb_entry_fill(vxlan, mdb_entry, remote, &e); if (nla_put_nohdr(skb, sizeof(e), &e) || nla_put_u32(skb, MDBA_MDB_EATTR_TIMER, 0)) goto nest_err; if (!vxlan_addr_any(&mdb_entry->key.src) && vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_SOURCE, &mdb_entry->key.src)) goto nest_err; if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, remote->rt_protocol) || nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, remote->filter_mode) || vxlan_mdb_entry_info_fill_srcs(skb, remote) || vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_DST, &rd->remote_ip)) goto nest_err; if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port && nla_put_u16(skb, MDBA_MDB_EATTR_DST_PORT, be16_to_cpu(rd->remote_port))) goto nest_err; if (rd->remote_vni != vxlan->default_dst.remote_vni && nla_put_u32(skb, MDBA_MDB_EATTR_VNI, be32_to_cpu(rd->remote_vni))) goto nest_err; if (rd->remote_ifindex && nla_put_u32(skb, MDBA_MDB_EATTR_IFINDEX, rd->remote_ifindex)) goto nest_err; if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && mdb_entry->key.vni && nla_put_u32(skb, MDBA_MDB_EATTR_SRC_VNI, be32_to_cpu(mdb_entry->key.vni))) goto nest_err; nla_nest_end(skb, nest); return 0; nest_err: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int vxlan_mdb_entry_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, struct vxlan_mdb_dump_ctx *ctx, const struct vxlan_mdb_entry *mdb_entry) { int remote_idx = 0, s_remote_idx = ctx->remote_idx; struct vxlan_mdb_remote *remote; struct nlattr *nest; int err = 0; nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!nest) return -EMSGSIZE; list_for_each_entry(remote, &mdb_entry->remotes, list) { if (remote_idx < s_remote_idx) goto skip; err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote); if (err) break; skip: remote_idx++; } ctx->remote_idx = err ? remote_idx : 0; nla_nest_end(skb, nest); return err; } static int vxlan_mdb_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, struct vxlan_mdb_dump_ctx *ctx) { int entry_idx = 0, s_entry_idx = ctx->entry_idx; struct vxlan_mdb_entry *mdb_entry; struct nlattr *nest; int err = 0; nest = nla_nest_start_noflag(skb, MDBA_MDB); if (!nest) return -EMSGSIZE; hlist_for_each_entry(mdb_entry, &vxlan->mdb_list, mdb_node) { if (entry_idx < s_entry_idx) goto skip; err = vxlan_mdb_entry_fill(vxlan, skb, ctx, mdb_entry); if (err) break; skip: entry_idx++; } ctx->entry_idx = err ? entry_idx : 0; nla_nest_end(skb, nest); return err; } int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { struct vxlan_mdb_dump_ctx *ctx = (void *)cb->ctx; struct vxlan_dev *vxlan = netdev_priv(dev); struct br_port_msg *bpm; struct nlmsghdr *nlh; int err; ASSERT_RTNL(); NL_ASSERT_DUMP_CTX_FITS(struct vxlan_mdb_dump_ctx); nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWMDB, sizeof(*bpm), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; err = vxlan_mdb_fill(vxlan, skb, ctx); nlmsg_end(skb, nlh); cb->seq = vxlan->mdb_seq; nl_dump_check_consistent(cb, nlh); return err; } static const struct nla_policy vxlan_mdbe_src_list_entry_pol[MDBE_SRCATTR_MAX + 1] = { [MDBE_SRCATTR_ADDRESS] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), }; static const struct nla_policy vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = { [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol), }; static const struct netlink_range_validation vni_range = { .max = VXLAN_N_VID - 1, }; static const struct nla_policy vxlan_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), [MDBE_ATTR_GROUP_MODE] = NLA_POLICY_RANGE(NLA_U8, MCAST_EXCLUDE, MCAST_INCLUDE), [MDBE_ATTR_SRC_LIST] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_pol), [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), [MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), [MDBE_ATTR_DST_PORT] = { .type = NLA_U16 }, [MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), [MDBE_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), }; static bool vxlan_mdb_is_valid_source(const struct nlattr *attr, __be16 proto, struct netlink_ext_ack *extack) { switch (proto) { case htons(ETH_P_IP): if (nla_len(attr) != sizeof(struct in_addr)) { NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length"); return false; } if (ipv4_is_multicast(nla_get_in_addr(attr))) { NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed"); return false; } break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): { struct in6_addr src; if (nla_len(attr) != sizeof(struct in6_addr)) { NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length"); return false; } src = nla_get_in6_addr(attr); if (ipv6_addr_is_multicast(&src)) { NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed"); return false; } break; } #endif default: NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address"); return false; } return true; } static void vxlan_mdb_group_set(struct vxlan_mdb_entry_key *group, const struct br_mdb_entry *entry, const struct nlattr *source_attr) { switch (entry->addr.proto) { case htons(ETH_P_IP): group->dst.sa.sa_family = AF_INET; group->dst.sin.sin_addr.s_addr = entry->addr.u.ip4; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): group->dst.sa.sa_family = AF_INET6; group->dst.sin6.sin6_addr = entry->addr.u.ip6; break; #endif } if (source_attr) vxlan_nla_get_addr(&group->src, source_attr); } static bool vxlan_mdb_is_star_g(const struct vxlan_mdb_entry_key *group) { return !vxlan_addr_any(&group->dst) && vxlan_addr_any(&group->src); } static bool vxlan_mdb_is_sg(const struct vxlan_mdb_entry_key *group) { return !vxlan_addr_any(&group->dst) && !vxlan_addr_any(&group->src); } static int vxlan_mdb_config_src_entry_init(struct vxlan_mdb_config *cfg, __be16 proto, const struct nlattr *src_entry, struct netlink_ext_ack *extack) { struct nlattr *tb[MDBE_SRCATTR_MAX + 1]; struct vxlan_mdb_config_src_entry *src; int err; err = nla_parse_nested(tb, MDBE_SRCATTR_MAX, src_entry, vxlan_mdbe_src_list_entry_pol, extack); if (err) return err; if (NL_REQ_ATTR_CHECK(extack, src_entry, tb, MDBE_SRCATTR_ADDRESS)) return -EINVAL; if (!vxlan_mdb_is_valid_source(tb[MDBE_SRCATTR_ADDRESS], proto, extack)) return -EINVAL; src = kzalloc(sizeof(*src), GFP_KERNEL); if (!src) return -ENOMEM; err = vxlan_nla_get_addr(&src->addr, tb[MDBE_SRCATTR_ADDRESS]); if (err) goto err_free_src; list_add_tail(&src->node, &cfg->src_list); return 0; err_free_src: kfree(src); return err; } static void vxlan_mdb_config_src_entry_fini(struct vxlan_mdb_config_src_entry *src) { list_del(&src->node); kfree(src); } static int vxlan_mdb_config_src_list_init(struct vxlan_mdb_config *cfg, __be16 proto, const struct nlattr *src_list, struct netlink_ext_ack *extack) { struct vxlan_mdb_config_src_entry *src, *tmp; struct nlattr *src_entry; int rem, err; nla_for_each_nested(src_entry, src_list, rem) { err = vxlan_mdb_config_src_entry_init(cfg, proto, src_entry, extack); if (err) goto err_src_entry_init; } return 0; err_src_entry_init: list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node) vxlan_mdb_config_src_entry_fini(src); return err; } static void vxlan_mdb_config_src_list_fini(struct vxlan_mdb_config *cfg) { struct vxlan_mdb_config_src_entry *src, *tmp; list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node) vxlan_mdb_config_src_entry_fini(src); } static int vxlan_mdb_config_attrs_init(struct vxlan_mdb_config *cfg, const struct br_mdb_entry *entry, const struct nlattr *set_attrs, struct netlink_ext_ack *extack) { struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; int err; err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, set_attrs, vxlan_mdbe_attrs_pol, extack); if (err) return err; if (NL_REQ_ATTR_CHECK(extack, set_attrs, mdbe_attrs, MDBE_ATTR_DST)) { NL_SET_ERR_MSG_MOD(extack, "Missing remote destination IP address"); return -EINVAL; } if (mdbe_attrs[MDBE_ATTR_SOURCE] && !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE], entry->addr.proto, extack)) return -EINVAL; vxlan_mdb_group_set(&cfg->group, entry, mdbe_attrs[MDBE_ATTR_SOURCE]); /* rtnetlink code only validates that IPv4 group address is * multicast. */ if (!vxlan_addr_is_multicast(&cfg->group.dst) && !vxlan_addr_any(&cfg->group.dst)) { NL_SET_ERR_MSG_MOD(extack, "Group address is not multicast"); return -EINVAL; } if (vxlan_addr_any(&cfg->group.dst) && mdbe_attrs[MDBE_ATTR_SOURCE]) { NL_SET_ERR_MSG_MOD(extack, "Source cannot be specified for the all-zeros entry"); return -EINVAL; } if (vxlan_mdb_is_sg(&cfg->group)) cfg->filter_mode = MCAST_INCLUDE; if (mdbe_attrs[MDBE_ATTR_GROUP_MODE]) { if (!vxlan_mdb_is_star_g(&cfg->group)) { NL_SET_ERR_MSG_MOD(extack, "Filter mode can only be set for (*, G) entries"); return -EINVAL; } cfg->filter_mode = nla_get_u8(mdbe_attrs[MDBE_ATTR_GROUP_MODE]); } if (mdbe_attrs[MDBE_ATTR_SRC_LIST]) { if (!vxlan_mdb_is_star_g(&cfg->group)) { NL_SET_ERR_MSG_MOD(extack, "Source list can only be set for (*, G) entries"); return -EINVAL; } if (!mdbe_attrs[MDBE_ATTR_GROUP_MODE]) { NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set without filter mode"); return -EINVAL; } err = vxlan_mdb_config_src_list_init(cfg, entry->addr.proto, mdbe_attrs[MDBE_ATTR_SRC_LIST], extack); if (err) return err; } if (vxlan_mdb_is_star_g(&cfg->group) && list_empty(&cfg->src_list) && cfg->filter_mode == MCAST_INCLUDE) { NL_SET_ERR_MSG_MOD(extack, "Cannot add (*, G) INCLUDE with an empty source list"); return -EINVAL; } if (mdbe_attrs[MDBE_ATTR_RTPROT]) cfg->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]); err = vxlan_nla_get_addr(&cfg->remote_ip, mdbe_attrs[MDBE_ATTR_DST]); if (err) { NL_SET_ERR_MSG_MOD(extack, "Invalid remote destination address"); goto err_src_list_fini; } if (mdbe_attrs[MDBE_ATTR_DST_PORT]) cfg->remote_port = cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT])); if (mdbe_attrs[MDBE_ATTR_VNI]) cfg->remote_vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI])); if (mdbe_attrs[MDBE_ATTR_IFINDEX]) { cfg->remote_ifindex = nla_get_s32(mdbe_attrs[MDBE_ATTR_IFINDEX]); if (!__dev_get_by_index(cfg->vxlan->net, cfg->remote_ifindex)) { NL_SET_ERR_MSG_MOD(extack, "Outgoing interface not found"); err = -EINVAL; goto err_src_list_fini; } } if (mdbe_attrs[MDBE_ATTR_SRC_VNI]) cfg->group.vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI])); return 0; err_src_list_fini: vxlan_mdb_config_src_list_fini(cfg); return err; } static int vxlan_mdb_config_init(struct vxlan_mdb_config *cfg, struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]); struct vxlan_dev *vxlan = netdev_priv(dev); memset(cfg, 0, sizeof(*cfg)); cfg->vxlan = vxlan; cfg->group.vni = vxlan->default_dst.remote_vni; INIT_LIST_HEAD(&cfg->src_list); cfg->nlflags = nlmsg_flags; cfg->filter_mode = MCAST_EXCLUDE; cfg->rt_protocol = RTPROT_STATIC; cfg->remote_vni = vxlan->default_dst.remote_vni; cfg->remote_port = vxlan->cfg.dst_port; if (entry->ifindex != dev->ifindex) { NL_SET_ERR_MSG_MOD(extack, "Port net device must be the VXLAN net device"); return -EINVAL; } /* State is not part of the entry key and can be ignored on deletion * requests. */ if ((nlmsg_flags & (NLM_F_CREATE | NLM_F_REPLACE)) && entry->state != MDB_PERMANENT) { NL_SET_ERR_MSG_MOD(extack, "MDB entry must be permanent"); return -EINVAL; } if (entry->flags) { NL_SET_ERR_MSG_MOD(extack, "Invalid MDB entry flags"); return -EINVAL; } if (entry->vid) { NL_SET_ERR_MSG_MOD(extack, "VID must not be specified"); return -EINVAL; } if (entry->addr.proto != htons(ETH_P_IP) && entry->addr.proto != htons(ETH_P_IPV6)) { NL_SET_ERR_MSG_MOD(extack, "Group address must be an IPv4 / IPv6 address"); return -EINVAL; } if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY_ATTRS)) { NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY_ATTRS attribute"); return -EINVAL; } return vxlan_mdb_config_attrs_init(cfg, entry, tb[MDBA_SET_ENTRY_ATTRS], extack); } static void vxlan_mdb_config_fini(struct vxlan_mdb_config *cfg) { vxlan_mdb_config_src_list_fini(cfg); } static struct vxlan_mdb_entry * vxlan_mdb_entry_lookup(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group) { return rhashtable_lookup_fast(&vxlan->mdb_tbl, group, vxlan_mdb_rht_params); } static struct vxlan_mdb_remote * vxlan_mdb_remote_lookup(const struct vxlan_mdb_entry *mdb_entry, const union vxlan_addr *addr) { struct vxlan_mdb_remote *remote; list_for_each_entry(remote, &mdb_entry->remotes, list) { struct vxlan_rdst *rd = rtnl_dereference(remote->rd); if (vxlan_addr_equal(addr, &rd->remote_ip)) return remote; } return NULL; } static void vxlan_mdb_rdst_free(struct rcu_head *head) { struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu); dst_cache_destroy(&rd->dst_cache); kfree(rd); } static int vxlan_mdb_remote_rdst_init(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_remote *remote) { struct vxlan_rdst *rd; int err; rd = kzalloc(sizeof(*rd), GFP_KERNEL); if (!rd) return -ENOMEM; err = dst_cache_init(&rd->dst_cache, GFP_KERNEL); if (err) goto err_free_rdst; rd->remote_ip = cfg->remote_ip; rd->remote_port = cfg->remote_port; rd->remote_vni = cfg->remote_vni; rd->remote_ifindex = cfg->remote_ifindex; rcu_assign_pointer(remote->rd, rd); return 0; err_free_rdst: kfree(rd); return err; } static void vxlan_mdb_remote_rdst_fini(struct vxlan_rdst *rd) { call_rcu(&rd->rcu, vxlan_mdb_rdst_free); } static int vxlan_mdb_remote_init(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_remote *remote) { int err; err = vxlan_mdb_remote_rdst_init(cfg, remote); if (err) return err; remote->flags = cfg->flags; remote->filter_mode = cfg->filter_mode; remote->rt_protocol = cfg->rt_protocol; INIT_HLIST_HEAD(&remote->src_list); return 0; } static void vxlan_mdb_remote_fini(struct vxlan_dev *vxlan, struct vxlan_mdb_remote *remote) { WARN_ON_ONCE(!hlist_empty(&remote->src_list)); vxlan_mdb_remote_rdst_fini(rtnl_dereference(remote->rd)); } static struct vxlan_mdb_src_entry * vxlan_mdb_remote_src_entry_lookup(const struct vxlan_mdb_remote *remote, const union vxlan_addr *addr) { struct vxlan_mdb_src_entry *ent; hlist_for_each_entry(ent, &remote->src_list, node) { if (vxlan_addr_equal(&ent->addr, addr)) return ent; } return NULL; } static struct vxlan_mdb_src_entry * vxlan_mdb_remote_src_entry_add(struct vxlan_mdb_remote *remote, const union vxlan_addr *addr) { struct vxlan_mdb_src_entry *ent; ent = kzalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return NULL; ent->addr = *addr; hlist_add_head(&ent->node, &remote->src_list); return ent; } static void vxlan_mdb_remote_src_entry_del(struct vxlan_mdb_src_entry *ent) { hlist_del(&ent->node); kfree(ent); } static int vxlan_mdb_remote_src_fwd_add(const struct vxlan_mdb_config *cfg, const union vxlan_addr *addr, struct netlink_ext_ack *extack) { struct vxlan_mdb_config sg_cfg; memset(&sg_cfg, 0, sizeof(sg_cfg)); sg_cfg.vxlan = cfg->vxlan; sg_cfg.group.src = *addr; sg_cfg.group.dst = cfg->group.dst; sg_cfg.group.vni = cfg->group.vni; INIT_LIST_HEAD(&sg_cfg.src_list); sg_cfg.remote_ip = cfg->remote_ip; sg_cfg.remote_ifindex = cfg->remote_ifindex; sg_cfg.remote_vni = cfg->remote_vni; sg_cfg.remote_port = cfg->remote_port; sg_cfg.nlflags = cfg->nlflags; sg_cfg.filter_mode = MCAST_INCLUDE; if (cfg->filter_mode == MCAST_EXCLUDE) sg_cfg.flags = VXLAN_MDB_REMOTE_F_BLOCKED; sg_cfg.rt_protocol = cfg->rt_protocol; return __vxlan_mdb_add(&sg_cfg, extack); } static void vxlan_mdb_remote_src_fwd_del(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group, const struct vxlan_mdb_remote *remote, const union vxlan_addr *addr) { struct vxlan_rdst *rd = rtnl_dereference(remote->rd); struct vxlan_mdb_config sg_cfg; memset(&sg_cfg, 0, sizeof(sg_cfg)); sg_cfg.vxlan = vxlan; sg_cfg.group.src = *addr; sg_cfg.group.dst = group->dst; sg_cfg.group.vni = group->vni; INIT_LIST_HEAD(&sg_cfg.src_list); sg_cfg.remote_ip = rd->remote_ip; __vxlan_mdb_del(&sg_cfg, NULL); } static int vxlan_mdb_remote_src_add(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_remote *remote, const struct vxlan_mdb_config_src_entry *src, struct netlink_ext_ack *extack) { struct vxlan_mdb_src_entry *ent; int err; ent = vxlan_mdb_remote_src_entry_lookup(remote, &src->addr); if (!ent) { ent = vxlan_mdb_remote_src_entry_add(remote, &src->addr); if (!ent) return -ENOMEM; } else if (!(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "Source entry already exists"); return -EEXIST; } err = vxlan_mdb_remote_src_fwd_add(cfg, &ent->addr, extack); if (err) goto err_src_del; /* Clear flags in case source entry was marked for deletion as part of * replace flow. */ ent->flags = 0; return 0; err_src_del: vxlan_mdb_remote_src_entry_del(ent); return err; } static void vxlan_mdb_remote_src_del(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group, const struct vxlan_mdb_remote *remote, struct vxlan_mdb_src_entry *ent) { vxlan_mdb_remote_src_fwd_del(vxlan, group, remote, &ent->addr); vxlan_mdb_remote_src_entry_del(ent); } static int vxlan_mdb_remote_srcs_add(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_remote *remote, struct netlink_ext_ack *extack) { struct vxlan_mdb_config_src_entry *src; struct vxlan_mdb_src_entry *ent; struct hlist_node *tmp; int err; list_for_each_entry(src, &cfg->src_list, node) { err = vxlan_mdb_remote_src_add(cfg, remote, src, extack); if (err) goto err_src_del; } return 0; err_src_del: hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) vxlan_mdb_remote_src_del(cfg->vxlan, &cfg->group, remote, ent); return err; } static void vxlan_mdb_remote_srcs_del(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group, struct vxlan_mdb_remote *remote) { struct vxlan_mdb_src_entry *ent; struct hlist_node *tmp; hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) vxlan_mdb_remote_src_del(vxlan, group, remote, ent); } static size_t vxlan_mdb_nlmsg_src_list_size(const struct vxlan_mdb_entry_key *group, const struct vxlan_mdb_remote *remote) { struct vxlan_mdb_src_entry *ent; size_t nlmsg_size; if (hlist_empty(&remote->src_list)) return 0; /* MDBA_MDB_EATTR_SRC_LIST */ nlmsg_size = nla_total_size(0); hlist_for_each_entry(ent, &remote->src_list, node) { /* MDBA_MDB_SRCLIST_ENTRY */ nlmsg_size += nla_total_size(0) + /* MDBA_MDB_SRCATTR_ADDRESS */ nla_total_size(vxlan_addr_size(&group->dst)) + /* MDBA_MDB_SRCATTR_TIMER */ nla_total_size(sizeof(u8)); } return nlmsg_size; } static size_t vxlan_mdb_nlmsg_remote_size(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote) { const struct vxlan_mdb_entry_key *group = &mdb_entry->key; struct vxlan_rdst *rd = rtnl_dereference(remote->rd); size_t nlmsg_size; /* MDBA_MDB_ENTRY_INFO */ nlmsg_size = nla_total_size(sizeof(struct br_mdb_entry)) + /* MDBA_MDB_EATTR_TIMER */ nla_total_size(sizeof(u32)); /* MDBA_MDB_EATTR_SOURCE */ if (vxlan_mdb_is_sg(group)) nlmsg_size += nla_total_size(vxlan_addr_size(&group->dst)); /* MDBA_MDB_EATTR_RTPROT */ nlmsg_size += nla_total_size(sizeof(u8)); /* MDBA_MDB_EATTR_SRC_LIST */ nlmsg_size += vxlan_mdb_nlmsg_src_list_size(group, remote); /* MDBA_MDB_EATTR_GROUP_MODE */ nlmsg_size += nla_total_size(sizeof(u8)); /* MDBA_MDB_EATTR_DST */ nlmsg_size += nla_total_size(vxlan_addr_size(&rd->remote_ip)); /* MDBA_MDB_EATTR_DST_PORT */ if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port) nlmsg_size += nla_total_size(sizeof(u16)); /* MDBA_MDB_EATTR_VNI */ if (rd->remote_vni != vxlan->default_dst.remote_vni) nlmsg_size += nla_total_size(sizeof(u32)); /* MDBA_MDB_EATTR_IFINDEX */ if (rd->remote_ifindex) nlmsg_size += nla_total_size(sizeof(u32)); /* MDBA_MDB_EATTR_SRC_VNI */ if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && group->vni) nlmsg_size += nla_total_size(sizeof(u32)); return nlmsg_size; } static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote) { return NLMSG_ALIGN(sizeof(struct br_port_msg)) + /* MDBA_MDB */ nla_total_size(0) + /* MDBA_MDB_ENTRY */ nla_total_size(0) + /* Remote entry */ vxlan_mdb_nlmsg_remote_size(vxlan, mdb_entry, remote); } static int vxlan_mdb_nlmsg_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote, int type) { struct nlattr *mdb_nest, *mdb_entry_nest; struct br_port_msg *bpm; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = vxlan->dev->ifindex; mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB); if (!mdb_nest) goto cancel; mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!mdb_entry_nest) goto cancel; if (vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote)) goto cancel; nla_nest_end(skb, mdb_entry_nest); nla_nest_end(skb, mdb_nest); nlmsg_end(skb, nlh); return 0; cancel: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static void vxlan_mdb_remote_notify(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote, int type) { struct net *net = dev_net(vxlan->dev); struct sk_buff *skb; int err = -ENOBUFS; skb = nlmsg_new(vxlan_mdb_nlmsg_size(vxlan, mdb_entry, remote), GFP_KERNEL); if (!skb) goto errout; err = vxlan_mdb_nlmsg_fill(vxlan, skb, mdb_entry, remote, type); if (err) { kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_KERNEL); return; errout: rtnl_set_sk_err(net, RTNLGRP_MDB, err); } static int vxlan_mdb_remote_srcs_replace(const struct vxlan_mdb_config *cfg, const struct vxlan_mdb_entry *mdb_entry, struct vxlan_mdb_remote *remote, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = cfg->vxlan; struct vxlan_mdb_src_entry *ent; struct hlist_node *tmp; int err; hlist_for_each_entry(ent, &remote->src_list, node) ent->flags |= VXLAN_SGRP_F_DELETE; err = vxlan_mdb_remote_srcs_add(cfg, remote, extack); if (err) goto err_clear_delete; hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) { if (ent->flags & VXLAN_SGRP_F_DELETE) vxlan_mdb_remote_src_del(vxlan, &mdb_entry->key, remote, ent); } return 0; err_clear_delete: hlist_for_each_entry(ent, &remote->src_list, node) ent->flags &= ~VXLAN_SGRP_F_DELETE; return err; } static int vxlan_mdb_remote_replace(const struct vxlan_mdb_config *cfg, const struct vxlan_mdb_entry *mdb_entry, struct vxlan_mdb_remote *remote, struct netlink_ext_ack *extack) { struct vxlan_rdst *new_rd, *old_rd = rtnl_dereference(remote->rd); struct vxlan_dev *vxlan = cfg->vxlan; int err; err = vxlan_mdb_remote_rdst_init(cfg, remote); if (err) return err; new_rd = rtnl_dereference(remote->rd); err = vxlan_mdb_remote_srcs_replace(cfg, mdb_entry, remote, extack); if (err) goto err_rdst_reset; WRITE_ONCE(remote->flags, cfg->flags); WRITE_ONCE(remote->filter_mode, cfg->filter_mode); remote->rt_protocol = cfg->rt_protocol; vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_NEWMDB); vxlan_mdb_remote_rdst_fini(old_rd); return 0; err_rdst_reset: rcu_assign_pointer(remote->rd, old_rd); vxlan_mdb_remote_rdst_fini(new_rd); return err; } static int vxlan_mdb_remote_add(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_entry *mdb_entry, struct netlink_ext_ack *extack) { struct vxlan_mdb_remote *remote; int err; remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip); if (remote) { if (!(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "Replace not specified and MDB remote entry already exists"); return -EEXIST; } return vxlan_mdb_remote_replace(cfg, mdb_entry, remote, extack); } if (!(cfg->nlflags & NLM_F_CREATE)) { NL_SET_ERR_MSG_MOD(extack, "Create not specified and entry does not exist"); return -ENOENT; } remote = kzalloc(sizeof(*remote), GFP_KERNEL); if (!remote) return -ENOMEM; err = vxlan_mdb_remote_init(cfg, remote); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to initialize remote MDB entry"); goto err_free_remote; } err = vxlan_mdb_remote_srcs_add(cfg, remote, extack); if (err) goto err_remote_fini; list_add_rcu(&remote->list, &mdb_entry->remotes); vxlan_mdb_remote_notify(cfg->vxlan, mdb_entry, remote, RTM_NEWMDB); return 0; err_remote_fini: vxlan_mdb_remote_fini(cfg->vxlan, remote); err_free_remote: kfree(remote); return err; } static void vxlan_mdb_remote_del(struct vxlan_dev *vxlan, struct vxlan_mdb_entry *mdb_entry, struct vxlan_mdb_remote *remote) { vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_DELMDB); list_del_rcu(&remote->list); vxlan_mdb_remote_srcs_del(vxlan, &mdb_entry->key, remote); vxlan_mdb_remote_fini(vxlan, remote); kfree_rcu(remote, rcu); } static struct vxlan_mdb_entry * vxlan_mdb_entry_get(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group) { struct vxlan_mdb_entry *mdb_entry; int err; mdb_entry = vxlan_mdb_entry_lookup(vxlan, group); if (mdb_entry) return mdb_entry; mdb_entry = kzalloc(sizeof(*mdb_entry), GFP_KERNEL); if (!mdb_entry) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&mdb_entry->remotes); memcpy(&mdb_entry->key, group, sizeof(mdb_entry->key)); hlist_add_head(&mdb_entry->mdb_node, &vxlan->mdb_list); err = rhashtable_lookup_insert_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode, vxlan_mdb_rht_params); if (err) goto err_free_entry; if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list)) vxlan->cfg.flags |= VXLAN_F_MDB; return mdb_entry; err_free_entry: hlist_del(&mdb_entry->mdb_node); kfree(mdb_entry); return ERR_PTR(err); } static void vxlan_mdb_entry_put(struct vxlan_dev *vxlan, struct vxlan_mdb_entry *mdb_entry) { if (!list_empty(&mdb_entry->remotes)) return; if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list)) vxlan->cfg.flags &= ~VXLAN_F_MDB; rhashtable_remove_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode, vxlan_mdb_rht_params); hlist_del(&mdb_entry->mdb_node); kfree_rcu(mdb_entry, rcu); } static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = cfg->vxlan; struct vxlan_mdb_entry *mdb_entry; int err; mdb_entry = vxlan_mdb_entry_get(vxlan, &cfg->group); if (IS_ERR(mdb_entry)) return PTR_ERR(mdb_entry); err = vxlan_mdb_remote_add(cfg, mdb_entry, extack); if (err) goto err_entry_put; vxlan->mdb_seq++; return 0; err_entry_put: vxlan_mdb_entry_put(vxlan, mdb_entry); return err; } static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = cfg->vxlan; struct vxlan_mdb_entry *mdb_entry; struct vxlan_mdb_remote *remote; mdb_entry = vxlan_mdb_entry_lookup(vxlan, &cfg->group); if (!mdb_entry) { NL_SET_ERR_MSG_MOD(extack, "Did not find MDB entry"); return -ENOENT; } remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip); if (!remote) { NL_SET_ERR_MSG_MOD(extack, "Did not find MDB remote entry"); return -ENOENT; } vxlan_mdb_remote_del(vxlan, mdb_entry, remote); vxlan_mdb_entry_put(vxlan, mdb_entry); vxlan->mdb_seq++; return 0; } int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { struct vxlan_mdb_config cfg; int err; ASSERT_RTNL(); err = vxlan_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack); if (err) return err; err = __vxlan_mdb_add(&cfg, extack); vxlan_mdb_config_fini(&cfg); return err; } int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct vxlan_mdb_config cfg; int err; ASSERT_RTNL(); err = vxlan_mdb_config_init(&cfg, dev, tb, 0, extack); if (err) return err; err = __vxlan_mdb_del(&cfg, extack); vxlan_mdb_config_fini(&cfg); return err; } static const struct nla_policy vxlan_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), [MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), [MDBE_ATTR_DST_PORT] = { .type = NLA_U16 }, [MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), [MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT), }; static int vxlan_mdb_flush_desc_init(struct vxlan_dev *vxlan, struct vxlan_mdb_flush_desc *desc, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]); struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; int err; if (entry->ifindex && entry->ifindex != vxlan->dev->ifindex) { NL_SET_ERR_MSG_MOD(extack, "Invalid port net device"); return -EINVAL; } if (entry->vid) { NL_SET_ERR_MSG_MOD(extack, "VID must not be specified"); return -EINVAL; } if (!tb[MDBA_SET_ENTRY_ATTRS]) return 0; err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, tb[MDBA_SET_ENTRY_ATTRS], vxlan_mdbe_attrs_del_bulk_pol, extack); if (err) return err; if (mdbe_attrs[MDBE_ATTR_STATE_MASK]) { u8 state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]); if ((state_mask & MDB_PERMANENT) && !(entry->state & MDB_PERMANENT)) { NL_SET_ERR_MSG_MOD(extack, "Only permanent MDB entries are supported"); return -EINVAL; } } if (mdbe_attrs[MDBE_ATTR_RTPROT]) desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]); if (mdbe_attrs[MDBE_ATTR_DST]) vxlan_nla_get_addr(&desc->remote_ip, mdbe_attrs[MDBE_ATTR_DST]); if (mdbe_attrs[MDBE_ATTR_DST_PORT]) desc->remote_port = cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT])); if (mdbe_attrs[MDBE_ATTR_VNI]) desc->remote_vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI])); if (mdbe_attrs[MDBE_ATTR_SRC_VNI]) desc->src_vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI])); return 0; } static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan, struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_flush_desc *desc) { struct vxlan_mdb_remote *remote, *tmp; list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list) { struct vxlan_rdst *rd = rtnl_dereference(remote->rd); __be32 remote_vni; if (desc->remote_ip.sa.sa_family && !vxlan_addr_equal(&desc->remote_ip, &rd->remote_ip)) continue; /* Encapsulation is performed with source VNI if remote VNI * is not set. */ remote_vni = rd->remote_vni ? : mdb_entry->key.vni; if (desc->remote_vni && desc->remote_vni != remote_vni) continue; if (desc->remote_port && desc->remote_port != rd->remote_port) continue; if (desc->rt_protocol && desc->rt_protocol != remote->rt_protocol) continue; vxlan_mdb_remote_del(vxlan, mdb_entry, remote); } } static void vxlan_mdb_flush(struct vxlan_dev *vxlan, const struct vxlan_mdb_flush_desc *desc) { struct vxlan_mdb_entry *mdb_entry; struct hlist_node *tmp; /* The removal of an entry cannot trigger the removal of another entry * since entries are always added to the head of the list. */ hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) { if (desc->src_vni && desc->src_vni != mdb_entry->key.vni) continue; vxlan_mdb_remotes_flush(vxlan, mdb_entry, desc); /* Entry will only be removed if its remotes list is empty. */ vxlan_mdb_entry_put(vxlan, mdb_entry); } } int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_mdb_flush_desc desc = {}; int err; ASSERT_RTNL(); err = vxlan_mdb_flush_desc_init(vxlan, &desc, tb, extack); if (err) return err; vxlan_mdb_flush(vxlan, &desc); return 0; } static const struct nla_policy vxlan_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), }; static int vxlan_mdb_get_parse(struct net_device *dev, struct nlattr *tb[], struct vxlan_mdb_entry_key *group, struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(tb[MDBA_GET_ENTRY]); struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; struct vxlan_dev *vxlan = netdev_priv(dev); int err; memset(group, 0, sizeof(*group)); group->vni = vxlan->default_dst.remote_vni; if (!tb[MDBA_GET_ENTRY_ATTRS]) { vxlan_mdb_group_set(group, entry, NULL); return 0; } err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, tb[MDBA_GET_ENTRY_ATTRS], vxlan_mdbe_attrs_get_pol, extack); if (err) return err; if (mdbe_attrs[MDBE_ATTR_SOURCE] && !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE], entry->addr.proto, extack)) return -EINVAL; vxlan_mdb_group_set(group, entry, mdbe_attrs[MDBE_ATTR_SOURCE]); if (mdbe_attrs[MDBE_ATTR_SRC_VNI]) group->vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI])); return 0; } static struct sk_buff * vxlan_mdb_get_reply_alloc(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry) { struct vxlan_mdb_remote *remote; size_t nlmsg_size; nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + /* MDBA_MDB */ nla_total_size(0) + /* MDBA_MDB_ENTRY */ nla_total_size(0); list_for_each_entry(remote, &mdb_entry->remotes, list) nlmsg_size += vxlan_mdb_nlmsg_remote_size(vxlan, mdb_entry, remote); return nlmsg_new(nlmsg_size, GFP_KERNEL); } static int vxlan_mdb_get_reply_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, const struct vxlan_mdb_entry *mdb_entry, u32 portid, u32 seq) { struct nlattr *mdb_nest, *mdb_entry_nest; struct vxlan_mdb_remote *remote; struct br_port_msg *bpm; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, portid, seq, RTM_NEWMDB, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = vxlan->dev->ifindex; mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB); if (!mdb_nest) { err = -EMSGSIZE; goto cancel; } mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!mdb_entry_nest) { err = -EMSGSIZE; goto cancel; } list_for_each_entry(remote, &mdb_entry->remotes, list) { err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote); if (err) goto cancel; } nla_nest_end(skb, mdb_entry_nest); nla_nest_end(skb, mdb_nest); nlmsg_end(skb, nlh); return 0; cancel: nlmsg_cancel(skb, nlh); return err; } int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_mdb_entry *mdb_entry; struct vxlan_mdb_entry_key group; struct sk_buff *skb; int err; ASSERT_RTNL(); err = vxlan_mdb_get_parse(dev, tb, &group, extack); if (err) return err; mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); if (!mdb_entry) { NL_SET_ERR_MSG_MOD(extack, "MDB entry not found"); return -ENOENT; } skb = vxlan_mdb_get_reply_alloc(vxlan, mdb_entry); if (!skb) return -ENOMEM; err = vxlan_mdb_get_reply_fill(vxlan, skb, mdb_entry, portid, seq); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to fill MDB get reply"); goto free; } return rtnl_unicast(skb, dev_net(dev), portid); free: kfree_skb(skb); return err; } struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan, struct sk_buff *skb, __be32 src_vni) { struct vxlan_mdb_entry *mdb_entry; struct vxlan_mdb_entry_key group; if (!is_multicast_ether_addr(eth_hdr(skb)->h_dest) || is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) return NULL; /* When not in collect metadata mode, 'src_vni' is zero, but MDB * entries are stored with the VNI of the VXLAN device. */ if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) src_vni = vxlan->default_dst.remote_vni; memset(&group, 0, sizeof(group)); group.vni = src_vni; switch (skb->protocol) { case htons(ETH_P_IP): if (!pskb_may_pull(skb, sizeof(struct iphdr))) return NULL; group.dst.sa.sa_family = AF_INET; group.dst.sin.sin_addr.s_addr = ip_hdr(skb)->daddr; group.src.sa.sa_family = AF_INET; group.src.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return NULL; group.dst.sa.sa_family = AF_INET6; group.dst.sin6.sin6_addr = ipv6_hdr(skb)->daddr; group.src.sa.sa_family = AF_INET6; group.src.sin6.sin6_addr = ipv6_hdr(skb)->saddr; break; #endif default: return NULL; } mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); if (mdb_entry) return mdb_entry; memset(&group.src, 0, sizeof(group.src)); mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); if (mdb_entry) return mdb_entry; /* No (S, G) or (*, G) found. Look up the all-zeros entry, but only if * the destination IP address is not link-local multicast since we want * to transmit such traffic together with broadcast and unknown unicast * traffic. */ switch (skb->protocol) { case htons(ETH_P_IP): if (ipv4_is_local_multicast(group.dst.sin.sin_addr.s_addr)) return NULL; group.dst.sin.sin_addr.s_addr = 0; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): if (ipv6_addr_type(&group.dst.sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) return NULL; memset(&group.dst.sin6.sin6_addr, 0, sizeof(group.dst.sin6.sin6_addr)); break; #endif default: return NULL; } return vxlan_mdb_entry_lookup(vxlan, &group); } netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, struct sk_buff *skb) { struct vxlan_mdb_remote *remote, *fremote = NULL; __be32 src_vni = mdb_entry->key.vni; list_for_each_entry_rcu(remote, &mdb_entry->remotes, list) { struct sk_buff *skb1; if ((vxlan_mdb_is_star_g(&mdb_entry->key) && READ_ONCE(remote->filter_mode) == MCAST_INCLUDE) || (READ_ONCE(remote->flags) & VXLAN_MDB_REMOTE_F_BLOCKED)) continue; if (!fremote) { fremote = remote; continue; } skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) vxlan_xmit_one(skb1, vxlan->dev, src_vni, rcu_dereference(remote->rd), false); } if (fremote) vxlan_xmit_one(skb, vxlan->dev, src_vni, rcu_dereference(fremote->rd), false); else kfree_skb(skb); return NETDEV_TX_OK; } static void vxlan_mdb_check_empty(void *ptr, void *arg) { WARN_ON_ONCE(1); } int vxlan_mdb_init(struct vxlan_dev *vxlan) { int err; err = rhashtable_init(&vxlan->mdb_tbl, &vxlan_mdb_rht_params); if (err) return err; INIT_HLIST_HEAD(&vxlan->mdb_list); return 0; } void vxlan_mdb_fini(struct vxlan_dev *vxlan) { struct vxlan_mdb_flush_desc desc = {}; vxlan_mdb_flush(vxlan, &desc); WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB); rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty, NULL); }
2 2 2 2 2 2 2 2 2 3 2 3 3 3 1 3 4 2 2 2 2 2 2 4 4 4 6 1 1 6 3 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <net/arp.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/init.h> #include <net/rose.h> #include <linux/seq_file.h> #include <linux/export.h> static unsigned int rose_neigh_no = 1; static struct rose_node *rose_node_list; static DEFINE_SPINLOCK(rose_node_list_lock); static struct rose_neigh *rose_neigh_list; static DEFINE_SPINLOCK(rose_neigh_list_lock); static struct rose_route *rose_route_list; static DEFINE_SPINLOCK(rose_route_list_lock); struct rose_neigh *rose_loopback_neigh; /* * Add a new route to a node, and in the process add the node and the * neighbour if it is new. */ static int __must_check rose_add_node(struct rose_route_struct *rose_route, struct net_device *dev) { struct rose_node *rose_node, *rose_tmpn, *rose_tmpp; struct rose_neigh *rose_neigh; int i, res = 0; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); rose_node = rose_node_list; while (rose_node != NULL) { if ((rose_node->mask == rose_route->mask) && (rosecmpm(&rose_route->address, &rose_node->address, rose_route->mask) == 0)) break; rose_node = rose_node->next; } if (rose_node != NULL && rose_node->loopback) { res = -EINVAL; goto out; } rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { if (ax25cmp(&rose_route->neighbour, &rose_neigh->callsign) == 0 && rose_neigh->dev == dev) break; rose_neigh = rose_neigh->next; } if (rose_neigh == NULL) { rose_neigh = kmalloc(sizeof(*rose_neigh), GFP_ATOMIC); if (rose_neigh == NULL) { res = -ENOMEM; goto out; } rose_neigh->callsign = rose_route->neighbour; rose_neigh->digipeat = NULL; rose_neigh->ax25 = NULL; rose_neigh->dev = dev; rose_neigh->count = 0; rose_neigh->use = 0; rose_neigh->dce_mode = 0; rose_neigh->loopback = 0; rose_neigh->number = rose_neigh_no++; rose_neigh->restarted = 0; skb_queue_head_init(&rose_neigh->queue); timer_setup(&rose_neigh->ftimer, NULL, 0); timer_setup(&rose_neigh->t0timer, NULL, 0); if (rose_route->ndigis != 0) { rose_neigh->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC); if (rose_neigh->digipeat == NULL) { kfree(rose_neigh); res = -ENOMEM; goto out; } rose_neigh->digipeat->ndigi = rose_route->ndigis; rose_neigh->digipeat->lastrepeat = -1; for (i = 0; i < rose_route->ndigis; i++) { rose_neigh->digipeat->calls[i] = rose_route->digipeaters[i]; rose_neigh->digipeat->repeated[i] = 0; } } rose_neigh->next = rose_neigh_list; rose_neigh_list = rose_neigh; } /* * This is a new node to be inserted into the list. Find where it needs * to be inserted into the list, and insert it. We want to be sure * to order the list in descending order of mask size to ensure that * later when we are searching this list the first match will be the * best match. */ if (rose_node == NULL) { rose_tmpn = rose_node_list; rose_tmpp = NULL; while (rose_tmpn != NULL) { if (rose_tmpn->mask > rose_route->mask) { rose_tmpp = rose_tmpn; rose_tmpn = rose_tmpn->next; } else { break; } } /* create new node */ rose_node = kmalloc(sizeof(*rose_node), GFP_ATOMIC); if (rose_node == NULL) { res = -ENOMEM; goto out; } rose_node->address = rose_route->address; rose_node->mask = rose_route->mask; rose_node->count = 1; rose_node->loopback = 0; rose_node->neighbour[0] = rose_neigh; if (rose_tmpn == NULL) { if (rose_tmpp == NULL) { /* Empty list */ rose_node_list = rose_node; rose_node->next = NULL; } else { rose_tmpp->next = rose_node; rose_node->next = NULL; } } else { if (rose_tmpp == NULL) { /* 1st node */ rose_node->next = rose_node_list; rose_node_list = rose_node; } else { rose_tmpp->next = rose_node; rose_node->next = rose_tmpn; } } rose_neigh->count++; goto out; } /* We have space, slot it in */ if (rose_node->count < 3) { rose_node->neighbour[rose_node->count] = rose_neigh; rose_node->count++; rose_neigh->count++; } out: spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); return res; } /* * Caller is holding rose_node_list_lock. */ static void rose_remove_node(struct rose_node *rose_node) { struct rose_node *s; if ((s = rose_node_list) == rose_node) { rose_node_list = rose_node->next; kfree(rose_node); return; } while (s != NULL && s->next != NULL) { if (s->next == rose_node) { s->next = rose_node->next; kfree(rose_node); return; } s = s->next; } } /* * Caller is holding rose_neigh_list_lock. */ static void rose_remove_neigh(struct rose_neigh *rose_neigh) { struct rose_neigh *s; del_timer_sync(&rose_neigh->ftimer); del_timer_sync(&rose_neigh->t0timer); skb_queue_purge(&rose_neigh->queue); if ((s = rose_neigh_list) == rose_neigh) { rose_neigh_list = rose_neigh->next; if (rose_neigh->ax25) ax25_cb_put(rose_neigh->ax25); kfree(rose_neigh->digipeat); kfree(rose_neigh); return; } while (s != NULL && s->next != NULL) { if (s->next == rose_neigh) { s->next = rose_neigh->next; if (rose_neigh->ax25) ax25_cb_put(rose_neigh->ax25); kfree(rose_neigh->digipeat); kfree(rose_neigh); return; } s = s->next; } } /* * Caller is holding rose_route_list_lock. */ static void rose_remove_route(struct rose_route *rose_route) { struct rose_route *s; if (rose_route->neigh1 != NULL) rose_route->neigh1->use--; if (rose_route->neigh2 != NULL) rose_route->neigh2->use--; if ((s = rose_route_list) == rose_route) { rose_route_list = rose_route->next; kfree(rose_route); return; } while (s != NULL && s->next != NULL) { if (s->next == rose_route) { s->next = rose_route->next; kfree(rose_route); return; } s = s->next; } } /* * "Delete" a node. Strictly speaking remove a route to a node. The node * is only deleted if no routes are left to it. */ static int rose_del_node(struct rose_route_struct *rose_route, struct net_device *dev) { struct rose_node *rose_node; struct rose_neigh *rose_neigh; int i, err = 0; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); rose_node = rose_node_list; while (rose_node != NULL) { if ((rose_node->mask == rose_route->mask) && (rosecmpm(&rose_route->address, &rose_node->address, rose_route->mask) == 0)) break; rose_node = rose_node->next; } if (rose_node == NULL || rose_node->loopback) { err = -EINVAL; goto out; } rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { if (ax25cmp(&rose_route->neighbour, &rose_neigh->callsign) == 0 && rose_neigh->dev == dev) break; rose_neigh = rose_neigh->next; } if (rose_neigh == NULL) { err = -EINVAL; goto out; } for (i = 0; i < rose_node->count; i++) { if (rose_node->neighbour[i] == rose_neigh) { rose_neigh->count--; if (rose_neigh->count == 0 && rose_neigh->use == 0) rose_remove_neigh(rose_neigh); rose_node->count--; if (rose_node->count == 0) { rose_remove_node(rose_node); } else { switch (i) { case 0: rose_node->neighbour[0] = rose_node->neighbour[1]; fallthrough; case 1: rose_node->neighbour[1] = rose_node->neighbour[2]; break; case 2: break; } } goto out; } } err = -EINVAL; out: spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); return err; } /* * Add the loopback neighbour. */ void rose_add_loopback_neigh(void) { struct rose_neigh *sn; rose_loopback_neigh = kmalloc(sizeof(struct rose_neigh), GFP_KERNEL); if (!rose_loopback_neigh) return; sn = rose_loopback_neigh; sn->callsign = null_ax25_address; sn->digipeat = NULL; sn->ax25 = NULL; sn->dev = NULL; sn->count = 0; sn->use = 0; sn->dce_mode = 1; sn->loopback = 1; sn->number = rose_neigh_no++; sn->restarted = 1; skb_queue_head_init(&sn->queue); timer_setup(&sn->ftimer, NULL, 0); timer_setup(&sn->t0timer, NULL, 0); spin_lock_bh(&rose_neigh_list_lock); sn->next = rose_neigh_list; rose_neigh_list = sn; spin_unlock_bh(&rose_neigh_list_lock); } /* * Add a loopback node. */ int rose_add_loopback_node(const rose_address *address) { struct rose_node *rose_node; int err = 0; spin_lock_bh(&rose_node_list_lock); rose_node = rose_node_list; while (rose_node != NULL) { if ((rose_node->mask == 10) && (rosecmpm(address, &rose_node->address, 10) == 0) && rose_node->loopback) break; rose_node = rose_node->next; } if (rose_node != NULL) goto out; if ((rose_node = kmalloc(sizeof(*rose_node), GFP_ATOMIC)) == NULL) { err = -ENOMEM; goto out; } rose_node->address = *address; rose_node->mask = 10; rose_node->count = 1; rose_node->loopback = 1; rose_node->neighbour[0] = rose_loopback_neigh; /* Insert at the head of list. Address is always mask=10 */ rose_node->next = rose_node_list; rose_node_list = rose_node; rose_loopback_neigh->count++; out: spin_unlock_bh(&rose_node_list_lock); return err; } /* * Delete a loopback node. */ void rose_del_loopback_node(const rose_address *address) { struct rose_node *rose_node; spin_lock_bh(&rose_node_list_lock); rose_node = rose_node_list; while (rose_node != NULL) { if ((rose_node->mask == 10) && (rosecmpm(address, &rose_node->address, 10) == 0) && rose_node->loopback) break; rose_node = rose_node->next; } if (rose_node == NULL) goto out; rose_remove_node(rose_node); rose_loopback_neigh->count--; out: spin_unlock_bh(&rose_node_list_lock); } /* * A device has been removed. Remove its routes and neighbours. */ void rose_rt_device_down(struct net_device *dev) { struct rose_neigh *s, *rose_neigh; struct rose_node *t, *rose_node; int i; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { s = rose_neigh; rose_neigh = rose_neigh->next; if (s->dev != dev) continue; rose_node = rose_node_list; while (rose_node != NULL) { t = rose_node; rose_node = rose_node->next; for (i = 0; i < t->count; i++) { if (t->neighbour[i] != s) continue; t->count--; switch (i) { case 0: t->neighbour[0] = t->neighbour[1]; fallthrough; case 1: t->neighbour[1] = t->neighbour[2]; break; case 2: break; } } if (t->count <= 0) rose_remove_node(t); } rose_remove_neigh(s); } spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); } #if 0 /* Currently unused */ /* * A device has been removed. Remove its links. */ void rose_route_device_down(struct net_device *dev) { struct rose_route *s, *rose_route; spin_lock_bh(&rose_route_list_lock); rose_route = rose_route_list; while (rose_route != NULL) { s = rose_route; rose_route = rose_route->next; if (s->neigh1->dev == dev || s->neigh2->dev == dev) rose_remove_route(s); } spin_unlock_bh(&rose_route_list_lock); } #endif /* * Clear all nodes and neighbours out, except for neighbours with * active connections going through them. * Do not clear loopback neighbour and nodes. */ static int rose_clear_routes(void) { struct rose_neigh *s, *rose_neigh; struct rose_node *t, *rose_node; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); rose_neigh = rose_neigh_list; rose_node = rose_node_list; while (rose_node != NULL) { t = rose_node; rose_node = rose_node->next; if (!t->loopback) rose_remove_node(t); } while (rose_neigh != NULL) { s = rose_neigh; rose_neigh = rose_neigh->next; if (s->use == 0 && !s->loopback) { s->count = 0; rose_remove_neigh(s); } } spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); return 0; } /* * Check that the device given is a valid AX.25 interface that is "up". * called with RTNL */ static struct net_device *rose_ax25_dev_find(char *devname) { struct net_device *dev; if ((dev = __dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) return dev; return NULL; } /* * Find the first active ROSE device, usually "rose0". */ struct net_device *rose_dev_first(void) { struct net_device *dev, *first = NULL; rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } if (first) dev_hold(first); rcu_read_unlock(); return first; } /* * Find the ROSE device for the given address. */ struct net_device *rose_dev_get(rose_address *addr) { struct net_device *dev; rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } dev = NULL; out: rcu_read_unlock(); return dev; } static int rose_dev_exists(rose_address *addr) { struct net_device *dev; rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) goto out; } dev = NULL; out: rcu_read_unlock(); return dev != NULL; } struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neigh) { struct rose_route *rose_route; for (rose_route = rose_route_list; rose_route != NULL; rose_route = rose_route->next) if ((rose_route->neigh1 == neigh && rose_route->lci1 == lci) || (rose_route->neigh2 == neigh && rose_route->lci2 == lci)) return rose_route; return NULL; } /* * Find a neighbour or a route given a ROSE address. */ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, unsigned char *diagnostic, int route_frame) { struct rose_neigh *res = NULL; struct rose_node *node; int failed = 0; int i; if (!route_frame) spin_lock_bh(&rose_node_list_lock); for (node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { for (i = 0; i < node->count; i++) { if (node->neighbour[i]->restarted) { res = node->neighbour[i]; goto out; } } } } if (!route_frame) { /* connect request */ for (node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { for (i = 0; i < node->count; i++) { if (!rose_ftimer_running(node->neighbour[i])) { res = node->neighbour[i]; goto out; } failed = 1; } } } } if (failed) { *cause = ROSE_OUT_OF_ORDER; *diagnostic = 0; } else { *cause = ROSE_NOT_OBTAINABLE; *diagnostic = 0; } out: if (!route_frame) spin_unlock_bh(&rose_node_list_lock); return res; } /* * Handle the ioctls that control the routing functions. */ int rose_rt_ioctl(unsigned int cmd, void __user *arg) { struct rose_route_struct rose_route; struct net_device *dev; int err; switch (cmd) { case SIOCADDRT: if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) return -EFAULT; if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) return -EINVAL; if (rose_dev_exists(&rose_route.address)) /* Can't add routes to ourself */ return -EINVAL; if (rose_route.mask > 10) /* Mask can't be more than 10 digits */ return -EINVAL; if (rose_route.ndigis > AX25_MAX_DIGIS) return -EINVAL; err = rose_add_node(&rose_route, dev); return err; case SIOCDELRT: if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) return -EFAULT; if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) return -EINVAL; err = rose_del_node(&rose_route, dev); return err; case SIOCRSCLRRT: return rose_clear_routes(); default: return -EINVAL; } return 0; } static void rose_del_route_by_neigh(struct rose_neigh *rose_neigh) { struct rose_route *rose_route, *s; rose_neigh->restarted = 0; rose_stop_t0timer(rose_neigh); rose_start_ftimer(rose_neigh); skb_queue_purge(&rose_neigh->queue); spin_lock_bh(&rose_route_list_lock); rose_route = rose_route_list; while (rose_route != NULL) { if ((rose_route->neigh1 == rose_neigh && rose_route->neigh2 == rose_neigh) || (rose_route->neigh1 == rose_neigh && rose_route->neigh2 == NULL) || (rose_route->neigh2 == rose_neigh && rose_route->neigh1 == NULL)) { s = rose_route->next; rose_remove_route(rose_route); rose_route = s; continue; } if (rose_route->neigh1 == rose_neigh) { rose_route->neigh1->use--; rose_route->neigh1 = NULL; rose_transmit_clear_request(rose_route->neigh2, rose_route->lci2, ROSE_OUT_OF_ORDER, 0); } if (rose_route->neigh2 == rose_neigh) { rose_route->neigh2->use--; rose_route->neigh2 = NULL; rose_transmit_clear_request(rose_route->neigh1, rose_route->lci1, ROSE_OUT_OF_ORDER, 0); } rose_route = rose_route->next; } spin_unlock_bh(&rose_route_list_lock); } /* * A level 2 link has timed out, therefore it appears to be a poor link, * then don't use that neighbour until it is reset. Blow away all through * routes and connections using this route. */ void rose_link_failed(ax25_cb *ax25, int reason) { struct rose_neigh *rose_neigh; spin_lock_bh(&rose_neigh_list_lock); rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { if (rose_neigh->ax25 == ax25) break; rose_neigh = rose_neigh->next; } if (rose_neigh != NULL) { rose_neigh->ax25 = NULL; ax25_cb_put(ax25); rose_del_route_by_neigh(rose_neigh); rose_kill_by_neigh(rose_neigh); } spin_unlock_bh(&rose_neigh_list_lock); } /* * A device has been "downed" remove its link status. Blow away all * through routes and connections that use this device. */ void rose_link_device_down(struct net_device *dev) { struct rose_neigh *rose_neigh; for (rose_neigh = rose_neigh_list; rose_neigh != NULL; rose_neigh = rose_neigh->next) { if (rose_neigh->dev == dev) { rose_del_route_by_neigh(rose_neigh); rose_kill_by_neigh(rose_neigh); } } } /* * Route a frame to an appropriate AX.25 connection. * A NULL ax25_cb indicates an internally generated frame. */ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) { struct rose_neigh *rose_neigh, *new_neigh; struct rose_route *rose_route; struct rose_facilities_struct facilities; rose_address *src_addr, *dest_addr; struct sock *sk; unsigned short frametype; unsigned int lci, new_lci; unsigned char cause, diagnostic; struct net_device *dev; int res = 0; char buf[11]; if (skb->len < ROSE_MIN_LEN) return res; if (!ax25) return rose_loopback_queue(skb, NULL); frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); if (frametype == ROSE_CALL_REQUEST && (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != ROSE_CALL_REQ_ADDR_LEN_VAL)) return res; src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF); dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); spin_lock_bh(&rose_neigh_list_lock); spin_lock_bh(&rose_route_list_lock); rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { if (ax25cmp(&ax25->dest_addr, &rose_neigh->callsign) == 0 && ax25->ax25_dev->dev == rose_neigh->dev) break; rose_neigh = rose_neigh->next; } if (rose_neigh == NULL) { printk("rose_route : unknown neighbour or device %s\n", ax2asc(buf, &ax25->dest_addr)); goto out; } /* * Obviously the link is working, halt the ftimer. */ rose_stop_ftimer(rose_neigh); /* * LCI of zero is always for us, and its always a restart * frame. */ if (lci == 0) { rose_link_rx_restart(skb, rose_neigh, frametype); goto out; } /* * Find an existing socket. */ if ((sk = rose_find_socket(lci, rose_neigh)) != NULL) { if (frametype == ROSE_CALL_REQUEST) { struct rose_sock *rose = rose_sk(sk); /* Remove an existing unused socket */ rose_clear_queues(sk); rose->cause = ROSE_NETWORK_CONGESTION; rose->diagnostic = 0; rose->neighbour->use--; rose->neighbour = NULL; rose->lci = 0; rose->state = ROSE_STATE_0; sk->sk_state = TCP_CLOSE; sk->sk_err = 0; sk->sk_shutdown |= SEND_SHUTDOWN; if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } } else { skb_reset_transport_header(skb); res = rose_process_rx_frame(sk, skb); goto out; } } /* * Is is a Call Request and is it for us ? */ if (frametype == ROSE_CALL_REQUEST) if ((dev = rose_dev_get(dest_addr)) != NULL) { res = rose_rx_call_request(skb, dev, rose_neigh, lci); dev_put(dev); goto out; } if (!sysctl_rose_routing_control) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 0); goto out; } /* * Route it to the next in line if we have an entry for it. */ rose_route = rose_route_list; while (rose_route != NULL) { if (rose_route->lci1 == lci && rose_route->neigh1 == rose_neigh) { if (frametype == ROSE_CALL_REQUEST) { /* F6FBB - Remove an existing unused route */ rose_remove_route(rose_route); break; } else if (rose_route->neigh2 != NULL) { skb->data[0] &= 0xF0; skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F; skb->data[1] = (rose_route->lci2 >> 0) & 0xFF; rose_transmit_link(skb, rose_route->neigh2); if (frametype == ROSE_CLEAR_CONFIRMATION) rose_remove_route(rose_route); res = 1; goto out; } else { if (frametype == ROSE_CLEAR_CONFIRMATION) rose_remove_route(rose_route); goto out; } } if (rose_route->lci2 == lci && rose_route->neigh2 == rose_neigh) { if (frametype == ROSE_CALL_REQUEST) { /* F6FBB - Remove an existing unused route */ rose_remove_route(rose_route); break; } else if (rose_route->neigh1 != NULL) { skb->data[0] &= 0xF0; skb->data[0] |= (rose_route->lci1 >> 8) & 0x0F; skb->data[1] = (rose_route->lci1 >> 0) & 0xFF; rose_transmit_link(skb, rose_route->neigh1); if (frametype == ROSE_CLEAR_CONFIRMATION) rose_remove_route(rose_route); res = 1; goto out; } else { if (frametype == ROSE_CLEAR_CONFIRMATION) rose_remove_route(rose_route); goto out; } } rose_route = rose_route->next; } /* * We know that: * 1. The frame isn't for us, * 2. It isn't "owned" by any existing route. */ if (frametype != ROSE_CALL_REQUEST) { /* XXX */ res = 0; goto out; } memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, skb->len - ROSE_CALL_REQ_FACILITIES_OFF, &facilities)) { rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76); goto out; } /* * Check for routing loops. */ rose_route = rose_route_list; while (rose_route != NULL) { if (rose_route->rand == facilities.rand && rosecmp(src_addr, &rose_route->src_addr) == 0 && ax25cmp(&facilities.dest_call, &rose_route->src_call) == 0 && ax25cmp(&facilities.source_call, &rose_route->dest_call) == 0) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 120); goto out; } rose_route = rose_route->next; } if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic, 1)) == NULL) { rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic); goto out; } if ((new_lci = rose_new_lci(new_neigh)) == 0) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 71); goto out; } if ((rose_route = kmalloc(sizeof(*rose_route), GFP_ATOMIC)) == NULL) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 120); goto out; } rose_route->lci1 = lci; rose_route->src_addr = *src_addr; rose_route->dest_addr = *dest_addr; rose_route->src_call = facilities.dest_call; rose_route->dest_call = facilities.source_call; rose_route->rand = facilities.rand; rose_route->neigh1 = rose_neigh; rose_route->lci2 = new_lci; rose_route->neigh2 = new_neigh; rose_route->neigh1->use++; rose_route->neigh2->use++; rose_route->next = rose_route_list; rose_route_list = rose_route; skb->data[0] &= 0xF0; skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F; skb->data[1] = (rose_route->lci2 >> 0) & 0xFF; rose_transmit_link(skb, rose_route->neigh2); res = 1; out: spin_unlock_bh(&rose_route_list_lock); spin_unlock_bh(&rose_neigh_list_lock); return res; } #ifdef CONFIG_PROC_FS static void *rose_node_start(struct seq_file *seq, loff_t *pos) __acquires(rose_node_list_lock) { struct rose_node *rose_node; int i = 1; spin_lock_bh(&rose_node_list_lock); if (*pos == 0) return SEQ_START_TOKEN; for (rose_node = rose_node_list; rose_node && i < *pos; rose_node = rose_node->next, ++i); return (i == *pos) ? rose_node : NULL; } static void *rose_node_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (v == SEQ_START_TOKEN) ? rose_node_list : ((struct rose_node *)v)->next; } static void rose_node_stop(struct seq_file *seq, void *v) __releases(rose_node_list_lock) { spin_unlock_bh(&rose_node_list_lock); } static int rose_node_show(struct seq_file *seq, void *v) { char rsbuf[11]; int i; if (v == SEQ_START_TOKEN) seq_puts(seq, "address mask n neigh neigh neigh\n"); else { const struct rose_node *rose_node = v; seq_printf(seq, "%-10s %04d %d", rose2asc(rsbuf, &rose_node->address), rose_node->mask, rose_node->count); for (i = 0; i < rose_node->count; i++) seq_printf(seq, " %05d", rose_node->neighbour[i]->number); seq_puts(seq, "\n"); } return 0; } const struct seq_operations rose_node_seqops = { .start = rose_node_start, .next = rose_node_next, .stop = rose_node_stop, .show = rose_node_show, }; static void *rose_neigh_start(struct seq_file *seq, loff_t *pos) __acquires(rose_neigh_list_lock) { struct rose_neigh *rose_neigh; int i = 1; spin_lock_bh(&rose_neigh_list_lock); if (*pos == 0) return SEQ_START_TOKEN; for (rose_neigh = rose_neigh_list; rose_neigh && i < *pos; rose_neigh = rose_neigh->next, ++i); return (i == *pos) ? rose_neigh : NULL; } static void *rose_neigh_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (v == SEQ_START_TOKEN) ? rose_neigh_list : ((struct rose_neigh *)v)->next; } static void rose_neigh_stop(struct seq_file *seq, void *v) __releases(rose_neigh_list_lock) { spin_unlock_bh(&rose_neigh_list_lock); } static int rose_neigh_show(struct seq_file *seq, void *v) { char buf[11]; int i; if (v == SEQ_START_TOKEN) seq_puts(seq, "addr callsign dev count use mode restart t0 tf digipeaters\n"); else { struct rose_neigh *rose_neigh = v; /* if (!rose_neigh->loopback) { */ seq_printf(seq, "%05d %-9s %-4s %3d %3d %3s %3s %3lu %3lu", rose_neigh->number, (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign), rose_neigh->dev ? rose_neigh->dev->name : "???", rose_neigh->count, rose_neigh->use, (rose_neigh->dce_mode) ? "DCE" : "DTE", (rose_neigh->restarted) ? "yes" : "no", ax25_display_timer(&rose_neigh->t0timer) / HZ, ax25_display_timer(&rose_neigh->ftimer) / HZ); if (rose_neigh->digipeat != NULL) { for (i = 0; i < rose_neigh->digipeat->ndigi; i++) seq_printf(seq, " %s", ax2asc(buf, &rose_neigh->digipeat->calls[i])); } seq_puts(seq, "\n"); } return 0; } const struct seq_operations rose_neigh_seqops = { .start = rose_neigh_start, .next = rose_neigh_next, .stop = rose_neigh_stop, .show = rose_neigh_show, }; static void *rose_route_start(struct seq_file *seq, loff_t *pos) __acquires(rose_route_list_lock) { struct rose_route *rose_route; int i = 1; spin_lock_bh(&rose_route_list_lock); if (*pos == 0) return SEQ_START_TOKEN; for (rose_route = rose_route_list; rose_route && i < *pos; rose_route = rose_route->next, ++i); return (i == *pos) ? rose_route : NULL; } static void *rose_route_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (v == SEQ_START_TOKEN) ? rose_route_list : ((struct rose_route *)v)->next; } static void rose_route_stop(struct seq_file *seq, void *v) __releases(rose_route_list_lock) { spin_unlock_bh(&rose_route_list_lock); } static int rose_route_show(struct seq_file *seq, void *v) { char buf[11], rsbuf[11]; if (v == SEQ_START_TOKEN) seq_puts(seq, "lci address callsign neigh <-> lci address callsign neigh\n"); else { struct rose_route *rose_route = v; if (rose_route->neigh1) seq_printf(seq, "%3.3X %-10s %-9s %05d ", rose_route->lci1, rose2asc(rsbuf, &rose_route->src_addr), ax2asc(buf, &rose_route->src_call), rose_route->neigh1->number); else seq_puts(seq, "000 * * 00000 "); if (rose_route->neigh2) seq_printf(seq, "%3.3X %-10s %-9s %05d\n", rose_route->lci2, rose2asc(rsbuf, &rose_route->dest_addr), ax2asc(buf, &rose_route->dest_call), rose_route->neigh2->number); else seq_puts(seq, "000 * * 00000\n"); } return 0; } struct seq_operations rose_route_seqops = { .start = rose_route_start, .next = rose_route_next, .stop = rose_route_stop, .show = rose_route_show, }; #endif /* CONFIG_PROC_FS */ /* * Release all memory associated with ROSE routing structures. */ void __exit rose_rt_free(void) { struct rose_neigh *s, *rose_neigh = rose_neigh_list; struct rose_node *t, *rose_node = rose_node_list; struct rose_route *u, *rose_route = rose_route_list; while (rose_neigh != NULL) { s = rose_neigh; rose_neigh = rose_neigh->next; rose_remove_neigh(s); } while (rose_node != NULL) { t = rose_node; rose_node = rose_node->next; rose_remove_node(t); } while (rose_route != NULL) { u = rose_route; rose_route = rose_route->next; rose_remove_route(u); } }
11 2 5 5 5 4 2 2 2 2 2 2 11 6 5 5 36 23 23 11 10 2 11 3 1 1 11 4 7 10 8 8 3 3 3 3 3 9 8 5 3 3 4 1 4 1 1 17 16 15 2 16 16 1 16 6 16 16 16 16 16 16 6 17 6 5 2 5 4 4 1 5 5 5 1 1 18 18 2 2 2 2 18 2 14 12 10 7 7 4 4 4 14 6 1 3 2 2 3 7 5 4 3 3 4 3 1 20 19 16 16 2 1 15 12 3 15 15 1 15 15 14 14 13 13 13 2 3 20 7 5 1 5 5 5 3 3 2 5 1 5 2 5 7 7 9 10 9 8 1 3 1 3 2 10 14 13 12 1 1 5 1 4 1 1 1 5 1 4 3 1 2 12 14 5 28 28 9 26 26 26 26 28 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 // SPDX-License-Identifier: GPL-2.0-only /* * IEEE802154.4 socket interface * * Copyright 2007, 2008 Siemens AG * * Written by: * Sergey Lapin <slapin@ossfans.org> * Maxim Gorbachyov <maxim.gorbachev@siemens.com> */ #include <linux/net.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/if_arp.h> #include <linux/if.h> #include <linux/termios.h> /* For TIOCOUTQ/INQ */ #include <linux/list.h> #include <linux/slab.h> #include <linux/socket.h> #include <net/datalink.h> #include <net/psnap.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/route.h> #include <net/af_ieee802154.h> #include <net/ieee802154_netdev.h> /* Utility function for families */ static struct net_device* ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) { struct net_device *dev = NULL; struct net_device *tmp; __le16 pan_id, short_addr; u8 hwaddr[IEEE802154_ADDR_LEN]; switch (addr->mode) { case IEEE802154_ADDR_LONG: ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); rcu_read_lock(); dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); dev_hold(dev); rcu_read_unlock(); break; case IEEE802154_ADDR_SHORT: if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) || addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) break; rtnl_lock(); for_each_netdev(net, tmp) { if (tmp->type != ARPHRD_IEEE802154) continue; pan_id = tmp->ieee802154_ptr->pan_id; short_addr = tmp->ieee802154_ptr->short_addr; if (pan_id == addr->pan_id && short_addr == addr->short_addr) { dev = tmp; dev_hold(dev); break; } } rtnl_unlock(); break; default: pr_warn("Unsupported ieee802154 address type: %d\n", addr->mode); break; } return dev; } static int ieee802154_sock_release(struct socket *sock) { struct sock *sk = sock->sk; if (sk) { sock->sk = NULL; sk->sk_prot->close(sk, 0); } return 0; } static int ieee802154_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; return sk->sk_prot->sendmsg(sk, msg, len); } static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; if (sk->sk_prot->bind) return sk->sk_prot->bind(sk, uaddr, addr_len); return sock_no_bind(sock, uaddr, addr_len); } static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) return sk->sk_prot->disconnect(sk, flags); return sk->sk_prot->connect(sk, uaddr, addr_len); } static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, unsigned int cmd) { struct ifreq ifr; int ret = -ENOIOCTLCMD; struct net_device *dev; if (get_user_ifreq(&ifr, NULL, arg)) return -EFAULT; ifr.ifr_name[IFNAMSIZ-1] = 0; dev_load(sock_net(sk), ifr.ifr_name); dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); if (!dev) return -ENODEV; if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); if (!ret && put_user_ifreq(&ifr, arg)) ret = -EFAULT; dev_put(dev); return ret; } static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; switch (cmd) { case SIOCGIFADDR: case SIOCSIFADDR: return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg, cmd); default: if (!sk->sk_prot->ioctl) return -ENOIOCTLCMD; return sk_ioctl(sk, cmd, (void __user *)arg); } } /* RAW Sockets (802.15.4 created in userspace) */ static HLIST_HEAD(raw_head); static DEFINE_RWLOCK(raw_lock); static int raw_hash(struct sock *sk) { write_lock_bh(&raw_lock); sk_add_node(sk, &raw_head); write_unlock_bh(&raw_lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); return 0; } static void raw_unhash(struct sock *sk) { write_lock_bh(&raw_lock); if (sk_del_node_init(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(&raw_lock); } static void raw_close(struct sock *sk, long timeout) { sk_common_release(sk); } static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) { struct ieee802154_addr addr; struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr; int err = 0; struct net_device *dev = NULL; err = ieee802154_sockaddr_check_size(uaddr, len); if (err < 0) return err; uaddr = (struct sockaddr_ieee802154 *)_uaddr; if (uaddr->family != AF_IEEE802154) return -EINVAL; lock_sock(sk); ieee802154_addr_from_sa(&addr, &uaddr->addr); dev = ieee802154_get_dev(sock_net(sk), &addr); if (!dev) { err = -ENODEV; goto out; } sk->sk_bound_dev_if = dev->ifindex; sk_dst_reset(sk); dev_put(dev); out: release_sock(sk); return err; } static int raw_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { return -ENOTSUPP; } static int raw_disconnect(struct sock *sk, int flags) { return 0; } static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; struct sk_buff *skb; int hlen, tlen; int err; if (msg->msg_flags & MSG_OOB) { pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); return -EOPNOTSUPP; } lock_sock(sk); if (!sk->sk_bound_dev_if) dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); else dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); release_sock(sk); if (!dev) { pr_debug("no dev\n"); err = -ENXIO; goto out; } mtu = IEEE802154_MTU; pr_debug("name = %s, mtu = %u\n", dev->name, mtu); if (size > mtu) { pr_debug("size = %zu, mtu = %u\n", size, mtu); err = -EMSGSIZE; goto out_dev; } if (!size) { err = 0; goto out_dev; } hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(sk, hlen + tlen + size, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto out_dev; skb_reserve(skb, hlen); skb_reset_mac_header(skb); skb_reset_network_header(skb); err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) goto out_skb; skb->dev = dev; skb->protocol = htons(ETH_P_IEEE802154); err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); dev_put(dev); return err ?: size; out_skb: kfree_skb(skb); out_dev: dev_put(dev); out: return err; } static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; struct sk_buff *skb; skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto done; sock_recv_cmsgs(msg, sk, skb); if (flags & MSG_TRUNC) copied = skb->len; done: skb_free_datagram(sk, skb); out: if (err) return err; return copied; } static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return NET_RX_DROP; if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } return NET_RX_SUCCESS; } static void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) { struct sock *sk; read_lock(&raw_lock); sk_for_each(sk, &raw_head) { bh_lock_sock(sk); if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) { struct sk_buff *clone; clone = skb_clone(skb, GFP_ATOMIC); if (clone) raw_rcv_skb(sk, clone); } bh_unlock_sock(sk); } read_unlock(&raw_lock); } static int raw_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { return -EOPNOTSUPP; } static int raw_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { return -EOPNOTSUPP; } static struct proto ieee802154_raw_prot = { .name = "IEEE-802.15.4-RAW", .owner = THIS_MODULE, .obj_size = sizeof(struct sock), .close = raw_close, .bind = raw_bind, .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .hash = raw_hash, .unhash = raw_unhash, .connect = raw_connect, .disconnect = raw_disconnect, .getsockopt = raw_getsockopt, .setsockopt = raw_setsockopt, }; static const struct proto_ops ieee802154_raw_ops = { .family = PF_IEEE802154, .owner = THIS_MODULE, .release = ieee802154_sock_release, .bind = ieee802154_sock_bind, .connect = ieee802154_sock_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = ieee802154_sock_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, }; /* DGRAM Sockets (802.15.4 dataframes) */ static HLIST_HEAD(dgram_head); static DEFINE_RWLOCK(dgram_lock); struct dgram_sock { struct sock sk; struct ieee802154_addr src_addr; struct ieee802154_addr dst_addr; unsigned int bound:1; unsigned int connected:1; unsigned int want_ack:1; unsigned int want_lqi:1; unsigned int secen:1; unsigned int secen_override:1; unsigned int seclevel:3; unsigned int seclevel_override:1; }; static inline struct dgram_sock *dgram_sk(const struct sock *sk) { return container_of(sk, struct dgram_sock, sk); } static int dgram_hash(struct sock *sk) { write_lock_bh(&dgram_lock); sk_add_node(sk, &dgram_head); write_unlock_bh(&dgram_lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); return 0; } static void dgram_unhash(struct sock *sk) { write_lock_bh(&dgram_lock); if (sk_del_node_init(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(&dgram_lock); } static int dgram_init(struct sock *sk) { struct dgram_sock *ro = dgram_sk(sk); ro->want_ack = 1; ro->want_lqi = 0; return 0; } static void dgram_close(struct sock *sk, long timeout) { sk_common_release(sk); } static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) { struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; struct ieee802154_addr haddr; struct dgram_sock *ro = dgram_sk(sk); int err = -EINVAL; struct net_device *dev; lock_sock(sk); ro->bound = 0; err = ieee802154_sockaddr_check_size(addr, len); if (err < 0) goto out; if (addr->family != AF_IEEE802154) { err = -EINVAL; goto out; } ieee802154_addr_from_sa(&haddr, &addr->addr); dev = ieee802154_get_dev(sock_net(sk), &haddr); if (!dev) { err = -ENODEV; goto out; } if (dev->type != ARPHRD_IEEE802154) { err = -ENODEV; goto out_put; } ro->src_addr = haddr; ro->bound = 1; err = 0; out_put: dev_put(dev); out: release_sock(sk); return err; } static int dgram_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { *karg = sk_wmem_alloc_get(sk); return 0; } case SIOCINQ: { struct sk_buff *skb; *karg = 0; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) { /* We will only return the amount * of this packet since that is all * that will be read. */ *karg = skb->len - ieee802154_hdr_length(skb); } spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; } } return -ENOIOCTLCMD; } /* FIXME: autobind */ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, int len) { struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; struct dgram_sock *ro = dgram_sk(sk); int err = 0; err = ieee802154_sockaddr_check_size(addr, len); if (err < 0) return err; if (addr->family != AF_IEEE802154) return -EINVAL; lock_sock(sk); if (!ro->bound) { err = -ENETUNREACH; goto out; } ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); ro->connected = 1; out: release_sock(sk); return err; } static int dgram_disconnect(struct sock *sk, int flags) { struct dgram_sock *ro = dgram_sk(sk); lock_sock(sk); ro->connected = 0; release_sock(sk); return 0; } static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; struct sk_buff *skb; struct ieee802154_mac_cb *cb; struct dgram_sock *ro = dgram_sk(sk); struct ieee802154_addr dst_addr; DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name); int hlen, tlen; int err; if (msg->msg_flags & MSG_OOB) { pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); return -EOPNOTSUPP; } if (msg->msg_name) { if (ro->connected) return -EISCONN; if (msg->msg_namelen < IEEE802154_MIN_NAMELEN) return -EINVAL; err = ieee802154_sockaddr_check_size(daddr, msg->msg_namelen); if (err < 0) return err; ieee802154_addr_from_sa(&dst_addr, &daddr->addr); } else { if (!ro->connected) return -EDESTADDRREQ; dst_addr = ro->dst_addr; } if (!ro->bound) dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); else dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr); if (!dev) { pr_debug("no dev\n"); err = -ENXIO; goto out; } mtu = IEEE802154_MTU; pr_debug("name = %s, mtu = %u\n", dev->name, mtu); if (size > mtu) { pr_debug("size = %zu, mtu = %u\n", size, mtu); err = -EMSGSIZE; goto out_dev; } hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(sk, hlen + tlen + size, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto out_dev; skb_reserve(skb, hlen); skb_reset_network_header(skb); cb = mac_cb_init(skb); cb->type = IEEE802154_FC_TYPE_DATA; cb->ackreq = ro->want_ack; cb->secen = ro->secen; cb->secen_override = ro->secen_override; cb->seclevel = ro->seclevel; cb->seclevel_override = ro->seclevel_override; err = wpan_dev_hard_header(skb, dev, &dst_addr, ro->bound ? &ro->src_addr : NULL, size); if (err < 0) goto out_skb; err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) goto out_skb; skb->dev = dev; skb->protocol = htons(ETH_P_IEEE802154); err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); dev_put(dev); return err ?: size; out_skb: kfree_skb(skb); out_dev: dev_put(dev); out: return err; } static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; struct sk_buff *skb; struct dgram_sock *ro = dgram_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } /* FIXME: skip headers if necessary ?! */ err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto done; sock_recv_cmsgs(msg, sk, skb); if (saddr) { /* Clear the implicit padding in struct sockaddr_ieee802154 * (16 bits between 'family' and 'addr') and in struct * ieee802154_addr_sa (16 bits at the end of the structure). */ memset(saddr, 0, sizeof(*saddr)); saddr->family = AF_IEEE802154; ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); *addr_len = sizeof(*saddr); } if (ro->want_lqi) { err = put_cmsg(msg, SOL_IEEE802154, WPAN_WANTLQI, sizeof(uint8_t), &(mac_cb(skb)->lqi)); if (err) goto done; } if (flags & MSG_TRUNC) copied = skb->len; done: skb_free_datagram(sk, skb); out: if (err) return err; return copied; } static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) { skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return NET_RX_DROP; if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } return NET_RX_SUCCESS; } static inline bool ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr, struct dgram_sock *ro) { if (!ro->bound) return true; if (ro->src_addr.mode == IEEE802154_ADDR_LONG && hw_addr == ro->src_addr.extended_addr) return true; if (ro->src_addr.mode == IEEE802154_ADDR_SHORT && pan_id == ro->src_addr.pan_id && short_addr == ro->src_addr.short_addr) return true; return false; } static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) { struct sock *sk, *prev = NULL; int ret = NET_RX_SUCCESS; __le16 pan_id, short_addr; __le64 hw_addr; /* Data frame processing */ BUG_ON(dev->type != ARPHRD_IEEE802154); pan_id = dev->ieee802154_ptr->pan_id; short_addr = dev->ieee802154_ptr->short_addr; hw_addr = dev->ieee802154_ptr->extended_addr; read_lock(&dgram_lock); sk_for_each(sk, &dgram_head) { if (ieee802154_match_sock(hw_addr, pan_id, short_addr, dgram_sk(sk))) { if (prev) { struct sk_buff *clone; clone = skb_clone(skb, GFP_ATOMIC); if (clone) dgram_rcv_skb(prev, clone); } prev = sk; } } if (prev) { dgram_rcv_skb(prev, skb); } else { kfree_skb(skb); ret = NET_RX_DROP; } read_unlock(&dgram_lock); return ret; } static int dgram_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct dgram_sock *ro = dgram_sk(sk); int val, len; if (level != SOL_IEEE802154) return -EOPNOTSUPP; if (get_user(len, optlen)) return -EFAULT; len = min_t(unsigned int, len, sizeof(int)); switch (optname) { case WPAN_WANTACK: val = ro->want_ack; break; case WPAN_WANTLQI: val = ro->want_lqi; break; case WPAN_SECURITY: if (!ro->secen_override) val = WPAN_SECURITY_DEFAULT; else if (ro->secen) val = WPAN_SECURITY_ON; else val = WPAN_SECURITY_OFF; break; case WPAN_SECURITY_LEVEL: if (!ro->seclevel_override) val = WPAN_SECURITY_LEVEL_DEFAULT; else val = ro->seclevel; break; default: return -ENOPROTOOPT; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } static int dgram_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct dgram_sock *ro = dgram_sk(sk); struct net *net = sock_net(sk); int val; int err = 0; if (optlen < sizeof(int)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; lock_sock(sk); switch (optname) { case WPAN_WANTACK: ro->want_ack = !!val; break; case WPAN_WANTLQI: ro->want_lqi = !!val; break; case WPAN_SECURITY: if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && !ns_capable(net->user_ns, CAP_NET_RAW)) { err = -EPERM; break; } switch (val) { case WPAN_SECURITY_DEFAULT: ro->secen_override = 0; break; case WPAN_SECURITY_ON: ro->secen_override = 1; ro->secen = 1; break; case WPAN_SECURITY_OFF: ro->secen_override = 1; ro->secen = 0; break; default: err = -EINVAL; break; } break; case WPAN_SECURITY_LEVEL: if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && !ns_capable(net->user_ns, CAP_NET_RAW)) { err = -EPERM; break; } if (val < WPAN_SECURITY_LEVEL_DEFAULT || val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { err = -EINVAL; } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { ro->seclevel_override = 0; } else { ro->seclevel_override = 1; ro->seclevel = val; } break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static struct proto ieee802154_dgram_prot = { .name = "IEEE-802.15.4-MAC", .owner = THIS_MODULE, .obj_size = sizeof(struct dgram_sock), .init = dgram_init, .close = dgram_close, .bind = dgram_bind, .sendmsg = dgram_sendmsg, .recvmsg = dgram_recvmsg, .hash = dgram_hash, .unhash = dgram_unhash, .connect = dgram_connect, .disconnect = dgram_disconnect, .ioctl = dgram_ioctl, .getsockopt = dgram_getsockopt, .setsockopt = dgram_setsockopt, }; static const struct proto_ops ieee802154_dgram_ops = { .family = PF_IEEE802154, .owner = THIS_MODULE, .release = ieee802154_sock_release, .bind = ieee802154_sock_bind, .connect = ieee802154_sock_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = ieee802154_sock_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, }; static void ieee802154_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); } /* Create a socket. Initialise the socket, blank the addresses * set the state. */ static int ieee802154_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; int rc; struct proto *proto; const struct proto_ops *ops; if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; switch (sock->type) { case SOCK_RAW: rc = -EPERM; if (!capable(CAP_NET_RAW)) goto out; proto = &ieee802154_raw_prot; ops = &ieee802154_raw_ops; break; case SOCK_DGRAM: proto = &ieee802154_dgram_prot; ops = &ieee802154_dgram_ops; break; default: rc = -ESOCKTNOSUPPORT; goto out; } rc = -ENOMEM; sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto, kern); if (!sk) goto out; rc = 0; sock->ops = ops; sock_init_data(sock, sk); sk->sk_destruct = ieee802154_sock_destruct; sk->sk_family = PF_IEEE802154; /* Checksums on by default */ sock_set_flag(sk, SOCK_ZAPPED); if (sk->sk_prot->hash) { rc = sk->sk_prot->hash(sk); if (rc) { sk_common_release(sk); goto out; } } if (sk->sk_prot->init) { rc = sk->sk_prot->init(sk); if (rc) sk_common_release(sk); } out: return rc; } static const struct net_proto_family ieee802154_family_ops = { .family = PF_IEEE802154, .create = ieee802154_create, .owner = THIS_MODULE, }; static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { if (!netif_running(dev)) goto drop; pr_debug("got frame, type %d, dev %p\n", dev->type, dev); #ifdef DEBUG print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len); #endif if (!net_eq(dev_net(dev), &init_net)) goto drop; ieee802154_raw_deliver(dev, skb); if (dev->type != ARPHRD_IEEE802154) goto drop; if (skb->pkt_type != PACKET_OTHERHOST) return ieee802154_dgram_deliver(dev, skb); drop: kfree_skb(skb); return NET_RX_DROP; } static struct packet_type ieee802154_packet_type = { .type = htons(ETH_P_IEEE802154), .func = ieee802154_rcv, }; static int __init af_ieee802154_init(void) { int rc; rc = proto_register(&ieee802154_raw_prot, 1); if (rc) goto out; rc = proto_register(&ieee802154_dgram_prot, 1); if (rc) goto err_dgram; /* Tell SOCKET that we are alive */ rc = sock_register(&ieee802154_family_ops); if (rc) goto err_sock; dev_add_pack(&ieee802154_packet_type); rc = 0; goto out; err_sock: proto_unregister(&ieee802154_dgram_prot); err_dgram: proto_unregister(&ieee802154_raw_prot); out: return rc; } static void __exit af_ieee802154_remove(void) { dev_remove_pack(&ieee802154_packet_type); sock_unregister(PF_IEEE802154); proto_unregister(&ieee802154_dgram_prot); proto_unregister(&ieee802154_raw_prot); } module_init(af_ieee802154_init); module_exit(af_ieee802154_remove); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IEEE 802.15.4 socket interface"); MODULE_ALIAS_NETPROTO(PF_IEEE802154);
8 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ #include "rxe.h" #define RXE_POOL_TIMEOUT (200) #define RXE_POOL_ALIGN (16) static const struct rxe_type_info { const char *name; size_t size; size_t elem_offset; void (*cleanup)(struct rxe_pool_elem *elem); u32 min_index; u32 max_index; u32 max_elem; } rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { .name = "uc", .size = sizeof(struct rxe_ucontext), .elem_offset = offsetof(struct rxe_ucontext, elem), .min_index = 1, .max_index = RXE_MAX_UCONTEXT, .max_elem = RXE_MAX_UCONTEXT, }, [RXE_TYPE_PD] = { .name = "pd", .size = sizeof(struct rxe_pd), .elem_offset = offsetof(struct rxe_pd, elem), .min_index = 1, .max_index = RXE_MAX_PD, .max_elem = RXE_MAX_PD, }, [RXE_TYPE_AH] = { .name = "ah", .size = sizeof(struct rxe_ah), .elem_offset = offsetof(struct rxe_ah, elem), .min_index = RXE_MIN_AH_INDEX, .max_index = RXE_MAX_AH_INDEX, .max_elem = RXE_MAX_AH, }, [RXE_TYPE_SRQ] = { .name = "srq", .size = sizeof(struct rxe_srq), .elem_offset = offsetof(struct rxe_srq, elem), .cleanup = rxe_srq_cleanup, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, .max_elem = RXE_MAX_SRQ, }, [RXE_TYPE_QP] = { .name = "qp", .size = sizeof(struct rxe_qp), .elem_offset = offsetof(struct rxe_qp, elem), .cleanup = rxe_qp_cleanup, .min_index = RXE_MIN_QP_INDEX, .max_index = RXE_MAX_QP_INDEX, .max_elem = RXE_MAX_QP, }, [RXE_TYPE_CQ] = { .name = "cq", .size = sizeof(struct rxe_cq), .elem_offset = offsetof(struct rxe_cq, elem), .cleanup = rxe_cq_cleanup, .min_index = 1, .max_index = RXE_MAX_CQ, .max_elem = RXE_MAX_CQ, }, [RXE_TYPE_MR] = { .name = "mr", .size = sizeof(struct rxe_mr), .elem_offset = offsetof(struct rxe_mr, elem), .cleanup = rxe_mr_cleanup, .min_index = RXE_MIN_MR_INDEX, .max_index = RXE_MAX_MR_INDEX, .max_elem = RXE_MAX_MR, }, [RXE_TYPE_MW] = { .name = "mw", .size = sizeof(struct rxe_mw), .elem_offset = offsetof(struct rxe_mw, elem), .cleanup = rxe_mw_cleanup, .min_index = RXE_MIN_MW_INDEX, .max_index = RXE_MAX_MW_INDEX, .max_elem = RXE_MAX_MW, }, }; void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, enum rxe_elem_type type) { const struct rxe_type_info *info = &rxe_type_info[type]; memset(pool, 0, sizeof(*pool)); pool->rxe = rxe; pool->name = info->name; pool->type = type; pool->max_elem = info->max_elem; pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN); pool->elem_offset = info->elem_offset; pool->cleanup = info->cleanup; atomic_set(&pool->num_elem, 0); xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); pool->limit.min = info->min_index; pool->limit.max = info->max_index; } void rxe_pool_cleanup(struct rxe_pool *pool) { WARN_ON(!xa_empty(&pool->xa)); } int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, bool sleepable) { int err = -EINVAL; gfp_t gfp_flags; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto err_cnt; elem->pool = pool; elem->obj = (u8 *)elem - pool->elem_offset; kref_init(&elem->ref_cnt); init_completion(&elem->complete); /* AH objects are unique in that the create_ah verb * can be called in atomic context. If the create_ah * call is not sleepable use GFP_ATOMIC. */ gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC; if (sleepable) might_sleep(); err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit, &pool->next, gfp_flags); if (err < 0) goto err_cnt; return 0; err_cnt: atomic_dec(&pool->num_elem); return err; } void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { struct rxe_pool_elem *elem; struct xarray *xa = &pool->xa; void *obj; rcu_read_lock(); elem = xa_load(xa, index); if (elem && kref_get_unless_zero(&elem->ref_cnt)) obj = elem->obj; else obj = NULL; rcu_read_unlock(); return obj; } static void rxe_elem_release(struct kref *kref) { struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt); complete(&elem->complete); } int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable) { struct rxe_pool *pool = elem->pool; struct xarray *xa = &pool->xa; static int timeout = RXE_POOL_TIMEOUT; int ret, err = 0; void *xa_ret; if (sleepable) might_sleep(); /* erase xarray entry to prevent looking up * the pool elem from its index */ xa_ret = xa_erase(xa, elem->index); WARN_ON(xa_err(xa_ret)); /* if this is the last call to rxe_put complete the * object. It is safe to touch obj->elem after this since * it is freed below */ __rxe_put(elem); /* wait until all references to the object have been * dropped before final object specific cleanup and * return to rdma-core */ if (sleepable) { if (!completion_done(&elem->complete) && timeout) { ret = wait_for_completion_timeout(&elem->complete, timeout); /* Shouldn't happen. There are still references to * the object but, rather than deadlock, free the * object or pass back to rdma-core. */ if (WARN_ON(!ret)) err = -EINVAL; } } else { unsigned long until = jiffies + timeout; /* AH objects are unique in that the destroy_ah verb * can be called in atomic context. This delay * replaces the wait_for_completion call above * when the destroy_ah call is not sleepable */ while (!completion_done(&elem->complete) && time_before(jiffies, until)) mdelay(1); if (WARN_ON(!completion_done(&elem->complete))) err = -EINVAL; } if (pool->cleanup) pool->cleanup(elem); atomic_dec(&pool->num_elem); return err; } int __rxe_get(struct rxe_pool_elem *elem) { return kref_get_unless_zero(&elem->ref_cnt); } int __rxe_put(struct rxe_pool_elem *elem) { return kref_put(&elem->ref_cnt, rxe_elem_release); } void __rxe_finalize(struct rxe_pool_elem *elem) { void *xa_ret; xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL); WARN_ON(xa_err(xa_ret)); }
156 152 11703 3 2646 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MNT_IDMAPPING_H #define _LINUX_MNT_IDMAPPING_H #include <linux/types.h> #include <linux/uidgid.h> struct mnt_idmap; struct user_namespace; extern struct mnt_idmap nop_mnt_idmap; extern struct mnt_idmap invalid_mnt_idmap; extern struct user_namespace init_user_ns; typedef struct { uid_t val; } vfsuid_t; typedef struct { gid_t val; } vfsgid_t; static_assert(sizeof(vfsuid_t) == sizeof(kuid_t)); static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); #ifdef CONFIG_MULTIUSER static inline uid_t __vfsuid_val(vfsuid_t uid) { return uid.val; } static inline gid_t __vfsgid_val(vfsgid_t gid) { return gid.val; } #else static inline uid_t __vfsuid_val(vfsuid_t uid) { return 0; } static inline gid_t __vfsgid_val(vfsgid_t gid) { return 0; } #endif static inline bool vfsuid_valid(vfsuid_t uid) { return __vfsuid_val(uid) != (uid_t)-1; } static inline bool vfsgid_valid(vfsgid_t gid) { return __vfsgid_val(gid) != (gid_t)-1; } static inline bool vfsuid_eq(vfsuid_t left, vfsuid_t right) { return vfsuid_valid(left) && __vfsuid_val(left) == __vfsuid_val(right); } static inline bool vfsgid_eq(vfsgid_t left, vfsgid_t right) { return vfsgid_valid(left) && __vfsgid_val(left) == __vfsgid_val(right); } /** * vfsuid_eq_kuid - check whether kuid and vfsuid have the same value * @vfsuid: the vfsuid to compare * @kuid: the kuid to compare * * Check whether @vfsuid and @kuid have the same values. * * Return: true if @vfsuid and @kuid have the same value, false if not. * Comparison between two invalid uids returns false. */ static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid) { return vfsuid_valid(vfsuid) && __vfsuid_val(vfsuid) == __kuid_val(kuid); } /** * vfsgid_eq_kgid - check whether kgid and vfsgid have the same value * @vfsgid: the vfsgid to compare * @kgid: the kgid to compare * * Check whether @vfsgid and @kgid have the same values. * * Return: true if @vfsgid and @kgid have the same value, false if not. * Comparison between two invalid gids returns false. */ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) { return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid); } /* * vfs{g,u}ids are created from k{g,u}ids. * We don't allow them to be created from regular {u,g}id. */ #define VFSUIDT_INIT(val) (vfsuid_t){ __kuid_val(val) } #define VFSGIDT_INIT(val) (vfsgid_t){ __kgid_val(val) } #define INVALID_VFSUID VFSUIDT_INIT(INVALID_UID) #define INVALID_VFSGID VFSGIDT_INIT(INVALID_GID) /* * Allow a vfs{g,u}id to be used as a k{g,u}id where we want to compare * whether the mapped value is identical to value of a k{g,u}id. */ #define AS_KUIDT(val) (kuid_t){ __vfsuid_val(val) } #define AS_KGIDT(val) (kgid_t){ __vfsgid_val(val) } int vfsgid_in_group_p(vfsgid_t vfsgid); struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); void mnt_idmap_put(struct mnt_idmap *idmap); vfsuid_t make_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kuid_t kuid); vfsgid_t make_vfsgid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kgid_t kgid); kuid_t from_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, vfsuid_t vfsuid); kgid_t from_vfsgid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, vfsgid_t vfsgid); /** * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * @vfsuid: vfsuid to be mapped * * Check whether @vfsuid has a mapping in the filesystem idmapping. Use this * function to check whether the filesystem idmapping has a mapping for * @vfsuid. * * Return: true if @vfsuid has a mapping in the filesystem, false if not. */ static inline bool vfsuid_has_fsmapping(struct mnt_idmap *idmap, struct user_namespace *fs_userns, vfsuid_t vfsuid) { return uid_valid(from_vfsuid(idmap, fs_userns, vfsuid)); } static inline bool vfsuid_has_mapping(struct user_namespace *userns, vfsuid_t vfsuid) { return from_kuid(userns, AS_KUIDT(vfsuid)) != (uid_t)-1; } /** * vfsuid_into_kuid - convert vfsuid into kuid * @vfsuid: the vfsuid to convert * * This can be used when a vfsuid is committed as a kuid. * * Return: a kuid with the value of @vfsuid */ static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid) { return AS_KUIDT(vfsuid); } /** * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * @vfsgid: vfsgid to be mapped * * Check whether @vfsgid has a mapping in the filesystem idmapping. Use this * function to check whether the filesystem idmapping has a mapping for * @vfsgid. * * Return: true if @vfsgid has a mapping in the filesystem, false if not. */ static inline bool vfsgid_has_fsmapping(struct mnt_idmap *idmap, struct user_namespace *fs_userns, vfsgid_t vfsgid) { return gid_valid(from_vfsgid(idmap, fs_userns, vfsgid)); } static inline bool vfsgid_has_mapping(struct user_namespace *userns, vfsgid_t vfsgid) { return from_kgid(userns, AS_KGIDT(vfsgid)) != (gid_t)-1; } /** * vfsgid_into_kgid - convert vfsgid into kgid * @vfsgid: the vfsgid to convert * * This can be used when a vfsgid is committed as a kgid. * * Return: a kgid with the value of @vfsgid */ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid) { return AS_KGIDT(vfsgid); } /** * mapped_fsuid - return caller's fsuid mapped according to an idmapping * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsuid. A common example is initializing the i_uid field of * a newly allocated inode triggered by a creation event such as mkdir or * O_CREAT. Other examples include the allocation of quotas for a specific * user. * * Return: the caller's current fsuid mapped up according to @idmap. */ static inline kuid_t mapped_fsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns) { return from_vfsuid(idmap, fs_userns, VFSUIDT_INIT(current_fsuid())); } /** * mapped_fsgid - return caller's fsgid mapped according to an idmapping * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsgid. A common example is initializing the i_gid field of * a newly allocated inode triggered by a creation event such as mkdir or * O_CREAT. Other examples include the allocation of quotas for a specific * user. * * Return: the caller's current fsgid mapped up according to @idmap. */ static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap, struct user_namespace *fs_userns) { return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid())); } #endif /* _LINUX_MNT_IDMAPPING_H */
41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/lockd/svc.c * * This is the central lockd service. * * FIXME: Separate the lockd NFS server functionality from the lockd NFS * client functionality. Oh why didn't Sun create two separate * services in the first place? * * Authors: Olaf Kirch (okir@monad.swb.de) * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ #include <linux/module.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/moduleparam.h> #include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/in.h> #include <linux/uio.h> #include <linux/smp.h> #include <linux/mutex.h> #include <linux/freezer.h> #include <linux/inetdevice.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/svc_xprt.h> #include <net/ip.h> #include <net/addrconf.h> #include <net/ipv6.h> #include <linux/lockd/lockd.h> #include <linux/nfs.h> #include "netns.h" #include "procfs.h" #define NLMDBG_FACILITY NLMDBG_SVC #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) static struct svc_program nlmsvc_program; const struct nlmsvc_binding *nlmsvc_ops; EXPORT_SYMBOL_GPL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; static struct svc_serv *nlmsvc_serv; static void nlmsvc_request_retry(struct timer_list *tl) { svc_wake_up(nlmsvc_serv); } DEFINE_TIMER(nlmsvc_retry, nlmsvc_request_retry); unsigned int lockd_net_id; /* * These can be set at insmod time (useful for NFS as root filesystem), * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 */ static unsigned long nlm_grace_period; unsigned long nlm_timeout = LOCKD_DFLT_TIMEO; static int nlm_udpport, nlm_tcpport; /* RLIM_NOFILE defaults to 1024. That seems like a reasonable default here. */ static unsigned int nlm_max_connections = 1024; /* * Constants needed for the sysctl interface. */ static const unsigned long nlm_grace_period_min = 0; static const unsigned long nlm_grace_period_max = 240; static const unsigned long nlm_timeout_min = 3; static const unsigned long nlm_timeout_max = 20; #ifdef CONFIG_SYSCTL static const int nlm_port_min = 0, nlm_port_max = 65535; static struct ctl_table_header * nlm_sysctl_table; #endif static unsigned long get_lockd_grace_period(void) { /* Note: nlm_timeout should always be nonzero */ if (nlm_grace_period) return roundup(nlm_grace_period, nlm_timeout) * HZ; else return nlm_timeout * 5 * HZ; } static void grace_ender(struct work_struct *grace) { struct delayed_work *dwork = to_delayed_work(grace); struct lockd_net *ln = container_of(dwork, struct lockd_net, grace_period_end); locks_end_grace(&ln->lockd_manager); } static void set_grace_period(struct net *net) { unsigned long grace_period = get_lockd_grace_period(); struct lockd_net *ln = net_generic(net, lockd_net_id); locks_start_grace(net, &ln->lockd_manager); cancel_delayed_work_sync(&ln->grace_period_end); schedule_delayed_work(&ln->grace_period_end, grace_period); } /* * This is the lockd kernel thread */ static int lockd(void *vrqstp) { struct svc_rqst *rqstp = vrqstp; struct net *net = &init_net; struct lockd_net *ln = net_generic(net, lockd_net_id); svc_thread_init_status(rqstp, 0); /* try_to_freeze() is called from svc_recv() */ set_freezable(); dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); /* * The main request loop. We don't terminate until the last * NFS mount or NFS daemon has gone away. */ while (!svc_thread_should_stop(rqstp)) { /* update sv_maxconn if it has changed */ rqstp->rq_server->sv_maxconn = nlm_max_connections; nlmsvc_retry_blocked(rqstp); svc_recv(rqstp); } if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); dprintk("lockd_down: service stopped\n"); svc_exit_thread(rqstp); return 0; } static int create_lockd_listener(struct svc_serv *serv, const char *name, struct net *net, const int family, const unsigned short port, const struct cred *cred) { struct svc_xprt *xprt; xprt = svc_find_xprt(serv, name, net, family, 0); if (xprt == NULL) return svc_xprt_create(serv, name, net, family, port, SVC_SOCK_DEFAULTS, cred); svc_xprt_put(xprt); return 0; } static int create_lockd_family(struct svc_serv *serv, struct net *net, const int family, const struct cred *cred) { int err; err = create_lockd_listener(serv, "udp", net, family, nlm_udpport, cred); if (err < 0) return err; return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport, cred); } /* * Ensure there are active UDP and TCP listeners for lockd. * * Even if we have only TCP NFS mounts and/or TCP NFSDs, some * local services (such as rpc.statd) still require UDP, and * some NFS servers do not yet support NLM over TCP. * * Returns zero if all listeners are available; otherwise a * negative errno value is returned. */ static int make_socks(struct svc_serv *serv, struct net *net, const struct cred *cred) { static int warned; int err; err = create_lockd_family(serv, net, PF_INET, cred); if (err < 0) goto out_err; err = create_lockd_family(serv, net, PF_INET6, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_err; warned = 0; return 0; out_err: if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); svc_xprt_destroy_all(serv, net); svc_rpcb_cleanup(serv, net); return err; } static int lockd_up_net(struct svc_serv *serv, struct net *net, const struct cred *cred) { struct lockd_net *ln = net_generic(net, lockd_net_id); int error; if (ln->nlmsvc_users++) return 0; error = svc_bind(serv, net); if (error) goto err_bind; error = make_socks(serv, net, cred); if (error < 0) goto err_bind; set_grace_period(net); dprintk("%s: per-net data created; net=%x\n", __func__, net->ns.inum); return 0; err_bind: ln->nlmsvc_users--; return error; } static void lockd_down_net(struct svc_serv *serv, struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); svc_xprt_destroy_all(serv, net); svc_rpcb_cleanup(serv, net); } } else { pr_err("%s: no users! net=%x\n", __func__, net->ns.inum); BUG(); } } static int lockd_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sockaddr_in sin; if (event != NETDEV_DOWN) goto out; if (nlmsvc_serv) { dprintk("lockd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin); } out: return NOTIFY_DONE; } static struct notifier_block lockd_inetaddr_notifier = { .notifier_call = lockd_inetaddr_event, }; #if IS_ENABLED(CONFIG_IPV6) static int lockd_inet6addr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sockaddr_in6 sin6; if (event != NETDEV_DOWN) goto out; if (nlmsvc_serv) { dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ifa->addr; if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6); } out: return NOTIFY_DONE; } static struct notifier_block lockd_inet6addr_notifier = { .notifier_call = lockd_inet6addr_event, }; #endif static int lockd_get(void) { struct svc_serv *serv; int error; if (nlmsvc_serv) { nlmsvc_users++; return 0; } /* * Sanity check: if there's no pid, * we should be the first user ... */ if (nlmsvc_users) printk(KERN_WARNING "lockd_up: no pid, %d users??\n", nlmsvc_users); serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); return -ENOMEM; } serv->sv_maxconn = nlm_max_connections; error = svc_set_num_threads(serv, NULL, 1); if (error < 0) { svc_destroy(&serv); return error; } nlmsvc_serv = serv; register_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) register_inet6addr_notifier(&lockd_inet6addr_notifier); #endif dprintk("lockd_up: service created\n"); nlmsvc_users++; return 0; } static void lockd_put(void) { if (WARN(nlmsvc_users <= 0, "lockd_down: no users!\n")) return; if (--nlmsvc_users) return; unregister_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&lockd_inet6addr_notifier); #endif svc_set_num_threads(nlmsvc_serv, NULL, 0); timer_delete_sync(&nlmsvc_retry); svc_destroy(&nlmsvc_serv); dprintk("lockd_down: service destroyed\n"); } /* * Bring up the lockd process if it's not already up. */ int lockd_up(struct net *net, const struct cred *cred) { int error; mutex_lock(&nlmsvc_mutex); error = lockd_get(); if (error) goto err; error = lockd_up_net(nlmsvc_serv, net, cred); if (error < 0) { lockd_put(); goto err; } err: mutex_unlock(&nlmsvc_mutex); return error; } EXPORT_SYMBOL_GPL(lockd_up); /* * Decrement the user count and bring down lockd if we're the last. */ void lockd_down(struct net *net) { mutex_lock(&nlmsvc_mutex); lockd_down_net(nlmsvc_serv, net); lockd_put(); mutex_unlock(&nlmsvc_mutex); } EXPORT_SYMBOL_GPL(lockd_down); #ifdef CONFIG_SYSCTL /* * Sysctl parameters (same as module parameters, different interface). */ static struct ctl_table nlm_sysctls[] = { { .procname = "nlm_grace_period", .data = &nlm_grace_period, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = (unsigned long *) &nlm_grace_period_min, .extra2 = (unsigned long *) &nlm_grace_period_max, }, { .procname = "nlm_timeout", .data = &nlm_timeout, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = (unsigned long *) &nlm_timeout_min, .extra2 = (unsigned long *) &nlm_timeout_max, }, { .procname = "nlm_udpport", .data = &nlm_udpport, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (int *) &nlm_port_min, .extra2 = (int *) &nlm_port_max, }, { .procname = "nlm_tcpport", .data = &nlm_tcpport, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (int *) &nlm_port_min, .extra2 = (int *) &nlm_port_max, }, { .procname = "nsm_use_hostnames", .data = &nsm_use_hostnames, .maxlen = sizeof(bool), .mode = 0644, .proc_handler = proc_dobool, }, { .procname = "nsm_local_state", .data = &nsm_local_state, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, }; #endif /* CONFIG_SYSCTL */ /* * Module (and sysfs) parameters. */ #define param_set_min_max(name, type, which_strtol, min, max) \ static int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ char *endp; \ __typeof__(type) num = which_strtol(val, &endp, 0); \ if (endp == val || *endp || num < (min) || num > (max)) \ return -EINVAL; \ *((type *) kp->arg) = num; \ return 0; \ } static inline int is_callback(u32 proc) { return proc == NLMPROC_GRANTED || proc == NLMPROC_GRANTED_MSG || proc == NLMPROC_TEST_RES || proc == NLMPROC_LOCK_RES || proc == NLMPROC_CANCEL_RES || proc == NLMPROC_UNLOCK_RES || proc == NLMPROC_NSM_NOTIFY; } static enum svc_auth_status lockd_authenticate(struct svc_rqst *rqstp) { rqstp->rq_client = NULL; switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: case RPC_AUTH_UNIX: rqstp->rq_auth_stat = rpc_auth_ok; if (rqstp->rq_proc == 0) return SVC_OK; if (is_callback(rqstp->rq_proc)) { /* Leave it to individual procedures to * call nlmsvc_lookup_host(rqstp) */ return SVC_OK; } return svc_set_client(rqstp); } rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } param_set_min_max(port, int, simple_strtol, 0, 65535) param_set_min_max(grace_period, unsigned long, simple_strtoul, nlm_grace_period_min, nlm_grace_period_max) param_set_min_max(timeout, unsigned long, simple_strtoul, nlm_timeout_min, nlm_timeout_max) MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); MODULE_DESCRIPTION("NFS file locking service version " LOCKD_VERSION "."); MODULE_LICENSE("GPL"); module_param_call(nlm_grace_period, param_set_grace_period, param_get_ulong, &nlm_grace_period, 0644); module_param_call(nlm_timeout, param_set_timeout, param_get_ulong, &nlm_timeout, 0644); module_param_call(nlm_udpport, param_set_port, param_get_int, &nlm_udpport, 0644); module_param_call(nlm_tcpport, param_set_port, param_get_int, &nlm_tcpport, 0644); module_param(nsm_use_hostnames, bool, 0644); module_param(nlm_max_connections, uint, 0644); static int lockd_init_net(struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); INIT_LIST_HEAD(&ln->lockd_manager.list); ln->lockd_manager.block_opens = false; INIT_LIST_HEAD(&ln->nsm_handles); return 0; } static void lockd_exit_net(struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); WARN_ONCE(!list_empty(&ln->lockd_manager.list), "net %x %s: lockd_manager.list is not empty\n", net->ns.inum, __func__); WARN_ONCE(!list_empty(&ln->nsm_handles), "net %x %s: nsm_handles list is not empty\n", net->ns.inum, __func__); WARN_ONCE(delayed_work_pending(&ln->grace_period_end), "net %x %s: grace_period_end was not cancelled\n", net->ns.inum, __func__); } static struct pernet_operations lockd_net_ops = { .init = lockd_init_net, .exit = lockd_exit_net, .id = &lockd_net_id, .size = sizeof(struct lockd_net), }; /* * Initialising and terminating the module. */ static int __init init_nlm(void) { int err; #ifdef CONFIG_SYSCTL err = -ENOMEM; nlm_sysctl_table = register_sysctl("fs/nfs", nlm_sysctls); if (nlm_sysctl_table == NULL) goto err_sysctl; #endif err = register_pernet_subsys(&lockd_net_ops); if (err) goto err_pernet; err = lockd_create_procfs(); if (err) goto err_procfs; return 0; err_procfs: unregister_pernet_subsys(&lockd_net_ops); err_pernet: #ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); err_sysctl: #endif return err; } static void __exit exit_nlm(void) { /* FIXME: delete all NLM clients */ nlm_shutdown_hosts(); lockd_remove_procfs(); unregister_pernet_subsys(&lockd_net_ops); #ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); #endif } module_init(init_nlm); module_exit(exit_nlm); /** * nlmsvc_dispatch - Process an NLM Request * @rqstp: incoming request * * Return values: * %0: Processing complete; do not send a Reply * %1: Processing complete; send Reply in rqstp->rq_res */ static int nlmsvc_dispatch(struct svc_rqst *rqstp) { const struct svc_procedure *procp = rqstp->rq_procinfo; __be32 *statp = rqstp->rq_accept_statp; if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; *statp = procp->pc_func(rqstp); if (*statp == rpc_drop_reply) return 0; if (*statp != rpc_success) return 1; if (!procp->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; return 1; out_decode_err: *statp = rpc_garbage_args; return 1; out_encode_err: *statp = rpc_system_err; return 1; } /* * Define NLM program and procedures */ static DEFINE_PER_CPU_ALIGNED(unsigned long, nlmsvc_version1_count[17]); static const struct svc_version nlmsvc_version1 = { .vs_vers = 1, .vs_nproc = 17, .vs_proc = nlmsvc_procedures, .vs_count = nlmsvc_version1_count, .vs_dispatch = nlmsvc_dispatch, .vs_xdrsize = NLMSVC_XDRSIZE, }; static DEFINE_PER_CPU_ALIGNED(unsigned long, nlmsvc_version3_count[ARRAY_SIZE(nlmsvc_procedures)]); static const struct svc_version nlmsvc_version3 = { .vs_vers = 3, .vs_nproc = ARRAY_SIZE(nlmsvc_procedures), .vs_proc = nlmsvc_procedures, .vs_count = nlmsvc_version3_count, .vs_dispatch = nlmsvc_dispatch, .vs_xdrsize = NLMSVC_XDRSIZE, }; #ifdef CONFIG_LOCKD_V4 static DEFINE_PER_CPU_ALIGNED(unsigned long, nlmsvc_version4_count[ARRAY_SIZE(nlmsvc_procedures4)]); static const struct svc_version nlmsvc_version4 = { .vs_vers = 4, .vs_nproc = ARRAY_SIZE(nlmsvc_procedures4), .vs_proc = nlmsvc_procedures4, .vs_count = nlmsvc_version4_count, .vs_dispatch = nlmsvc_dispatch, .vs_xdrsize = NLMSVC_XDRSIZE, }; #endif static const struct svc_version *nlmsvc_version[] = { [1] = &nlmsvc_version1, [3] = &nlmsvc_version3, #ifdef CONFIG_LOCKD_V4 [4] = &nlmsvc_version4, #endif }; #define NLM_NRVERS ARRAY_SIZE(nlmsvc_version) static struct svc_program nlmsvc_program = { .pg_prog = NLM_PROGRAM, /* program number */ .pg_nvers = NLM_NRVERS, /* number of entries in nlmsvc_version */ .pg_vers = nlmsvc_version, /* version table */ .pg_name = "lockd", /* service name */ .pg_class = "nfsd", /* share authentication with nfsd */ .pg_authenticate = &lockd_authenticate, /* export authentication */ .pg_init_request = svc_generic_init_request, .pg_rpcbind_set = svc_generic_rpcbind_set, };
75 27 23 4 4 23 23 27 10 8 2 2 8 8 8 10 132 91 131 132 2 3 2 3 75 75 75 75 75 71 75 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 // SPDX-License-Identifier: GPL-2.0+ /* * NILFS inode file * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * * Written by Amagai Yoshiji. * Revised by Ryusuke Konishi. * */ #include <linux/types.h> #include <linux/buffer_head.h> #include "nilfs.h" #include "mdt.h" #include "alloc.h" #include "ifile.h" #include "cpfile.h" /** * struct nilfs_ifile_info - on-memory private data of ifile * @mi: on-memory private data of metadata file * @palloc_cache: persistent object allocator cache of ifile */ struct nilfs_ifile_info { struct nilfs_mdt_info mi; struct nilfs_palloc_cache palloc_cache; }; static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile) { return (struct nilfs_ifile_info *)NILFS_MDT(ifile); } /** * nilfs_ifile_create_inode - create a new disk inode * @ifile: ifile inode * @out_ino: pointer to a variable to store inode number * @out_bh: buffer_head contains newly allocated disk inode * * Return Value: On success, 0 is returned and the newly allocated inode * number is stored in the place pointed by @ino, and buffer_head pointer * that contains newly allocated disk inode structure is stored in the * place pointed by @out_bh * On error, one of the following negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOSPC - No inode left. */ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, struct buffer_head **out_bh) { struct nilfs_palloc_req req; int ret; req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb); req.pr_entry_bh = NULL; ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false); if (!ret) { ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, &req.pr_entry_bh); if (ret < 0) nilfs_palloc_abort_alloc_entry(ifile, &req); } if (ret < 0) { brelse(req.pr_entry_bh); return ret; } nilfs_palloc_commit_alloc_entry(ifile, &req); mark_buffer_dirty(req.pr_entry_bh); nilfs_mdt_mark_dirty(ifile); *out_ino = (ino_t)req.pr_entry_nr; *out_bh = req.pr_entry_bh; return 0; } /** * nilfs_ifile_delete_inode - delete a disk inode * @ifile: ifile inode * @ino: inode number * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - The inode number @ino have not been allocated. */ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) { struct nilfs_palloc_req req = { .pr_entry_nr = ino, .pr_entry_bh = NULL }; struct nilfs_inode *raw_inode; void *kaddr; int ret; ret = nilfs_palloc_prepare_free_entry(ifile, &req); if (!ret) { ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 0, &req.pr_entry_bh); if (ret < 0) nilfs_palloc_abort_free_entry(ifile, &req); } if (ret < 0) { brelse(req.pr_entry_bh); return ret; } kaddr = kmap_local_page(req.pr_entry_bh->b_page); raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, req.pr_entry_bh, kaddr); raw_inode->i_flags = 0; kunmap_local(kaddr); mark_buffer_dirty(req.pr_entry_bh); brelse(req.pr_entry_bh); nilfs_palloc_commit_free_entry(ifile, &req); return 0; } int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, struct buffer_head **out_bh) { struct super_block *sb = ifile->i_sb; int err; if (unlikely(!NILFS_VALID_INODE(sb, ino))) { nilfs_error(sb, "bad inode number: %lu", (unsigned long)ino); return -EINVAL; } err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); if (unlikely(err)) nilfs_warn(sb, "error %d reading inode: ino=%lu", err, (unsigned long)ino); return err; } /** * nilfs_ifile_count_free_inodes - calculate free inodes count * @ifile: ifile inode * @nmaxinodes: current maximum of available inodes count [out] * @nfreeinodes: free inodes count [out] */ int nilfs_ifile_count_free_inodes(struct inode *ifile, u64 *nmaxinodes, u64 *nfreeinodes) { u64 nused; int err; *nmaxinodes = 0; *nfreeinodes = 0; nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count); err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes); if (likely(!err)) *nfreeinodes = *nmaxinodes - nused; return err; } /** * nilfs_ifile_read - read or get ifile inode * @sb: super block instance * @root: root object * @cno: number of checkpoint entry to read * @inode_size: size of an inode * * Return: 0 on success, or the following negative error code on failure. * * %-EINVAL - Invalid checkpoint. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error (including metadata corruption). */ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, __u64 cno, size_t inode_size) { struct the_nilfs *nilfs; struct inode *ifile; int err; ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO); if (unlikely(!ifile)) return -ENOMEM; if (!(ifile->i_state & I_NEW)) goto out; err = nilfs_mdt_init(ifile, NILFS_MDT_GFP, sizeof(struct nilfs_ifile_info)); if (err) goto failed; err = nilfs_palloc_init_blockgroup(ifile, inode_size); if (err) goto failed; nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache); nilfs = sb->s_fs_info; err = nilfs_cpfile_read_checkpoint(nilfs->ns_cpfile, cno, root, ifile); if (err) goto failed; unlock_new_inode(ifile); out: return 0; failed: iget_failed(ifile); return err; }
29 16 17 17 29 29 28 12 22 22 28 29 1 1 14 3 12 12 12 12 12 14 20 26 11 16 13 16 12 12 9 5 9 11 11 11 3 129 129 128 129 6 2 2 2 133 132 132 4 4 1 20 21 22 22 22 1 1 1 1 1 2 2 2 2 5 11 11 11 9 11 9 7 11 11 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 // SPDX-License-Identifier: GPL-2.0 /* * Copyright 2018 Noralf Trønnes */ #include <linux/dma-buf.h> #include <linux/export.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/vmalloc.h> #ifdef CONFIG_X86 #include <asm/set_memory.h> #endif #include <drm/drm.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_prime.h> #include <drm/drm_print.h> MODULE_IMPORT_NS(DMA_BUF); /** * DOC: overview * * This library provides helpers for GEM objects backed by shmem buffers * allocated using anonymous pageable memory. * * Functions that operate on the GEM object receive struct &drm_gem_shmem_object. * For GEM callback helpers in struct &drm_gem_object functions, see likewise * named functions with an _object_ infix (e.g., drm_gem_shmem_object_vmap() wraps * drm_gem_shmem_vmap()). These helpers perform the necessary type conversion. */ static const struct drm_gem_object_funcs drm_gem_shmem_funcs = { .free = drm_gem_shmem_object_free, .print_info = drm_gem_shmem_object_print_info, .pin = drm_gem_shmem_object_pin, .unpin = drm_gem_shmem_object_unpin, .get_sg_table = drm_gem_shmem_object_get_sg_table, .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = drm_gem_shmem_object_mmap, .vm_ops = &drm_gem_shmem_vm_ops, }; static struct drm_gem_shmem_object * __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) { struct drm_gem_shmem_object *shmem; struct drm_gem_object *obj; int ret = 0; size = PAGE_ALIGN(size); if (dev->driver->gem_create_object) { obj = dev->driver->gem_create_object(dev, size); if (IS_ERR(obj)) return ERR_CAST(obj); shmem = to_drm_gem_shmem_obj(obj); } else { shmem = kzalloc(sizeof(*shmem), GFP_KERNEL); if (!shmem) return ERR_PTR(-ENOMEM); obj = &shmem->base; } if (!obj->funcs) obj->funcs = &drm_gem_shmem_funcs; if (private) { drm_gem_private_object_init(dev, obj, size); shmem->map_wc = false; /* dma-buf mappings use always writecombine */ } else { ret = drm_gem_object_init(dev, obj, size); } if (ret) { drm_gem_private_object_fini(obj); goto err_free; } ret = drm_gem_create_mmap_offset(obj); if (ret) goto err_release; INIT_LIST_HEAD(&shmem->madv_list); if (!private) { /* * Our buffers are kept pinned, so allocating them * from the MOVABLE zone is a really bad idea, and * conflicts with CMA. See comments above new_inode() * why this is required _and_ expected if you're * going to pin these pages. */ mapping_set_gfp_mask(obj->filp->f_mapping, GFP_HIGHUSER | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); } return shmem; err_release: drm_gem_object_release(obj); err_free: kfree(obj); return ERR_PTR(ret); } /** * drm_gem_shmem_create - Allocate an object with the given size * @dev: DRM device * @size: Size of the object to allocate * * This function creates a shmem GEM object. * * Returns: * A struct drm_gem_shmem_object * on success or an ERR_PTR()-encoded negative * error code on failure. */ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size) { return __drm_gem_shmem_create(dev, size, false); } EXPORT_SYMBOL_GPL(drm_gem_shmem_create); /** * drm_gem_shmem_free - Free resources associated with a shmem GEM object * @shmem: shmem GEM object to free * * This function cleans up the GEM object state and frees the memory used to * store the object itself. */ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; if (obj->import_attach) { drm_prime_gem_destroy(obj, shmem->sgt); } else { dma_resv_lock(shmem->base.resv, NULL); drm_WARN_ON(obj->dev, shmem->vmap_use_count); if (shmem->sgt) { dma_unmap_sgtable(obj->dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0); sg_free_table(shmem->sgt); kfree(shmem->sgt); } if (shmem->pages) drm_gem_shmem_put_pages(shmem); drm_WARN_ON(obj->dev, shmem->pages_use_count); dma_resv_unlock(shmem->base.resv); } drm_gem_object_release(obj); kfree(shmem); } EXPORT_SYMBOL_GPL(drm_gem_shmem_free); static int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; struct page **pages; dma_resv_assert_held(shmem->base.resv); if (shmem->pages_use_count++ > 0) return 0; pages = drm_gem_get_pages(obj); if (IS_ERR(pages)) { drm_dbg_kms(obj->dev, "Failed to get pages (%ld)\n", PTR_ERR(pages)); shmem->pages_use_count = 0; return PTR_ERR(pages); } /* * TODO: Allocating WC pages which are correctly flushed is only * supported on x86. Ideal solution would be a GFP_WC flag, which also * ttm_pool.c could use. */ #ifdef CONFIG_X86 if (shmem->map_wc) set_pages_array_wc(pages, obj->size >> PAGE_SHIFT); #endif shmem->pages = pages; return 0; } /* * drm_gem_shmem_put_pages - Decrease use count on the backing pages for a shmem GEM object * @shmem: shmem GEM object * * This function decreases the use count and puts the backing pages when use drops to zero. */ void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; dma_resv_assert_held(shmem->base.resv); if (drm_WARN_ON_ONCE(obj->dev, !shmem->pages_use_count)) return; if (--shmem->pages_use_count > 0) return; #ifdef CONFIG_X86 if (shmem->map_wc) set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT); #endif drm_gem_put_pages(obj, shmem->pages, shmem->pages_mark_dirty_on_put, shmem->pages_mark_accessed_on_put); shmem->pages = NULL; } EXPORT_SYMBOL(drm_gem_shmem_put_pages); int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem) { int ret; dma_resv_assert_held(shmem->base.resv); drm_WARN_ON(shmem->base.dev, shmem->base.import_attach); ret = drm_gem_shmem_get_pages(shmem); return ret; } EXPORT_SYMBOL(drm_gem_shmem_pin_locked); void drm_gem_shmem_unpin_locked(struct drm_gem_shmem_object *shmem) { dma_resv_assert_held(shmem->base.resv); drm_gem_shmem_put_pages(shmem); } EXPORT_SYMBOL(drm_gem_shmem_unpin_locked); /** * drm_gem_shmem_pin - Pin backing pages for a shmem GEM object * @shmem: shmem GEM object * * This function makes sure the backing pages are pinned in memory while the * buffer is exported. * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; int ret; drm_WARN_ON(obj->dev, obj->import_attach); ret = dma_resv_lock_interruptible(shmem->base.resv, NULL); if (ret) return ret; ret = drm_gem_shmem_pin_locked(shmem); dma_resv_unlock(shmem->base.resv); return ret; } EXPORT_SYMBOL(drm_gem_shmem_pin); /** * drm_gem_shmem_unpin - Unpin backing pages for a shmem GEM object * @shmem: shmem GEM object * * This function removes the requirement that the backing pages are pinned in * memory. */ void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; drm_WARN_ON(obj->dev, obj->import_attach); dma_resv_lock(shmem->base.resv, NULL); drm_gem_shmem_unpin_locked(shmem); dma_resv_unlock(shmem->base.resv); } EXPORT_SYMBOL(drm_gem_shmem_unpin); /* * drm_gem_shmem_vmap - Create a virtual mapping for a shmem GEM object * @shmem: shmem GEM object * @map: Returns the kernel virtual address of the SHMEM GEM object's backing * store. * * This function makes sure that a contiguous kernel virtual address mapping * exists for the buffer backing the shmem GEM object. It hides the differences * between dma-buf imported and natively allocated objects. * * Acquired mappings should be cleaned up by calling drm_gem_shmem_vunmap(). * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct iosys_map *map) { struct drm_gem_object *obj = &shmem->base; int ret = 0; if (obj->import_attach) { ret = dma_buf_vmap(obj->import_attach->dmabuf, map); if (!ret) { if (drm_WARN_ON(obj->dev, map->is_iomem)) { dma_buf_vunmap(obj->import_attach->dmabuf, map); return -EIO; } } } else { pgprot_t prot = PAGE_KERNEL; dma_resv_assert_held(shmem->base.resv); if (shmem->vmap_use_count++ > 0) { iosys_map_set_vaddr(map, shmem->vaddr); return 0; } ret = drm_gem_shmem_get_pages(shmem); if (ret) goto err_zero_use; if (shmem->map_wc) prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, VM_MAP, prot); if (!shmem->vaddr) ret = -ENOMEM; else iosys_map_set_vaddr(map, shmem->vaddr); } if (ret) { drm_dbg_kms(obj->dev, "Failed to vmap pages, error %d\n", ret); goto err_put_pages; } return 0; err_put_pages: if (!obj->import_attach) drm_gem_shmem_put_pages(shmem); err_zero_use: shmem->vmap_use_count = 0; return ret; } EXPORT_SYMBOL(drm_gem_shmem_vmap); /* * drm_gem_shmem_vunmap - Unmap a virtual mapping for a shmem GEM object * @shmem: shmem GEM object * @map: Kernel virtual address where the SHMEM GEM object was mapped * * This function cleans up a kernel virtual address mapping acquired by * drm_gem_shmem_vmap(). The mapping is only removed when the use count drops to * zero. * * This function hides the differences between dma-buf imported and natively * allocated objects. */ void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct iosys_map *map) { struct drm_gem_object *obj = &shmem->base; if (obj->import_attach) { dma_buf_vunmap(obj->import_attach->dmabuf, map); } else { dma_resv_assert_held(shmem->base.resv); if (drm_WARN_ON_ONCE(obj->dev, !shmem->vmap_use_count)) return; if (--shmem->vmap_use_count > 0) return; vunmap(shmem->vaddr); drm_gem_shmem_put_pages(shmem); } shmem->vaddr = NULL; } EXPORT_SYMBOL(drm_gem_shmem_vunmap); static int drm_gem_shmem_create_with_handle(struct drm_file *file_priv, struct drm_device *dev, size_t size, uint32_t *handle) { struct drm_gem_shmem_object *shmem; int ret; shmem = drm_gem_shmem_create(dev, size); if (IS_ERR(shmem)) return PTR_ERR(shmem); /* * Allocate an id of idr table where the obj is registered * and handle has the id what user can see. */ ret = drm_gem_handle_create(file_priv, &shmem->base, handle); /* drop reference from allocate - handle holds it now. */ drm_gem_object_put(&shmem->base); return ret; } /* Update madvise status, returns true if not purged, else * false or -errno. */ int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv) { dma_resv_assert_held(shmem->base.resv); if (shmem->madv >= 0) shmem->madv = madv; madv = shmem->madv; return (madv >= 0); } EXPORT_SYMBOL(drm_gem_shmem_madvise); void drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; struct drm_device *dev = obj->dev; dma_resv_assert_held(shmem->base.resv); drm_WARN_ON(obj->dev, !drm_gem_shmem_is_purgeable(shmem)); dma_unmap_sgtable(dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0); sg_free_table(shmem->sgt); kfree(shmem->sgt); shmem->sgt = NULL; drm_gem_shmem_put_pages(shmem); shmem->madv = -1; drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); drm_gem_free_mmap_offset(obj); /* Our goal here is to return as much of the memory as * is possible back to the system as we are called from OOM. * To do this we must instruct the shmfs to drop all of its * backing pages, *now*. */ shmem_truncate_range(file_inode(obj->filp), 0, (loff_t)-1); invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1); } EXPORT_SYMBOL(drm_gem_shmem_purge); /** * drm_gem_shmem_dumb_create - Create a dumb shmem buffer object * @file: DRM file structure to create the dumb buffer for * @dev: DRM device * @args: IOCTL data * * This function computes the pitch of the dumb buffer and rounds it up to an * integer number of bytes per pixel. Drivers for hardware that doesn't have * any additional restrictions on the pitch can directly use this function as * their &drm_driver.dumb_create callback. * * For hardware with additional restrictions, drivers can adjust the fields * set up by userspace before calling into this function. * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args) { u32 min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); if (!args->pitch || !args->size) { args->pitch = min_pitch; args->size = PAGE_ALIGN(args->pitch * args->height); } else { /* ensure sane minimum values */ if (args->pitch < min_pitch) args->pitch = min_pitch; if (args->size < args->pitch * args->height) args->size = PAGE_ALIGN(args->pitch * args->height); } return drm_gem_shmem_create_with_handle(file, dev, args->size, &args->handle); } EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create); static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); loff_t num_pages = obj->size >> PAGE_SHIFT; vm_fault_t ret; struct page *page; pgoff_t page_offset; /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; dma_resv_lock(shmem->base.resv, NULL); if (page_offset >= num_pages || drm_WARN_ON_ONCE(obj->dev, !shmem->pages) || shmem->madv < 0) { ret = VM_FAULT_SIGBUS; } else { page = shmem->pages[page_offset]; ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); } dma_resv_unlock(shmem->base.resv); return ret; } static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); drm_WARN_ON(obj->dev, obj->import_attach); dma_resv_lock(shmem->base.resv, NULL); /* * We should have already pinned the pages when the buffer was first * mmap'd, vm_open() just grabs an additional reference for the new * mm the vma is getting copied into (ie. on fork()). */ if (!drm_WARN_ON_ONCE(obj->dev, !shmem->pages_use_count)) shmem->pages_use_count++; dma_resv_unlock(shmem->base.resv); drm_gem_vm_open(vma); } static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); dma_resv_lock(shmem->base.resv, NULL); drm_gem_shmem_put_pages(shmem); dma_resv_unlock(shmem->base.resv); drm_gem_vm_close(vma); } const struct vm_operations_struct drm_gem_shmem_vm_ops = { .fault = drm_gem_shmem_fault, .open = drm_gem_shmem_vm_open, .close = drm_gem_shmem_vm_close, }; EXPORT_SYMBOL_GPL(drm_gem_shmem_vm_ops); /** * drm_gem_shmem_mmap - Memory-map a shmem GEM object * @shmem: shmem GEM object * @vma: VMA for the area to be mapped * * This function implements an augmented version of the GEM DRM file mmap * operation for shmem objects. * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma) { struct drm_gem_object *obj = &shmem->base; int ret; if (obj->import_attach) { /* Reset both vm_ops and vm_private_data, so we don't end up with * vm_ops pointing to our implementation if the dma-buf backend * doesn't set those fields. */ vma->vm_private_data = NULL; vma->vm_ops = NULL; ret = dma_buf_mmap(obj->dma_buf, vma, 0); /* Drop the reference drm_gem_mmap_obj() acquired.*/ if (!ret) drm_gem_object_put(obj); return ret; } if (is_cow_mapping(vma->vm_flags)) return -EINVAL; dma_resv_lock(shmem->base.resv, NULL); ret = drm_gem_shmem_get_pages(shmem); dma_resv_unlock(shmem->base.resv); if (ret) return ret; vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); if (shmem->map_wc) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); return 0; } EXPORT_SYMBOL_GPL(drm_gem_shmem_mmap); /** * drm_gem_shmem_print_info() - Print &drm_gem_shmem_object info for debugfs * @shmem: shmem GEM object * @p: DRM printer * @indent: Tab indentation level */ void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, struct drm_printer *p, unsigned int indent) { if (shmem->base.import_attach) return; drm_printf_indent(p, indent, "pages_use_count=%u\n", shmem->pages_use_count); drm_printf_indent(p, indent, "vmap_use_count=%u\n", shmem->vmap_use_count); drm_printf_indent(p, indent, "vaddr=%p\n", shmem->vaddr); } EXPORT_SYMBOL(drm_gem_shmem_print_info); /** * drm_gem_shmem_get_sg_table - Provide a scatter/gather table of pinned * pages for a shmem GEM object * @shmem: shmem GEM object * * This function exports a scatter/gather table suitable for PRIME usage by * calling the standard DMA mapping API. * * Drivers who need to acquire an scatter/gather table for objects need to call * drm_gem_shmem_get_pages_sgt() instead. * * Returns: * A pointer to the scatter/gather table of pinned pages or error pointer on failure. */ struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; drm_WARN_ON(obj->dev, obj->import_attach); return drm_prime_pages_to_sg(obj->dev, shmem->pages, obj->size >> PAGE_SHIFT); } EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; int ret; struct sg_table *sgt; if (shmem->sgt) return shmem->sgt; drm_WARN_ON(obj->dev, obj->import_attach); ret = drm_gem_shmem_get_pages(shmem); if (ret) return ERR_PTR(ret); sgt = drm_gem_shmem_get_sg_table(shmem); if (IS_ERR(sgt)) { ret = PTR_ERR(sgt); goto err_put_pages; } /* Map the pages for use by the h/w. */ ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0); if (ret) goto err_free_sgt; shmem->sgt = sgt; return sgt; err_free_sgt: sg_free_table(sgt); kfree(sgt); err_put_pages: drm_gem_shmem_put_pages(shmem); return ERR_PTR(ret); } /** * drm_gem_shmem_get_pages_sgt - Pin pages, dma map them, and return a * scatter/gather table for a shmem GEM object. * @shmem: shmem GEM object * * This function returns a scatter/gather table suitable for driver usage. If * the sg table doesn't exist, the pages are pinned, dma-mapped, and a sg * table created. * * This is the main function for drivers to get at backing storage, and it hides * and difference between dma-buf imported and natively allocated objects. * drm_gem_shmem_get_sg_table() should not be directly called by drivers. * * Returns: * A pointer to the scatter/gather table of pinned pages or errno on failure. */ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem) { int ret; struct sg_table *sgt; ret = dma_resv_lock_interruptible(shmem->base.resv, NULL); if (ret) return ERR_PTR(ret); sgt = drm_gem_shmem_get_pages_sgt_locked(shmem); dma_resv_unlock(shmem->base.resv); return sgt; } EXPORT_SYMBOL_GPL(drm_gem_shmem_get_pages_sgt); /** * drm_gem_shmem_prime_import_sg_table - Produce a shmem GEM object from * another driver's scatter/gather table of pinned pages * @dev: Device to import into * @attach: DMA-BUF attachment * @sgt: Scatter/gather table of pinned pages * * This function imports a scatter/gather table exported via DMA-BUF by * another driver. Drivers that use the shmem helpers should set this as their * &drm_driver.gem_prime_import_sg_table callback. * * Returns: * A pointer to a newly created GEM object or an ERR_PTR-encoded negative * error code on failure. */ struct drm_gem_object * drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt) { size_t size = PAGE_ALIGN(attach->dmabuf->size); struct drm_gem_shmem_object *shmem; shmem = __drm_gem_shmem_create(dev, size, true); if (IS_ERR(shmem)) return ERR_CAST(shmem); shmem->sgt = sgt; drm_dbg_prime(dev, "size = %zu\n", size); return &shmem->base; } EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table); MODULE_DESCRIPTION("DRM SHMEM memory-management helpers"); MODULE_IMPORT_NS(DMA_BUF); MODULE_LICENSE("GPL v2");
5 2 2 5 2 2 2 2 2 2 2 2 2 5 6 4 6 3 6 4 36 6 32 3 5 1 32 2 1 30 1 1 29 30 36 17 17 4 17 1 1 17 12 48 48 48 12 48 4 4 2 35 5 30 1 1 1 7 7 2 5 2 48 14 9 9 2 5 35 35 35 35 35 35 47 35 47 3 3 2 1 3 3 3 3 3 3 3 7 8 8 8 8 8 3 3 3 8 3 3 3 11 3 11 8 4 4 8 8 8 8 7 1 11 11 4 8 11 11 2 11 6 11 2 11 7 11 11 10 11 8 11 8 5 4 1 4 1 8 11 36 6 36 33 3 3 36 36 36 2 1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 // SPDX-License-Identifier: GPL-2.0-only /* * This file contains vfs inode ops for the 9P2000 protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" #include "cache.h" #include "xattr.h" #include "acl.h" static const struct inode_operations v9fs_dir_inode_operations; static const struct inode_operations v9fs_dir_inode_operations_dotu; static const struct inode_operations v9fs_file_inode_operations; static const struct inode_operations v9fs_symlink_inode_operations; /** * unixmode2p9mode - convert unix mode bits to plan 9 * @v9ses: v9fs session information * @mode: mode to convert * */ static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode) { int res; res = mode & 0777; if (S_ISDIR(mode)) res |= P9_DMDIR; if (v9fs_proto_dotu(v9ses)) { if (v9ses->nodev == 0) { if (S_ISSOCK(mode)) res |= P9_DMSOCKET; if (S_ISFIFO(mode)) res |= P9_DMNAMEDPIPE; if (S_ISBLK(mode)) res |= P9_DMDEVICE; if (S_ISCHR(mode)) res |= P9_DMDEVICE; } if ((mode & S_ISUID) == S_ISUID) res |= P9_DMSETUID; if ((mode & S_ISGID) == S_ISGID) res |= P9_DMSETGID; if ((mode & S_ISVTX) == S_ISVTX) res |= P9_DMSETVTX; } return res; } /** * p9mode2perm- convert plan9 mode bits to unix permission bits * @v9ses: v9fs session information * @stat: p9_wstat from which mode need to be derived * */ static int p9mode2perm(struct v9fs_session_info *v9ses, struct p9_wstat *stat) { int res; int mode = stat->mode; res = mode & 0777; /* S_IRWXUGO */ if (v9fs_proto_dotu(v9ses)) { if ((mode & P9_DMSETUID) == P9_DMSETUID) res |= S_ISUID; if ((mode & P9_DMSETGID) == P9_DMSETGID) res |= S_ISGID; if ((mode & P9_DMSETVTX) == P9_DMSETVTX) res |= S_ISVTX; } return res; } /** * p9mode2unixmode- convert plan9 mode bits to unix mode bits * @v9ses: v9fs session information * @stat: p9_wstat from which mode need to be derived * @rdev: major number, minor number in case of device files. * */ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, struct p9_wstat *stat, dev_t *rdev) { int res, r; u32 mode = stat->mode; *rdev = 0; res = p9mode2perm(v9ses, stat); if ((mode & P9_DMDIR) == P9_DMDIR) res |= S_IFDIR; else if ((mode & P9_DMSYMLINK) && (v9fs_proto_dotu(v9ses))) res |= S_IFLNK; else if ((mode & P9_DMSOCKET) && (v9fs_proto_dotu(v9ses)) && (v9ses->nodev == 0)) res |= S_IFSOCK; else if ((mode & P9_DMNAMEDPIPE) && (v9fs_proto_dotu(v9ses)) && (v9ses->nodev == 0)) res |= S_IFIFO; else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) && (v9ses->nodev == 0)) { char type = 0; int major = -1, minor = -1; r = sscanf(stat->extension, "%c %i %i", &type, &major, &minor); if (r != 3) { p9_debug(P9_DEBUG_ERROR, "invalid device string, umode will be bogus: %s\n", stat->extension); return res; } switch (type) { case 'c': res |= S_IFCHR; break; case 'b': res |= S_IFBLK; break; default: p9_debug(P9_DEBUG_ERROR, "Unknown special type %c %s\n", type, stat->extension); } *rdev = MKDEV(major, minor); } else res |= S_IFREG; return res; } /** * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits * @uflags: flags to convert * @extended: if .u extensions are active */ int v9fs_uflags2omode(int uflags, int extended) { int ret; switch (uflags&3) { default: case O_RDONLY: ret = P9_OREAD; break; case O_WRONLY: ret = P9_OWRITE; break; case O_RDWR: ret = P9_ORDWR; break; } if (uflags & O_TRUNC) ret |= P9_OTRUNC; if (extended) { if (uflags & O_EXCL) ret |= P9_OEXCL; if (uflags & O_APPEND) ret |= P9_OAPPEND; } return ret; } /** * v9fs_blank_wstat - helper function to setup a 9P stat structure * @wstat: structure to initialize * */ void v9fs_blank_wstat(struct p9_wstat *wstat) { wstat->type = ~0; wstat->dev = ~0; wstat->qid.type = ~0; wstat->qid.version = ~0; *((long long *)&wstat->qid.path) = ~0; wstat->mode = ~0; wstat->atime = ~0; wstat->mtime = ~0; wstat->length = ~0; wstat->name = NULL; wstat->uid = NULL; wstat->gid = NULL; wstat->muid = NULL; wstat->n_uid = INVALID_UID; wstat->n_gid = INVALID_GID; wstat->n_muid = INVALID_UID; wstat->extension = NULL; } /** * v9fs_alloc_inode - helper function to allocate an inode * @sb: The superblock to allocate the inode from */ struct inode *v9fs_alloc_inode(struct super_block *sb) { struct v9fs_inode *v9inode; v9inode = alloc_inode_sb(sb, v9fs_inode_cache, GFP_KERNEL); if (!v9inode) return NULL; v9inode->cache_validity = 0; mutex_init(&v9inode->v_mutex); return &v9inode->netfs.inode; } /** * v9fs_free_inode - destroy an inode * @inode: The inode to be freed */ void v9fs_free_inode(struct inode *inode) { kmem_cache_free(v9fs_inode_cache, V9FS_I(inode)); } /* * Set parameters for the netfs library */ void v9fs_set_netfs_context(struct inode *inode) { struct v9fs_inode *v9inode = V9FS_I(inode); netfs_inode_init(&v9inode->netfs, &v9fs_req_ops, true); } int v9fs_init_inode(struct v9fs_session_info *v9ses, struct inode *inode, umode_t mode, dev_t rdev) { int err = 0; inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); inode->i_blocks = 0; inode->i_rdev = rdev; simple_inode_init_ts(inode); inode->i_mapping->a_ops = &v9fs_addr_operations; inode->i_private = NULL; switch (mode & S_IFMT) { case S_IFIFO: case S_IFBLK: case S_IFCHR: case S_IFSOCK: if (v9fs_proto_dotl(v9ses)) { inode->i_op = &v9fs_file_inode_operations_dotl; } else if (v9fs_proto_dotu(v9ses)) { inode->i_op = &v9fs_file_inode_operations; } else { p9_debug(P9_DEBUG_ERROR, "special files without extended mode\n"); err = -EINVAL; goto error; } init_special_inode(inode, inode->i_mode, inode->i_rdev); break; case S_IFREG: if (v9fs_proto_dotl(v9ses)) { inode->i_op = &v9fs_file_inode_operations_dotl; inode->i_fop = &v9fs_file_operations_dotl; } else { inode->i_op = &v9fs_file_inode_operations; inode->i_fop = &v9fs_file_operations; } break; case S_IFLNK: if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { p9_debug(P9_DEBUG_ERROR, "extended modes used with legacy protocol\n"); err = -EINVAL; goto error; } if (v9fs_proto_dotl(v9ses)) inode->i_op = &v9fs_symlink_inode_operations_dotl; else inode->i_op = &v9fs_symlink_inode_operations; break; case S_IFDIR: inc_nlink(inode); if (v9fs_proto_dotl(v9ses)) inode->i_op = &v9fs_dir_inode_operations_dotl; else if (v9fs_proto_dotu(v9ses)) inode->i_op = &v9fs_dir_inode_operations_dotu; else inode->i_op = &v9fs_dir_inode_operations; if (v9fs_proto_dotl(v9ses)) inode->i_fop = &v9fs_dir_operations_dotl; else inode->i_fop = &v9fs_dir_operations; break; default: p9_debug(P9_DEBUG_ERROR, "BAD mode 0x%hx S_IFMT 0x%x\n", mode, mode & S_IFMT); err = -EINVAL; goto error; } error: return err; } /** * v9fs_evict_inode - Remove an inode from the inode cache * @inode: inode to release * */ void v9fs_evict_inode(struct inode *inode) { struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode); __le32 __maybe_unused version; if (!is_bad_inode(inode)) { netfs_wait_for_outstanding_io(inode); truncate_inode_pages_final(&inode->i_data); version = cpu_to_le32(v9inode->qid.version); netfs_clear_inode_writeback(inode, &version); clear_inode(inode); filemap_fdatawrite(&inode->i_data); #ifdef CONFIG_9P_FSCACHE if (v9fs_inode_cookie(v9inode)) fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false); #endif } else clear_inode(inode); } static int v9fs_test_inode(struct inode *inode, void *data) { int umode; dev_t rdev; struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_wstat *st = (struct p9_wstat *)data; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); umode = p9mode2unixmode(v9ses, st, &rdev); /* don't match inode of different type */ if (inode_wrong_type(inode, umode)) return 0; /* compare qid details */ if (memcmp(&v9inode->qid.version, &st->qid.version, sizeof(v9inode->qid.version))) return 0; if (v9inode->qid.type != st->qid.type) return 0; if (v9inode->qid.path != st->qid.path) return 0; return 1; } static int v9fs_test_new_inode(struct inode *inode, void *data) { return 0; } static int v9fs_set_inode(struct inode *inode, void *data) { struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_wstat *st = (struct p9_wstat *)data; memcpy(&v9inode->qid, &st->qid, sizeof(st->qid)); return 0; } static struct inode *v9fs_qid_iget(struct super_block *sb, struct p9_qid *qid, struct p9_wstat *st, int new) { dev_t rdev; int retval; umode_t umode; struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; int (*test)(struct inode *inode, void *data); if (new) test = v9fs_test_new_inode; else test = v9fs_test_inode; inode = iget5_locked(sb, QID2INO(qid), test, v9fs_set_inode, st); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; /* * initialize the inode with the stat info * FIXME!! we may need support for stale inodes * later. */ inode->i_ino = QID2INO(qid); umode = p9mode2unixmode(v9ses, st, &rdev); retval = v9fs_init_inode(v9ses, inode, umode, rdev); if (retval) goto error; v9fs_stat2inode(st, inode, sb, 0); v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); return inode; error: iget_failed(inode); return ERR_PTR(retval); } struct inode * v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new) { struct p9_wstat *st; struct inode *inode = NULL; st = p9_client_stat(fid); if (IS_ERR(st)) return ERR_CAST(st); inode = v9fs_qid_iget(sb, &st->qid, st, new); p9stat_free(st); kfree(st); return inode; } /** * v9fs_at_to_dotl_flags- convert Linux specific AT flags to * plan 9 AT flag. * @flags: flags to convert */ static int v9fs_at_to_dotl_flags(int flags) { int rflags = 0; if (flags & AT_REMOVEDIR) rflags |= P9_DOTL_AT_REMOVEDIR; return rflags; } /** * v9fs_dec_count - helper functon to drop i_nlink. * * If a directory had nlink <= 2 (including . and ..), then we should not drop * the link count, which indicates the underlying exported fs doesn't maintain * nlink accurately. e.g. * - overlayfs sets nlink to 1 for merged dir * - ext4 (with dir_nlink feature enabled) sets nlink to 1 if a dir has more * than EXT4_LINK_MAX (65000) links. * * @inode: inode whose nlink is being dropped */ static void v9fs_dec_count(struct inode *inode) { if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) drop_nlink(inode); } /** * v9fs_remove - helper function to remove files and directories * @dir: directory inode that is being deleted * @dentry: dentry that is being deleted * @flags: removing a directory * */ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) { struct inode *inode; int retval = -EOPNOTSUPP; struct p9_fid *v9fid, *dfid; struct v9fs_session_info *v9ses; p9_debug(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n", dir, dentry, flags); v9ses = v9fs_inode2v9ses(dir); inode = d_inode(dentry); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { retval = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", retval); return retval; } if (v9fs_proto_dotl(v9ses)) retval = p9_client_unlinkat(dfid, dentry->d_name.name, v9fs_at_to_dotl_flags(flags)); p9_fid_put(dfid); if (retval == -EOPNOTSUPP) { /* Try the one based on path */ v9fid = v9fs_fid_clone(dentry); if (IS_ERR(v9fid)) return PTR_ERR(v9fid); retval = p9_client_remove(v9fid); } if (!retval) { /* * directories on unlink should have zero * link count */ if (flags & AT_REMOVEDIR) { clear_nlink(inode); v9fs_dec_count(dir); } else v9fs_dec_count(inode); v9fs_invalidate_inode_attr(inode); v9fs_invalidate_inode_attr(dir); /* invalidate all fids associated with dentry */ /* NOTE: This will not include open fids */ dentry->d_op->d_release(dentry); } return retval; } /** * v9fs_create - Create a file * @v9ses: session information * @dir: directory that dentry is being created in * @dentry: dentry that is being created * @extension: 9p2000.u extension string to support devices, etc. * @perm: create permissions * @mode: open mode * */ static struct p9_fid * v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, struct dentry *dentry, char *extension, u32 perm, u8 mode) { int err; const unsigned char *name; struct p9_fid *dfid, *ofid = NULL, *fid = NULL; struct inode *inode; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); name = dentry->d_name.name; dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); return ERR_PTR(err); } /* clone a fid to use for creation */ ofid = clone_fid(dfid); if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } err = p9_client_fcreate(ofid, name, perm, mode, extension); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); goto error; } if (!(perm & P9_DMLINK)) { /* now walk from the parent so we can get unopened fid */ fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } /* * instantiate inode and assign the unopened fid to the dentry */ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); } p9_fid_put(dfid); return ofid; error: p9_fid_put(dfid); p9_fid_put(ofid); p9_fid_put(fid); return ERR_PTR(err); } /** * v9fs_vfs_create - VFS hook to create a regular file * @idmap: idmap of the mount * @dir: The parent directory * @dentry: The name of file to be created * @mode: The UNIX file mode to set * @excl: True if the file must not yet exist * * open(.., O_CREAT) is handled in v9fs_vfs_atomic_open(). This is only called * for mknod(2). * */ static int v9fs_vfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); u32 perm = unixmode2p9mode(v9ses, mode); struct p9_fid *fid; /* P9_OEXCL? */ fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_ORDWR); if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_invalidate_inode_attr(dir); p9_fid_put(fid); return 0; } /** * v9fs_vfs_mkdir - VFS mkdir hook to create a directory * @idmap: idmap of the mount * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @mode: mode for new directory * */ static int v9fs_vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int err; u32 perm; struct p9_fid *fid; struct v9fs_session_info *v9ses; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; v9ses = v9fs_inode2v9ses(dir); perm = unixmode2p9mode(v9ses, mode | S_IFDIR); fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_OREAD); if (IS_ERR(fid)) { err = PTR_ERR(fid); fid = NULL; } else { inc_nlink(dir); v9fs_invalidate_inode_attr(dir); } if (fid) p9_fid_put(fid); return err; } /** * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode * @dir: inode that is being walked from * @dentry: dentry that is being walked to? * @flags: lookup flags (unused) * */ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct dentry *res; struct v9fs_session_info *v9ses; struct p9_fid *dfid, *fid; struct inode *inode; const unsigned char *name; p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%pd) %p flags: %x\n", dir, dentry, dentry, flags); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); v9ses = v9fs_inode2v9ses(dir); /* We can walk d_parent because we hold the dir->i_mutex */ dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) return ERR_CAST(dfid); /* * Make sure we don't use a wrong inode due to parallel * unlink. For cached mode create calls request for new * inode. But with cache disabled, lookup should do this. */ name = dentry->d_name.name; fid = p9_client_walk(dfid, 1, &name, 1); p9_fid_put(dfid); if (fid == ERR_PTR(-ENOENT)) inode = NULL; else if (IS_ERR(fid)) inode = ERR_CAST(fid); else if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); else inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); /* * If we had a rename on the server and a parallel lookup * for the new name, then make sure we instantiate with * the new name. ie look up for a/b, while on server somebody * moved b under k and client parallely did a lookup for * k/b. */ res = d_splice_alias(inode, dentry); if (!IS_ERR(fid)) { if (!res) v9fs_fid_add(dentry, &fid); else if (!IS_ERR(res)) v9fs_fid_add(res, &fid); else p9_fid_put(fid); } return res; } static int v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned int flags, umode_t mode) { int err; u32 perm; struct v9fs_inode __maybe_unused *v9inode; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct dentry *res = NULL; struct inode *inode; int p9_omode; if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); if (res) dentry = res; } /* Only creates */ if (!(flags & O_CREAT) || d_really_is_positive(dentry)) return finish_no_open(file, res); v9ses = v9fs_inode2v9ses(dir); perm = unixmode2p9mode(v9ses, mode); p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses)); if ((v9ses->cache & CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) { p9_omode = (p9_omode & ~P9_OWRITE) | P9_ORDWR; p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, creating w/ O_RDWR\n"); } fid = v9fs_create(v9ses, dir, dentry, NULL, perm, p9_omode); if (IS_ERR(fid)) { err = PTR_ERR(fid); goto error; } v9fs_invalidate_inode_attr(dir); inode = d_inode(dentry); v9inode = V9FS_I(inode); err = finish_open(file, dentry, generic_file_open); if (err) goto error; file->private_data = fid; #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) fscache_use_cookie(v9fs_inode_cookie(v9inode), file->f_mode & FMODE_WRITE); #endif v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags); v9fs_open_fid_add(inode, &fid); file->f_mode |= FMODE_CREATED; out: dput(res); return err; error: p9_fid_put(fid); goto out; } /** * v9fs_vfs_unlink - VFS unlink hook to delete an inode * @i: inode that is being unlinked * @d: dentry that is being unlinked * */ int v9fs_vfs_unlink(struct inode *i, struct dentry *d) { return v9fs_remove(i, d, 0); } /** * v9fs_vfs_rmdir - VFS unlink hook to delete a directory * @i: inode that is being unlinked * @d: dentry that is being unlinked * */ int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) { return v9fs_remove(i, d, AT_REMOVEDIR); } /** * v9fs_vfs_rename - VFS hook to rename an inode * @idmap: The idmap of the mount * @old_dir: old dir inode * @old_dentry: old dentry * @new_dir: new dir inode * @new_dentry: new dentry * @flags: RENAME_* flags * */ int v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { int retval; struct inode *old_inode; struct inode *new_inode; struct v9fs_session_info *v9ses; struct p9_fid *oldfid = NULL, *dfid = NULL; struct p9_fid *olddirfid = NULL; struct p9_fid *newdirfid = NULL; struct p9_wstat wstat; if (flags) return -EINVAL; p9_debug(P9_DEBUG_VFS, "\n"); old_inode = d_inode(old_dentry); new_inode = d_inode(new_dentry); v9ses = v9fs_inode2v9ses(old_inode); oldfid = v9fs_fid_lookup(old_dentry); if (IS_ERR(oldfid)) return PTR_ERR(oldfid); dfid = v9fs_parent_fid(old_dentry); olddirfid = clone_fid(dfid); p9_fid_put(dfid); dfid = NULL; if (IS_ERR(olddirfid)) { retval = PTR_ERR(olddirfid); goto error; } dfid = v9fs_parent_fid(new_dentry); newdirfid = clone_fid(dfid); p9_fid_put(dfid); dfid = NULL; if (IS_ERR(newdirfid)) { retval = PTR_ERR(newdirfid); goto error; } down_write(&v9ses->rename_sem); if (v9fs_proto_dotl(v9ses)) { retval = p9_client_renameat(olddirfid, old_dentry->d_name.name, newdirfid, new_dentry->d_name.name); if (retval == -EOPNOTSUPP) retval = p9_client_rename(oldfid, newdirfid, new_dentry->d_name.name); if (retval != -EOPNOTSUPP) goto error_locked; } if (old_dentry->d_parent != new_dentry->d_parent) { /* * 9P .u can only handle file rename in the same directory */ p9_debug(P9_DEBUG_ERROR, "old dir and new dir are different\n"); retval = -EXDEV; goto error_locked; } v9fs_blank_wstat(&wstat); wstat.muid = v9ses->uname; wstat.name = new_dentry->d_name.name; retval = p9_client_wstat(oldfid, &wstat); error_locked: if (!retval) { if (new_inode) { if (S_ISDIR(new_inode->i_mode)) clear_nlink(new_inode); else v9fs_dec_count(new_inode); } if (S_ISDIR(old_inode->i_mode)) { if (!new_inode) inc_nlink(new_dir); v9fs_dec_count(old_dir); } v9fs_invalidate_inode_attr(old_inode); v9fs_invalidate_inode_attr(old_dir); v9fs_invalidate_inode_attr(new_dir); /* successful rename */ d_move(old_dentry, new_dentry); } up_write(&v9ses->rename_sem); error: p9_fid_put(newdirfid); p9_fid_put(olddirfid); p9_fid_put(oldfid); return retval; } /** * v9fs_vfs_getattr - retrieve file metadata * @idmap: idmap of the mount * @path: Object to query * @stat: metadata structure to populate * @request_mask: Mask of STATX_xxx flags indicating the caller's interests * @flags: AT_STATX_xxx setting * */ static int v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); struct v9fs_session_info *v9ses; struct p9_fid *fid; struct p9_wstat *st; p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } else if (v9ses->cache & CACHE_WRITEBACK) { if (S_ISREG(inode->i_mode)) { int retval = filemap_fdatawrite(inode->i_mapping); if (retval) p9_debug(P9_DEBUG_ERROR, "flushing writeback during getattr returned %d\n", retval); } } fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); st = p9_client_stat(fid); p9_fid_put(fid); if (IS_ERR(st)) return PTR_ERR(st); v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0); generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat); p9stat_free(st); kfree(st); return 0; } /** * v9fs_vfs_setattr - set file metadata * @idmap: idmap of the mount * @dentry: file whose metadata to set * @iattr: metadata assignment structure * */ static int v9fs_vfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { int retval, use_dentry = 0; struct inode *inode = d_inode(dentry); struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL; struct p9_wstat wstat; p9_debug(P9_DEBUG_VFS, "\n"); retval = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (retval) return retval; v9ses = v9fs_dentry2v9ses(dentry); if (iattr->ia_valid & ATTR_FILE) { fid = iattr->ia_file->private_data; WARN_ON(!fid); } if (!fid) { fid = v9fs_fid_lookup(dentry); use_dentry = 1; } if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_blank_wstat(&wstat); if (iattr->ia_valid & ATTR_MODE) wstat.mode = unixmode2p9mode(v9ses, iattr->ia_mode); if (iattr->ia_valid & ATTR_MTIME) wstat.mtime = iattr->ia_mtime.tv_sec; if (iattr->ia_valid & ATTR_ATIME) wstat.atime = iattr->ia_atime.tv_sec; if (iattr->ia_valid & ATTR_SIZE) wstat.length = iattr->ia_size; if (v9fs_proto_dotu(v9ses)) { if (iattr->ia_valid & ATTR_UID) wstat.n_uid = iattr->ia_uid; if (iattr->ia_valid & ATTR_GID) wstat.n_gid = iattr->ia_gid; } /* Write all dirty data */ if (d_is_reg(dentry)) { retval = filemap_fdatawrite(inode->i_mapping); if (retval) p9_debug(P9_DEBUG_ERROR, "flushing writeback during setattr returned %d\n", retval); } retval = p9_client_wstat(fid, &wstat); if (use_dentry) p9_fid_put(fid); if (retval < 0) return retval; if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { truncate_setsize(inode, iattr->ia_size); netfs_resize_file(netfs_inode(inode), iattr->ia_size, true); #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) { struct v9fs_inode *v9inode = V9FS_I(inode); fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size); } #endif } v9fs_invalidate_inode_attr(inode); setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); return 0; } /** * v9fs_stat2inode - populate an inode structure with mistat info * @stat: Plan 9 metadata (mistat) structure * @inode: inode to populate * @sb: superblock of filesystem * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) * */ void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, struct super_block *sb, unsigned int flags) { umode_t mode; struct v9fs_session_info *v9ses = sb->s_fs_info; struct v9fs_inode *v9inode = V9FS_I(inode); inode_set_atime(inode, stat->atime, 0); inode_set_mtime(inode, stat->mtime, 0); inode_set_ctime(inode, stat->mtime, 0); inode->i_uid = v9ses->dfltuid; inode->i_gid = v9ses->dfltgid; if (v9fs_proto_dotu(v9ses)) { inode->i_uid = stat->n_uid; inode->i_gid = stat->n_gid; } if ((S_ISREG(inode->i_mode)) || (S_ISDIR(inode->i_mode))) { if (v9fs_proto_dotu(v9ses)) { unsigned int i_nlink; /* * Hadlink support got added later to the .u extension. * So there can be a server out there that doesn't * support this even with .u extension. That would * just leave us with stat->extension being an empty * string, though. */ /* HARDLINKCOUNT %u */ if (sscanf(stat->extension, " HARDLINKCOUNT %u", &i_nlink) == 1) set_nlink(inode, i_nlink); } } mode = p9mode2perm(v9ses, stat); mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; v9inode->netfs.remote_i_size = stat->length; if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) v9fs_i_size_write(inode, stat->length); /* not real number of blocks, but 512 byte ones ... */ inode->i_blocks = (stat->length + 512 - 1) >> 9; v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; } /** * v9fs_vfs_get_link - follow a symlink path * @dentry: dentry for symlink * @inode: inode for symlink * @done: delayed call for when we are done with the return value */ static const char *v9fs_vfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct v9fs_session_info *v9ses; struct p9_fid *fid; struct p9_wstat *st; char *res; if (!dentry) return ERR_PTR(-ECHILD); v9ses = v9fs_dentry2v9ses(dentry); if (!v9fs_proto_dotu(v9ses)) return ERR_PTR(-EBADF); p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return ERR_CAST(fid); st = p9_client_stat(fid); p9_fid_put(fid); if (IS_ERR(st)) return ERR_CAST(st); if (!(st->mode & P9_DMSYMLINK)) { p9stat_free(st); kfree(st); return ERR_PTR(-EINVAL); } res = st->extension; st->extension = NULL; if (strlen(res) >= PATH_MAX) res[PATH_MAX - 1] = '\0'; p9stat_free(st); kfree(st); set_delayed_call(done, kfree_link, res); return res; } /** * v9fs_vfs_mkspecial - create a special file * @dir: inode to create special file in * @dentry: dentry to create * @perm: mode to create special file * @extension: 9p2000.u format extension string representing special file * */ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, u32 perm, const char *extension) { struct p9_fid *fid; struct v9fs_session_info *v9ses; v9ses = v9fs_inode2v9ses(dir); if (!v9fs_proto_dotu(v9ses)) { p9_debug(P9_DEBUG_ERROR, "not extended\n"); return -EPERM; } fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm, P9_OREAD); if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_invalidate_inode_attr(dir); p9_fid_put(fid); return 0; } /** * v9fs_vfs_symlink - helper function to create symlinks * @idmap: idmap of the mount * @dir: directory inode containing symlink * @dentry: dentry for symlink * @symname: symlink data * * See Also: 9P2000.u RFC for more information * */ static int v9fs_vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n", dir->i_ino, dentry, symname); return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname); } #define U32_MAX_DIGITS 10 /** * v9fs_vfs_link - create a hardlink * @old_dentry: dentry for file to link to * @dir: inode destination for new link * @dentry: dentry for link * */ static int v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int retval; char name[1 + U32_MAX_DIGITS + 2]; /* sign + number + \n + \0 */ struct p9_fid *oldfid; p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n", dir->i_ino, dentry, old_dentry); oldfid = v9fs_fid_clone(old_dentry); if (IS_ERR(oldfid)) return PTR_ERR(oldfid); sprintf(name, "%d\n", oldfid->fid); retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); if (!retval) { v9fs_refresh_inode(oldfid, d_inode(old_dentry)); v9fs_invalidate_inode_attr(dir); } p9_fid_put(oldfid); return retval; } /** * v9fs_vfs_mknod - create a special file * @idmap: idmap of the mount * @dir: inode destination for new link * @dentry: dentry for file * @mode: mode for creation * @rdev: device associated with special file * */ static int v9fs_vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); int retval; char name[2 + U32_MAX_DIGITS + 1 + U32_MAX_DIGITS + 1]; u32 perm; p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry, mode, MAJOR(rdev), MINOR(rdev)); /* build extension */ if (S_ISBLK(mode)) sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev)); else if (S_ISCHR(mode)) sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); else *name = 0; perm = unixmode2p9mode(v9ses, mode); retval = v9fs_vfs_mkspecial(dir, dentry, perm, name); return retval; } int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) { int umode; dev_t rdev; struct p9_wstat *st; struct v9fs_session_info *v9ses; unsigned int flags; v9ses = v9fs_inode2v9ses(inode); st = p9_client_stat(fid); if (IS_ERR(st)) return PTR_ERR(st); /* * Don't update inode if the file type is different */ umode = p9mode2unixmode(v9ses, st, &rdev); if (inode_wrong_type(inode, umode)) goto out; /* * We don't want to refresh inode->i_size, * because we may have cached data */ flags = (v9ses->cache & CACHE_LOOSE) ? V9FS_STAT2INODE_KEEP_ISIZE : 0; v9fs_stat2inode(st, inode, inode->i_sb, flags); out: p9stat_free(st); kfree(st); return 0; } static const struct inode_operations v9fs_dir_inode_operations_dotu = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, .atomic_open = v9fs_vfs_atomic_open, .symlink = v9fs_vfs_symlink, .link = v9fs_vfs_link, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, .mknod = v9fs_vfs_mknod, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; static const struct inode_operations v9fs_dir_inode_operations = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, .atomic_open = v9fs_vfs_atomic_open, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, .mknod = v9fs_vfs_mknod, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; static const struct inode_operations v9fs_file_inode_operations = { .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; static const struct inode_operations v9fs_symlink_inode_operations = { .get_link = v9fs_vfs_get_link, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, };
2 1 2 14 6 5 5 4 1 3 1 3 3 3 2 2 2 1 1 1 1 3 14 14 3 2 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 /* BNEP implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2001-2002 Inventel Systemes Written 2001-2002 by David Libault <david.libault@inventel.fr> Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #include <linux/compat.h> #include <linux/export.h> #include <linux/file.h> #include "bnep.h" static struct bt_sock_list bnep_sk_list = { .lock = __RW_LOCK_UNLOCKED(bnep_sk_list.lock) }; static int bnep_sock_release(struct socket *sock) { struct sock *sk = sock->sk; BT_DBG("sock %p sk %p", sock, sk); if (!sk) return 0; bt_sock_unlink(&bnep_sk_list, sk); sock_orphan(sk); sock_put(sk); return 0; } static int do_bnep_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp) { struct bnep_connlist_req cl; struct bnep_connadd_req ca; struct bnep_conndel_req cd; struct bnep_conninfo ci; struct socket *nsock; __u32 supp_feat = BIT(BNEP_SETUP_RESPONSE); int err; BT_DBG("cmd %x arg %p", cmd, argp); switch (cmd) { case BNEPCONNADD: if (!capable(CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; nsock = sockfd_lookup(ca.sock, &err); if (!nsock) return err; if (nsock->sk->sk_state != BT_CONNECTED) { sockfd_put(nsock); return -EBADFD; } ca.device[sizeof(ca.device)-1] = 0; err = bnep_add_connection(&ca, nsock); if (!err) { if (copy_to_user(argp, &ca, sizeof(ca))) err = -EFAULT; } else sockfd_put(nsock); return err; case BNEPCONNDEL: if (!capable(CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; return bnep_del_connection(&cd); case BNEPGETCONNLIST: if (copy_from_user(&cl, argp, sizeof(cl))) return -EFAULT; if (cl.cnum <= 0) return -EINVAL; err = bnep_get_connlist(&cl); if (!err && copy_to_user(argp, &cl, sizeof(cl))) return -EFAULT; return err; case BNEPGETCONNINFO: if (copy_from_user(&ci, argp, sizeof(ci))) return -EFAULT; err = bnep_get_conninfo(&ci); if (!err && copy_to_user(argp, &ci, sizeof(ci))) return -EFAULT; return err; case BNEPGETSUPPFEAT: if (copy_to_user(argp, &supp_feat, sizeof(supp_feat))) return -EFAULT; return 0; default: return -EINVAL; } return 0; } static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return do_bnep_sock_ioctl(sock, cmd, (void __user *)arg); } #ifdef CONFIG_COMPAT static int bnep_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); if (cmd == BNEPGETCONNLIST) { struct bnep_connlist_req cl; unsigned __user *p = argp; u32 uci; int err; if (get_user(cl.cnum, p) || get_user(uci, p + 1)) return -EFAULT; cl.ci = compat_ptr(uci); if (cl.cnum <= 0) return -EINVAL; err = bnep_get_connlist(&cl); if (!err && put_user(cl.cnum, p)) err = -EFAULT; return err; } return do_bnep_sock_ioctl(sock, cmd, argp); } #endif static const struct proto_ops bnep_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = bnep_sock_release, .ioctl = bnep_sock_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = bnep_sock_compat_ioctl, #endif .bind = sock_no_bind, .getname = sock_no_getname, .sendmsg = sock_no_sendmsg, .recvmsg = sock_no_recvmsg, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .mmap = sock_no_mmap }; static struct proto bnep_proto = { .name = "BNEP", .owner = THIS_MODULE, .obj_size = sizeof(struct bt_sock) }; static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; BT_DBG("sock %p", sock); if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; sk = bt_sock_alloc(net, sock, &bnep_proto, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; sock->ops = &bnep_sock_ops; sock->state = SS_UNCONNECTED; bt_sock_link(&bnep_sk_list, sk); return 0; } static const struct net_proto_family bnep_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = bnep_sock_create }; int __init bnep_sock_init(void) { int err; err = proto_register(&bnep_proto, 0); if (err < 0) return err; err = bt_sock_register(BTPROTO_BNEP, &bnep_sock_family_ops); if (err < 0) { BT_ERR("Can't register BNEP socket"); goto error; } err = bt_procfs_init(&init_net, "bnep", &bnep_sk_list, NULL); if (err < 0) { BT_ERR("Failed to create BNEP proc file"); bt_sock_unregister(BTPROTO_BNEP); goto error; } BT_INFO("BNEP socket layer initialized"); return 0; error: proto_unregister(&bnep_proto); return err; } void __exit bnep_sock_cleanup(void) { bt_procfs_cleanup(&init_net, "bnep"); bt_sock_unregister(BTPROTO_BNEP); proto_unregister(&bnep_proto); }
139 115 1 389 390 57 27 77 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Skb ref helpers. * */ #ifndef _LINUX_SKBUFF_REF_H #define _LINUX_SKBUFF_REF_H #include <linux/skbuff.h> /** * __skb_frag_ref - take an addition reference on a paged fragment. * @frag: the paged fragment * * Takes an additional reference on the paged fragment @frag. */ static inline void __skb_frag_ref(skb_frag_t *frag) { get_page(skb_frag_page(frag)); } /** * skb_frag_ref - take an addition reference on a paged fragment of an skb. * @skb: the buffer * @f: the fragment offset. * * Takes an additional reference on the @f'th paged fragment of @skb. */ static inline void skb_frag_ref(struct sk_buff *skb, int f) { __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } bool napi_pp_put_page(netmem_ref netmem); static inline void skb_page_unref(netmem_ref netmem, bool recycle) { #ifdef CONFIG_PAGE_POOL if (recycle && napi_pp_put_page(netmem)) return; #endif put_page(netmem_to_page(netmem)); } /** * __skb_frag_unref - release a reference on a paged fragment. * @frag: the paged fragment * @recycle: recycle the page if allocated via page_pool * * Releases a reference on the paged fragment @frag * or recycles the page via the page_pool API. */ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) { skb_page_unref(skb_frag_netmem(frag), recycle); } /** * skb_frag_unref - release a reference on a paged fragment of an skb. * @skb: the buffer * @f: the fragment offset * * Releases a reference on the @f'th paged fragment of @skb. */ static inline void skb_frag_unref(struct sk_buff *skb, int f) { struct skb_shared_info *shinfo = skb_shinfo(skb); if (!skb_zcopy_managed(skb)) __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle); } #endif /* _LINUX_SKBUFF_REF_H */
76 121 106 106 103 48 64 48 48 48 32 32 48 48 47 1 1 101 100 1 99 98 99 101 263 262 262 263 241 189 1 259 263 80 71 71 8 80 23 23 23 22 23 10 23 23 10 39 39 39 39 5 38 39 39 1 1 38 39 19 19 16 16 16 16 16 15 15 15 16 16 64 65 43 10 34 65 49 49 49 48 48 49 49 26 26 49 49 49 37 37 37 36 37 37 37 37 37 37 26 26 26 26 25 2 2 2 26 46 25 24 57 25 25 25 25 25 25 23 2 25 25 4 21 25 2 2 2 1 2 25 23 23 25 23 25 23 25 57 18 18 18 1 1 1 17 17 17 17 17 11 11 17 46 20 1 20 20 19 20 20 20 20 20 20 20 20 20 1 20 20 1 1 20 22 9 13 9 8 1 9 1 1 8 21 22 22 22 22 22 22 22 1 3 3 3 3 1 3 3 3 22 24 24 18 18 9 18 9 9 8 8 8 13 8 8 11 9 10 5 2 3 8 8 8 8 8 8 8 21 21 21 21 21 11 20 20 14 15 15 15 15 7 5 7 8 15 5 20 21 24 24 24 24 24 12 11 1 10 9 9 24 2 24 8 8 8 8 8 8 8 8 8 8 7 8 8 8 7 8 8 7 8 8 1 8 8 8 8 8 17 16 17 17 17 17 17 17 17 3 2 2 2 17 16 16 14 12 14 13 13 12 11 11 12 17 17 2 2 2 2 2 2 3 3 3 27 27 27 24 27 54 1 53 53 53 30 1 1 30 1 1 30 53 17 17 17 17 17 17 17 17 17 17 17 17 17 10 10 10 10 10 10 10 10 10 1 9 8 10 10 4 4 5 5 5 5 5 5 5 5 68 68 68 68 68 67 68 68 65 1 65 9 9 9 9 9 56 65 63 64 68 68 68 56 68 68 57 11 68 140 12 128 140 12 1 1 12 12 1 12 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 // SPDX-License-Identifier: LGPL-2.1 /* * Copyright (c) 2012 Taobao. * Written by Tao Ma <boyu.mt@taobao.com> */ #include <linux/iomap.h> #include <linux/fiemap.h> #include <linux/namei.h> #include <linux/iversion.h> #include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" #include "truncate.h" #define EXT4_XATTR_SYSTEM_DATA "data" #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) #define EXT4_INLINE_DOTDOT_OFFSET 2 #define EXT4_INLINE_DOTDOT_SIZE 4 static int ext4_get_inline_size(struct inode *inode) { if (EXT4_I(inode)->i_inline_off) return EXT4_I(inode)->i_inline_size; return 0; } static int get_max_inline_xattr_value_size(struct inode *inode, struct ext4_iloc *iloc) { struct ext4_xattr_ibody_header *header; struct ext4_xattr_entry *entry; struct ext4_inode *raw_inode; void *end; int free, min_offs; if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return 0; min_offs = EXT4_SB(inode->i_sb)->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - EXT4_I(inode)->i_extra_isize - sizeof(struct ext4_xattr_ibody_header); /* * We need to subtract another sizeof(__u32) since an in-inode xattr * needs an empty 4 bytes to indicate the gap between the xattr entry * and the name/value pair. */ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) return EXT4_XATTR_SIZE(min_offs - EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) - EXT4_XATTR_ROUND - sizeof(__u32)); raw_inode = ext4_raw_inode(iloc); header = IHDR(inode, raw_inode); entry = IFIRST(header); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; /* Compute min_offs. */ while (!IS_LAST_ENTRY(entry)) { void *next = EXT4_XATTR_NEXT(entry); if (next >= end) { EXT4_ERROR_INODE(inode, "corrupt xattr in inline inode"); return 0; } if (!entry->e_value_inum && entry->e_value_size) { size_t offs = le16_to_cpu(entry->e_value_offs); if (offs < min_offs) min_offs = offs; } entry = next; } free = min_offs - ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); if (EXT4_I(inode)->i_inline_off) { entry = (struct ext4_xattr_entry *) ((void *)raw_inode + EXT4_I(inode)->i_inline_off); free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); goto out; } free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)); if (free > EXT4_XATTR_ROUND) free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND); else free = 0; out: return free; } /* * Get the maximum size we now can store in an inode. * If we can't find the space for a xattr entry, don't use the space * of the extents since we have no space to indicate the inline data. */ int ext4_get_max_inline_size(struct inode *inode) { int error, max_inline_size; struct ext4_iloc iloc; if (EXT4_I(inode)->i_extra_isize == 0) return 0; error = ext4_get_inode_loc(inode, &iloc); if (error) { ext4_error_inode_err(inode, __func__, __LINE__, 0, -error, "can't get inode location %lu", inode->i_ino); return 0; } down_read(&EXT4_I(inode)->xattr_sem); max_inline_size = get_max_inline_xattr_value_size(inode, &iloc); up_read(&EXT4_I(inode)->xattr_sem); brelse(iloc.bh); if (!max_inline_size) return 0; return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; } /* * this function does not take xattr_sem, which is OK because it is * currently only used in a code path coming form ext4_iget, before * the new inode has been unlocked */ int ext4_find_inline_data_nolock(struct inode *inode) { struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; int error; if (EXT4_I(inode)->i_extra_isize == 0) return 0; error = ext4_get_inode_loc(inode, &is.iloc); if (error) return error; error = ext4_xattr_ibody_find(inode, &i, &is); if (error) goto out; if (!is.s.not_found) { if (is.s.here->e_value_inum) { EXT4_ERROR_INODE(inode, "inline data xattr refers " "to an external xattr inode"); error = -EFSCORRUPTED; goto out; } EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); } out: brelse(is.iloc.bh); return error; } static int ext4_read_inline_data(struct inode *inode, void *buffer, unsigned int len, struct ext4_iloc *iloc) { struct ext4_xattr_entry *entry; struct ext4_xattr_ibody_header *header; int cp_len = 0; struct ext4_inode *raw_inode; if (!len) return 0; BUG_ON(len > EXT4_I(inode)->i_inline_size); cp_len = min_t(unsigned int, len, EXT4_MIN_INLINE_DATA_SIZE); raw_inode = ext4_raw_inode(iloc); memcpy(buffer, (void *)(raw_inode->i_block), cp_len); len -= cp_len; buffer += cp_len; if (!len) goto out; header = IHDR(inode, raw_inode); entry = (struct ext4_xattr_entry *)((void *)raw_inode + EXT4_I(inode)->i_inline_off); len = min_t(unsigned int, len, (unsigned int)le32_to_cpu(entry->e_value_size)); memcpy(buffer, (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); cp_len += len; out: return cp_len; } /* * write the buffer to the inline inode. * If 'create' is set, we don't need to do the extra copy in the xattr * value since it is already handled by ext4_xattr_ibody_set. * That saves us one memcpy. */ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, void *buffer, loff_t pos, unsigned int len) { struct ext4_xattr_entry *entry; struct ext4_xattr_ibody_header *header; struct ext4_inode *raw_inode; int cp_len = 0; if (unlikely(ext4_forced_shutdown(inode->i_sb))) return; BUG_ON(!EXT4_I(inode)->i_inline_off); BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); raw_inode = ext4_raw_inode(iloc); buffer += pos; if (pos < EXT4_MIN_INLINE_DATA_SIZE) { cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ? EXT4_MIN_INLINE_DATA_SIZE - pos : len; memcpy((void *)raw_inode->i_block + pos, buffer, cp_len); len -= cp_len; buffer += cp_len; pos += cp_len; } if (!len) return; pos -= EXT4_MIN_INLINE_DATA_SIZE; header = IHDR(inode, raw_inode); entry = (struct ext4_xattr_entry *)((void *)raw_inode + EXT4_I(inode)->i_inline_off); memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos, buffer, len); } static int ext4_create_inline_data(handle_t *handle, struct inode *inode, unsigned len) { int error; void *value = NULL; struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; error = ext4_get_inode_loc(inode, &is.iloc); if (error) return error; BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, EXT4_JTR_NONE); if (error) goto out; if (len > EXT4_MIN_INLINE_DATA_SIZE) { value = EXT4_ZERO_XATTR_VALUE; len -= EXT4_MIN_INLINE_DATA_SIZE; } else { value = ""; len = 0; } /* Insert the xttr entry. */ i.value = value; i.value_len = len; error = ext4_xattr_ibody_find(inode, &i, &is); if (error) goto out; BUG_ON(!is.s.not_found); error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) { if (error == -ENOSPC) ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); goto out; } memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); get_bh(is.iloc.bh); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); out: brelse(is.iloc.bh); return error; } static int ext4_update_inline_data(handle_t *handle, struct inode *inode, unsigned int len) { int error; void *value = NULL; struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; /* If the old space is ok, write the data directly. */ if (len <= EXT4_I(inode)->i_inline_size) return 0; error = ext4_get_inode_loc(inode, &is.iloc); if (error) return error; error = ext4_xattr_ibody_find(inode, &i, &is); if (error) goto out; BUG_ON(is.s.not_found); len -= EXT4_MIN_INLINE_DATA_SIZE; value = kzalloc(len, GFP_NOFS); if (!value) { error = -ENOMEM; goto out; } error = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, len); if (error < 0) goto out; BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, EXT4_JTR_NONE); if (error) goto out; /* Update the xattr entry. */ i.value = value; i.value_len = len; error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); get_bh(is.iloc.bh); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); out: kfree(value); brelse(is.iloc.bh); return error; } static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, unsigned int len) { int ret, size, no_expand; struct ext4_inode_info *ei = EXT4_I(inode); if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) return -ENOSPC; size = ext4_get_max_inline_size(inode); if (size < len) return -ENOSPC; ext4_write_lock_xattr(inode, &no_expand); if (ei->i_inline_off) ret = ext4_update_inline_data(handle, inode, len); else ret = ext4_create_inline_data(handle, inode, len); ext4_write_unlock_xattr(inode, &no_expand); return ret; } static int ext4_destroy_inline_data_nolock(handle_t *handle, struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_xattr_ibody_find is = { .s = { .not_found = 0, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, .value = NULL, .value_len = 0, }; int error; if (!ei->i_inline_off) return 0; error = ext4_get_inode_loc(inode, &is.iloc); if (error) return error; error = ext4_xattr_ibody_find(inode, &i, &is); if (error) goto out; BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, EXT4_JTR_NONE); if (error) goto out; error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); if (ext4_has_feature_extents(inode->i_sb)) { if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); ext4_ext_tree_init(handle, inode); } } ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); get_bh(is.iloc.bh); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); EXT4_I(inode)->i_inline_off = 0; EXT4_I(inode)->i_inline_size = 0; ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); out: brelse(is.iloc.bh); if (error == -ENODATA) error = 0; return error; } static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) { void *kaddr; int ret = 0; size_t len; struct ext4_iloc iloc; BUG_ON(!folio_test_locked(folio)); BUG_ON(!ext4_has_inline_data(inode)); BUG_ON(folio->index); if (!EXT4_I(inode)->i_inline_off) { ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", inode->i_ino); goto out; } ret = ext4_get_inode_loc(inode, &iloc); if (ret) goto out; len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); BUG_ON(len > PAGE_SIZE); kaddr = kmap_local_folio(folio, 0); ret = ext4_read_inline_data(inode, kaddr, len, &iloc); kaddr = folio_zero_tail(folio, len, kaddr + len); kunmap_local(kaddr); folio_mark_uptodate(folio); brelse(iloc.bh); out: return ret; } int ext4_readpage_inline(struct inode *inode, struct folio *folio) { int ret = 0; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { up_read(&EXT4_I(inode)->xattr_sem); return -EAGAIN; } /* * Current inline data can only exist in the 1st page, * So for all the other pages, just set them uptodate. */ if (!folio->index) ret = ext4_read_inline_folio(inode, folio); else if (!folio_test_uptodate(folio)) { folio_zero_segment(folio, 0, folio_size(folio)); folio_mark_uptodate(folio); } up_read(&EXT4_I(inode)->xattr_sem); folio_unlock(folio); return ret >= 0 ? 0 : ret; } static int ext4_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode) { int ret, needed_blocks, no_expand; handle_t *handle = NULL; int retries = 0, sem_held = 0; struct folio *folio = NULL; unsigned from, to; struct ext4_iloc iloc; if (!ext4_has_inline_data(inode)) { /* * clear the flag so that no new write * will trap here again. */ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); return 0; } needed_blocks = ext4_writepage_trans_blocks(inode); ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; retry: handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; goto out; } /* We cannot recurse into the filesystem as the transaction is already * started */ folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) { ret = PTR_ERR(folio); goto out_nofolio; } ext4_write_lock_xattr(inode, &no_expand); sem_held = 1; /* If some one has already done this for us, just exit. */ if (!ext4_has_inline_data(inode)) { ret = 0; goto out; } from = 0; to = ext4_get_inline_size(inode); if (!folio_test_uptodate(folio)) { ret = ext4_read_inline_folio(inode, folio); if (ret < 0) goto out; } ret = ext4_destroy_inline_data_nolock(handle, inode); if (ret) goto out; if (ext4_should_dioread_nolock(inode)) { ret = ext4_block_write_begin(handle, folio, from, to, ext4_get_block_unwritten); } else ret = ext4_block_write_begin(handle, folio, from, to, ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = ext4_walk_page_buffers(handle, inode, folio_buffers(folio), from, to, NULL, do_journal_get_write_access); } if (ret) { folio_unlock(folio); folio_put(folio); folio = NULL; ext4_orphan_add(handle, inode); ext4_write_unlock_xattr(inode, &no_expand); sem_held = 0; ext4_journal_stop(handle); handle = NULL; ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might * still be on the orphan list; we need to * make sure the inode is removed from the * orphan list in that case. */ if (inode->i_nlink) ext4_orphan_del(NULL, inode); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; if (folio) block_commit_write(&folio->page, from, to); out: if (folio) { folio_unlock(folio); folio_put(folio); } out_nofolio: if (sem_held) ext4_write_unlock_xattr(inode, &no_expand); if (handle) ext4_journal_stop(handle); brelse(iloc.bh); return ret; } /* * Try to write data in the inode. * If the inode has inline data, check whether the new write can be * in the inode also. If not, create the page the handle, move the data * to the page make it update and let the later codes create extent for it. */ int ext4_try_to_write_inline_data(struct address_space *mapping, struct inode *inode, loff_t pos, unsigned len, struct folio **foliop) { int ret; handle_t *handle; struct folio *folio; struct ext4_iloc iloc; if (pos + len > ext4_get_max_inline_size(inode)) goto convert; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; /* * The possible write could happen in the inode, * so try to reserve the space in inode first. */ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; goto out; } ret = ext4_prepare_inline_data(handle, inode, pos + len); if (ret && ret != -ENOSPC) goto out; /* We don't have space in inline inode, so convert it to extent. */ if (ret == -ENOSPC) { ext4_journal_stop(handle); brelse(iloc.bh); goto convert; } ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE); if (ret) goto out; folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) { ret = PTR_ERR(folio); goto out; } *foliop = folio; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { ret = 0; folio_unlock(folio); folio_put(folio); goto out_up_read; } if (!folio_test_uptodate(folio)) { ret = ext4_read_inline_folio(inode, folio); if (ret < 0) { folio_unlock(folio); folio_put(folio); goto out_up_read; } } ret = 1; handle = NULL; out_up_read: up_read(&EXT4_I(inode)->xattr_sem); out: if (handle && (ret != 1)) ext4_journal_stop(handle); brelse(iloc.bh); return ret; convert: return ext4_convert_inline_data_to_extent(mapping, inode); } int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct folio *folio) { handle_t *handle = ext4_journal_current_handle(); int no_expand; void *kaddr; struct ext4_iloc iloc; int ret = 0, ret2; if (unlikely(copied < len) && !folio_test_uptodate(folio)) copied = 0; if (likely(copied)) { ret = ext4_get_inode_loc(inode, &iloc); if (ret) { folio_unlock(folio); folio_put(folio); ext4_std_error(inode->i_sb, ret); goto out; } ext4_write_lock_xattr(inode, &no_expand); BUG_ON(!ext4_has_inline_data(inode)); /* * ei->i_inline_off may have changed since * ext4_write_begin() called * ext4_try_to_write_inline_data() */ (void) ext4_find_inline_data_nolock(inode); kaddr = kmap_local_folio(folio, 0); ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); kunmap_local(kaddr); folio_mark_uptodate(folio); /* clear dirty flag so that writepages wouldn't work for us. */ folio_clear_dirty(folio); ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); /* * It's important to update i_size while still holding folio * lock: page writeout could otherwise come in and zero * beyond i_size. */ ext4_update_inode_size(inode, pos + copied); } folio_unlock(folio); folio_put(folio); /* * Don't mark the inode dirty under folio lock. First, it unnecessarily * makes the holding time of folio lock longer. Second, it forces lock * ordering of folio lock and transaction start for journaling * filesystems. */ if (likely(copied)) mark_inode_dirty(inode); out: /* * If we didn't copy as much data as expected, we need to trim back * size of xattr containing inline data. */ if (pos + len > inode->i_size && ext4_can_truncate(inode)) ext4_orphan_add(handle, inode); ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; if (pos + len > inode->i_size) { ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode * is removed from the orphan list in that case. */ if (inode->i_nlink) ext4_orphan_del(NULL, inode); } return ret ? ret : copied; } /* * Try to make the page cache and handle ready for the inline data case. * We can call this function in 2 cases: * 1. The inode is created and the first write exceeds inline size. We can * clear the inode state safely. * 2. The inode has inline data, then we need to read the data, make it * update and dirty so that ext4_da_writepages can handle it. We don't * need to start the journal since the file's metadata isn't changed now. */ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, void **fsdata) { int ret = 0, inline_size; struct folio *folio; folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) return PTR_ERR(folio); down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); goto out; } inline_size = ext4_get_inline_size(inode); if (!folio_test_uptodate(folio)) { ret = ext4_read_inline_folio(inode, folio); if (ret < 0) goto out; } ret = ext4_block_write_begin(NULL, folio, 0, inline_size, ext4_da_get_block_prep); if (ret) { up_read(&EXT4_I(inode)->xattr_sem); folio_unlock(folio); folio_put(folio); ext4_truncate_failed_write(inode); return ret; } folio_mark_dirty(folio); folio_mark_uptodate(folio); ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); *fsdata = (void *)CONVERT_INLINE_DATA; out: up_read(&EXT4_I(inode)->xattr_sem); if (folio) { folio_unlock(folio); folio_put(folio); } return ret; } /* * Prepare the write for the inline data. * If the data can be written into the inode, we just read * the page and make it uptodate, and start the journal. * Otherwise read the page, makes it dirty so that it can be * handle in writepages(the i_disksize update is left to the * normal ext4_da_write_end). */ int ext4_da_write_inline_data_begin(struct address_space *mapping, struct inode *inode, loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { int ret; handle_t *handle; struct folio *folio; struct ext4_iloc iloc; int retries = 0; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; retry_journal: handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; } ret = ext4_prepare_inline_data(handle, inode, pos + len); if (ret && ret != -ENOSPC) goto out_journal; if (ret == -ENOSPC) { ext4_journal_stop(handle); ret = ext4_da_convert_inline_data_to_extent(mapping, inode, fsdata); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; goto out; } /* * We cannot recurse into the filesystem as the transaction * is already started. */ folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) { ret = PTR_ERR(folio); goto out_journal; } down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { ret = 0; goto out_release_page; } if (!folio_test_uptodate(folio)) { ret = ext4_read_inline_folio(inode, folio); if (ret < 0) goto out_release_page; } ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE); if (ret) goto out_release_page; up_read(&EXT4_I(inode)->xattr_sem); *foliop = folio; brelse(iloc.bh); return 1; out_release_page: up_read(&EXT4_I(inode)->xattr_sem); folio_unlock(folio); folio_put(folio); out_journal: ext4_journal_stop(handle); out: brelse(iloc.bh); return ret; } #ifdef INLINE_DIR_DEBUG void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, void *inline_start, int inline_size) { int offset; unsigned short de_len; struct ext4_dir_entry_2 *de = inline_start; void *dlimit = inline_start + inline_size; trace_printk("inode %lu\n", dir->i_ino); offset = 0; while ((void *)de < dlimit) { de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n", offset, de_len, de->name_len, de->name, de->name_len, le32_to_cpu(de->inode)); if (ext4_check_dir_entry(dir, NULL, de, bh, inline_start, inline_size, offset)) BUG(); offset += de_len; de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); } } #else #define ext4_show_inline_dir(dir, bh, inline_start, inline_size) #endif /* * Add a new entry into a inline dir. * It will return -ENOSPC if no space is available, and -EIO * and -EEXIST if directory entry already exists. */ static int ext4_add_dirent_to_inline(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode, struct ext4_iloc *iloc, void *inline_start, int inline_size) { int err; struct ext4_dir_entry_2 *de; err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start, inline_size, fname, &de); if (err) return err; BUFFER_TRACE(iloc->bh, "get_write_access"); err = ext4_journal_get_write_access(handle, dir->i_sb, iloc->bh, EXT4_JTR_NONE); if (err) return err; ext4_insert_dentry(dir, inode, de, inline_size, fname); ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); /* * XXX shouldn't update any times until successful * completion of syscall, but too many callers depend * on this. * * XXX similarly, too many callers depend on * ext4_new_inode() setting the times, but error * recovery deletes the inode, so the worst that can * happen is that the times are slightly out of date * and/or different from the directory change time. */ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); ext4_update_dx_flag(dir); inode_inc_iversion(dir); return 1; } static void *ext4_get_inline_xattr_pos(struct inode *inode, struct ext4_iloc *iloc) { struct ext4_xattr_entry *entry; struct ext4_xattr_ibody_header *header; BUG_ON(!EXT4_I(inode)->i_inline_off); header = IHDR(inode, ext4_raw_inode(iloc)); entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) + EXT4_I(inode)->i_inline_off); return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs); } /* Set the final de to cover the whole block. */ static void ext4_update_final_de(void *de_buf, int old_size, int new_size) { struct ext4_dir_entry_2 *de, *prev_de; void *limit; int de_len; de = de_buf; if (old_size) { limit = de_buf + old_size; do { prev_de = de; de_len = ext4_rec_len_from_disk(de->rec_len, old_size); de_buf += de_len; de = de_buf; } while (de_buf < limit); prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - old_size, new_size); } else { /* this is just created, so create an empty entry. */ de->inode = 0; de->rec_len = ext4_rec_len_to_disk(new_size, new_size); } } static int ext4_update_inline_dir(handle_t *handle, struct inode *dir, struct ext4_iloc *iloc) { int ret; int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; int new_size = get_max_inline_xattr_value_size(dir, iloc); if (new_size - old_size <= ext4_dir_rec_len(1, NULL)) return -ENOSPC; ret = ext4_update_inline_data(handle, dir, new_size + EXT4_MIN_INLINE_DATA_SIZE); if (ret) return ret; ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size, EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE); dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size; return 0; } static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc, void *buf, int inline_size) { int ret; ret = ext4_create_inline_data(handle, inode, inline_size); if (ret) { ext4_msg(inode->i_sb, KERN_EMERG, "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", inode->i_ino, ret); return; } ext4_write_inline_data(inode, iloc, buf, 0, inline_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } static int ext4_finish_convert_inline_dir(handle_t *handle, struct inode *inode, struct buffer_head *dir_block, void *buf, int inline_size) { int err, csum_size = 0, header_size = 0; struct ext4_dir_entry_2 *de; void *target = dir_block->b_data; /* * First create "." and ".." and then copy the dir information * back to the block. */ de = target; de = ext4_init_dot_dotdot(inode, de, inode->i_sb->s_blocksize, csum_size, le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1); header_size = (void *)de - target; memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, inline_size - EXT4_INLINE_DOTDOT_SIZE); if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); inode->i_size = inode->i_sb->s_blocksize; i_size_write(inode, inode->i_sb->s_blocksize); EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; ext4_update_final_de(dir_block->b_data, inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size, inode->i_sb->s_blocksize - csum_size); if (csum_size) ext4_initialize_dirent_tail(dir_block, inode->i_sb->s_blocksize); set_buffer_uptodate(dir_block); unlock_buffer(dir_block); err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) return err; set_buffer_verified(dir_block); return ext4_mark_inode_dirty(handle, inode); } static int ext4_convert_inline_data_nolock(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc) { int error; void *buf = NULL; struct buffer_head *data_bh = NULL; struct ext4_map_blocks map; int inline_size; inline_size = ext4_get_inline_size(inode); buf = kmalloc(inline_size, GFP_NOFS); if (!buf) { error = -ENOMEM; goto out; } error = ext4_read_inline_data(inode, buf, inline_size, iloc); if (error < 0) goto out; /* * Make sure the inline directory entries pass checks before we try to * convert them, so that we avoid touching stuff that needs fsck. */ if (S_ISDIR(inode->i_mode)) { error = ext4_check_all_de(inode, iloc->bh, buf + EXT4_INLINE_DOTDOT_SIZE, inline_size - EXT4_INLINE_DOTDOT_SIZE); if (error) goto out; } error = ext4_destroy_inline_data_nolock(handle, inode); if (error) goto out; map.m_lblk = 0; map.m_len = 1; map.m_flags = 0; error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE); if (error < 0) goto out_restore; if (!(map.m_flags & EXT4_MAP_MAPPED)) { error = -EIO; goto out_restore; } data_bh = sb_getblk(inode->i_sb, map.m_pblk); if (!data_bh) { error = -ENOMEM; goto out_restore; } lock_buffer(data_bh); error = ext4_journal_get_create_access(handle, inode->i_sb, data_bh, EXT4_JTR_NONE); if (error) { unlock_buffer(data_bh); error = -EIO; goto out_restore; } memset(data_bh->b_data, 0, inode->i_sb->s_blocksize); if (!S_ISDIR(inode->i_mode)) { memcpy(data_bh->b_data, buf, inline_size); set_buffer_uptodate(data_bh); unlock_buffer(data_bh); error = ext4_handle_dirty_metadata(handle, inode, data_bh); } else { error = ext4_finish_convert_inline_dir(handle, inode, data_bh, buf, inline_size); } out_restore: if (error) ext4_restore_inline_data(handle, inode, iloc, buf, inline_size); out: brelse(data_bh); kfree(buf); return error; } /* * Try to add the new entry to the inline data. * If succeeds, return 0. If not, extended the inline dir and copied data to * the new created block. */ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode) { int ret, ret2, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; ret = ext4_get_inode_loc(dir, &iloc); if (ret) return ret; ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) goto out; inline_start = (void *)ext4_raw_inode(&iloc)->i_block + EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); if (ret != -ENOSPC) goto out; /* check whether it can be inserted to inline xattr space. */ inline_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; if (!inline_size) { /* Try to use the xattr space.*/ ret = ext4_update_inline_dir(handle, dir, &iloc); if (ret && ret != -ENOSPC) goto out; inline_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; } if (inline_size) { inline_start = ext4_get_inline_xattr_pos(dir, &iloc); ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); if (ret != -ENOSPC) goto out; } /* * The inline space is filled up, so create a new block for it. * As the extent tree will be created, we have to save the inline * dir first. */ ret = ext4_convert_inline_data_nolock(handle, dir, &iloc); out: ext4_write_unlock_xattr(dir, &no_expand); ret2 = ext4_mark_inode_dirty(handle, dir); if (unlikely(ret2 && !ret)) ret = ret2; brelse(iloc.bh); return ret; } /* * This function fills a red-black tree with information from an * inlined dir. It returns the number directory entries loaded * into the tree. If there is an error it is returned in err. */ int ext4_inlinedir_to_tree(struct file *dir_file, struct inode *dir, ext4_lblk_t block, struct dx_hash_info *hinfo, __u32 start_hash, __u32 start_minor_hash, int *has_inline_data) { int err = 0, count = 0; unsigned int parent_ino; int pos; struct ext4_dir_entry_2 *de; struct inode *inode = file_inode(dir_file); int ret, inline_size = 0; struct ext4_iloc iloc; void *dir_buf = NULL; struct ext4_dir_entry_2 fake; struct fscrypt_str tmp_str; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { up_read(&EXT4_I(inode)->xattr_sem); *has_inline_data = 0; goto out; } inline_size = ext4_get_inline_size(inode); dir_buf = kmalloc(inline_size, GFP_NOFS); if (!dir_buf) { ret = -ENOMEM; up_read(&EXT4_I(inode)->xattr_sem); goto out; } ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); up_read(&EXT4_I(inode)->xattr_sem); if (ret < 0) goto out; pos = 0; parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); while (pos < inline_size) { /* * As inlined dir doesn't store any information about '.' and * only the inode number of '..' is stored, we have to handle * them differently. */ if (pos == 0) { fake.inode = cpu_to_le32(inode->i_ino); fake.name_len = 1; strcpy(fake.name, "."); fake.rec_len = ext4_rec_len_to_disk( ext4_dir_rec_len(fake.name_len, NULL), inline_size); ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); de = &fake; pos = EXT4_INLINE_DOTDOT_OFFSET; } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) { fake.inode = cpu_to_le32(parent_ino); fake.name_len = 2; strcpy(fake.name, ".."); fake.rec_len = ext4_rec_len_to_disk( ext4_dir_rec_len(fake.name_len, NULL), inline_size); ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); de = &fake; pos = EXT4_INLINE_DOTDOT_SIZE; } else { de = (struct ext4_dir_entry_2 *)(dir_buf + pos); pos += ext4_rec_len_from_disk(de->rec_len, inline_size); if (ext4_check_dir_entry(inode, dir_file, de, iloc.bh, dir_buf, inline_size, pos)) { ret = count; goto out; } } if (ext4_hash_in_dirent(dir)) { hinfo->hash = EXT4_DIRENT_HASH(de); hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); } else { err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo); if (err) { ret = err; goto out; } } if ((hinfo->hash < start_hash) || ((hinfo->hash == start_hash) && (hinfo->minor_hash < start_minor_hash))) continue; if (de->inode == 0) continue; tmp_str.name = de->name; tmp_str.len = de->name_len; err = ext4_htree_store_dirent(dir_file, hinfo->hash, hinfo->minor_hash, de, &tmp_str); if (err) { ret = err; goto out; } count++; } ret = count; out: kfree(dir_buf); brelse(iloc.bh); return ret; } /* * So this function is called when the volume is mkfsed with * dir_index disabled. In order to keep f_pos persistent * after we convert from an inlined dir to a blocked based, * we just pretend that we are a normal dir and return the * offset as if '.' and '..' really take place. * */ int ext4_read_inline_dir(struct file *file, struct dir_context *ctx, int *has_inline_data) { unsigned int offset, parent_ino; int i; struct ext4_dir_entry_2 *de; struct super_block *sb; struct inode *inode = file_inode(file); int ret, inline_size = 0; struct ext4_iloc iloc; void *dir_buf = NULL; int dotdot_offset, dotdot_size, extra_offset, extra_size; struct dir_private_info *info = file->private_data; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { up_read(&EXT4_I(inode)->xattr_sem); *has_inline_data = 0; goto out; } inline_size = ext4_get_inline_size(inode); dir_buf = kmalloc(inline_size, GFP_NOFS); if (!dir_buf) { ret = -ENOMEM; up_read(&EXT4_I(inode)->xattr_sem); goto out; } ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); up_read(&EXT4_I(inode)->xattr_sem); if (ret < 0) goto out; ret = 0; sb = inode->i_sb; parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); offset = ctx->pos; /* * dotdot_offset and dotdot_size is the real offset and * size for ".." and "." if the dir is block based while * the real size for them are only EXT4_INLINE_DOTDOT_SIZE. * So we will use extra_offset and extra_size to indicate them * during the inline dir iteration. */ dotdot_offset = ext4_dir_rec_len(1, NULL); dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL); extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; extra_size = extra_offset + inline_size; /* * If the cookie has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the inline * dir to make sure. */ if (!inode_eq_iversion(inode, info->cookie)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and * ".." is dotdot_offset. */ if (!i) { i = dotdot_offset; continue; } else if (i == dotdot_offset) { i = dotdot_size; continue; } /* for other entry, the real offset in * the buf has to be tuned accordingly. */ de = (struct ext4_dir_entry_2 *) (dir_buf + i - extra_offset); /* It's too expensive to do a full * dirent test each time round this * loop, but we do have to test at * least that it is non-zero. A * failure will be detected in the * dirent test below. */ if (ext4_rec_len_from_disk(de->rec_len, extra_size) < ext4_dir_rec_len(1, NULL)) break; i += ext4_rec_len_from_disk(de->rec_len, extra_size); } offset = i; ctx->pos = offset; info->cookie = inode_query_iversion(inode); } while (ctx->pos < extra_size) { if (ctx->pos == 0) { if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) goto out; ctx->pos = dotdot_offset; continue; } if (ctx->pos == dotdot_offset) { if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) goto out; ctx->pos = dotdot_size; continue; } de = (struct ext4_dir_entry_2 *) (dir_buf + ctx->pos - extra_offset); if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, extra_size, ctx->pos)) goto out; if (le32_to_cpu(de->inode)) { if (!dir_emit(ctx, de->name, de->name_len, le32_to_cpu(de->inode), get_dtype(sb, de->file_type))) goto out; } ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); } out: kfree(dir_buf); brelse(iloc.bh); return ret; } void *ext4_read_inline_link(struct inode *inode) { struct ext4_iloc iloc; int ret, inline_size; void *link; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ERR_PTR(ret); ret = -ENOMEM; inline_size = ext4_get_inline_size(inode); link = kmalloc(inline_size + 1, GFP_NOFS); if (!link) goto out; ret = ext4_read_inline_data(inode, link, inline_size, &iloc); if (ret < 0) { kfree(link); goto out; } nd_terminate_link(link, inode->i_size, ret); out: if (ret < 0) link = ERR_PTR(ret); brelse(iloc.bh); return link; } struct buffer_head *ext4_get_first_inline_block(struct inode *inode, struct ext4_dir_entry_2 **parent_de, int *retval) { struct ext4_iloc iloc; *retval = ext4_get_inode_loc(inode, &iloc); if (*retval) return NULL; *parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; return iloc.bh; } /* * Try to create the inline data for the new dir. * If it succeeds, return 0, otherwise return the error. * In case of ENOSPC, the caller should create the normal disk layout dir. */ int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, struct inode *inode) { int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE; struct ext4_iloc iloc; struct ext4_dir_entry_2 *de; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; ret = ext4_prepare_inline_data(handle, inode, inline_size); if (ret) goto out; /* * For inline dir, we only save the inode information for the ".." * and create a fake dentry to cover the left space. */ de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; de->inode = cpu_to_le32(parent->i_ino); de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE); de->inode = 0; de->rec_len = ext4_rec_len_to_disk( inline_size - EXT4_INLINE_DOTDOT_SIZE, inline_size); set_nlink(inode, 2); inode->i_size = EXT4_I(inode)->i_disksize = inline_size; out: brelse(iloc.bh); return ret; } struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir, int *has_inline_data) { struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; int ret; void *inline_start; int inline_size; ret = ext4_get_inode_loc(dir, &is.iloc); if (ret) return ERR_PTR(ret); down_read(&EXT4_I(dir)->xattr_sem); ret = ext4_xattr_ibody_find(dir, &i, &is); if (ret) goto out; if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; } inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, dir, fname, 0, res_dir); if (ret == 1) goto out_find; if (ret < 0) goto out; if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) goto out; inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, dir, fname, 0, res_dir); if (ret == 1) goto out_find; out: brelse(is.iloc.bh); if (ret < 0) is.iloc.bh = ERR_PTR(ret); else is.iloc.bh = NULL; out_find: up_read(&EXT4_I(dir)->xattr_sem); return is.iloc.bh; } int ext4_delete_inline_entry(handle_t *handle, struct inode *dir, struct ext4_dir_entry_2 *de_del, struct buffer_head *bh, int *has_inline_data) { int err, inline_size, no_expand; struct ext4_iloc iloc; void *inline_start; err = ext4_get_inode_loc(dir, &iloc); if (err) return err; ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; } if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) < EXT4_MIN_INLINE_DATA_SIZE) { inline_start = (void *)ext4_raw_inode(&iloc)->i_block + EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; } else { inline_start = ext4_get_inline_xattr_pos(dir, &iloc); inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; } BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, dir->i_sb, bh, EXT4_JTR_NONE); if (err) goto out; err = ext4_generic_delete_entry(dir, de_del, bh, inline_start, inline_size, 0); if (err) goto out; ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); out: ext4_write_unlock_xattr(dir, &no_expand); if (likely(err == 0)) err = ext4_mark_inode_dirty(handle, dir); brelse(iloc.bh); if (err != -ENOENT) ext4_std_error(dir->i_sb, err); return err; } /* * Get the inline dentry at offset. */ static inline struct ext4_dir_entry_2 * ext4_get_inline_entry(struct inode *inode, struct ext4_iloc *iloc, unsigned int offset, void **inline_start, int *inline_size) { void *inline_pos; BUG_ON(offset > ext4_get_inline_size(inode)); if (offset < EXT4_MIN_INLINE_DATA_SIZE) { inline_pos = (void *)ext4_raw_inode(iloc)->i_block; *inline_size = EXT4_MIN_INLINE_DATA_SIZE; } else { inline_pos = ext4_get_inline_xattr_pos(inode, iloc); offset -= EXT4_MIN_INLINE_DATA_SIZE; *inline_size = ext4_get_inline_size(inode) - EXT4_MIN_INLINE_DATA_SIZE; } if (inline_start) *inline_start = inline_pos; return (struct ext4_dir_entry_2 *)(inline_pos + offset); } bool empty_inline_dir(struct inode *dir, int *has_inline_data) { int err, inline_size; struct ext4_iloc iloc; size_t inline_len; void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; bool ret = false; err = ext4_get_inode_loc(dir, &iloc); if (err) { EXT4_ERROR_INODE_ERR(dir, -err, "error %d getting inode %lu block", err, dir->i_ino); return false; } down_read(&EXT4_I(dir)->xattr_sem); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; ret = true; goto out; } de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; if (!le32_to_cpu(de->inode)) { ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - no `..'", dir->i_ino); goto out; } inline_len = ext4_get_inline_size(dir); offset = EXT4_INLINE_DOTDOT_SIZE; while (offset < inline_len) { de = ext4_get_inline_entry(dir, &iloc, offset, &inline_pos, &inline_size); if (ext4_check_dir_entry(dir, NULL, de, iloc.bh, inline_pos, inline_size, offset)) { ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - " "inode %u, rec_len %u, name_len %d" "inline size %d", dir->i_ino, le32_to_cpu(de->inode), le16_to_cpu(de->rec_len), de->name_len, inline_size); goto out; } if (le32_to_cpu(de->inode)) { goto out; } offset += ext4_rec_len_from_disk(de->rec_len, inline_size); } ret = true; out: up_read(&EXT4_I(dir)->xattr_sem); brelse(iloc.bh); return ret; } int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) { int ret, no_expand; ext4_write_lock_xattr(inode, &no_expand); ret = ext4_destroy_inline_data_nolock(handle, inode); ext4_write_unlock_xattr(inode, &no_expand); return ret; } int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) { __u64 addr; int error = -EAGAIN; struct ext4_iloc iloc; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) goto out; error = ext4_get_inode_loc(inode, &iloc); if (error) goto out; addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; addr += offsetof(struct ext4_inode, i_block); brelse(iloc.bh); iomap->addr = addr; iomap->offset = 0; iomap->length = min_t(loff_t, ext4_get_inline_size(inode), i_size_read(inode)); iomap->type = IOMAP_INLINE; iomap->flags = 0; out: up_read(&EXT4_I(inode)->xattr_sem); return error; } int ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; int inline_size, value_len, needed_blocks, no_expand, err = 0; size_t i_size; void *value = NULL; struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; needed_blocks = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); if (IS_ERR(handle)) return PTR_ERR(handle); ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { ext4_write_unlock_xattr(inode, &no_expand); *has_inline = 0; ext4_journal_stop(handle); return 0; } if ((err = ext4_orphan_add(handle, inode)) != 0) goto out; if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0) goto out; down_write(&EXT4_I(inode)->i_data_sem); i_size = inode->i_size; inline_size = ext4_get_inline_size(inode); EXT4_I(inode)->i_disksize = i_size; if (i_size < inline_size) { /* * if there's inline data to truncate and this file was * converted to extents after that inline data was written, * the extent status cache must be cleared to avoid leaving * behind stale delayed allocated extent entries */ if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); /* Clear the content in the xattr space. */ if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) { if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) goto out_error; BUG_ON(is.s.not_found); value_len = le32_to_cpu(is.s.here->e_value_size); value = kmalloc(value_len, GFP_NOFS); if (!value) { err = -ENOMEM; goto out_error; } err = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, value_len); if (err <= 0) goto out_error; i.value = value; i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; err = ext4_xattr_ibody_set(handle, inode, &i, &is); if (err) goto out_error; } /* Clear the content within i_blocks. */ if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; memset(p + i_size, 0, EXT4_MIN_INLINE_DATA_SIZE - i_size); } EXT4_I(inode)->i_inline_size = i_size < EXT4_MIN_INLINE_DATA_SIZE ? EXT4_MIN_INLINE_DATA_SIZE : i_size; } out_error: up_write(&EXT4_I(inode)->i_data_sem); out: brelse(is.iloc.bh); ext4_write_unlock_xattr(inode, &no_expand); kfree(value); if (inode->i_nlink) ext4_orphan_del(handle, inode); if (err == 0) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); err = ext4_mark_inode_dirty(handle, inode); if (IS_SYNC(inode)) ext4_handle_sync(handle); } ext4_journal_stop(handle); return err; } int ext4_convert_inline_data(struct inode *inode) { int error, needed_blocks, no_expand; handle_t *handle; struct ext4_iloc iloc; if (!ext4_has_inline_data(inode)) { ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); return 0; } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { /* * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is * cleared. This means we are in the middle of moving of * inline data to delay allocated block. Just force writeout * here to finish conversion. */ error = filemap_flush(inode->i_mapping); if (error) return error; if (!ext4_has_inline_data(inode)) return 0; } needed_blocks = ext4_writepage_trans_blocks(inode); iloc.bh = NULL; error = ext4_get_inode_loc(inode, &iloc); if (error) return error; handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); if (IS_ERR(handle)) { error = PTR_ERR(handle); goto out_free; } ext4_write_lock_xattr(inode, &no_expand); if (ext4_has_inline_data(inode)) error = ext4_convert_inline_data_nolock(handle, inode, &iloc); ext4_write_unlock_xattr(inode, &no_expand); ext4_journal_stop(handle); out_free: brelse(iloc.bh); return error; }
13 13 13 1 1 1 2 2 37 38 24 14 14 9 7 7 27 1 39 38 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 /* * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/in.h> #include <linux/if.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/if_arp.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/module.h> #include <net/addrconf.h> #include "rds_single_path.h" #include "rds.h" #include "ib.h" #include "ib_mr.h" static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; static atomic_t rds_ib_unloading; module_param(rds_ib_mr_1m_pool_size, int, 0444); MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA"); module_param(rds_ib_mr_8k_pool_size, int, 0444); MODULE_PARM_DESC(rds_ib_mr_8k_pool_size, " Max number of 8K mr per HCA"); module_param(rds_ib_retry_count, int, 0444); MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); /* * we have a clumsy combination of RCU and a rwsem protecting this list * because it is used both in the get_mr fast path and while blocking in * the FMR flushing path. */ DECLARE_RWSEM(rds_ib_devices_lock); struct list_head rds_ib_devices; /* NOTE: if also grabbing ibdev lock, grab this first */ DEFINE_SPINLOCK(ib_nodev_conns_lock); LIST_HEAD(ib_nodev_conns); static void rds_ib_nodev_connect(void) { struct rds_ib_connection *ic; spin_lock(&ib_nodev_conns_lock); list_for_each_entry(ic, &ib_nodev_conns, ib_node) rds_conn_connect_if_down(ic->conn); spin_unlock(&ib_nodev_conns_lock); } static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev) { struct rds_ib_connection *ic; unsigned long flags; spin_lock_irqsave(&rds_ibdev->spinlock, flags); list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) rds_conn_path_drop(&ic->conn->c_path[0], true); spin_unlock_irqrestore(&rds_ibdev->spinlock, flags); } /* * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references * from interrupt context so we push freing off into a work struct in krdsd. */ static void rds_ib_dev_free(struct work_struct *work) { struct rds_ib_ipaddr *i_ipaddr, *i_next; struct rds_ib_device *rds_ibdev = container_of(work, struct rds_ib_device, free_work); if (rds_ibdev->mr_8k_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_8k_pool); if (rds_ibdev->mr_1m_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); kfree(i_ipaddr); } kfree(rds_ibdev->vector_load); kfree(rds_ibdev); } void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) { BUG_ON(refcount_read(&rds_ibdev->refcount) == 0); if (refcount_dec_and_test(&rds_ibdev->refcount)) queue_work(rds_wq, &rds_ibdev->free_work); } static int rds_ib_add_one(struct ib_device *device) { struct rds_ib_device *rds_ibdev; int ret; /* Only handle IB (no iWARP) devices */ if (device->node_type != RDMA_NODE_IB_CA) return -EOPNOTSUPP; /* Device must support FRWR */ if (!(device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) return -EOPNOTSUPP; rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, ibdev_to_node(device)); if (!rds_ibdev) return -ENOMEM; spin_lock_init(&rds_ibdev->spinlock); refcount_set(&rds_ibdev->refcount, 1); INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); INIT_LIST_HEAD(&rds_ibdev->conn_list); rds_ibdev->max_wrs = device->attrs.max_qp_wr; rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); rds_ibdev->odp_capable = !!(device->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_WRITE) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_READ); rds_ibdev->max_1m_mrs = device->attrs.max_mr ? min_t(unsigned int, (device->attrs.max_mr / 2), rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size; rds_ibdev->max_8k_mrs = device->attrs.max_mr ? min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE), rds_ib_mr_8k_pool_size) : rds_ib_mr_8k_pool_size; rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom; rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; rds_ibdev->vector_load = kcalloc(device->num_comp_vectors, sizeof(int), GFP_KERNEL); if (!rds_ibdev->vector_load) { pr_err("RDS/IB: %s failed to allocate vector memory\n", __func__); ret = -ENOMEM; goto put_dev; } rds_ibdev->dev = device; rds_ibdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(rds_ibdev->pd)) { ret = PTR_ERR(rds_ibdev->pd); rds_ibdev->pd = NULL; goto put_dev; } rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); if (IS_ERR(rds_ibdev->mr_1m_pool)) { ret = PTR_ERR(rds_ibdev->mr_1m_pool); rds_ibdev->mr_1m_pool = NULL; goto put_dev; } rds_ibdev->mr_8k_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL); if (IS_ERR(rds_ibdev->mr_8k_pool)) { ret = PTR_ERR(rds_ibdev->mr_8k_pool); rds_ibdev->mr_8k_pool = NULL; goto put_dev; } rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", device->attrs.max_mr, rds_ibdev->max_wrs, rds_ibdev->max_sge, rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs); pr_info("RDS/IB: %s: added\n", device->name); down_write(&rds_ib_devices_lock); list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); up_write(&rds_ib_devices_lock); refcount_inc(&rds_ibdev->refcount); ib_set_client_data(device, &rds_ib_client, rds_ibdev); rds_ib_nodev_connect(); return 0; put_dev: rds_ib_dev_put(rds_ibdev); return ret; } /* * New connections use this to find the device to associate with the * connection. It's not in the fast path so we're not concerned about the * performance of the IB call. (As of this writing, it uses an interrupt * blocking spinlock to serialize walking a per-device list of all registered * clients.) * * RCU is used to handle incoming connections racing with device teardown. * Rather than use a lock to serialize removal from the client_data and * getting a new reference, we use an RCU grace period. The destruction * path removes the device from client_data and then waits for all RCU * readers to finish. * * A new connection can get NULL from this if its arriving on a * device that is in the process of being removed. */ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device) { struct rds_ib_device *rds_ibdev; rcu_read_lock(); rds_ibdev = ib_get_client_data(device, &rds_ib_client); if (rds_ibdev) refcount_inc(&rds_ibdev->refcount); rcu_read_unlock(); return rds_ibdev; } /* * The IB stack is letting us know that a device is going away. This can * happen if the underlying HCA driver is removed or if PCI hotplug is removing * the pci function, for example. * * This can be called at any time and can be racing with any other RDS path. */ static void rds_ib_remove_one(struct ib_device *device, void *client_data) { struct rds_ib_device *rds_ibdev = client_data; rds_ib_dev_shutdown(rds_ibdev); /* stop connection attempts from getting a reference to this device. */ ib_set_client_data(device, &rds_ib_client, NULL); down_write(&rds_ib_devices_lock); list_del_rcu(&rds_ibdev->list); up_write(&rds_ib_devices_lock); /* * This synchronize rcu is waiting for readers of both the ib * client data and the devices list to finish before we drop * both of those references. */ synchronize_rcu(); rds_ib_dev_put(rds_ibdev); rds_ib_dev_put(rds_ibdev); } struct ib_client rds_ib_client = { .name = "rds_ib", .add = rds_ib_add_one, .remove = rds_ib_remove_one }; static int rds_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds_info_rdma_connection *iinfo = buffer; struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) return 0; if (conn->c_isv6) return 0; iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; if (ic) { iinfo->tos = conn->c_tos; iinfo->sl = ic->i_sl; } memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid, (union ib_gid *)&iinfo->dst_gid); rds_ibdev = ic->rds_ibdev; iinfo->max_send_wr = ic->i_send_ring.w_nr; iinfo->max_recv_wr = ic->i_recv_ring.w_nr; iinfo->max_send_sge = rds_ibdev->max_sge; rds_ib_get_mr_info(rds_ibdev, iinfo); iinfo->cache_allocs = atomic_read(&ic->i_cache_allocs); } return 1; } #if IS_ENABLED(CONFIG_IPV6) /* IPv6 version of rds_ib_conn_info_visitor(). */ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds6_info_rdma_connection *iinfo6 = buffer; struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) return 0; iinfo6->src_addr = conn->c_laddr; iinfo6->dst_addr = conn->c_faddr; if (ic) { iinfo6->tos = conn->c_tos; iinfo6->sl = ic->i_sl; } memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid)); memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid)); if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid, (union ib_gid *)&iinfo6->dst_gid); rds_ibdev = ic->rds_ibdev; iinfo6->max_send_wr = ic->i_send_ring.w_nr; iinfo6->max_recv_wr = ic->i_recv_ring.w_nr; iinfo6->max_send_sge = rds_ibdev->max_sge; rds6_ib_get_mr_info(rds_ibdev, iinfo6); iinfo6->cache_allocs = atomic_read(&ic->i_cache_allocs); } return 1; } #endif static void rds_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8]; rds_for_each_conn_info(sock, len, iter, lens, rds_ib_conn_info_visitor, buffer, sizeof(struct rds_info_rdma_connection)); } #if IS_ENABLED(CONFIG_IPV6) /* IPv6 version of rds_ib_ic_info(). */ static void rds6_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { u64 buffer[(sizeof(struct rds6_info_rdma_connection) + 7) / 8]; rds_for_each_conn_info(sock, len, iter, lens, rds6_ib_conn_info_visitor, buffer, sizeof(struct rds6_info_rdma_connection)); } #endif /* * Early RDS/IB was built to only bind to an address if there is an IPoIB * device with that address set. * * If it were me, I'd advocate for something more flexible. Sending and * receiving should be device-agnostic. Transports would try and maintain * connections between peers who have messages queued. Userspace would be * allowed to influence which paths have priority. We could call userspace * asserting this policy "routing". */ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, __u32 scope_id) { int ret; struct rdma_cm_id *cm_id; #if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 sin6; #endif struct sockaddr_in sin; struct sockaddr *sa; bool isv4; isv4 = ipv6_addr_v4mapped(addr); /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); if (isv4) { memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = addr->s6_addr32[3]; sa = (struct sockaddr *)&sin; } else { #if IS_ENABLED(CONFIG_IPV6) memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr; sin6.sin6_scope_id = scope_id; sa = (struct sockaddr *)&sin6; /* XXX Do a special IPv6 link local address check here. The * reason is that rdma_bind_addr() always succeeds with IPv6 * link local address regardless it is indeed configured in a * system. */ if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) { struct net_device *dev; if (scope_id == 0) { ret = -EADDRNOTAVAIL; goto out; } /* Use init_net for now as RDS is not network * name space aware. */ dev = dev_get_by_index(&init_net, scope_id); if (!dev) { ret = -EADDRNOTAVAIL; goto out; } if (!ipv6_chk_addr(&init_net, addr, dev, 1)) { dev_put(dev); ret = -EADDRNOTAVAIL; goto out; } dev_put(dev); } #else ret = -EADDRNOTAVAIL; goto out; #endif } /* rdma_bind_addr will only succeed for IB & iWARP devices */ ret = rdma_bind_addr(cm_id, sa); /* due to this, we will claim to support iWARP devices unless we check node_type. */ if (ret || !cm_id->device || cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI6c%%%u ret %d node type %d\n", addr, scope_id, ret, cm_id->device ? cm_id->device->node_type : -1); out: rdma_destroy_id(cm_id); return ret; } static void rds_ib_unregister_client(void) { ib_unregister_client(&rds_ib_client); /* wait for rds_ib_dev_free() to complete */ flush_workqueue(rds_wq); } static void rds_ib_set_unloading(void) { atomic_set(&rds_ib_unloading, 1); } static bool rds_ib_is_unloading(struct rds_connection *conn) { struct rds_conn_path *cp = &conn->c_path[0]; return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) || atomic_read(&rds_ib_unloading) != 0); } void rds_ib_exit(void) { rds_ib_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); #if IS_ENABLED(CONFIG_IPV6) rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); #endif rds_ib_unregister_client(); rds_ib_destroy_nodev_conns(); rds_ib_sysctl_exit(); rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); rds_ib_mr_exit(); } static u8 rds_ib_get_tos_map(u8 tos) { /* 1:1 user to transport map for RDMA transport. * In future, if custom map is desired, hook can export * user configurable map. */ return tos; } struct rds_transport rds_ib_transport = { .laddr_check = rds_ib_laddr_check, .xmit_path_complete = rds_ib_xmit_path_complete, .xmit = rds_ib_xmit, .xmit_rdma = rds_ib_xmit_rdma, .xmit_atomic = rds_ib_xmit_atomic, .recv_path = rds_ib_recv_path, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, .conn_path_connect = rds_ib_conn_path_connect, .conn_path_shutdown = rds_ib_conn_path_shutdown, .inc_copy_to_user = rds_ib_inc_copy_to_user, .inc_free = rds_ib_inc_free, .cm_initiate_connect = rds_ib_cm_initiate_connect, .cm_handle_connect = rds_ib_cm_handle_connect, .cm_connect_complete = rds_ib_cm_connect_complete, .stats_info_copy = rds_ib_stats_info_copy, .exit = rds_ib_exit, .get_mr = rds_ib_get_mr, .sync_mr = rds_ib_sync_mr, .free_mr = rds_ib_free_mr, .flush_mrs = rds_ib_flush_mrs, .get_tos_map = rds_ib_get_tos_map, .t_owner = THIS_MODULE, .t_name = "infiniband", .t_unloading = rds_ib_is_unloading, .t_type = RDS_TRANS_IB }; int rds_ib_init(void) { int ret; INIT_LIST_HEAD(&rds_ib_devices); ret = rds_ib_mr_init(); if (ret) goto out; ret = ib_register_client(&rds_ib_client); if (ret) goto out_mr_exit; ret = rds_ib_sysctl_init(); if (ret) goto out_ibreg; ret = rds_ib_recv_init(); if (ret) goto out_sysctl; rds_trans_register(&rds_ib_transport); rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); #if IS_ENABLED(CONFIG_IPV6) rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); #endif goto out; out_sysctl: rds_ib_sysctl_exit(); out_ibreg: rds_ib_unregister_client(); out_mr_exit: rds_ib_mr_exit(); out: return ret; } MODULE_LICENSE("GPL");
2 36 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 /* SPDX-License-Identifier: GPL-2.0-only */ /* * VMware vSockets Driver * * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. */ #ifndef __AF_VSOCK_H__ #define __AF_VSOCK_H__ #include <linux/kernel.h> #include <linux/workqueue.h> #include <net/sock.h> #include <uapi/linux/vm_sockets.h> #include "vsock_addr.h" #define LAST_RESERVED_PORT 1023 #define VSOCK_HASH_SIZE 251 extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; extern spinlock_t vsock_table_lock; #define vsock_sk(__sk) ((struct vsock_sock *)__sk) #define sk_vsock(__vsk) (&(__vsk)->sk) struct vsock_sock { /* sk must be the first member. */ struct sock sk; const struct vsock_transport *transport; struct sockaddr_vm local_addr; struct sockaddr_vm remote_addr; /* Links for the global tables of bound and connected sockets. */ struct list_head bound_table; struct list_head connected_table; /* Accessed without the socket lock held. This means it can never be * modified outsided of socket create or destruct. */ bool trusted; bool cached_peer_allow_dgram; /* Dgram communication allowed to * cached peer? */ u32 cached_peer; /* Context ID of last dgram destination check. */ const struct cred *owner; /* Rest are SOCK_STREAM only. */ long connect_timeout; /* Listening socket that this came from. */ struct sock *listener; /* Used for pending list and accept queue during connection handshake. * The listening socket is the head for both lists. Sockets created * for connection requests are placed in the pending list until they * are connected, at which point they are put in the accept queue list * so they can be accepted in accept(). If accept() cannot accept the * connection, it is marked as rejected so the cleanup function knows * to clean up the socket. */ struct list_head pending_links; struct list_head accept_queue; bool rejected; struct delayed_work connect_work; struct delayed_work pending_work; struct delayed_work close_work; bool close_work_scheduled; u32 peer_shutdown; bool sent_request; bool ignore_connecting_rst; /* Protected by lock_sock(sk) */ u64 buffer_size; u64 buffer_min_size; u64 buffer_max_size; /* Private to transport. */ void *trans; }; s64 vsock_connectible_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); struct sock *vsock_create_connected(struct sock *parent); void vsock_data_ready(struct sock *sk); /**** TRANSPORT ****/ struct vsock_transport_recv_notify_data { u64 data1; /* Transport-defined. */ u64 data2; /* Transport-defined. */ bool notify_on_block; }; struct vsock_transport_send_notify_data { u64 data1; /* Transport-defined. */ u64 data2; /* Transport-defined. */ }; /* Transport features flags */ /* Transport provides host->guest communication */ #define VSOCK_TRANSPORT_F_H2G 0x00000001 /* Transport provides guest->host communication */ #define VSOCK_TRANSPORT_F_G2H 0x00000002 /* Transport provides DGRAM communication */ #define VSOCK_TRANSPORT_F_DGRAM 0x00000004 /* Transport provides local (loopback) communication */ #define VSOCK_TRANSPORT_F_LOCAL 0x00000008 struct vsock_transport { struct module *module; /* Initialize/tear-down socket. */ int (*init)(struct vsock_sock *, struct vsock_sock *); void (*destruct)(struct vsock_sock *); void (*release)(struct vsock_sock *); /* Cancel all pending packets sent on vsock. */ int (*cancel_pkt)(struct vsock_sock *vsk); /* Connections. */ int (*connect)(struct vsock_sock *); /* DGRAM. */ int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); int (*dgram_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags); int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, struct msghdr *, size_t len); bool (*dgram_allow)(u32 cid, u32 port); /* STREAM. */ /* TODO: stream_bind() */ ssize_t (*stream_dequeue)(struct vsock_sock *, struct msghdr *, size_t len, int flags); ssize_t (*stream_enqueue)(struct vsock_sock *, struct msghdr *, size_t len); s64 (*stream_has_data)(struct vsock_sock *); s64 (*stream_has_space)(struct vsock_sock *); u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); bool (*stream_allow)(u32 cid, u32 port); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, int flags); int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg, size_t len); bool (*seqpacket_allow)(u32 remote_cid); u32 (*seqpacket_has_data)(struct vsock_sock *vsk); /* Notification. */ int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); int (*notify_recv_init)(struct vsock_sock *, size_t, struct vsock_transport_recv_notify_data *); int (*notify_recv_pre_block)(struct vsock_sock *, size_t, struct vsock_transport_recv_notify_data *); int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, struct vsock_transport_recv_notify_data *); int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, ssize_t, bool, struct vsock_transport_recv_notify_data *); int (*notify_send_init)(struct vsock_sock *, struct vsock_transport_send_notify_data *); int (*notify_send_pre_block)(struct vsock_sock *, struct vsock_transport_send_notify_data *); int (*notify_send_pre_enqueue)(struct vsock_sock *, struct vsock_transport_send_notify_data *); int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, struct vsock_transport_send_notify_data *); /* sk_lock held by the caller */ void (*notify_buffer_size)(struct vsock_sock *, u64 *); int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val); /* SIOCOUTQ ioctl */ ssize_t (*unsent_bytes)(struct vsock_sock *vsk); /* Shutdown. */ int (*shutdown)(struct vsock_sock *, int); /* Addressing. */ u32 (*get_local_cid)(void); /* Read a single skb */ int (*read_skb)(struct vsock_sock *, skb_read_actor_t); /* Zero-copy. */ bool (*msgzerocopy_allow)(void); }; /**** CORE ****/ int vsock_core_register(const struct vsock_transport *t, int features); void vsock_core_unregister(const struct vsock_transport *t); /* The transport may downcast this to access transport-specific functions */ const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk); /**** UTILS ****/ /* vsock_table_lock must be held */ static inline bool __vsock_in_bound_table(struct vsock_sock *vsk) { return !list_empty(&vsk->bound_table); } /* vsock_table_lock must be held */ static inline bool __vsock_in_connected_table(struct vsock_sock *vsk) { return !list_empty(&vsk->connected_table); } void vsock_add_pending(struct sock *listener, struct sock *pending); void vsock_remove_pending(struct sock *listener, struct sock *pending); void vsock_enqueue_accept(struct sock *listener, struct sock *connected); void vsock_insert_connected(struct vsock_sock *vsk); void vsock_remove_bound(struct vsock_sock *vsk); void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(struct vsock_transport *transport, void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); /**** TAP ****/ struct vsock_tap { struct net_device *dev; struct module *module; struct list_head list; }; int vsock_add_tap(struct vsock_tap *vt); int vsock_remove_tap(struct vsock_tap *vt); void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); #ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else static inline void __init vsock_bpf_build_proto(void) {} #endif static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) { return t->msgzerocopy_allow && t->msgzerocopy_allow(); } #endif /* __AF_VSOCK_H__ */
4 1 4 3 3 2 3 4 1 3 3 4 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 // SPDX-License-Identifier: GPL-2.0 #include <linux/ptrace.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/export.h> #include <asm/syscall.h> static int collect_syscall(struct task_struct *target, struct syscall_info *info) { unsigned long args[6] = { }; struct pt_regs *regs; if (!try_get_task_stack(target)) { /* Task has no stack, so the task isn't in a syscall. */ memset(info, 0, sizeof(*info)); info->data.nr = -1; return 0; } regs = task_pt_regs(target); if (unlikely(!regs)) { put_task_stack(target); return -EAGAIN; } info->sp = user_stack_pointer(regs); info->data.instruction_pointer = instruction_pointer(regs); info->data.nr = syscall_get_nr(target, regs); if (info->data.nr != -1L) syscall_get_arguments(target, regs, args); info->data.args[0] = args[0]; info->data.args[1] = args[1]; info->data.args[2] = args[2]; info->data.args[3] = args[3]; info->data.args[4] = args[4]; info->data.args[5] = args[5]; put_task_stack(target); return 0; } /** * task_current_syscall - Discover what a blocked task is doing. * @target: thread to examine * @info: structure with the following fields: * .sp - filled with user stack pointer * .data.nr - filled with system call number or -1 * .data.args - filled with @maxargs system call arguments * .data.instruction_pointer - filled with user PC * * If @target is blocked in a system call, returns zero with @info.data.nr * set to the call's number and @info.data.args filled in with its * arguments. Registers not used for system call arguments may not be available * and it is not kosher to use &struct user_regset calls while the system * call is still in progress. Note we may get this result if @target * has finished its system call but not yet returned to user mode, such * as when it's stopped for signal handling or syscall exit tracing. * * If @target is blocked in the kernel during a fault or exception, * returns zero with *@info.data.nr set to -1 and does not fill in * @info.data.args. If so, it's now safe to examine @target using * &struct user_regset get() calls as long as we're sure @target won't return * to user mode. * * Returns -%EAGAIN if @target does not remain blocked. */ int task_current_syscall(struct task_struct *target, struct syscall_info *info) { unsigned long ncsw; unsigned int state; if (target == current) return collect_syscall(target, info); state = READ_ONCE(target->__state); if (unlikely(!state)) return -EAGAIN; ncsw = wait_task_inactive(target, state); if (unlikely(!ncsw) || unlikely(collect_syscall(target, info)) || unlikely(wait_task_inactive(target, state) != ncsw)) return -EAGAIN; return 0; }
7 7 2 1 1 8 8 8 8 8 8 8 5 5 5 12 12 12 12 12 12 7 12 12 2 5 5 1 4 1 7 7 7 7 7 6 5 4 7 4 4 4 4 4 1 1 1 4 7 3 12 12 12 6 6 3 12 12 8 3 1 1 3 1 1 1 2 1 4 8 9 9 7 9 9 1 9 9 9 9 9 5 5 5 5 7 7 7 21 21 21 27 23 23 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 // SPDX-License-Identifier: GPL-1.0+ /* generic HDLC line discipline for Linux * * Written by Paul Fulghum paulkf@microgate.com * for Microgate Corporation * * Microgate and SyncLink are registered trademarks of Microgate Corporation * * Adapted from ppp.c, written by Michael Callahan <callahan@maths.ox.ac.uk>, * Al Longyear <longyear@netcom.com>, * Paul Mackerras <Paul.Mackerras@cs.anu.edu.au> * * Original release 01/11/99 * * This module implements the tty line discipline N_HDLC for use with * tty device drivers that support bit-synchronous HDLC communications. * * All HDLC data is frame oriented which means: * * 1. tty write calls represent one complete transmit frame of data * The device driver should accept the complete frame or none of * the frame (busy) in the write method. Each write call should have * a byte count in the range of 2-65535 bytes (2 is min HDLC frame * with 1 addr byte and 1 ctrl byte). The max byte count of 65535 * should include any crc bytes required. For example, when using * CCITT CRC32, 4 crc bytes are required, so the maximum size frame * the application may transmit is limited to 65531 bytes. For CCITT * CRC16, the maximum application frame size would be 65533. * * * 2. receive callbacks from the device driver represents * one received frame. The device driver should bypass * the tty flip buffer and call the line discipline receive * callback directly to avoid fragmenting or concatenating * multiple frames into a single receive callback. * * The HDLC line discipline queues the receive frames in separate * buffers so complete receive frames can be returned by the * tty read calls. * * 3. tty read calls returns an entire frame of data or nothing. * * 4. all send and receive data is considered raw. No processing * or translation is performed by the line discipline, regardless * of the tty flags * * 5. When line discipline is queried for the amount of receive * data available (FIOC), 0 is returned if no data available, * otherwise the count of the next available frame is returned. * (instead of the sum of all received frame counts). * * These conventions allow the standard tty programming interface * to be used for synchronous HDLC applications when used with * this line discipline (or another line discipline that is frame * oriented such as N_PPP). * * The SyncLink driver (synclink.c) implements both asynchronous * (using standard line discipline N_TTY) and synchronous HDLC * (using N_HDLC) communications, with the latter using the above * conventions. * * This implementation is very basic and does not maintain * any statistics. The main point is to enforce the raw data * and frame orientation of HDLC communications. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/interrupt.h> #include <linux/ptrace.h> #include <linux/poll.h> #include <linux/in.h> #include <linux/ioctl.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/string.h> /* used in new tty drivers */ #include <linux/signal.h> /* used in new tty drivers */ #include <linux/if.h> #include <linux/bitops.h> #include <linux/uaccess.h> #include "tty.h" /* * Buffers for individual HDLC frames */ #define MAX_HDLC_FRAME_SIZE 65535 #define DEFAULT_RX_BUF_COUNT 10 #define MAX_RX_BUF_COUNT 60 #define DEFAULT_TX_BUF_COUNT 3 struct n_hdlc_buf { struct list_head list_item; size_t count; u8 buf[]; }; struct n_hdlc_buf_list { struct list_head list; int count; spinlock_t spinlock; }; /** * struct n_hdlc - per device instance data structure * @tbusy: reentrancy flag for tx wakeup code * @woke_up: tx wakeup needs to be run again as it was called while @tbusy * @tx_buf_list: list of pending transmit frame buffers * @rx_buf_list: list of received frame buffers * @tx_free_buf_list: list unused transmit frame buffers * @rx_free_buf_list: list unused received frame buffers */ struct n_hdlc { bool tbusy; bool woke_up; struct n_hdlc_buf_list tx_buf_list; struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; struct n_hdlc_buf_list rx_free_buf_list; struct work_struct write_work; struct tty_struct *tty_for_write_work; }; /* * HDLC buffer list manipulation functions */ static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list, struct n_hdlc_buf *buf); static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, struct n_hdlc_buf *buf); static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); /* Local functions */ static struct n_hdlc *n_hdlc_alloc(void); static void n_hdlc_tty_write_work(struct work_struct *work); /* max frame size for memory allocations */ static int maxframe = 4096; static void flush_rx_queue(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty->disc_data; struct n_hdlc_buf *buf; while ((buf = n_hdlc_buf_get(&n_hdlc->rx_buf_list))) n_hdlc_buf_put(&n_hdlc->rx_free_buf_list, buf); } static void flush_tx_queue(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty->disc_data; struct n_hdlc_buf *buf; while ((buf = n_hdlc_buf_get(&n_hdlc->tx_buf_list))) n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, buf); } static void n_hdlc_free_buf_list(struct n_hdlc_buf_list *list) { struct n_hdlc_buf *buf; do { buf = n_hdlc_buf_get(list); kfree(buf); } while (buf); } /** * n_hdlc_tty_close - line discipline close * @tty: pointer to tty info structure * * Called when the line discipline is changed to something * else, the tty is closed, or the tty detects a hangup. */ static void n_hdlc_tty_close(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty->disc_data; #if defined(TTY_NO_WRITE_SPLIT) clear_bit(TTY_NO_WRITE_SPLIT, &tty->flags); #endif tty->disc_data = NULL; /* Ensure that the n_hdlcd process is not hanging on select()/poll() */ wake_up_interruptible(&tty->read_wait); wake_up_interruptible(&tty->write_wait); cancel_work_sync(&n_hdlc->write_work); n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->rx_buf_list); n_hdlc_free_buf_list(&n_hdlc->tx_buf_list); kfree(n_hdlc); } /* end of n_hdlc_tty_close() */ /** * n_hdlc_tty_open - called when line discipline changed to n_hdlc * @tty: pointer to tty info structure * * Returns 0 if success, otherwise error code */ static int n_hdlc_tty_open(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty->disc_data; pr_debug("%s() called (device=%s)\n", __func__, tty->name); /* There should not be an existing table for this slot. */ if (n_hdlc) { pr_err("%s: tty already associated!\n", __func__); return -EEXIST; } n_hdlc = n_hdlc_alloc(); if (!n_hdlc) { pr_err("%s: n_hdlc_alloc failed\n", __func__); return -ENFILE; } INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work); n_hdlc->tty_for_write_work = tty; tty->disc_data = n_hdlc; tty->receive_room = 65536; /* change tty_io write() to not split large writes into 8K chunks */ set_bit(TTY_NO_WRITE_SPLIT, &tty->flags); /* flush receive data from driver */ tty_driver_flush_buffer(tty); return 0; } /* end of n_tty_hdlc_open() */ /** * n_hdlc_send_frames - send frames on pending send buffer list * @n_hdlc: pointer to ldisc instance data * @tty: pointer to tty instance data * * Send frames on pending send buffer list until the driver does not accept a * frame (busy) this function is called after adding a frame to the send buffer * list and by the tty wakeup callback. */ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) { unsigned long flags; struct n_hdlc_buf *tbuf; ssize_t actual; check_again: spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags); if (n_hdlc->tbusy) { n_hdlc->woke_up = true; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); return; } n_hdlc->tbusy = true; n_hdlc->woke_up = false; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); while (tbuf) { pr_debug("sending frame %p, count=%zu\n", tbuf, tbuf->count); /* Send the next block of data to device */ set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); actual = tty->ops->write(tty, tbuf->buf, tbuf->count); /* rollback was possible and has been done */ if (actual == -ERESTARTSYS) { n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } /* if transmit error, throw frame away by */ /* pretending it was accepted by driver */ if (actual < 0) actual = tbuf->count; if (actual == tbuf->count) { pr_debug("frame %p completed\n", tbuf); /* free current transmit buffer */ n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, tbuf); /* wait up sleeping writers */ wake_up_interruptible(&tty->write_wait); /* get next pending transmit buffer */ tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); } else { pr_debug("frame %p pending\n", tbuf); /* * the buffer was not accepted by driver, * return it back into tx queue */ n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } } if (!tbuf) clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); /* Clear the re-entry flag */ spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags); n_hdlc->tbusy = false; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); if (n_hdlc->woke_up) goto check_again; } /* end of n_hdlc_send_frames() */ /** * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup * @work: pointer to work_struct * * Called when low level device driver can accept more send data. */ static void n_hdlc_tty_write_work(struct work_struct *work) { struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work); struct tty_struct *tty = n_hdlc->tty_for_write_work; n_hdlc_send_frames(n_hdlc, tty); } /* end of n_hdlc_tty_write_work() */ /** * n_hdlc_tty_wakeup - Callback for transmit wakeup * @tty: pointer to associated tty instance data * * Called when low level device driver can accept more send data. */ static void n_hdlc_tty_wakeup(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty->disc_data; schedule_work(&n_hdlc->write_work); } /* end of n_hdlc_tty_wakeup() */ /** * n_hdlc_tty_receive - Called by tty driver when receive data is available * @tty: pointer to tty instance data * @data: pointer to received data * @flags: pointer to flags for data * @count: count of received data in bytes * * Called by tty low level driver when receive data is available. Data is * interpreted as one HDLC frame. */ static void n_hdlc_tty_receive(struct tty_struct *tty, const u8 *data, const u8 *flags, size_t count) { register struct n_hdlc *n_hdlc = tty->disc_data; register struct n_hdlc_buf *buf; pr_debug("%s() called count=%zu\n", __func__, count); if (count > maxframe) { pr_debug("rx count>maxframesize, data discarded\n"); return; } /* get a free HDLC buffer */ buf = n_hdlc_buf_get(&n_hdlc->rx_free_buf_list); if (!buf) { /* * no buffers in free list, attempt to allocate another rx * buffer unless the maximum count has been reached */ if (n_hdlc->rx_buf_list.count < MAX_RX_BUF_COUNT) buf = kmalloc(struct_size(buf, buf, maxframe), GFP_ATOMIC); } if (!buf) { pr_debug("no more rx buffers, data discarded\n"); return; } /* copy received data to HDLC buffer */ memcpy(buf->buf, data, count); buf->count = count; /* add HDLC buffer to list of received frames */ n_hdlc_buf_put(&n_hdlc->rx_buf_list, buf); /* wake up any blocked reads and perform async signalling */ wake_up_interruptible(&tty->read_wait); if (tty->fasync != NULL) kill_fasync(&tty->fasync, SIGIO, POLL_IN); } /* end of n_hdlc_tty_receive() */ /** * n_hdlc_tty_read - Called to retrieve one frame of data (if available) * @tty: pointer to tty instance data * @file: pointer to open file object * @kbuf: pointer to returned data buffer * @nr: size of returned data buffer * @cookie: stored rbuf from previous run * @offset: offset into the data buffer * * Returns the number of bytes returned or error code. */ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, u8 *kbuf, size_t nr, void **cookie, unsigned long offset) { struct n_hdlc *n_hdlc = tty->disc_data; int ret = 0; struct n_hdlc_buf *rbuf; DECLARE_WAITQUEUE(wait, current); /* Is this a repeated call for an rbuf we already found earlier? */ rbuf = *cookie; if (rbuf) goto have_rbuf; add_wait_queue(&tty->read_wait, &wait); for (;;) { if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) { ret = -EIO; break; } if (tty_hung_up_p(file)) break; set_current_state(TASK_INTERRUPTIBLE); rbuf = n_hdlc_buf_get(&n_hdlc->rx_buf_list); if (rbuf) break; /* no data */ if (tty_io_nonblock(tty, file)) { ret = -EAGAIN; break; } schedule(); if (signal_pending(current)) { ret = -EINTR; break; } } remove_wait_queue(&tty->read_wait, &wait); __set_current_state(TASK_RUNNING); if (!rbuf) return ret; *cookie = rbuf; have_rbuf: /* Have we used it up entirely? */ if (offset >= rbuf->count) goto done_with_rbuf; /* More data to go, but can't copy any more? EOVERFLOW */ ret = -EOVERFLOW; if (!nr) goto done_with_rbuf; /* Copy as much data as possible */ ret = rbuf->count - offset; if (ret > nr) ret = nr; memcpy(kbuf, rbuf->buf+offset, ret); offset += ret; /* If we still have data left, we leave the rbuf in the cookie */ if (offset < rbuf->count) return ret; done_with_rbuf: *cookie = NULL; if (n_hdlc->rx_free_buf_list.count > DEFAULT_RX_BUF_COUNT) kfree(rbuf); else n_hdlc_buf_put(&n_hdlc->rx_free_buf_list, rbuf); return ret; } /* end of n_hdlc_tty_read() */ /** * n_hdlc_tty_write - write a single frame of data to device * @tty: pointer to associated tty device instance data * @file: pointer to file object data * @data: pointer to transmit data (one frame) * @count: size of transmit frame in bytes * * Returns the number of bytes written (or error code). */ static ssize_t n_hdlc_tty_write(struct tty_struct *tty, struct file *file, const u8 *data, size_t count) { struct n_hdlc *n_hdlc = tty->disc_data; DECLARE_WAITQUEUE(wait, current); struct n_hdlc_buf *tbuf; ssize_t error = 0; pr_debug("%s() called count=%zd\n", __func__, count); /* verify frame size */ if (count > maxframe) { pr_debug("%s: truncating user packet from %zu to %d\n", __func__, count, maxframe); count = maxframe; } add_wait_queue(&tty->write_wait, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); tbuf = n_hdlc_buf_get(&n_hdlc->tx_free_buf_list); if (tbuf) break; if (tty_io_nonblock(tty, file)) { error = -EAGAIN; break; } schedule(); if (signal_pending(current)) { error = -EINTR; break; } } __set_current_state(TASK_RUNNING); remove_wait_queue(&tty->write_wait, &wait); if (!error) { /* Retrieve the user's buffer */ memcpy(tbuf->buf, data, count); /* Send the data */ tbuf->count = error = count; n_hdlc_buf_put(&n_hdlc->tx_buf_list, tbuf); n_hdlc_send_frames(n_hdlc, tty); } return error; } /* end of n_hdlc_tty_write() */ /** * n_hdlc_tty_ioctl - process IOCTL system call for the tty device. * @tty: pointer to tty instance data * @cmd: IOCTL command code * @arg: argument for IOCTL call (cmd dependent) * * Returns command dependent result. */ static int n_hdlc_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct n_hdlc *n_hdlc = tty->disc_data; int error = 0; int count; unsigned long flags; struct n_hdlc_buf *buf = NULL; pr_debug("%s() called %d\n", __func__, cmd); switch (cmd) { case FIONREAD: /* report count of read data available */ /* in next available frame (if any) */ spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock, flags); buf = list_first_entry_or_null(&n_hdlc->rx_buf_list.list, struct n_hdlc_buf, list_item); if (buf) count = buf->count; else count = 0; spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock, flags); error = put_user(count, (int __user *)arg); break; case TIOCOUTQ: /* get the pending tx byte count in the driver */ count = tty_chars_in_buffer(tty); /* add size of next output frame in queue */ spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags); buf = list_first_entry_or_null(&n_hdlc->tx_buf_list.list, struct n_hdlc_buf, list_item); if (buf) count += buf->count; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); error = put_user(count, (int __user *)arg); break; case TCFLSH: switch (arg) { case TCIOFLUSH: case TCOFLUSH: flush_tx_queue(tty); } fallthrough; /* to default */ default: error = n_tty_ioctl_helper(tty, cmd, arg); break; } return error; } /* end of n_hdlc_tty_ioctl() */ /** * n_hdlc_tty_poll - TTY callback for poll system call * @tty: pointer to tty instance data * @filp: pointer to open file object for device * @wait: wait queue for operations * * Determine which operations (read/write) will not block and return info * to caller. * Returns a bit mask containing info on which ops will not block. */ static __poll_t n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp, poll_table *wait) { struct n_hdlc *n_hdlc = tty->disc_data; __poll_t mask = 0; /* * queue the current process into any wait queue that may awaken in the * future (read and write) */ poll_wait(filp, &tty->read_wait, wait); poll_wait(filp, &tty->write_wait, wait); /* set bits for operations that won't block */ if (!list_empty(&n_hdlc->rx_buf_list.list)) mask |= EPOLLIN | EPOLLRDNORM; /* readable */ if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= EPOLLHUP; if (tty_hung_up_p(filp)) mask |= EPOLLHUP; if (!tty_is_writelocked(tty) && !list_empty(&n_hdlc->tx_free_buf_list.list)) mask |= EPOLLOUT | EPOLLWRNORM; /* writable */ return mask; } /* end of n_hdlc_tty_poll() */ static void n_hdlc_alloc_buf(struct n_hdlc_buf_list *list, unsigned int count, const char *name) { struct n_hdlc_buf *buf; unsigned int i; for (i = 0; i < count; i++) { buf = kmalloc(struct_size(buf, buf, maxframe), GFP_KERNEL); if (!buf) { pr_debug("%s(), kmalloc() failed for %s buffer %u\n", __func__, name, i); return; } n_hdlc_buf_put(list, buf); } } /** * n_hdlc_alloc - allocate an n_hdlc instance data structure * * Returns a pointer to newly created structure if success, otherwise %NULL */ static struct n_hdlc *n_hdlc_alloc(void) { struct n_hdlc *n_hdlc = kzalloc(sizeof(*n_hdlc), GFP_KERNEL); if (!n_hdlc) return NULL; spin_lock_init(&n_hdlc->rx_free_buf_list.spinlock); spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock); spin_lock_init(&n_hdlc->rx_buf_list.spinlock); spin_lock_init(&n_hdlc->tx_buf_list.spinlock); INIT_LIST_HEAD(&n_hdlc->rx_free_buf_list.list); INIT_LIST_HEAD(&n_hdlc->tx_free_buf_list.list); INIT_LIST_HEAD(&n_hdlc->rx_buf_list.list); INIT_LIST_HEAD(&n_hdlc->tx_buf_list.list); n_hdlc_alloc_buf(&n_hdlc->rx_free_buf_list, DEFAULT_RX_BUF_COUNT, "rx"); n_hdlc_alloc_buf(&n_hdlc->tx_free_buf_list, DEFAULT_TX_BUF_COUNT, "tx"); return n_hdlc; } /* end of n_hdlc_alloc() */ /** * n_hdlc_buf_return - put the HDLC buffer after the head of the specified list * @buf_list: pointer to the buffer list * @buf: pointer to the buffer */ static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list, struct n_hdlc_buf *buf) { unsigned long flags; spin_lock_irqsave(&buf_list->spinlock, flags); list_add(&buf->list_item, &buf_list->list); buf_list->count++; spin_unlock_irqrestore(&buf_list->spinlock, flags); } /** * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list * @buf_list: pointer to buffer list * @buf: pointer to buffer */ static void n_hdlc_buf_put(struct n_hdlc_buf_list *buf_list, struct n_hdlc_buf *buf) { unsigned long flags; spin_lock_irqsave(&buf_list->spinlock, flags); list_add_tail(&buf->list_item, &buf_list->list); buf_list->count++; spin_unlock_irqrestore(&buf_list->spinlock, flags); } /* end of n_hdlc_buf_put() */ /** * n_hdlc_buf_get - remove and return an HDLC buffer from list * @buf_list: pointer to HDLC buffer list * * Remove and return an HDLC buffer from the head of the specified HDLC buffer * list. * Returns a pointer to HDLC buffer if available, otherwise %NULL. */ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list) { unsigned long flags; struct n_hdlc_buf *buf; spin_lock_irqsave(&buf_list->spinlock, flags); buf = list_first_entry_or_null(&buf_list->list, struct n_hdlc_buf, list_item); if (buf) { list_del(&buf->list_item); buf_list->count--; } spin_unlock_irqrestore(&buf_list->spinlock, flags); return buf; } /* end of n_hdlc_buf_get() */ static struct tty_ldisc_ops n_hdlc_ldisc = { .owner = THIS_MODULE, .num = N_HDLC, .name = "hdlc", .open = n_hdlc_tty_open, .close = n_hdlc_tty_close, .read = n_hdlc_tty_read, .write = n_hdlc_tty_write, .ioctl = n_hdlc_tty_ioctl, .poll = n_hdlc_tty_poll, .receive_buf = n_hdlc_tty_receive, .write_wakeup = n_hdlc_tty_wakeup, .flush_buffer = flush_rx_queue, }; static int __init n_hdlc_init(void) { int status; /* range check maxframe arg */ maxframe = clamp(maxframe, 4096, MAX_HDLC_FRAME_SIZE); status = tty_register_ldisc(&n_hdlc_ldisc); if (!status) pr_info("N_HDLC line discipline registered with maxframe=%d\n", maxframe); else pr_err("N_HDLC: error registering line discipline: %d\n", status); return status; } /* end of init_module() */ static void __exit n_hdlc_exit(void) { tty_unregister_ldisc(&n_hdlc_ldisc); } module_init(n_hdlc_init); module_exit(n_hdlc_exit); MODULE_DESCRIPTION("HDLC line discipline support"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul Fulghum paulkf@microgate.com"); module_param(maxframe, int, 0); MODULE_ALIAS_LDISC(N_HDLC);
122 121 122 121 121 121 26 26 25 26 125 125 121 120 10 40 46 125 120 120 13 122 7 23 14 14 14 17 17 17 41 23 41 10 10 10 1 10 120 119 120 120 119 118 102 119 119 5 97 119 119 86 2 69 2 121 121 87 87 86 37 121 46 26 29 21 26 1 1 120 121 120 1 70 6 6 6 6 2 6 6 70 70 121 2 2 121 122 120 120 119 120 120 26 120 120 120 119 35 120 14 120 119 111 105 13 13 13 11 9 1 7 8 10 10 112 111 112 30 30 16 16 15 16 11 11 11 10 8 8 8 8 8 8 8 6 3 3 3 3 3 16 30 13 7 7 4 7 13 13 13 34 34 34 29 33 1 17 10 10 1 10 7 10 10 10 10 1 1 1 1 1 1 1 1 1 14 14 14 14 14 14 14 14 14 14 14 12 12 12 12 5 3 3 2 1 1 12 26 5 4 9 34 22 22 34 34 34 33 14 14 10 10 10 10 14 1 1 1 1 14 14 14 14 17 17 12 12 12 12 34 34 22 34 22 34 34 28 34 33 34 34 34 34 8 10 8 6 8 1 6 2 2 2 2 2 2 4 1 4 5 10 8 8 8 7 4 8 8 4 4 2 7 4 8 8 5 8 6 4 7 7 7 2 2 10 10 10 4 10 10 10 10 7 10 10 10 13 13 12 10 10 10 2 8 7 7 9 8 7 9 7 7 2 5 5 5 7 7 7 7 27 21 11 27 21 20 14 20 27 27 11 14 20 20 20 20 20 12 20 20 9 9 3 9 7 27 21 27 20 27 27 27 27 7 7 7 5 7 7 5 7 7 7 7 10 9 5 5 10 7 10 5 10 10 10 6 6 10 10 10 10 10 10 10 10 9 10 10 9 9 9 9 9 9 9 9 9 9 9 9 9 81 70 70 69 70 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 // SPDX-License-Identifier: GPL-2.0+ /* * NILFS B-tree. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Koji Sato. */ #include <linux/slab.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/pagevec.h> #include "nilfs.h" #include "page.h" #include "btnode.h" #include "btree.h" #include "alloc.h" #include "dat.h" static void __nilfs_btree_init(struct nilfs_bmap *bmap); static struct nilfs_btree_path *nilfs_btree_alloc_path(void) { struct nilfs_btree_path *path; int level = NILFS_BTREE_LEVEL_DATA; path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); if (path == NULL) goto out; for (; level < NILFS_BTREE_LEVEL_MAX; level++) { path[level].bp_bh = NULL; path[level].bp_sib_bh = NULL; path[level].bp_index = 0; path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; path[level].bp_op = NULL; } out: return path; } static void nilfs_btree_free_path(struct nilfs_btree_path *path) { int level = NILFS_BTREE_LEVEL_DATA; for (; level < NILFS_BTREE_LEVEL_MAX; level++) brelse(path[level].bp_bh); kmem_cache_free(nilfs_btree_path_cache, path); } /* * B-tree node operations */ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); if (IS_ERR(bh)) return PTR_ERR(bh); set_buffer_nilfs_volatile(bh); *bhp = bh; return 0; } static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) { return node->bn_flags; } static void nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) { node->bn_flags = flags; } static int nilfs_btree_node_root(const struct nilfs_btree_node *node) { return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; } static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) { return node->bn_level; } static void nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) { node->bn_level = level; } static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) { return le16_to_cpu(node->bn_nchildren); } static void nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) { node->bn_nchildren = cpu_to_le16(nchildren); } static int nilfs_btree_node_size(const struct nilfs_bmap *btree) { return i_blocksize(btree->b_inode); } static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) { return btree->b_nchildren_per_block; } static __le64 * nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) { return (__le64 *)((char *)(node + 1) + (nilfs_btree_node_root(node) ? 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); } static __le64 * nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) { return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); } static __u64 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) { return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); } static void nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) { *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); } static __u64 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, int ncmax) { return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); } static void nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, int ncmax) { *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); } static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, int level, int nchildren, int ncmax, const __u64 *keys, const __u64 *ptrs) { __le64 *dkeys; __le64 *dptrs; int i; nilfs_btree_node_set_flags(node, flags); nilfs_btree_node_set_level(node, level); nilfs_btree_node_set_nchildren(node, nchildren); dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nchildren; i++) { dkeys[i] = cpu_to_le64(keys[i]); dptrs[i] = cpu_to_le64(ptrs[i]); } } /* Assume the buffer heads corresponding to left and right are locked. */ static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, struct nilfs_btree_node *right, int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; ldkeys = nilfs_btree_node_dkeys(left); ldptrs = nilfs_btree_node_dptrs(left, lncmax); lnchildren = nilfs_btree_node_get_nchildren(left); rdkeys = nilfs_btree_node_dkeys(right); rdptrs = nilfs_btree_node_dptrs(right, rncmax); rnchildren = nilfs_btree_node_get_nchildren(right); memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); lnchildren += n; rnchildren -= n; nilfs_btree_node_set_nchildren(left, lnchildren); nilfs_btree_node_set_nchildren(right, rnchildren); } /* Assume that the buffer heads corresponding to left and right are locked. */ static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, struct nilfs_btree_node *right, int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; ldkeys = nilfs_btree_node_dkeys(left); ldptrs = nilfs_btree_node_dptrs(left, lncmax); lnchildren = nilfs_btree_node_get_nchildren(left); rdkeys = nilfs_btree_node_dkeys(right); rdptrs = nilfs_btree_node_dptrs(right, rncmax); rnchildren = nilfs_btree_node_get_nchildren(right); memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); lnchildren -= n; rnchildren += n; nilfs_btree_node_set_nchildren(left, lnchildren); nilfs_btree_node_set_nchildren(right, rnchildren); } /* Assume that the buffer head corresponding to node is locked. */ static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, __u64 key, __u64 ptr, int ncmax) { __le64 *dkeys; __le64 *dptrs; int nchildren; dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); nchildren = nilfs_btree_node_get_nchildren(node); if (index < nchildren) { memmove(dkeys + index + 1, dkeys + index, (nchildren - index) * sizeof(*dkeys)); memmove(dptrs + index + 1, dptrs + index, (nchildren - index) * sizeof(*dptrs)); } dkeys[index] = cpu_to_le64(key); dptrs[index] = cpu_to_le64(ptr); nchildren++; nilfs_btree_node_set_nchildren(node, nchildren); } /* Assume that the buffer head corresponding to node is locked. */ static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, __u64 *keyp, __u64 *ptrp, int ncmax) { __u64 key; __u64 ptr; __le64 *dkeys; __le64 *dptrs; int nchildren; dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); key = le64_to_cpu(dkeys[index]); ptr = le64_to_cpu(dptrs[index]); nchildren = nilfs_btree_node_get_nchildren(node); if (keyp != NULL) *keyp = key; if (ptrp != NULL) *ptrp = ptr; if (index < nchildren - 1) { memmove(dkeys + index, dkeys + index + 1, (nchildren - index - 1) * sizeof(*dkeys)); memmove(dptrs + index, dptrs + index + 1, (nchildren - index - 1) * sizeof(*dptrs)); } nchildren--; nilfs_btree_node_set_nchildren(node, nchildren); } static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, __u64 key, int *indexp) { __u64 nkey; int index, low, high, s; /* binary search */ low = 0; high = nilfs_btree_node_get_nchildren(node) - 1; index = 0; s = 0; while (low <= high) { index = (low + high) / 2; nkey = nilfs_btree_node_get_key(node, index); if (nkey == key) { s = 0; goto out; } else if (nkey < key) { low = index + 1; s = -1; } else { high = index - 1; s = 1; } } /* adjust index */ if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { if (s > 0 && index > 0) index--; } else if (s < 0) index++; out: *indexp = index; return s == 0; } /** * nilfs_btree_node_broken - verify consistency of btree node * @node: btree node block to be examined * @size: node size (in bytes) * @inode: host inode of btree * @blocknr: block number * * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. */ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, size_t size, struct inode *inode, sector_t blocknr) { int level, flags, nchildren; int ret = 0; level = nilfs_btree_node_get_level(node); flags = nilfs_btree_node_get_flags(node); nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || nchildren <= 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { nilfs_crit(inode->i_sb, "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, (unsigned long long)blocknr, level, flags, nchildren); ret = 1; } return ret; } /** * nilfs_btree_root_broken - verify consistency of btree root node * @node: btree root node to be examined * @inode: host inode of btree * * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. */ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, struct inode *inode) { int level, flags, nchildren; int ret = 0; level = nilfs_btree_node_get_level(node); flags = nilfs_btree_node_get_flags(node); nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { nilfs_crit(inode->i_sb, "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, level, flags, nchildren); ret = 1; } return ret; } int nilfs_btree_broken_node_block(struct buffer_head *bh) { struct inode *inode; int ret; if (buffer_nilfs_checked(bh)) return 0; inode = bh->b_folio->mapping->host; ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, bh->b_size, inode, bh->b_blocknr); if (likely(!ret)) set_buffer_nilfs_checked(bh); return ret; } static struct nilfs_btree_node * nilfs_btree_get_root(const struct nilfs_bmap *btree) { return (struct nilfs_btree_node *)btree->b_u.u_data; } static struct nilfs_btree_node * nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_bh->b_data; } static struct nilfs_btree_node * nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; } static int nilfs_btree_height(const struct nilfs_bmap *btree) { return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; } static struct nilfs_btree_node * nilfs_btree_get_node(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, int level, int *ncmaxp) { struct nilfs_btree_node *node; if (level == nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_root(btree); *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; } else { node = nilfs_btree_get_nonroot_node(path, level); *ncmaxp = nilfs_btree_nchildren_per_block(btree); } return node; } static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, struct nilfs_btree_node *node, int level) { if (unlikely(nilfs_btree_node_get_level(node) != level)) { dump_stack(); nilfs_crit(btree->b_inode->i_sb, "btree level mismatch (ino=%lu): %d != %d", btree->b_inode->i_ino, nilfs_btree_node_get_level(node), level); return 1; } return 0; } struct nilfs_btree_readahead_info { struct nilfs_btree_node *node; /* parent node */ int max_ra_blocks; /* max nof blocks to read ahead */ int index; /* current index on the parent node */ int ncmax; /* nof children in the parent node */ }; static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp, const struct nilfs_btree_readahead_info *ra) { struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh, *ra_bh; sector_t submit_ptr = 0; int ret; ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, &submit_ptr); if (ret) { if (likely(ret == -EEXIST)) goto out_check; if (ret == -ENOENT) { /* * Block address translation failed due to invalid * value of 'ptr'. In this case, return internal code * -EINVAL (broken bmap) to notify bmap layer of fatal * metadata corruption. */ ret = -EINVAL; } return ret; } if (ra) { int i, n; __u64 ptr2; /* read ahead sibling nodes */ for (n = ra->max_ra_blocks, i = ra->index + 1; n > 0 && i < ra->ncmax; n--, i++) { ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); ret = nilfs_btnode_submit_block(btnc, ptr2, 0, REQ_OP_READ | REQ_RAHEAD, &ra_bh, &submit_ptr); if (likely(!ret || ret == -EEXIST)) brelse(ra_bh); else if (ret != -EBUSY) break; if (!buffer_locked(bh)) goto out_no_wait; } } wait_on_buffer(bh); out_no_wait: if (!buffer_uptodate(bh)) { nilfs_err(btree->b_inode->i_sb, "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", btree->b_inode->i_ino, (unsigned long long)ptr); brelse(bh); return -EIO; } out_check: if (nilfs_btree_broken_node_block(bh)) { clear_buffer_uptodate(bh); brelse(bh); return -EINVAL; } *bhp = bh; return 0; } static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { return __nilfs_btree_get_block(btree, ptr, bhp, NULL); } static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, __u64 key, __u64 *ptrp, int minlevel, int readahead) { struct nilfs_btree_node *node; struct nilfs_btree_readahead_info p, *ra; __u64 ptr; int level, index, found, ncmax, ret; node = nilfs_btree_get_root(btree); level = nilfs_btree_node_get_level(node); if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) return -ENOENT; found = nilfs_btree_node_lookup(node, key, &index); ptr = nilfs_btree_node_get_ptr(node, index, NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; ncmax = nilfs_btree_nchildren_per_block(btree); while (--level >= minlevel) { ra = NULL; if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); p.index = index; p.max_ra_blocks = 7; ra = &p; } ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, ra); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_bad_node(btree, node, level)) return -EINVAL; if (!found) found = nilfs_btree_node_lookup(node, key, &index); else index = 0; if (index < ncmax) { ptr = nilfs_btree_node_get_ptr(node, index, ncmax); } else { WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); /* insert */ ptr = NILFS_BMAP_INVALID_PTR; } path[level].bp_index = index; } if (!found) return -ENOENT; if (ptrp != NULL) *ptrp = ptr; return 0; } static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; __u64 ptr; int index, level, ncmax, ret; node = nilfs_btree_get_root(btree); index = nilfs_btree_node_get_nchildren(node) - 1; if (index < 0) return -ENOENT; level = nilfs_btree_node_get_level(node); ptr = nilfs_btree_node_get_ptr(node, index, NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; ncmax = nilfs_btree_nchildren_per_block(btree); for (level--; level > 0; level--) { ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_bad_node(btree, node, level)) return -EINVAL; index = nilfs_btree_node_get_nchildren(node) - 1; ptr = nilfs_btree_node_get_ptr(node, index, ncmax); path[level].bp_index = index; } if (keyp != NULL) *keyp = nilfs_btree_node_get_key(node, index); if (ptrp != NULL) *ptrp = ptr; return 0; } /** * nilfs_btree_get_next_key - get next valid key from btree path array * @btree: bmap struct of btree * @path: array of nilfs_btree_path struct * @minlevel: start level * @nextkey: place to store the next valid key * * Return Value: If a next key was found, 0 is returned. Otherwise, * -ENOENT is returned. */ static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, int minlevel, __u64 *nextkey) { struct nilfs_btree_node *node; int maxlevel = nilfs_btree_height(btree) - 1; int index, next_adj, level; /* Next index is already set to bp_index for leaf nodes. */ next_adj = 0; for (level = minlevel; level <= maxlevel; level++) { if (level == maxlevel) node = nilfs_btree_get_root(btree); else node = nilfs_btree_get_nonroot_node(path, level); index = path[level].bp_index + next_adj; if (index < nilfs_btree_node_get_nchildren(node)) { /* Next key is in this node */ *nextkey = nilfs_btree_node_get_key(node, index); return 0; } /* For non-leaf nodes, next index is stored at bp_index + 1. */ next_adj = 1; } return -ENOENT; } static int nilfs_btree_lookup(const struct nilfs_bmap *btree, __u64 key, int level, __u64 *ptrp) { struct nilfs_btree_path *path; int ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); nilfs_btree_free_path(path); return ret; } static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, __u64 key, __u64 *ptrp, unsigned int maxblocks) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; struct inode *dat = NULL; __u64 ptr, ptr2; sector_t blocknr; int level = NILFS_BTREE_LEVEL_NODE_MIN; int ret, cnt, index, maxlevel, ncmax; struct nilfs_btree_readahead_info p; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); if (ret < 0) goto out; if (NILFS_BMAP_USE_VBN(btree)) { dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) goto dat_error; ptr = blocknr; } cnt = 1; if (cnt == maxblocks) goto end; maxlevel = nilfs_btree_height(btree) - 1; node = nilfs_btree_get_node(btree, path, level, &ncmax); index = path[level].bp_index + 1; for (;;) { while (index < nilfs_btree_node_get_nchildren(node)) { if (nilfs_btree_node_get_key(node, index) != key + cnt) goto end; ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt || ++cnt == maxblocks) goto end; index++; } if (level == maxlevel) break; /* look-up right sibling node */ p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); p.index = path[level + 1].bp_index + 1; p.max_ra_blocks = 7; if (p.index >= nilfs_btree_node_get_nchildren(p.node) || nilfs_btree_node_get_key(p.node, p.index) != key + cnt) break; ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); path[level + 1].bp_index = p.index; brelse(path[level].bp_bh); path[level].bp_bh = NULL; ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, &p); if (ret < 0) goto out; node = nilfs_btree_get_nonroot_node(path, level); ncmax = nilfs_btree_nchildren_per_block(btree); index = 0; path[level].bp_index = index; } end: *ptrp = ptr; ret = cnt; out: nilfs_btree_free_path(path); return ret; dat_error: if (ret == -ENOENT) ret = -EINVAL; /* Notify bmap layer of metadata corruption */ goto out; } static void nilfs_btree_promote_key(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 key) { if (level < nilfs_btree_height(btree) - 1) { do { nilfs_btree_node_set_key( nilfs_btree_get_nonroot_node(path, level), path[level].bp_index, key); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); } while ((path[level].bp_index == 0) && (++level < nilfs_btree_height(btree) - 1)); } /* root */ if (level == nilfs_btree_height(btree) - 1) { nilfs_btree_node_set_key(nilfs_btree_get_root(btree), path[level].bp_index, key); } } static void nilfs_btree_do_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; int ncblk; if (level < nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_nonroot_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_insert(node, path[level].bp_index, *keyp, *ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); } else { node = nilfs_btree_get_root(btree); nilfs_btree_node_insert(node, path[level].bp_index, *keyp, *ptrp, NILFS_BTREE_ROOT_NCHILDREN_MAX); } } static void nilfs_btree_carry_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; int nchildren, lnchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); lnchildren = nilfs_btree_node_get_nchildren(left); ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + lnchildren + 1) / 2 - lnchildren; if (n > path[level].bp_index) { /* move insert point */ n--; move = 1; } nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); if (move) { brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += lnchildren; path[level + 1].bp_index--; } else { brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index -= n; } nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } static void nilfs_btree_carry_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int nchildren, rnchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); rnchildren = nilfs_btree_node_get_nchildren(right); ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + rnchildren + 1) / 2 - rnchildren; if (n > nchildren - path[level].bp_index) { /* move insert point */ n--; move = 1; } nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); path[level + 1].bp_index--; if (move) { brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index -= nilfs_btree_node_get_nchildren(node); path[level + 1].bp_index++; } else { brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } static void nilfs_btree_split(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int nchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + 1) / 2; if (n > nchildren - path[level].bp_index) { n--; move = 1; } nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); if (move) { path[level].bp_index -= nilfs_btree_node_get_nchildren(node); nilfs_btree_node_insert(right, path[level].bp_index, *keyp, *ptrp, ncblk); *keyp = nilfs_btree_node_get_key(right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; } else { nilfs_btree_do_insert(btree, path, level, keyp, ptrp); *keyp = nilfs_btree_node_get_key(right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } path[level + 1].bp_index++; } static void nilfs_btree_grow(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; int n, ncblk; root = nilfs_btree_get_root(btree); child = nilfs_btree_get_sib_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(root); nilfs_btree_node_move_right(root, child, n, NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); nilfs_btree_node_set_level(root, level + 1); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; nilfs_btree_do_insert(btree, path, level, keyp, ptrp); *keyp = nilfs_btree_node_get_key(child, 0); *ptrp = path[level].bp_newreq.bpr_ptr; } static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path) { struct nilfs_btree_node *node; int level, ncmax; if (path == NULL) return NILFS_BMAP_INVALID_PTR; /* left sibling */ level = NILFS_BTREE_LEVEL_NODE_MIN; if (path[level].bp_index > 0) { node = nilfs_btree_get_node(btree, path, level, &ncmax); return nilfs_btree_node_get_ptr(node, path[level].bp_index - 1, ncmax); } /* parent */ level = NILFS_BTREE_LEVEL_NODE_MIN + 1; if (level <= nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_node(btree, path, level, &ncmax); return nilfs_btree_node_get_ptr(node, path[level].bp_index, ncmax); } return NILFS_BMAP_INVALID_PTR; } static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, __u64 key) { __u64 ptr; ptr = nilfs_bmap_find_target_seq(btree, key); if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; ptr = nilfs_btree_find_near(btree, path); if (ptr != NILFS_BMAP_INVALID_PTR) /* near */ return ptr; /* block group */ return nilfs_bmap_find_target_in_group(btree); } static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, __u64 key, __u64 ptr, struct nilfs_bmap_stats *stats) { struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; int pindex, level, ncmax, ncblk, ret; struct inode *dat = NULL; stats->bs_nblocks = 0; level = NILFS_BTREE_LEVEL_DATA; /* allocate a new ptr for data block */ if (NILFS_BMAP_USE_VBN(btree)) { path[level].bp_newreq.bpr_ptr = nilfs_btree_find_target_v(btree, path, key); dat = nilfs_bmap_get_dat(btree); } ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_data; ncblk = nilfs_btree_nchildren_per_block(btree); for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < nilfs_btree_height(btree) - 1; level++) { node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_node_get_nchildren(node) < ncblk) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; } parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; /* left sibling */ if (pindex > 0) { sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_left; stats->bs_nblocks++; goto out; } else { brelse(bh); } } /* right sibling */ if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_right; stats->bs_nblocks++; goto out; } else { brelse(bh); } } /* split */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, &bh); if (ret < 0) goto err_out_curr_node; stats->bs_nblocks++; sib = (struct nilfs_btree_node *)bh->b_data; nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_split; } /* root */ node = nilfs_btree_get_root(btree); if (nilfs_btree_node_get_nchildren(node) < NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; } /* grow */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, &bh); if (ret < 0) goto err_out_curr_node; nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_grow; level++; path[level].bp_op = nilfs_btree_do_insert; /* a newly-created node block and a data block are added */ stats->bs_nblocks += 2; /* success */ out: *levelp = level; return ret; /* error */ err_out_curr_node: nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_child_node: for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { nilfs_btnode_delete(path[level].bp_sib_bh); nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); } nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_data: *levelp = level; stats->bs_nblocks = 0; return ret; } static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int maxlevel, __u64 key, __u64 ptr) { struct inode *dat = NULL; int level; set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; if (NILFS_BMAP_USE_VBN(btree)) { nilfs_bmap_set_target_v(btree, key, ptr); dat = nilfs_bmap_get_dat(btree); } for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { nilfs_bmap_commit_alloc_ptr(btree, &path[level - 1].bp_newreq, dat); path[level].bp_op(btree, path, level, &key, &ptr); } if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); } static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) { struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; int level, ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, NULL, NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret != -ENOENT) { if (ret == 0) ret = -EEXIST; goto out; } ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); if (ret < 0) goto out; nilfs_btree_commit_insert(btree, path, level, key, ptr); nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); out: nilfs_btree_free_path(path); return ret; } static void nilfs_btree_do_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; int ncblk; if (level < nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_nonroot_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_delete(node, path[level].bp_index, keyp, ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); } else { node = nilfs_btree_get_root(btree); nilfs_btree_node_delete(node, path[level].bp_index, keyp, ptrp, NILFS_BTREE_ROOT_NCHILDREN_MAX); } } static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; int nchildren, lnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); lnchildren = nilfs_btree_node_get_nchildren(left); ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + lnchildren) / 2 - nchildren; nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index += n; } static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int nchildren, rnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); rnchildren = nilfs_btree_node_get_nchildren(right); ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + rnchildren) / 2 - nchildren; nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); path[level + 1].bp_index--; brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } static void nilfs_btree_concat_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(node); nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += nilfs_btree_node_get_nchildren(left); } static void nilfs_btree_concat_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(right); nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level + 1].bp_index++; } static void nilfs_btree_shrink(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); root = nilfs_btree_get_root(btree); child = nilfs_btree_get_nonroot_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_delete(root, 0, NULL, NULL, NILFS_BTREE_ROOT_NCHILDREN_MAX); nilfs_btree_node_set_level(root, level); n = nilfs_btree_node_get_nchildren(child); nilfs_btree_node_move_left(root, child, n, NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = NULL; } static void nilfs_btree_nop(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { } static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, struct nilfs_bmap_stats *stats, struct inode *dat) { struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; int pindex, dindex, level, ncmin, ncmax, ncblk, ret; ret = 0; stats->bs_nblocks = 0; ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); ncblk = nilfs_btree_nchildren_per_block(btree); for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; level < nilfs_btree_height(btree) - 1; level++) { node = nilfs_btree_get_nonroot_node(path, level); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(node, dindex, ncblk); ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) goto err_out_child_node; if (nilfs_btree_node_get_nchildren(node) > ncmin) { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; goto out; } parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; dindex = pindex; if (pindex > 0) { /* left sibling */ sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_left; stats->bs_nblocks++; goto out; } else { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_concat_left; stats->bs_nblocks++; /* continue; */ } } else if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { /* right sibling */ sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_right; stats->bs_nblocks++; goto out; } else { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_concat_right; stats->bs_nblocks++; /* * When merging right sibling node * into the current node, pointer to * the right sibling node must be * terminated instead. The adjustment * below is required for that. */ dindex = pindex + 1; /* continue; */ } } else { /* no siblings */ /* the only child of the root node */ WARN_ON(level != nilfs_btree_height(btree) - 2); if (nilfs_btree_node_get_nchildren(node) - 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_shrink; stats->bs_nblocks += 2; level++; path[level].bp_op = nilfs_btree_nop; goto shrink_root_child; } else { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; goto out; } } } /* child of the root node is deleted */ path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; shrink_root_child: node = nilfs_btree_get_root(btree); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(node, dindex, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) goto err_out_child_node; /* success */ out: *levelp = level; return ret; /* error */ err_out_curr_node: nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); err_out_child_node: for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { brelse(path[level].bp_sib_bh); nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); } *levelp = level; stats->bs_nblocks = 0; return ret; } static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int maxlevel, struct inode *dat) { int level; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); path[level].bp_op(btree, path, level, NULL, NULL); } if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); } static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) { struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; struct inode *dat; int level, ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, NULL, NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret < 0) goto out; dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); if (ret < 0) goto out; nilfs_btree_commit_delete(btree, path, level, dat); nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); out: nilfs_btree_free_path(path); return ret; } static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, __u64 *keyp) { struct nilfs_btree_path *path; const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; int ret; path = nilfs_btree_alloc_path(); if (!path) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); if (!ret) *keyp = start; else if (ret == -ENOENT) ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); nilfs_btree_free_path(path); return ret; } static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) { struct nilfs_btree_path *path; int ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); nilfs_btree_free_path(path); return ret; } static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) { struct buffer_head *bh; struct nilfs_btree_node *root, *node; __u64 maxkey, nextmaxkey; __u64 ptr; int nchildren, ret; root = nilfs_btree_get_root(btree); nchildren = nilfs_btree_node_get_nchildren(root); if (unlikely(nchildren == 0)) return 0; switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; nchildren = nilfs_btree_node_get_nchildren(node); break; default: return 0; } maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; brelse(bh); return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); } static int nilfs_btree_gather_data(struct nilfs_bmap *btree, __u64 *keys, __u64 *ptrs, int nitems) { struct buffer_head *bh; struct nilfs_btree_node *node, *root; __le64 *dkeys; __le64 *dptrs; __u64 ptr; int nchildren, ncmax, i, ret; root = nilfs_btree_get_root(btree); switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; break; case 3: nchildren = nilfs_btree_node_get_nchildren(root); WARN_ON(nchildren > 1); ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; ncmax = nilfs_btree_nchildren_per_block(btree); break; default: node = NULL; return -EINVAL; } nchildren = nilfs_btree_node_get_nchildren(node); if (nchildren < nitems) nitems = nchildren; dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nitems; i++) { keys[i] = le64_to_cpu(dkeys[i]); ptrs[i] = le64_to_cpu(dptrs[i]); } brelse(bh); return nitems; } static int nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head **bhp, struct nilfs_bmap_stats *stats) { struct buffer_head *bh; struct inode *dat = NULL; int ret; stats->bs_nblocks = 0; /* for data */ /* cannot find near ptr */ if (NILFS_BMAP_USE_VBN(btree)) { dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); dat = nilfs_bmap_get_dat(btree); } ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); if (ret < 0) return ret; ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; *bhp = NULL; stats->bs_nblocks++; if (nreq != NULL) { nreq->bpr_ptr = dreq->bpr_ptr + 1; ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); if (ret < 0) goto err_out_dreq; ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); if (ret < 0) goto err_out_nreq; *bhp = bh; stats->bs_nblocks++; } /* success */ return 0; /* error */ err_out_nreq: nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); err_out_dreq: nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); stats->bs_nblocks = 0; return ret; } static void nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head *bh) { struct nilfs_btree_node *node; struct inode *dat; __u64 tmpptr; int ncblk; /* free resources */ if (btree->b_ops->bop_clear != NULL) btree->b_ops->bop_clear(btree); /* ptr must be a pointer to a buffer head. */ set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); /* convert and insert */ dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; __nilfs_btree_init(btree); if (nreq != NULL) { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); /* create child node at level 1 */ node = (struct nilfs_btree_node *)bh->b_data; ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); if (!buffer_dirty(bh)) mark_buffer_dirty(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); brelse(bh); /* create root node at level 2 */ node = nilfs_btree_get_root(btree); tmpptr = nreq->bpr_ptr; nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, NILFS_BTREE_ROOT_NCHILDREN_MAX, &keys[0], &tmpptr); } else { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); /* create root node at level 1 */ node = nilfs_btree_get_root(btree); nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, NILFS_BTREE_ROOT_NCHILDREN_MAX, keys, ptrs); nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, NILFS_BTREE_ROOT_NCHILDREN_MAX); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); } if (NILFS_BMAP_USE_VBN(btree)) nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); } /** * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree * @btree: NILFS B-tree structure * @key: Key of the new entry to be inserted * @ptr: Pointer (block number) associated with the key to be inserted * @keys: Array of keys to be inserted in addition to @key * @ptrs: Array of pointers associated with @keys * @n: Number of keys and pointers in @keys and @ptrs * * This function is used to insert a new entry specified by @key and @ptr, * along with additional entries specified by @keys and @ptrs arrays, into a * NILFS B-tree. * It prepares the necessary changes by allocating the required blocks and any * necessary intermediate nodes. It converts configurations from other forms of * block mapping (the one that currently exists is direct mapping) to a B-tree. * * Return: 0 on success or a negative error code on failure. */ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n) { struct buffer_head *bh = NULL; union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; struct nilfs_bmap_stats stats; int ret; if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { di = &dreq; ni = NULL; } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( nilfs_btree_node_size(btree))) { di = &dreq; ni = &nreq; } else { di = NULL; ni = NULL; BUG(); } ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, &stats); if (ret < 0) return ret; nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, di, ni, bh); nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); return 0; } static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) { while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); return 0; } static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { struct nilfs_btree_node *parent; int ncmax, ret; parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); if (ret < 0) return ret; if (buffer_nilfs_node(path[level].bp_bh)) { path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) { nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); return ret; } } return 0; } static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { struct nilfs_btree_node *parent; int ncmax; nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req, btree->b_ptr_type == NILFS_BMAP_PTR_VS); if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } set_buffer_nilfs_volatile(path[level].bp_bh); parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, path[level].bp_newreq.bpr_ptr, ncmax); } static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); } static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int minlevel, int *maxlevelp, struct inode *dat) { int level, ret; level = minlevel; if (!buffer_nilfs_volatile(path[level].bp_bh)) { ret = nilfs_btree_prepare_update_v(btree, path, level, dat); if (ret < 0) return ret; } while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) { WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); ret = nilfs_btree_prepare_update_v(btree, path, level, dat); if (ret < 0) goto out; } /* success */ *maxlevelp = level - 1; return 0; /* error */ out: while (--level > minlevel) nilfs_btree_abort_update_v(btree, path, level, dat); if (!buffer_nilfs_volatile(path[level].bp_bh)) nilfs_btree_abort_update_v(btree, path, level, dat); return ret; } static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int minlevel, int maxlevel, struct buffer_head *bh, struct inode *dat) { int level; if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) nilfs_btree_commit_update_v(btree, path, minlevel, dat); for (level = minlevel + 1; level <= maxlevel; level++) nilfs_btree_commit_update_v(btree, path, level, dat); } static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) { int maxlevel = 0, ret; struct nilfs_btree_node *parent; struct inode *dat = nilfs_bmap_get_dat(btree); __u64 ptr; int ncmax; get_bh(bh); path[level].bp_bh = bh; ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, dat); if (ret < 0) goto out; if (buffer_nilfs_volatile(path[level].bp_bh)) { parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); ret = nilfs_dat_mark_dirty(dat, ptr); if (ret < 0) goto out; } nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); out: brelse(path[level].bp_bh); path[level].bp_bh = NULL; return ret; } static int nilfs_btree_propagate(struct nilfs_bmap *btree, struct buffer_head *bh) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; int level, ret; WARN_ON(!buffer_dirty(bh)); path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; if (buffer_nilfs_node(bh)) { node = (struct nilfs_btree_node *)bh->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); } else { key = nilfs_bmap_data_get_key(btree, bh); level = NILFS_BTREE_LEVEL_DATA; } ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { if (unlikely(ret == -ENOENT)) nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); goto out; } ret = NILFS_BMAP_USE_VBN(btree) ? nilfs_btree_propagate_v(btree, path, level, bh) : nilfs_btree_propagate_p(btree, path, level, bh); out: nilfs_btree_free_path(path); return ret; } static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, struct buffer_head *bh) { return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); } static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, struct list_head *lists, struct buffer_head *bh) { struct list_head *head; struct buffer_head *cbh; struct nilfs_btree_node *node, *cnode; __u64 key, ckey; int level; get_bh(bh); node = (struct nilfs_btree_node *)bh->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); if (level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX) { dump_stack(); nilfs_warn(btree->b_inode->i_sb, "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", level, (unsigned long long)key, btree->b_inode->i_ino, (unsigned long long)bh->b_blocknr); return; } list_for_each(head, &lists[level]) { cbh = list_entry(head, struct buffer_head, b_assoc_buffers); cnode = (struct nilfs_btree_node *)cbh->b_data; ckey = nilfs_btree_node_get_key(cnode, 0); if (key < ckey) break; } list_add_tail(&bh->b_assoc_buffers, head); } static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btcache = btnc_inode->i_mapping; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; struct folio_batch fbatch; struct buffer_head *bh, *head; pgoff_t index = 0; int level, i; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < NILFS_BTREE_LEVEL_MAX; level++) INIT_LIST_HEAD(&lists[level]); folio_batch_init(&fbatch); while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { bh = head = folio_buffers(fbatch.folios[i]); do { if (buffer_dirty(bh)) nilfs_btree_add_dirty_buffer(btree, lists, bh); } while ((bh = bh->b_this_page) != head); } folio_batch_release(&fbatch); cond_resched(); } for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < NILFS_BTREE_LEVEL_MAX; level++) list_splice_tail(&lists[level], listp); } static int nilfs_btree_assign_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_node *parent; __u64 key; __u64 ptr; int ncmax, ret; parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); if (buffer_nilfs_node(*bh)) { path[level].bp_ctxt.oldkey = ptr; path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, ncmax); key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = level; memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } static int nilfs_btree_assign_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_node *parent; struct inode *dat = nilfs_bmap_get_dat(btree); __u64 key; __u64 ptr; union nilfs_bmap_ptr_req req; int ncmax, ret; parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); req.bpr_ptr = ptr; ret = nilfs_dat_prepare_start(dat, &req.bpr_req); if (ret < 0) return ret; nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } static int nilfs_btree_assign(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; int level, ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; if (buffer_nilfs_node(*bh)) { node = (struct nilfs_btree_node *)(*bh)->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); } else { key = nilfs_bmap_data_get_key(btree, *bh); level = NILFS_BTREE_LEVEL_DATA; } ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } ret = NILFS_BMAP_USE_VBN(btree) ? nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); out: nilfs_btree_free_path(path); return ret; } static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_node *node; __u64 key; int ret; ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, blocknr); if (ret < 0) return ret; if (buffer_nilfs_node(*bh)) { node = (struct nilfs_btree_node *)(*bh)->b_data; key = nilfs_btree_node_get_key(node, 0); } else key = nilfs_bmap_data_get_key(btree, *bh); /* on-disk format */ binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) { struct buffer_head *bh; struct nilfs_btree_path *path; __u64 ptr; int ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } if (!buffer_dirty(bh)) mark_buffer_dirty(bh); brelse(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); out: nilfs_btree_free_path(path); return ret; } static const struct nilfs_bmap_operations nilfs_btree_ops = { .bop_lookup = nilfs_btree_lookup, .bop_lookup_contig = nilfs_btree_lookup_contig, .bop_insert = nilfs_btree_insert, .bop_delete = nilfs_btree_delete, .bop_clear = NULL, .bop_propagate = nilfs_btree_propagate, .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, .bop_assign = nilfs_btree_assign, .bop_mark = nilfs_btree_mark, .bop_seek_key = nilfs_btree_seek_key, .bop_last_key = nilfs_btree_last_key, .bop_check_insert = NULL, .bop_check_delete = nilfs_btree_check_delete, .bop_gather_data = nilfs_btree_gather_data, }; static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_lookup = NULL, .bop_lookup_contig = NULL, .bop_insert = NULL, .bop_delete = NULL, .bop_clear = NULL, .bop_propagate = nilfs_btree_propagate_gc, .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, .bop_assign = nilfs_btree_assign_gc, .bop_mark = NULL, .bop_seek_key = NULL, .bop_last_key = NULL, .bop_check_insert = NULL, .bop_check_delete = NULL, .bop_gather_data = NULL, }; static void __nilfs_btree_init(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); } int nilfs_btree_init(struct nilfs_bmap *bmap) { int ret = 0; __nilfs_btree_init(bmap); if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) ret = -EIO; else ret = nilfs_attach_btree_node_cache( &NILFS_BMAP_I(bmap)->vfs_inode); return ret; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops_gc; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); }
37 38 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 // SPDX-License-Identifier: GPL-2.0-or-later /* * NetLabel NETLINK Interface * * This file defines the NETLINK interface for the NetLabel system. The * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * * Author: Paul Moore <paul@paul-moore.com> */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 */ #include <linux/init.h> #include <linux/types.h> #include <linux/list.h> #include <linux/socket.h> #include <linux/audit.h> #include <linux/tty.h> #include <linux/security.h> #include <linux/gfp.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/netlabel.h> #include <asm/bug.h> #include "netlabel_mgmt.h" #include "netlabel_unlabeled.h" #include "netlabel_cipso_v4.h" #include "netlabel_calipso.h" #include "netlabel_user.h" /* * NetLabel NETLINK Setup Functions */ /** * netlbl_netlink_init - Initialize the NETLINK communication channel * * Description: * Call out to the NetLabel components so they can register their families and * commands with the Generic NETLINK mechanism. Returns zero on success and * non-zero on failure. * */ int __init netlbl_netlink_init(void) { int ret_val; ret_val = netlbl_mgmt_genl_init(); if (ret_val != 0) return ret_val; ret_val = netlbl_cipsov4_genl_init(); if (ret_val != 0) return ret_val; ret_val = netlbl_calipso_genl_init(); if (ret_val != 0) return ret_val; return netlbl_unlabel_genl_init(); } /* * NetLabel Audit Functions */ /** * netlbl_audit_start_common - Start an audit message * @type: audit message type * @audit_info: NetLabel audit information * * Description: * Start an audit message using the type specified in @type and fill the audit * message with some fields common to all NetLabel audit messages. Returns * a pointer to the audit buffer on success, NULL on failure. * */ struct audit_buffer *netlbl_audit_start_common(int type, struct netlbl_audit *audit_info) { struct audit_buffer *audit_buf; char *secctx; u32 secctx_len; if (audit_enabled == AUDIT_OFF) return NULL; audit_buf = audit_log_start(audit_context(), GFP_ATOMIC, type); if (audit_buf == NULL) return NULL; audit_log_format(audit_buf, "netlabel: auid=%u ses=%u", from_kuid(&init_user_ns, audit_info->loginuid), audit_info->sessionid); if (audit_info->secid != 0 && security_secid_to_secctx(audit_info->secid, &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " subj=%s", secctx); security_release_secctx(secctx, secctx_len); } return audit_buf; }
3527 3547 3528 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 // SPDX-License-Identifier: GPL-2.0-or-later /* * Derived from arch/ppc/mm/extable.c and arch/i386/mm/extable.c. * * Copyright (C) 2004 Paul Mackerras, IBM Corp. */ #include <linux/bsearch.h> #include <linux/module.h> #include <linux/init.h> #include <linux/sort.h> #include <linux/uaccess.h> #include <linux/extable.h> #ifndef ARCH_HAS_RELATIVE_EXTABLE #define ex_to_insn(x) ((x)->insn) #else static inline unsigned long ex_to_insn(const struct exception_table_entry *x) { return (unsigned long)&x->insn + x->insn; } #endif #ifndef ARCH_HAS_RELATIVE_EXTABLE #define swap_ex NULL #else static void swap_ex(void *a, void *b, int size) { struct exception_table_entry *x = a, *y = b, tmp; int delta = b - a; tmp = *x; x->insn = y->insn + delta; y->insn = tmp.insn - delta; #ifdef swap_ex_entry_fixup swap_ex_entry_fixup(x, y, tmp, delta); #else x->fixup = y->fixup + delta; y->fixup = tmp.fixup - delta; #endif } #endif /* ARCH_HAS_RELATIVE_EXTABLE */ /* * The exception table needs to be sorted so that the binary * search that we use to find entries in it works properly. * This is used both for the kernel exception table and for * the exception tables of modules that get loaded. */ static int cmp_ex_sort(const void *a, const void *b) { const struct exception_table_entry *x = a, *y = b; /* avoid overflow */ if (ex_to_insn(x) > ex_to_insn(y)) return 1; if (ex_to_insn(x) < ex_to_insn(y)) return -1; return 0; } void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish) { sort(start, finish - start, sizeof(struct exception_table_entry), cmp_ex_sort, swap_ex); } #ifdef CONFIG_MODULES /* * If the exception table is sorted, any referring to the module init * will be at the beginning or the end. */ void trim_init_extable(struct module *m) { /*trim the beginning*/ while (m->num_exentries && within_module_init(ex_to_insn(&m->extable[0]), m)) { m->extable++; m->num_exentries--; } /*trim the end*/ while (m->num_exentries && within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]), m)) m->num_exentries--; } #endif /* CONFIG_MODULES */ static int cmp_ex_search(const void *key, const void *elt) { const struct exception_table_entry *_elt = elt; unsigned long _key = *(unsigned long *)key; /* avoid overflow */ if (_key > ex_to_insn(_elt)) return 1; if (_key < ex_to_insn(_elt)) return -1; return 0; } /* * Search one exception table for an entry corresponding to the * given instruction address, and return the address of the entry, * or NULL if none is found. * We use a binary search, and thus we assume that the table is * already sorted. */ const struct exception_table_entry * search_extable(const struct exception_table_entry *base, const size_t num, unsigned long value) { return bsearch(&value, base, num, sizeof(struct exception_table_entry), cmp_ex_search); }
16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008 Intel Corporation * Author: Matthew Wilcox <willy@linux.intel.com> * * Please see kernel/locking/semaphore.c for documentation of these functions */ #ifndef __LINUX_SEMAPHORE_H #define __LINUX_SEMAPHORE_H #include <linux/list.h> #include <linux/spinlock.h> /* Please don't access any members of this structure directly */ struct semaphore { raw_spinlock_t lock; unsigned int count; struct list_head wait_list; }; #define __SEMAPHORE_INITIALIZER(name, n) \ { \ .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \ .count = n, \ .wait_list = LIST_HEAD_INIT((name).wait_list), \ } /* * Unlike mutexes, binary semaphores do not have an owner, so up() can * be called in a different thread from the one which called down(). * It is also safe to call down_trylock() and up() from interrupt * context. */ #define DEFINE_SEMAPHORE(_name, _n) \ struct semaphore _name = __SEMAPHORE_INITIALIZER(_name, _n) static inline void sema_init(struct semaphore *sem, int val) { static struct lock_class_key __key; *sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val); lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0); } extern void down(struct semaphore *sem); extern int __must_check down_interruptible(struct semaphore *sem); extern int __must_check down_killable(struct semaphore *sem); extern int __must_check down_trylock(struct semaphore *sem); extern int __must_check down_timeout(struct semaphore *sem, long jiffies); extern void up(struct semaphore *sem); #endif /* __LINUX_SEMAPHORE_H */
4 4 4 5 1 1 1 1 1 1 1 1 1 1 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/ch11.h> #define TEST_SE0_NAK_PID 0x0101 #define TEST_J_PID 0x0102 #define TEST_K_PID 0x0103 #define TEST_PACKET_PID 0x0104 #define TEST_HS_HOST_PORT_SUSPEND_RESUME 0x0106 #define TEST_SINGLE_STEP_GET_DEV_DESC 0x0107 #define TEST_SINGLE_STEP_SET_FEATURE 0x0108 extern const struct usb_device_id *usb_device_match_id(struct usb_device *udev, const struct usb_device_id *id); /* * A list of USB hubs which requires to disable the power * to the port before starting the testing procedures. */ static const struct usb_device_id ehset_hub_list[] = { { USB_DEVICE(0x0424, 0x4502) }, { USB_DEVICE(0x0424, 0x4913) }, { USB_DEVICE(0x0451, 0x8027) }, { } }; static int ehset_prepare_port_for_testing(struct usb_device *hub_udev, u16 portnum) { int ret = 0; /* * The USB2.0 spec chapter 11.24.2.13 says that the USB port which is * going under test needs to be put in suspend before sending the * test command. Most hubs don't enforce this precondition, but there * are some hubs which needs to disable the power to the port before * starting the test. */ if (usb_device_match_id(hub_udev, ehset_hub_list)) { ret = usb_control_msg_send(hub_udev, 0, USB_REQ_CLEAR_FEATURE, USB_RT_PORT, USB_PORT_FEAT_ENABLE, portnum, NULL, 0, 1000, GFP_KERNEL); /* * Wait for the port to be disabled. It's an arbitrary value * which worked every time. */ msleep(100); } else { /* * For the hubs which are compliant with the spec, * put the port in SUSPEND. */ ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_SUSPEND, portnum, NULL, 0, 1000, GFP_KERNEL); } return ret; } static int ehset_probe(struct usb_interface *intf, const struct usb_device_id *id) { int ret = -EINVAL; struct usb_device *dev = interface_to_usbdev(intf); struct usb_device *hub_udev = dev->parent; struct usb_device_descriptor buf; u8 portnum = dev->portnum; u16 test_pid = le16_to_cpu(dev->descriptor.idProduct); switch (test_pid) { case TEST_SE0_NAK_PID: ret = ehset_prepare_port_for_testing(hub_udev, portnum); if (ret < 0) break; ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (USB_TEST_SE0_NAK << 8) | portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_J_PID: ret = ehset_prepare_port_for_testing(hub_udev, portnum); if (ret < 0) break; ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (USB_TEST_J << 8) | portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_K_PID: ret = ehset_prepare_port_for_testing(hub_udev, portnum); if (ret < 0) break; ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (USB_TEST_K << 8) | portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_PACKET_PID: ret = ehset_prepare_port_for_testing(hub_udev, portnum); if (ret < 0) break; ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (USB_TEST_PACKET << 8) | portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_HS_HOST_PORT_SUSPEND_RESUME: /* Test: wait for 15secs -> suspend -> 15secs delay -> resume */ msleep(15 * 1000); ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_SUSPEND, portnum, NULL, 0, 1000, GFP_KERNEL); if (ret < 0) break; msleep(15 * 1000); ret = usb_control_msg_send(hub_udev, 0, USB_REQ_CLEAR_FEATURE, USB_RT_PORT, USB_PORT_FEAT_SUSPEND, portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_SINGLE_STEP_GET_DEV_DESC: /* Test: wait for 15secs -> GetDescriptor request */ msleep(15 * 1000); ret = usb_control_msg_recv(dev, 0, USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, USB_DT_DEVICE << 8, 0, &buf, USB_DT_DEVICE_SIZE, USB_CTRL_GET_TIMEOUT, GFP_KERNEL); break; case TEST_SINGLE_STEP_SET_FEATURE: /* * GetDescriptor SETUP request -> 15secs delay -> IN & STATUS * * Note, this test is only supported on root hubs since the * SetPortFeature handling can only be done inside the HCD's * hub_control callback function. */ if (hub_udev != dev->bus->root_hub) { dev_err(&intf->dev, "SINGLE_STEP_SET_FEATURE test only supported on root hub\n"); break; } ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (6 << 8) | portnum, NULL, 0, 60 * 1000, GFP_KERNEL); break; default: dev_err(&intf->dev, "%s: unsupported PID: 0x%x\n", __func__, test_pid); } return ret; } static void ehset_disconnect(struct usb_interface *intf) { } static const struct usb_device_id ehset_id_table[] = { { USB_DEVICE(0x1a0a, TEST_SE0_NAK_PID) }, { USB_DEVICE(0x1a0a, TEST_J_PID) }, { USB_DEVICE(0x1a0a, TEST_K_PID) }, { USB_DEVICE(0x1a0a, TEST_PACKET_PID) }, { USB_DEVICE(0x1a0a, TEST_HS_HOST_PORT_SUSPEND_RESUME) }, { USB_DEVICE(0x1a0a, TEST_SINGLE_STEP_GET_DEV_DESC) }, { USB_DEVICE(0x1a0a, TEST_SINGLE_STEP_SET_FEATURE) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, ehset_id_table); static struct usb_driver ehset_driver = { .name = "usb_ehset_test", .probe = ehset_probe, .disconnect = ehset_disconnect, .id_table = ehset_id_table, }; module_usb_driver(ehset_driver); MODULE_DESCRIPTION("USB Driver for EHSET Test Fixture"); MODULE_LICENSE("GPL v2");
8 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 /* SPDX-License-Identifier: GPL-2.0 */ /* * Declarations of X.25 Packet Layer type objects. * * History * nov/17/96 Jonathan Naylor Initial version. * mar/20/00 Daniela Squassoni Disabling/enabling of facilities * negotiation. */ #ifndef _X25_H #define _X25_H #include <linux/x25.h> #include <linux/slab.h> #include <linux/refcount.h> #include <net/sock.h> #define X25_ADDR_LEN 16 #define X25_MAX_L2_LEN 18 /* 802.2 LLC */ #define X25_STD_MIN_LEN 3 #define X25_EXT_MIN_LEN 4 #define X25_GFI_SEQ_MASK 0x30 #define X25_GFI_STDSEQ 0x10 #define X25_GFI_EXTSEQ 0x20 #define X25_Q_BIT 0x80 #define X25_D_BIT 0x40 #define X25_STD_M_BIT 0x10 #define X25_EXT_M_BIT 0x01 #define X25_CALL_REQUEST 0x0B #define X25_CALL_ACCEPTED 0x0F #define X25_CLEAR_REQUEST 0x13 #define X25_CLEAR_CONFIRMATION 0x17 #define X25_DATA 0x00 #define X25_INTERRUPT 0x23 #define X25_INTERRUPT_CONFIRMATION 0x27 #define X25_RR 0x01 #define X25_RNR 0x05 #define X25_REJ 0x09 #define X25_RESET_REQUEST 0x1B #define X25_RESET_CONFIRMATION 0x1F #define X25_REGISTRATION_REQUEST 0xF3 #define X25_REGISTRATION_CONFIRMATION 0xF7 #define X25_RESTART_REQUEST 0xFB #define X25_RESTART_CONFIRMATION 0xFF #define X25_DIAGNOSTIC 0xF1 #define X25_ILLEGAL 0xFD /* Define the various conditions that may exist */ #define X25_COND_ACK_PENDING 0x01 #define X25_COND_OWN_RX_BUSY 0x02 #define X25_COND_PEER_RX_BUSY 0x04 /* Define Link State constants. */ enum { X25_STATE_0, /* Ready */ X25_STATE_1, /* Awaiting Call Accepted */ X25_STATE_2, /* Awaiting Clear Confirmation */ X25_STATE_3, /* Data Transfer */ X25_STATE_4, /* Awaiting Reset Confirmation */ X25_STATE_5 /* Call Accepted / Call Connected pending */ }; enum { X25_LINK_STATE_0, X25_LINK_STATE_1, X25_LINK_STATE_2, X25_LINK_STATE_3 }; #define X25_DEFAULT_T20 (180 * HZ) /* Default T20 value */ #define X25_DEFAULT_T21 (200 * HZ) /* Default T21 value */ #define X25_DEFAULT_T22 (180 * HZ) /* Default T22 value */ #define X25_DEFAULT_T23 (180 * HZ) /* Default T23 value */ #define X25_DEFAULT_T2 (3 * HZ) /* Default ack holdback value */ #define X25_DEFAULT_WINDOW_SIZE 2 /* Default Window Size */ #define X25_DEFAULT_PACKET_SIZE X25_PS128 /* Default Packet Size */ #define X25_DEFAULT_THROUGHPUT 0x0A /* Default Throughput */ #define X25_DEFAULT_REVERSE 0x00 /* Default Reverse Charging */ #define X25_SMODULUS 8 #define X25_EMODULUS 128 /* * X.25 Facilities constants. */ #define X25_FAC_CLASS_MASK 0xC0 #define X25_FAC_CLASS_A 0x00 #define X25_FAC_CLASS_B 0x40 #define X25_FAC_CLASS_C 0x80 #define X25_FAC_CLASS_D 0xC0 #define X25_FAC_REVERSE 0x01 /* also fast select */ #define X25_FAC_THROUGHPUT 0x02 #define X25_FAC_PACKET_SIZE 0x42 #define X25_FAC_WINDOW_SIZE 0x43 #define X25_MAX_FAC_LEN 60 #define X25_MAX_CUD_LEN 128 #define X25_FAC_CALLING_AE 0xCB #define X25_FAC_CALLED_AE 0xC9 #define X25_MARKER 0x00 #define X25_DTE_SERVICES 0x0F #define X25_MAX_AE_LEN 40 /* Max num of semi-octets in AE - OSI Nw */ #define X25_MAX_DTE_FACIL_LEN 21 /* Max length of DTE facility params */ /* Bitset in x25_sock->flags for misc flags */ #define X25_Q_BIT_FLAG 0 #define X25_INTERRUPT_FLAG 1 #define X25_ACCPT_APPRV_FLAG 2 /** * struct x25_route - x25 routing entry * @node - entry in x25_list_lock * @address - Start of address range * @sigdigits - Number of sig digits * @dev - More than one for MLP * @refcnt - reference counter */ struct x25_route { struct list_head node; struct x25_address address; unsigned int sigdigits; struct net_device *dev; refcount_t refcnt; }; struct x25_neigh { struct list_head node; struct net_device *dev; unsigned int state; unsigned int extended; struct sk_buff_head queue; unsigned long t20; struct timer_list t20timer; unsigned long global_facil_mask; refcount_t refcnt; }; struct x25_sock { struct sock sk; struct x25_address source_addr, dest_addr; struct x25_neigh *neighbour; unsigned int lci, cudmatchlength; unsigned char state, condition; unsigned short vs, vr, va, vl; unsigned long t2, t21, t22, t23; unsigned short fraglen; unsigned long flags; struct sk_buff_head ack_queue; struct sk_buff_head fragment_queue; struct sk_buff_head interrupt_in_queue; struct sk_buff_head interrupt_out_queue; struct timer_list timer; struct x25_causediag causediag; struct x25_facilities facilities; struct x25_dte_facilities dte_facilities; struct x25_calluserdata calluserdata; unsigned long vc_facil_mask; /* inc_call facilities mask */ }; struct x25_forward { struct list_head node; unsigned int lci; struct net_device *dev1; struct net_device *dev2; atomic_t refcnt; }; #define x25_sk(ptr) container_of_const(ptr, struct x25_sock, sk) /* af_x25.c */ extern int sysctl_x25_restart_request_timeout; extern int sysctl_x25_call_request_timeout; extern int sysctl_x25_reset_request_timeout; extern int sysctl_x25_clear_request_timeout; extern int sysctl_x25_ack_holdback_timeout; extern int sysctl_x25_forward; int x25_parse_address_block(struct sk_buff *skb, struct x25_address *called_addr, struct x25_address *calling_addr); int x25_addr_ntoa(unsigned char *, struct x25_address *, struct x25_address *); int x25_addr_aton(unsigned char *, struct x25_address *, struct x25_address *); struct sock *x25_find_socket(unsigned int, struct x25_neigh *); void x25_destroy_socket_from_timer(struct sock *); int x25_rx_call_request(struct sk_buff *, struct x25_neigh *, unsigned int); void x25_kill_by_neigh(struct x25_neigh *); /* x25_dev.c */ void x25_send_frame(struct sk_buff *, struct x25_neigh *); int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); void x25_establish_link(struct x25_neigh *); void x25_terminate_link(struct x25_neigh *); /* x25_facilities.c */ int x25_parse_facilities(struct sk_buff *, struct x25_facilities *, struct x25_dte_facilities *, unsigned long *); int x25_create_facilities(unsigned char *, struct x25_facilities *, struct x25_dte_facilities *, unsigned long); int x25_negotiate_facilities(struct sk_buff *, struct sock *, struct x25_facilities *, struct x25_dte_facilities *); void x25_limit_facilities(struct x25_facilities *, struct x25_neigh *); /* x25_forward.c */ void x25_clear_forward_by_lci(unsigned int lci); void x25_clear_forward_by_dev(struct net_device *); int x25_forward_data(int, struct x25_neigh *, struct sk_buff *); int x25_forward_call(struct x25_address *, struct x25_neigh *, struct sk_buff *, int); /* x25_in.c */ int x25_process_rx_frame(struct sock *, struct sk_buff *); int x25_backlog_rcv(struct sock *, struct sk_buff *); /* x25_link.c */ void x25_link_control(struct sk_buff *, struct x25_neigh *, unsigned short); void x25_link_device_up(struct net_device *); void x25_link_device_down(struct net_device *); void x25_link_established(struct x25_neigh *); void x25_link_terminated(struct x25_neigh *); void x25_transmit_clear_request(struct x25_neigh *, unsigned int, unsigned char); void x25_transmit_link(struct sk_buff *, struct x25_neigh *); int x25_subscr_ioctl(unsigned int, void __user *); struct x25_neigh *x25_get_neigh(struct net_device *); void x25_link_free(void); /* x25_neigh.c */ static __inline__ void x25_neigh_hold(struct x25_neigh *nb) { refcount_inc(&nb->refcnt); } static __inline__ void x25_neigh_put(struct x25_neigh *nb) { if (refcount_dec_and_test(&nb->refcnt)) kfree(nb); } /* x25_out.c */ int x25_output(struct sock *, struct sk_buff *); void x25_kick(struct sock *); void x25_enquiry_response(struct sock *); /* x25_route.c */ struct x25_route *x25_get_route(struct x25_address *addr); struct net_device *x25_dev_get(char *); void x25_route_device_down(struct net_device *dev); int x25_route_ioctl(unsigned int, void __user *); void x25_route_free(void); static __inline__ void x25_route_hold(struct x25_route *rt) { refcount_inc(&rt->refcnt); } static __inline__ void x25_route_put(struct x25_route *rt) { if (refcount_dec_and_test(&rt->refcnt)) kfree(rt); } /* x25_subr.c */ void x25_clear_queues(struct sock *); void x25_frames_acked(struct sock *, unsigned short); void x25_requeue_frames(struct sock *); int x25_validate_nr(struct sock *, unsigned short); void x25_write_internal(struct sock *, int); int x25_decode(struct sock *, struct sk_buff *, int *, int *, int *, int *, int *); void x25_disconnect(struct sock *, int, unsigned char, unsigned char); /* x25_timer.c */ void x25_init_timers(struct sock *sk); void x25_start_heartbeat(struct sock *); void x25_start_t2timer(struct sock *); void x25_start_t21timer(struct sock *); void x25_start_t22timer(struct sock *); void x25_start_t23timer(struct sock *); void x25_stop_heartbeat(struct sock *); void x25_stop_timer(struct sock *); unsigned long x25_display_timer(struct sock *); void x25_check_rbuf(struct sock *); /* sysctl_net_x25.c */ #ifdef CONFIG_SYSCTL int x25_register_sysctl(void); void x25_unregister_sysctl(void); #else static inline int x25_register_sysctl(void) { return 0; }; static inline void x25_unregister_sysctl(void) {}; #endif /* CONFIG_SYSCTL */ struct x25_skb_cb { unsigned int flags; }; #define X25_SKB_CB(s) ((struct x25_skb_cb *) ((s)->cb)) extern struct hlist_head x25_list; extern rwlock_t x25_list_lock; extern struct list_head x25_route_list; extern rwlock_t x25_route_list_lock; extern struct list_head x25_forward_list; extern rwlock_t x25_forward_list_lock; extern struct list_head x25_neigh_list; extern rwlock_t x25_neigh_list_lock; int x25_proc_init(void); void x25_proc_exit(void); #endif
61 62 114 112 47 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2008, Jouni Malinen <j@w1.fi> * Copyright (C) 2016-2017 Intel Deutschland GmbH * Copyright (C) 2020-2023 Intel Corporation */ #include <linux/netdevice.h> #include <linux/types.h> #include <linux/skbuff.h> #include <linux/compiler.h> #include <linux/ieee80211.h> #include <linux/gfp.h> #include <linux/unaligned.h> #include <net/mac80211.h> #include <crypto/aes.h> #include <crypto/utils.h> #include "ieee80211_i.h" #include "michael.h" #include "tkip.h" #include "aes_ccm.h" #include "aes_cmac.h" #include "aes_gmac.h" #include "aes_gcm.h" #include "wpa.h" ieee80211_tx_result ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) { u8 *data, *key, *mic; size_t data_len; unsigned int hdrlen; struct ieee80211_hdr *hdr; struct sk_buff *skb = tx->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int tail; hdr = (struct ieee80211_hdr *)skb->data; if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP || skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control)) return TX_CONTINUE; hdrlen = ieee80211_hdrlen(hdr->frame_control); if (skb->len < hdrlen) return TX_DROP; data = skb->data + hdrlen; data_len = skb->len - hdrlen; if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) { /* Need to use software crypto for the test */ info->control.hw_key = NULL; } if (info->control.hw_key && (info->flags & IEEE80211_TX_CTL_DONTFRAG || ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) && !(tx->key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE))) { /* hwaccel - with no need for SW-generated MMIC or MIC space */ return TX_CONTINUE; } tail = MICHAEL_MIC_LEN; if (!info->control.hw_key) tail += IEEE80211_TKIP_ICV_LEN; if (WARN(skb_tailroom(skb) < tail || skb_headroom(skb) < IEEE80211_TKIP_IV_LEN, "mmic: not enough head/tail (%d/%d,%d/%d)\n", skb_headroom(skb), IEEE80211_TKIP_IV_LEN, skb_tailroom(skb), tail)) return TX_DROP; mic = skb_put(skb, MICHAEL_MIC_LEN); if (tx->key->conf.flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) { /* Zeroed MIC can help with debug */ memset(mic, 0, MICHAEL_MIC_LEN); return TX_CONTINUE; } key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; michael_mic(key, hdr, data, data_len, mic); if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) mic[0]++; return TX_CONTINUE; } ieee80211_rx_result ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) { u8 *data, *key = NULL; size_t data_len; unsigned int hdrlen; u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; /* * it makes no sense to check for MIC errors on anything other * than data frames. */ if (!ieee80211_is_data_present(hdr->frame_control)) return RX_CONTINUE; /* * No way to verify the MIC if the hardware stripped it or * the IV with the key index. In this case we have solely rely * on the driver to set RX_FLAG_MMIC_ERROR in the event of a * MIC failure report. */ if (status->flag & (RX_FLAG_MMIC_STRIPPED | RX_FLAG_IV_STRIPPED)) { if (status->flag & RX_FLAG_MMIC_ERROR) goto mic_fail_no_key; if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_TKIP) goto update_iv; return RX_CONTINUE; } /* * Some hardware seems to generate Michael MIC failure reports; even * though, the frame was not encrypted with TKIP and therefore has no * MIC. Ignore the flag them to avoid triggering countermeasures. */ if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP || !(status->flag & RX_FLAG_DECRYPTED)) return RX_CONTINUE; if (rx->sdata->vif.type == NL80211_IFTYPE_AP && rx->key->conf.keyidx) { /* * APs with pairwise keys should never receive Michael MIC * errors for non-zero keyidx because these are reserved for * group keys and only the AP is sending real multicast * frames in the BSS. */ return RX_DROP_U_AP_RX_GROUPCAST; } if (status->flag & RX_FLAG_MMIC_ERROR) goto mic_fail; hdrlen = ieee80211_hdrlen(hdr->frame_control); if (skb->len < hdrlen + MICHAEL_MIC_LEN) return RX_DROP_U_SHORT_MMIC; if (skb_linearize(rx->skb)) return RX_DROP_U_OOM; hdr = (void *)skb->data; data = skb->data + hdrlen; data_len = skb->len - hdrlen - MICHAEL_MIC_LEN; key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY]; michael_mic(key, hdr, data, data_len, mic); if (crypto_memneq(mic, data + data_len, MICHAEL_MIC_LEN)) goto mic_fail; /* remove Michael MIC from payload */ skb_trim(skb, skb->len - MICHAEL_MIC_LEN); update_iv: /* update IV in key information to be able to detect replays */ rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip.iv32; rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip.iv16; return RX_CONTINUE; mic_fail: rx->key->u.tkip.mic_failures++; mic_fail_no_key: /* * In some cases the key can be unset - e.g. a multicast packet, in * a driver that supports HW encryption. Send up the key idx only if * the key is set. */ cfg80211_michael_mic_failure(rx->sdata->dev, hdr->addr2, is_multicast_ether_addr(hdr->addr1) ? NL80211_KEYTYPE_GROUP : NL80211_KEYTYPE_PAIRWISE, rx->key ? rx->key->conf.keyidx : -1, NULL, GFP_ATOMIC); return RX_DROP_U_MMIC_FAIL; } static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); unsigned int hdrlen; int len, tail; u64 pn; u8 *pos; if (info->control.hw_key && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { /* hwaccel - with no need for software-generated IV */ return 0; } hdrlen = ieee80211_hdrlen(hdr->frame_control); len = skb->len - hdrlen; if (info->control.hw_key) tail = 0; else tail = IEEE80211_TKIP_ICV_LEN; if (WARN_ON(skb_tailroom(skb) < tail || skb_headroom(skb) < IEEE80211_TKIP_IV_LEN)) return -1; pos = skb_push(skb, IEEE80211_TKIP_IV_LEN); memmove(pos, pos + IEEE80211_TKIP_IV_LEN, hdrlen); pos += hdrlen; /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return 0; /* Increase IV for the frame */ pn = atomic64_inc_return(&key->conf.tx_pn); pos = ieee80211_tkip_add_iv(pos, &key->conf, pn); /* hwaccel - with software IV */ if (info->control.hw_key) return 0; /* Add room for ICV */ skb_put(skb, IEEE80211_TKIP_ICV_LEN); return ieee80211_tkip_encrypt_data(&tx->local->wep_tx_ctx, key, skb, pos, len); } ieee80211_tx_result ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb; ieee80211_tx_set_protected(tx); skb_queue_walk(&tx->skbs, skb) { if (tkip_encrypt_skb(tx, skb) < 0) return TX_DROP; } return TX_CONTINUE; } ieee80211_rx_result ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; int hdrlen, res, hwaccel = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); hdrlen = ieee80211_hdrlen(hdr->frame_control); if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; if (!rx->sta || skb->len - hdrlen < 12) return RX_DROP_U_SHORT_TKIP; /* it may be possible to optimize this a bit more */ if (skb_linearize(rx->skb)) return RX_DROP_U_OOM; hdr = (void *)skb->data; /* * Let TKIP code verify IV, but skip decryption. * In the case where hardware checks the IV as well, * we don't even get here, see ieee80211_rx_h_decrypt() */ if (status->flag & RX_FLAG_DECRYPTED) hwaccel = 1; res = ieee80211_tkip_decrypt_data(&rx->local->wep_rx_ctx, key, skb->data + hdrlen, skb->len - hdrlen, rx->sta->sta.addr, hdr->addr1, hwaccel, rx->security_idx, &rx->tkip.iv32, &rx->tkip.iv16); if (res != TKIP_DECRYPT_OK) return RX_DROP_U_TKIP_FAIL; /* Trim ICV */ if (!(status->flag & RX_FLAG_ICV_STRIPPED)) skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); /* Remove IV */ memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_TKIP_IV_LEN); return RX_CONTINUE; } /* * Calculate AAD for CCMP/GCMP, returning qos_tid since we * need that in CCMP also for b_0. */ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu) { struct ieee80211_hdr *hdr = (void *)skb->data; __le16 mask_fc; int a4_included, mgmt; u8 qos_tid; u16 len_a = 22; /* * Mask FC: zero subtype b4 b5 b6 (if not mgmt) * Retry, PwrMgt, MoreData, Order (if Qos Data); set Protected */ mgmt = ieee80211_is_mgmt(hdr->frame_control); mask_fc = hdr->frame_control; mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY | IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); if (!mgmt) mask_fc &= ~cpu_to_le16(0x0070); mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); a4_included = ieee80211_has_a4(hdr->frame_control); if (a4_included) len_a += 6; if (ieee80211_is_data_qos(hdr->frame_control)) { qos_tid = *ieee80211_get_qos_ctl(hdr); if (spp_amsdu) qos_tid &= IEEE80211_QOS_CTL_TID_MASK | IEEE80211_QOS_CTL_A_MSDU_PRESENT; else qos_tid &= IEEE80211_QOS_CTL_TID_MASK; mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_ORDER); len_a += 2; } else { qos_tid = 0; } /* AAD (extra authenticate-only data) / masked 802.11 header * FC | A1 | A2 | A3 | SC | [A4] | [QC] */ put_unaligned_be16(len_a, &aad[0]); put_unaligned(mask_fc, (__le16 *)&aad[2]); memcpy(&aad[4], &hdr->addrs, 3 * ETH_ALEN); /* Mask Seq#, leave Frag# */ aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f; aad[23] = 0; if (a4_included) { memcpy(&aad[24], hdr->addr4, ETH_ALEN); aad[30] = qos_tid; aad[31] = 0; } else { memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN); aad[24] = qos_tid; } return qos_tid; } static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad, bool spp_amsdu) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; u8 qos_tid = ccmp_gcmp_aad(skb, aad, spp_amsdu); /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC * mode authentication are not allowed to collide, yet both are derived * from this vector b_0. We only set L := 1 here to indicate that the * data size can be represented in (L+1) bytes. The CCM layer will take * care of storing the data length in the top (L+1) bytes and setting * and clearing the other bits as is required to derive the two IVs. */ b_0[0] = 0x1; /* Nonce: Nonce Flags | A2 | PN * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7) */ b_0[1] = qos_tid | (ieee80211_is_mgmt(hdr->frame_control) << 4); memcpy(&b_0[2], hdr->addr2, ETH_ALEN); memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN); } static inline void ccmp_pn2hdr(u8 *hdr, u8 *pn, int key_id) { hdr[0] = pn[5]; hdr[1] = pn[4]; hdr[2] = 0; hdr[3] = 0x20 | (key_id << 6); hdr[4] = pn[3]; hdr[5] = pn[2]; hdr[6] = pn[1]; hdr[7] = pn[0]; } static inline void ccmp_hdr2pn(u8 *pn, u8 *hdr) { pn[0] = hdr[7]; pn[1] = hdr[6]; pn[2] = hdr[5]; pn[3] = hdr[4]; pn[4] = hdr[1]; pn[5] = hdr[0]; } static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, unsigned int mic_len) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int hdrlen, len, tail; u8 *pos; u8 pn[6]; u64 pn64; u8 aad[CCM_AAD_LEN]; u8 b_0[AES_BLOCK_SIZE]; if (info->control.hw_key && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && !((info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) && ieee80211_is_mgmt(hdr->frame_control))) { /* * hwaccel has no need for preallocated room for CCMP * header or MIC fields */ return 0; } hdrlen = ieee80211_hdrlen(hdr->frame_control); len = skb->len - hdrlen; if (info->control.hw_key) tail = 0; else tail = mic_len; if (WARN_ON(skb_tailroom(skb) < tail || skb_headroom(skb) < IEEE80211_CCMP_HDR_LEN)) return -1; pos = skb_push(skb, IEEE80211_CCMP_HDR_LEN); memmove(pos, pos + IEEE80211_CCMP_HDR_LEN, hdrlen); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return 0; pos += hdrlen; pn64 = atomic64_inc_return(&key->conf.tx_pn); pn[5] = pn64; pn[4] = pn64 >> 8; pn[3] = pn64 >> 16; pn[2] = pn64 >> 24; pn[1] = pn64 >> 32; pn[0] = pn64 >> 40; ccmp_pn2hdr(pos, pn, key->conf.keyidx); /* hwaccel - with software CCMP header */ if (info->control.hw_key) return 0; pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad, key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, skb_put(skb, mic_len)); } ieee80211_tx_result ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx, unsigned int mic_len) { struct sk_buff *skb; ieee80211_tx_set_protected(tx); skb_queue_walk(&tx->skbs, skb) { if (ccmp_encrypt_skb(tx, skb, mic_len) < 0) return TX_DROP; } return TX_CONTINUE; } ieee80211_rx_result ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, unsigned int mic_len) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int hdrlen; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 pn[IEEE80211_CCMP_PN_LEN]; int data_len; int queue; hdrlen = ieee80211_hdrlen(hdr->frame_control); if (!ieee80211_is_data(hdr->frame_control) && !ieee80211_is_robust_mgmt_frame(skb)) return RX_CONTINUE; if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_U_SHORT_CCMP; if (status->flag & RX_FLAG_MIC_STRIPPED) mic_len = 0; } else { if (skb_linearize(rx->skb)) return RX_DROP_U_OOM; } /* reload hdr - skb might have been reallocated */ hdr = (void *)rx->skb->data; data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) return RX_DROP_U_SHORT_CCMP; if (!(status->flag & RX_FLAG_PN_VALIDATED)) { int res; ccmp_hdr2pn(pn, skb->data + hdrlen); queue = rx->security_idx; res = memcmp(pn, key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN); if (res < 0 || (!res && !(status->flag & RX_FLAG_ALLOW_SAME_PN))) { key->u.ccmp.replays++; return RX_DROP_U_REPLAY; } if (!(status->flag & RX_FLAG_DECRYPTED)) { u8 aad[2 * AES_BLOCK_SIZE]; u8 b_0[AES_BLOCK_SIZE]; /* hardware didn't decrypt/verify MIC */ ccmp_special_blocks(skb, pn, b_0, aad, key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); if (ieee80211_aes_ccm_decrypt( key->u.ccmp.tfm, b_0, aad, skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, data_len, skb->data + skb->len - mic_len)) return RX_DROP_U_MIC_FAIL; } memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); if (unlikely(ieee80211_is_frag(hdr))) memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN); } /* Remove CCMP header and MIC */ if (pskb_trim(skb, skb->len - mic_len)) return RX_DROP_U_SHORT_CCMP_MIC; memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_CCMP_HDR_LEN); return RX_CONTINUE; } static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad, bool spp_amsdu) { struct ieee80211_hdr *hdr = (void *)skb->data; memcpy(j_0, hdr->addr2, ETH_ALEN); memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN); j_0[13] = 0; j_0[14] = 0; j_0[AES_BLOCK_SIZE - 1] = 0x01; ccmp_gcmp_aad(skb, aad, spp_amsdu); } static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id) { hdr[0] = pn[5]; hdr[1] = pn[4]; hdr[2] = 0; hdr[3] = 0x20 | (key_id << 6); hdr[4] = pn[3]; hdr[5] = pn[2]; hdr[6] = pn[1]; hdr[7] = pn[0]; } static inline void gcmp_hdr2pn(u8 *pn, const u8 *hdr) { pn[0] = hdr[7]; pn[1] = hdr[6]; pn[2] = hdr[5]; pn[3] = hdr[4]; pn[4] = hdr[1]; pn[5] = hdr[0]; } static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_key *key = tx->key; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int hdrlen, len, tail; u8 *pos; u8 pn[6]; u64 pn64; u8 aad[GCM_AAD_LEN]; u8 j_0[AES_BLOCK_SIZE]; if (info->control.hw_key && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && !((info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) && ieee80211_is_mgmt(hdr->frame_control))) { /* hwaccel has no need for preallocated room for GCMP * header or MIC fields */ return 0; } hdrlen = ieee80211_hdrlen(hdr->frame_control); len = skb->len - hdrlen; if (info->control.hw_key) tail = 0; else tail = IEEE80211_GCMP_MIC_LEN; if (WARN_ON(skb_tailroom(skb) < tail || skb_headroom(skb) < IEEE80211_GCMP_HDR_LEN)) return -1; pos = skb_push(skb, IEEE80211_GCMP_HDR_LEN); memmove(pos, pos + IEEE80211_GCMP_HDR_LEN, hdrlen); skb_set_network_header(skb, skb_network_offset(skb) + IEEE80211_GCMP_HDR_LEN); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return 0; pos += hdrlen; pn64 = atomic64_inc_return(&key->conf.tx_pn); pn[5] = pn64; pn[4] = pn64 >> 8; pn[3] = pn64 >> 16; pn[2] = pn64 >> 24; pn[1] = pn64 >> 32; pn[0] = pn64 >> 40; gcmp_pn2hdr(pos, pn, key->conf.keyidx); /* hwaccel - with software GCMP header */ if (info->control.hw_key) return 0; pos += IEEE80211_GCMP_HDR_LEN; gcmp_special_blocks(skb, pn, j_0, aad, key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, skb_put(skb, IEEE80211_GCMP_MIC_LEN)); } ieee80211_tx_result ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb; ieee80211_tx_set_protected(tx); skb_queue_walk(&tx->skbs, skb) { if (gcmp_encrypt_skb(tx, skb) < 0) return TX_DROP; } return TX_CONTINUE; } ieee80211_rx_result ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int hdrlen; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 pn[IEEE80211_GCMP_PN_LEN]; int data_len, queue, mic_len = IEEE80211_GCMP_MIC_LEN; hdrlen = ieee80211_hdrlen(hdr->frame_control); if (!ieee80211_is_data(hdr->frame_control) && !ieee80211_is_robust_mgmt_frame(skb)) return RX_CONTINUE; if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN)) return RX_DROP_U_SHORT_GCMP; if (status->flag & RX_FLAG_MIC_STRIPPED) mic_len = 0; } else { if (skb_linearize(rx->skb)) return RX_DROP_U_OOM; } /* reload hdr - skb might have been reallocated */ hdr = (void *)rx->skb->data; data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) return RX_DROP_U_SHORT_GCMP; if (!(status->flag & RX_FLAG_PN_VALIDATED)) { int res; gcmp_hdr2pn(pn, skb->data + hdrlen); queue = rx->security_idx; res = memcmp(pn, key->u.gcmp.rx_pn[queue], IEEE80211_GCMP_PN_LEN); if (res < 0 || (!res && !(status->flag & RX_FLAG_ALLOW_SAME_PN))) { key->u.gcmp.replays++; return RX_DROP_U_REPLAY; } if (!(status->flag & RX_FLAG_DECRYPTED)) { u8 aad[2 * AES_BLOCK_SIZE]; u8 j_0[AES_BLOCK_SIZE]; /* hardware didn't decrypt/verify MIC */ gcmp_special_blocks(skb, pn, j_0, aad, key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); if (ieee80211_aes_gcm_decrypt( key->u.gcmp.tfm, j_0, aad, skb->data + hdrlen + IEEE80211_GCMP_HDR_LEN, data_len, skb->data + skb->len - IEEE80211_GCMP_MIC_LEN)) return RX_DROP_U_MIC_FAIL; } memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN); if (unlikely(ieee80211_is_frag(hdr))) memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN); } /* Remove GCMP header and MIC */ if (pskb_trim(skb, skb->len - mic_len)) return RX_DROP_U_SHORT_GCMP_MIC; memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_GCMP_HDR_LEN); return RX_CONTINUE; } static void bip_aad(struct sk_buff *skb, u8 *aad) { __le16 mask_fc; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; /* BIP AAD: FC(masked) || A1 || A2 || A3 */ /* FC type/subtype */ /* Mask FC Retry, PwrMgt, MoreData flags to zero */ mask_fc = hdr->frame_control; mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY | IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); put_unaligned(mask_fc, (__le16 *) &aad[0]); /* A1 || A2 || A3 */ memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN); } static inline void bip_ipn_set64(u8 *d, u64 pn) { *d++ = pn; *d++ = pn >> 8; *d++ = pn >> 16; *d++ = pn >> 24; *d++ = pn >> 32; *d = pn >> 40; } static inline void bip_ipn_swap(u8 *d, const u8 *s) { *d++ = s[5]; *d++ = s[4]; *d++ = s[3]; *d++ = s[2]; *d++ = s[1]; *d = s[0]; } ieee80211_tx_result ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb; struct ieee80211_tx_info *info; struct ieee80211_key *key = tx->key; struct ieee80211_mmie *mmie; u8 aad[20]; u64 pn64; if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) return TX_DROP; skb = skb_peek(&tx->skbs); info = IEEE80211_SKB_CB(skb); if (info->control.hw_key && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE)) return TX_CONTINUE; if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) return TX_DROP; mmie = skb_put(skb, sizeof(*mmie)); mmie->element_id = WLAN_EID_MMIE; mmie->length = sizeof(*mmie) - 2; mmie->key_id = cpu_to_le16(key->conf.keyidx); /* PN = PN + 1 */ pn64 = atomic64_inc_return(&key->conf.tx_pn); bip_ipn_set64(mmie->sequence_number, pn64); if (info->control.hw_key) return TX_CONTINUE; bip_aad(skb, aad); /* * MIC = AES-128-CMAC(IGTK, AAD || Management Frame Body || MMIE, 64) */ ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad, skb->data + 24, skb->len - 24, mmie->mic); return TX_CONTINUE; } ieee80211_tx_result ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb; struct ieee80211_tx_info *info; struct ieee80211_key *key = tx->key; struct ieee80211_mmie_16 *mmie; u8 aad[20]; u64 pn64; if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) return TX_DROP; skb = skb_peek(&tx->skbs); info = IEEE80211_SKB_CB(skb); if (info->control.hw_key && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE)) return TX_CONTINUE; if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) return TX_DROP; mmie = skb_put(skb, sizeof(*mmie)); mmie->element_id = WLAN_EID_MMIE; mmie->length = sizeof(*mmie) - 2; mmie->key_id = cpu_to_le16(key->conf.keyidx); /* PN = PN + 1 */ pn64 = atomic64_inc_return(&key->conf.tx_pn); bip_ipn_set64(mmie->sequence_number, pn64); if (info->control.hw_key) return TX_CONTINUE; bip_aad(skb, aad); /* MIC = AES-256-CMAC(IGTK, AAD || Management Frame Body || MMIE, 128) */ ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad, skb->data + 24, skb->len - 24, mmie->mic); return TX_CONTINUE; } ieee80211_rx_result ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) { struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_key *key = rx->key; struct ieee80211_mmie *mmie; u8 aad[20], mic[8], ipn[6]; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; if (!ieee80211_is_mgmt(hdr->frame_control)) return RX_CONTINUE; /* management frames are already linear */ if (skb->len < 24 + sizeof(*mmie)) return RX_DROP_U_SHORT_CMAC; mmie = (struct ieee80211_mmie *) (skb->data + skb->len - sizeof(*mmie)); if (mmie->element_id != WLAN_EID_MMIE || mmie->length != sizeof(*mmie) - 2) return RX_DROP_U_BAD_MMIE; /* Invalid MMIE */ bip_ipn_swap(ipn, mmie->sequence_number); if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) { key->u.aes_cmac.replays++; return RX_DROP_U_REPLAY; } if (!(status->flag & RX_FLAG_DECRYPTED)) { /* hardware didn't decrypt/verify MIC */ bip_aad(skb, aad); ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad, skb->data + 24, skb->len - 24, mic); if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) { key->u.aes_cmac.icverrors++; return RX_DROP_U_MIC_FAIL; } } memcpy(key->u.aes_cmac.rx_pn, ipn, 6); /* Remove MMIE */ skb_trim(skb, skb->len - sizeof(*mmie)); return RX_CONTINUE; } ieee80211_rx_result ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx) { struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_key *key = rx->key; struct ieee80211_mmie_16 *mmie; u8 aad[20], mic[16], ipn[6]; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (!ieee80211_is_mgmt(hdr->frame_control)) return RX_CONTINUE; /* management frames are already linear */ if (skb->len < 24 + sizeof(*mmie)) return RX_DROP_U_SHORT_CMAC256; mmie = (struct ieee80211_mmie_16 *) (skb->data + skb->len - sizeof(*mmie)); if (mmie->element_id != WLAN_EID_MMIE || mmie->length != sizeof(*mmie) - 2) return RX_DROP_U_BAD_MMIE; /* Invalid MMIE */ bip_ipn_swap(ipn, mmie->sequence_number); if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) { key->u.aes_cmac.replays++; return RX_DROP_U_REPLAY; } if (!(status->flag & RX_FLAG_DECRYPTED)) { /* hardware didn't decrypt/verify MIC */ bip_aad(skb, aad); ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad, skb->data + 24, skb->len - 24, mic); if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) { key->u.aes_cmac.icverrors++; return RX_DROP_U_MIC_FAIL; } } memcpy(key->u.aes_cmac.rx_pn, ipn, 6); /* Remove MMIE */ skb_trim(skb, skb->len - sizeof(*mmie)); return RX_CONTINUE; } ieee80211_tx_result ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb; struct ieee80211_tx_info *info; struct ieee80211_key *key = tx->key; struct ieee80211_mmie_16 *mmie; struct ieee80211_hdr *hdr; u8 aad[GMAC_AAD_LEN]; u64 pn64; u8 nonce[GMAC_NONCE_LEN]; if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) return TX_DROP; skb = skb_peek(&tx->skbs); info = IEEE80211_SKB_CB(skb); if (info->control.hw_key && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE)) return TX_CONTINUE; if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) return TX_DROP; mmie = skb_put(skb, sizeof(*mmie)); mmie->element_id = WLAN_EID_MMIE; mmie->length = sizeof(*mmie) - 2; mmie->key_id = cpu_to_le16(key->conf.keyidx); /* PN = PN + 1 */ pn64 = atomic64_inc_return(&key->conf.tx_pn); bip_ipn_set64(mmie->sequence_number, pn64); if (info->control.hw_key) return TX_CONTINUE; bip_aad(skb, aad); hdr = (struct ieee80211_hdr *)skb->data; memcpy(nonce, hdr->addr2, ETH_ALEN); bip_ipn_swap(nonce + ETH_ALEN, mmie->sequence_number); /* MIC = AES-GMAC(IGTK, AAD || Management Frame Body || MMIE, 128) */ if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, skb->data + 24, skb->len - 24, mmie->mic) < 0) return TX_DROP; return TX_CONTINUE; } ieee80211_rx_result ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) { struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_key *key = rx->key; struct ieee80211_mmie_16 *mmie; u8 aad[GMAC_AAD_LEN], *mic, ipn[6], nonce[GMAC_NONCE_LEN]; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (!ieee80211_is_mgmt(hdr->frame_control)) return RX_CONTINUE; /* management frames are already linear */ if (skb->len < 24 + sizeof(*mmie)) return RX_DROP_U_SHORT_GMAC; mmie = (struct ieee80211_mmie_16 *) (skb->data + skb->len - sizeof(*mmie)); if (mmie->element_id != WLAN_EID_MMIE || mmie->length != sizeof(*mmie) - 2) return RX_DROP_U_BAD_MMIE; /* Invalid MMIE */ bip_ipn_swap(ipn, mmie->sequence_number); if (memcmp(ipn, key->u.aes_gmac.rx_pn, 6) <= 0) { key->u.aes_gmac.replays++; return RX_DROP_U_REPLAY; } if (!(status->flag & RX_FLAG_DECRYPTED)) { /* hardware didn't decrypt/verify MIC */ bip_aad(skb, aad); memcpy(nonce, hdr->addr2, ETH_ALEN); memcpy(nonce + ETH_ALEN, ipn, 6); mic = kmalloc(GMAC_MIC_LEN, GFP_ATOMIC); if (!mic) return RX_DROP_U_OOM; if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, skb->data + 24, skb->len - 24, mic) < 0 || crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) { key->u.aes_gmac.icverrors++; kfree(mic); return RX_DROP_U_MIC_FAIL; } kfree(mic); } memcpy(key->u.aes_gmac.rx_pn, ipn, 6); /* Remove MMIE */ skb_trim(skb, skb->len - sizeof(*mmie)); return RX_CONTINUE; }
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * ARIA Cipher Algorithm. * * Documentation of ARIA can be found in RFC 5794. * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> * * Information for ARIA * http://210.104.33.10/ARIA/index-e.html (English) * http://seed.kisa.or.kr/ (Korean) * * Public domain version is distributed above. */ #include <crypto/aria.h> static const u32 key_rc[20] = { 0x517cc1b7, 0x27220a94, 0xfe13abe8, 0xfa9a6ee0, 0x6db14acc, 0x9e21c820, 0xff28b1d5, 0xef5de2b0, 0xdb92371d, 0x2126e970, 0x03249775, 0x04e8c90e, 0x517cc1b7, 0x27220a94, 0xfe13abe8, 0xfa9a6ee0, 0x6db14acc, 0x9e21c820, 0xff28b1d5, 0xef5de2b0 }; static void aria_set_encrypt_key(struct aria_ctx *ctx, const u8 *in_key, unsigned int key_len) { const __be32 *key = (const __be32 *)in_key; u32 w0[4], w1[4], w2[4], w3[4]; u32 reg0, reg1, reg2, reg3; const u32 *ck; int rkidx = 0; ck = &key_rc[(key_len - 16) / 2]; w0[0] = be32_to_cpu(key[0]); w0[1] = be32_to_cpu(key[1]); w0[2] = be32_to_cpu(key[2]); w0[3] = be32_to_cpu(key[3]); reg0 = w0[0] ^ ck[0]; reg1 = w0[1] ^ ck[1]; reg2 = w0[2] ^ ck[2]; reg3 = w0[3] ^ ck[3]; aria_subst_diff_odd(&reg0, &reg1, &reg2, &reg3); if (key_len > 16) { w1[0] = be32_to_cpu(key[4]); w1[1] = be32_to_cpu(key[5]); if (key_len > 24) { w1[2] = be32_to_cpu(key[6]); w1[3] = be32_to_cpu(key[7]); } else { w1[2] = 0; w1[3] = 0; } } else { w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; } w1[0] ^= reg0; w1[1] ^= reg1; w1[2] ^= reg2; w1[3] ^= reg3; reg0 = w1[0]; reg1 = w1[1]; reg2 = w1[2]; reg3 = w1[3]; reg0 ^= ck[4]; reg1 ^= ck[5]; reg2 ^= ck[6]; reg3 ^= ck[7]; aria_subst_diff_even(&reg0, &reg1, &reg2, &reg3); reg0 ^= w0[0]; reg1 ^= w0[1]; reg2 ^= w0[2]; reg3 ^= w0[3]; w2[0] = reg0; w2[1] = reg1; w2[2] = reg2; w2[3] = reg3; reg0 ^= ck[8]; reg1 ^= ck[9]; reg2 ^= ck[10]; reg3 ^= ck[11]; aria_subst_diff_odd(&reg0, &reg1, &reg2, &reg3); w3[0] = reg0 ^ w1[0]; w3[1] = reg1 ^ w1[1]; w3[2] = reg2 ^ w1[2]; w3[3] = reg3 ^ w1[3]; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 19); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w1, w2, 19); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w2, w3, 19); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w3, w0, 19); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 31); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w1, w2, 31); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w2, w3, 31); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w3, w0, 31); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 67); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w1, w2, 67); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w2, w3, 67); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w3, w0, 67); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 97); if (key_len > 16) { rkidx++; aria_gsrk(ctx->enc_key[rkidx], w1, w2, 97); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w2, w3, 97); if (key_len > 24) { rkidx++; aria_gsrk(ctx->enc_key[rkidx], w3, w0, 97); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 109); } } } static void aria_set_decrypt_key(struct aria_ctx *ctx) { int i; for (i = 0; i < 4; i++) { ctx->dec_key[0][i] = ctx->enc_key[ctx->rounds][i]; ctx->dec_key[ctx->rounds][i] = ctx->enc_key[0][i]; } for (i = 1; i < ctx->rounds; i++) { ctx->dec_key[i][0] = aria_m(ctx->enc_key[ctx->rounds - i][0]); ctx->dec_key[i][1] = aria_m(ctx->enc_key[ctx->rounds - i][1]); ctx->dec_key[i][2] = aria_m(ctx->enc_key[ctx->rounds - i][2]); ctx->dec_key[i][3] = aria_m(ctx->enc_key[ctx->rounds - i][3]); aria_diff_word(&ctx->dec_key[i][0], &ctx->dec_key[i][1], &ctx->dec_key[i][2], &ctx->dec_key[i][3]); aria_diff_byte(&ctx->dec_key[i][1], &ctx->dec_key[i][2], &ctx->dec_key[i][3]); aria_diff_word(&ctx->dec_key[i][0], &ctx->dec_key[i][1], &ctx->dec_key[i][2], &ctx->dec_key[i][3]); } } int aria_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct aria_ctx *ctx = crypto_tfm_ctx(tfm); if (key_len != 16 && key_len != 24 && key_len != 32) return -EINVAL; BUILD_BUG_ON(sizeof(ctx->enc_key) != 272); BUILD_BUG_ON(sizeof(ctx->dec_key) != 272); BUILD_BUG_ON(sizeof(int) != sizeof(ctx->rounds)); ctx->key_length = key_len; ctx->rounds = (key_len + 32) / 4; aria_set_encrypt_key(ctx, in_key, key_len); aria_set_decrypt_key(ctx); return 0; } EXPORT_SYMBOL_GPL(aria_set_key); static void __aria_crypt(struct aria_ctx *ctx, u8 *out, const u8 *in, u32 key[][ARIA_RD_KEY_WORDS]) { const __be32 *src = (const __be32 *)in; __be32 *dst = (__be32 *)out; u32 reg0, reg1, reg2, reg3; int rounds, rkidx = 0; rounds = ctx->rounds; reg0 = be32_to_cpu(src[0]); reg1 = be32_to_cpu(src[1]); reg2 = be32_to_cpu(src[2]); reg3 = be32_to_cpu(src[3]); aria_add_round_key(key[rkidx], &reg0, &reg1, &reg2, &reg3); rkidx++; aria_subst_diff_odd(&reg0, &reg1, &reg2, &reg3); aria_add_round_key(key[rkidx], &reg0, &reg1, &reg2, &reg3); rkidx++; while ((rounds -= 2) > 0) { aria_subst_diff_even(&reg0, &reg1, &reg2, &reg3); aria_add_round_key(key[rkidx], &reg0, &reg1, &reg2, &reg3); rkidx++; aria_subst_diff_odd(&reg0, &reg1, &reg2, &reg3); aria_add_round_key(key[rkidx], &reg0, &reg1, &reg2, &reg3); rkidx++; } reg0 = key[rkidx][0] ^ make_u32((u8)(x1[get_u8(reg0, 0)]), (u8)(x2[get_u8(reg0, 1)] >> 8), (u8)(s1[get_u8(reg0, 2)]), (u8)(s2[get_u8(reg0, 3)])); reg1 = key[rkidx][1] ^ make_u32((u8)(x1[get_u8(reg1, 0)]), (u8)(x2[get_u8(reg1, 1)] >> 8), (u8)(s1[get_u8(reg1, 2)]), (u8)(s2[get_u8(reg1, 3)])); reg2 = key[rkidx][2] ^ make_u32((u8)(x1[get_u8(reg2, 0)]), (u8)(x2[get_u8(reg2, 1)] >> 8), (u8)(s1[get_u8(reg2, 2)]), (u8)(s2[get_u8(reg2, 3)])); reg3 = key[rkidx][3] ^ make_u32((u8)(x1[get_u8(reg3, 0)]), (u8)(x2[get_u8(reg3, 1)] >> 8), (u8)(s1[get_u8(reg3, 2)]), (u8)(s2[get_u8(reg3, 3)])); dst[0] = cpu_to_be32(reg0); dst[1] = cpu_to_be32(reg1); dst[2] = cpu_to_be32(reg2); dst[3] = cpu_to_be32(reg3); } void aria_encrypt(void *_ctx, u8 *out, const u8 *in) { struct aria_ctx *ctx = (struct aria_ctx *)_ctx; __aria_crypt(ctx, out, in, ctx->enc_key); } EXPORT_SYMBOL_GPL(aria_encrypt); void aria_decrypt(void *_ctx, u8 *out, const u8 *in) { struct aria_ctx *ctx = (struct aria_ctx *)_ctx; __aria_crypt(ctx, out, in, ctx->dec_key); } EXPORT_SYMBOL_GPL(aria_decrypt); static void __aria_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aria_ctx *ctx = crypto_tfm_ctx(tfm); __aria_crypt(ctx, out, in, ctx->enc_key); } static void __aria_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aria_ctx *ctx = crypto_tfm_ctx(tfm); __aria_crypt(ctx, out, in, ctx->dec_key); } static struct crypto_alg aria_alg = { .cra_name = "aria", .cra_driver_name = "aria-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = ARIA_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aria_ctx), .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = ARIA_MIN_KEY_SIZE, .cia_max_keysize = ARIA_MAX_KEY_SIZE, .cia_setkey = aria_set_key, .cia_encrypt = __aria_encrypt, .cia_decrypt = __aria_decrypt } } }; static int __init aria_init(void) { return crypto_register_alg(&aria_alg); } static void __exit aria_fini(void) { crypto_unregister_alg(&aria_alg); } subsys_initcall(aria_init); module_exit(aria_fini); MODULE_DESCRIPTION("ARIA Cipher Algorithm"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>"); MODULE_ALIAS_CRYPTO("aria"); MODULE_ALIAS_CRYPTO("aria-generic");
18 18 16 14 5 3 13 46 46 46 41 32 29 32 3 1 3 21 21 21 16 21 17 2 21 14 13 13 21 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2002 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ #include <linux/mm.h> #include <linux/fs.h> #include <linux/posix_acl.h> #include <linux/quotaops.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_dmap.h" #include "jfs_txnmgr.h" #include "jfs_xattr.h" #include "jfs_acl.h" #include "jfs_debug.h" int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; int rc = 0; rc = file_write_and_wait_range(file, start, end); if (rc) return rc; inode_lock(inode); if (!(inode->i_state & I_DIRTY_ALL) || (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); inode_unlock(inode); return rc; } rc |= jfs_commit_inode(inode, 1); inode_unlock(inode); return rc ? -EIO : 0; } static int jfs_open(struct inode *inode, struct file *file) { int rc; if ((rc = dquot_file_open(inode, file))) return rc; /* * We attempt to allow only one "active" file open per aggregate * group. Otherwise, appending to files in parallel can cause * fragmentation within the files. * * If the file is empty, it was probably just created and going * to be written to. If it has a size, we'll hold off until the * file is actually grown. */ if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE && (inode->i_size == 0)) { struct jfs_inode_info *ji = JFS_IP(inode); spin_lock_irq(&ji->ag_lock); if (ji->active_ag == -1) { struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); atomic_inc(&jfs_sb->bmap->db_active[ji->active_ag]); } spin_unlock_irq(&ji->ag_lock); } return 0; } static int jfs_release(struct inode *inode, struct file *file) { struct jfs_inode_info *ji = JFS_IP(inode); spin_lock_irq(&ji->ag_lock); if (ji->active_ag != -1) { struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; atomic_dec(&bmap->db_active[ji->active_ag]); ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); return 0; } int jfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int rc; rc = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (rc) return rc; if (is_quota_modification(&nop_mnt_idmap, inode, iattr)) { rc = dquot_initialize(inode); if (rc) return rc; } if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { rc = dquot_transfer(&nop_mnt_idmap, inode, iattr); if (rc) return rc; } if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); rc = inode_newsize_ok(inode, iattr->ia_size); if (rc) return rc; truncate_setsize(inode, iattr->ia_size); jfs_truncate(inode); } setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) rc = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); return rc; } const struct inode_operations jfs_file_inode_operations = { .listxattr = jfs_listxattr, .setattr = jfs_setattr, .fileattr_get = jfs_fileattr_get, .fileattr_set = jfs_fileattr_set, #ifdef CONFIG_JFS_POSIX_ACL .get_inode_acl = jfs_get_acl, .set_acl = jfs_set_acl, #endif }; const struct file_operations jfs_file_operations = { .open = jfs_open, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .fsync = jfs_fsync, .release = jfs_release, .unlocked_ioctl = jfs_ioctl, .compat_ioctl = compat_ptr_ioctl, };
1 5 5 4 1 19 34 22 19 4 6 4 3 3 4 2 34 34 34 12 11 12 12 12 12 12 12 9 10 10 10 10 10 10 9 11 12 12 12 10 4 4 1 3 3 1 2 10 10 10 10 10 10 10 1 1 1 1 1 3 3 3 26 5 5 5 5 5 10 1 4 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 // SPDX-License-Identifier: GPL-2.0-or-later /* AFS cell and server record management * * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/slab.h> #include <linux/key.h> #include <linux/ctype.h> #include <linux/dns_resolver.h> #include <linux/sched.h> #include <linux/inet.h> #include <linux/namei.h> #include <keys/rxrpc-type.h> #include "internal.h" static unsigned __read_mostly afs_cell_gc_delay = 10; static unsigned __read_mostly afs_cell_min_ttl = 10 * 60; static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60; static atomic_t cell_debug_id; static void afs_queue_cell_manager(struct afs_net *); static void afs_manage_cell_work(struct work_struct *); static void afs_dec_cells_outstanding(struct afs_net *net) { if (atomic_dec_and_test(&net->cells_outstanding)) wake_up_var(&net->cells_outstanding); } /* * Set the cell timer to fire after a given delay, assuming it's not already * set for an earlier time. */ static void afs_set_cell_timer(struct afs_net *net, time64_t delay) { if (net->live) { atomic_inc(&net->cells_outstanding); if (timer_reduce(&net->cells_timer, jiffies + delay * HZ)) afs_dec_cells_outstanding(net); } else { afs_queue_cell_manager(net); } } /* * Look up and get an activation reference on a cell record. The caller must * hold net->cells_lock at least read-locked. */ static struct afs_cell *afs_find_cell_locked(struct afs_net *net, const char *name, unsigned int namesz, enum afs_cell_trace reason) { struct afs_cell *cell = NULL; struct rb_node *p; int n; _enter("%*.*s", namesz, namesz, name); if (name && namesz == 0) return ERR_PTR(-EINVAL); if (namesz > AFS_MAXCELLNAME) return ERR_PTR(-ENAMETOOLONG); if (!name) { cell = net->ws_cell; if (!cell) return ERR_PTR(-EDESTADDRREQ); goto found; } p = net->cells.rb_node; while (p) { cell = rb_entry(p, struct afs_cell, net_node); n = strncasecmp(cell->name, name, min_t(size_t, cell->name_len, namesz)); if (n == 0) n = cell->name_len - namesz; if (n < 0) p = p->rb_left; else if (n > 0) p = p->rb_right; else goto found; } return ERR_PTR(-ENOENT); found: return afs_use_cell(cell, reason); } /* * Look up and get an activation reference on a cell record. */ struct afs_cell *afs_find_cell(struct afs_net *net, const char *name, unsigned int namesz, enum afs_cell_trace reason) { struct afs_cell *cell; down_read(&net->cells_lock); cell = afs_find_cell_locked(net, name, namesz, reason); up_read(&net->cells_lock); return cell; } /* * Set up a cell record and fill in its name, VL server address list and * allocate an anonymous key */ static struct afs_cell *afs_alloc_cell(struct afs_net *net, const char *name, unsigned int namelen, const char *addresses) { struct afs_vlserver_list *vllist; struct afs_cell *cell; int i, ret; ASSERT(name); if (namelen == 0) return ERR_PTR(-EINVAL); if (namelen > AFS_MAXCELLNAME) { _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); } /* Prohibit cell names that contain unprintable chars, '/' and '@' or * that begin with a dot. This also precludes "@cell". */ if (name[0] == '.') return ERR_PTR(-EINVAL); for (i = 0; i < namelen; i++) { char ch = name[i]; if (!isprint(ch) || ch == '/' || ch == '@') return ERR_PTR(-EINVAL); } _enter("%*.*s,%s", namelen, namelen, name, addresses); cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL); if (!cell) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } cell->name = kmalloc(namelen + 1, GFP_KERNEL); if (!cell->name) { kfree(cell); return ERR_PTR(-ENOMEM); } cell->net = net; cell->name_len = namelen; for (i = 0; i < namelen; i++) cell->name[i] = tolower(name[i]); cell->name[i] = 0; refcount_set(&cell->ref, 1); atomic_set(&cell->active, 0); INIT_WORK(&cell->manager, afs_manage_cell_work); init_rwsem(&cell->vs_lock); cell->volumes = RB_ROOT; INIT_HLIST_HEAD(&cell->proc_volumes); seqlock_init(&cell->volume_lock); cell->fs_servers = RB_ROOT; seqlock_init(&cell->fs_lock); rwlock_init(&cell->vl_servers_lock); cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS); /* Provide a VL server list, filling it in if we were given a list of * addresses to use. */ if (addresses) { vllist = afs_parse_text_addrs(net, addresses, strlen(addresses), ':', VL_SERVICE, AFS_VL_PORT); if (IS_ERR(vllist)) { ret = PTR_ERR(vllist); goto parse_failed; } vllist->source = DNS_RECORD_FROM_CONFIG; vllist->status = DNS_LOOKUP_NOT_DONE; cell->dns_expiry = TIME64_MAX; } else { ret = -ENOMEM; vllist = afs_alloc_vlserver_list(0); if (!vllist) goto error; vllist->source = DNS_RECORD_UNAVAILABLE; vllist->status = DNS_LOOKUP_NOT_DONE; cell->dns_expiry = ktime_get_real_seconds(); } rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_source = vllist->source; cell->dns_status = vllist->status; smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */ atomic_inc(&net->cells_outstanding); cell->debug_id = atomic_inc_return(&cell_debug_id); trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc); _leave(" = %p", cell); return cell; parse_failed: if (ret == -EINVAL) printk(KERN_ERR "kAFS: bad VL server IP address\n"); error: kfree(cell->name); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); } /* * afs_lookup_cell - Look up or create a cell record. * @net: The network namespace * @name: The name of the cell. * @namesz: The strlen of the cell name. * @vllist: A colon/comma separated list of numeric IP addresses or NULL. * @excl: T if an error should be given if the cell name already exists. * * Look up a cell record by name and query the DNS for VL server addresses if * needed. Note that that actual DNS query is punted off to the manager thread * so that this function can return immediately if interrupted whilst allowing * cell records to be shared even if not yet fully constructed. */ struct afs_cell *afs_lookup_cell(struct afs_net *net, const char *name, unsigned int namesz, const char *vllist, bool excl) { struct afs_cell *cell, *candidate, *cursor; struct rb_node *parent, **pp; enum afs_cell_state state; int ret, n; _enter("%s,%s", name, vllist); if (!excl) { cell = afs_find_cell(net, name, namesz, afs_cell_trace_use_lookup); if (!IS_ERR(cell)) goto wait_for_cell; } /* Assume we're probably going to create a cell and preallocate and * mostly set up a candidate record. We can then use this to stash the * name, the net namespace and VL server addresses. * * We also want to do this before we hold any locks as it may involve * upcalling to userspace to make DNS queries. */ candidate = afs_alloc_cell(net, name, namesz, vllist); if (IS_ERR(candidate)) { _leave(" = %ld", PTR_ERR(candidate)); return candidate; } /* Find the insertion point and check to see if someone else added a * cell whilst we were allocating. */ down_write(&net->cells_lock); pp = &net->cells.rb_node; parent = NULL; while (*pp) { parent = *pp; cursor = rb_entry(parent, struct afs_cell, net_node); n = strncasecmp(cursor->name, name, min_t(size_t, cursor->name_len, namesz)); if (n == 0) n = cursor->name_len - namesz; if (n < 0) pp = &(*pp)->rb_left; else if (n > 0) pp = &(*pp)->rb_right; else goto cell_already_exists; } cell = candidate; candidate = NULL; atomic_set(&cell->active, 2); trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 2, afs_cell_trace_insert); rb_link_node_rcu(&cell->net_node, parent, pp); rb_insert_color(&cell->net_node, &net->cells); up_write(&net->cells_lock); afs_queue_cell(cell, afs_cell_trace_get_queue_new); wait_for_cell: trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), atomic_read(&cell->active), afs_cell_trace_wait); _debug("wait_for_cell"); wait_var_event(&cell->state, ({ state = smp_load_acquire(&cell->state); /* vs error */ state == AFS_CELL_ACTIVE || state == AFS_CELL_REMOVED; })); /* Check the state obtained from the wait check. */ if (state == AFS_CELL_REMOVED) { ret = cell->error; goto error; } _leave(" = %p [cell]", cell); return cell; cell_already_exists: _debug("cell exists"); cell = cursor; if (excl) { ret = -EEXIST; } else { afs_use_cell(cursor, afs_cell_trace_use_lookup); ret = 0; } up_write(&net->cells_lock); if (candidate) afs_put_cell(candidate, afs_cell_trace_put_candidate); if (ret == 0) goto wait_for_cell; goto error_noput; error: afs_unuse_cell(net, cell, afs_cell_trace_unuse_lookup); error_noput: _leave(" = %d [error]", ret); return ERR_PTR(ret); } /* * set the root cell information * - can be called with a module parameter string * - can be called from a write to /proc/fs/afs/rootcell */ int afs_cell_init(struct afs_net *net, const char *rootcell) { struct afs_cell *old_root, *new_root; const char *cp, *vllist; size_t len; _enter(""); if (!rootcell) { /* module is loaded with no parameters, or built statically. * - in the future we might initialize cell DB here. */ _leave(" = 0 [no root]"); return 0; } cp = strchr(rootcell, ':'); if (!cp) { _debug("kAFS: no VL server IP addresses specified"); vllist = NULL; len = strlen(rootcell); } else { vllist = cp + 1; len = cp - rootcell; } /* allocate a cell record for the root cell */ new_root = afs_lookup_cell(net, rootcell, len, vllist, false); if (IS_ERR(new_root)) { _leave(" = %ld", PTR_ERR(new_root)); return PTR_ERR(new_root); } if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags)) afs_use_cell(new_root, afs_cell_trace_use_pin); /* install the new cell */ down_write(&net->cells_lock); afs_see_cell(new_root, afs_cell_trace_see_ws); old_root = net->ws_cell; net->ws_cell = new_root; up_write(&net->cells_lock); afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws); _leave(" = 0"); return 0; } /* * Update a cell's VL server address list from the DNS. */ static int afs_update_cell(struct afs_cell *cell) { struct afs_vlserver_list *vllist, *old = NULL, *p; unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl); unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl); time64_t now, expiry = 0; int ret = 0; _enter("%s", cell->name); vllist = afs_dns_query(cell, &expiry); if (IS_ERR(vllist)) { ret = PTR_ERR(vllist); _debug("%s: fail %d", cell->name, ret); if (ret == -ENOMEM) goto out_wake; vllist = afs_alloc_vlserver_list(0); if (!vllist) { if (ret >= 0) ret = -ENOMEM; goto out_wake; } switch (ret) { case -ENODATA: case -EDESTADDRREQ: vllist->status = DNS_LOOKUP_GOT_NOT_FOUND; break; case -EAGAIN: case -ECONNREFUSED: vllist->status = DNS_LOOKUP_GOT_TEMP_FAILURE; break; default: vllist->status = DNS_LOOKUP_GOT_LOCAL_FAILURE; break; } } _debug("%s: got list %d %d", cell->name, vllist->source, vllist->status); cell->dns_status = vllist->status; now = ktime_get_real_seconds(); if (min_ttl > max_ttl) max_ttl = min_ttl; if (expiry < now + min_ttl) expiry = now + min_ttl; else if (expiry > now + max_ttl) expiry = now + max_ttl; _debug("%s: status %d", cell->name, vllist->status); if (vllist->source == DNS_RECORD_UNAVAILABLE) { switch (vllist->status) { case DNS_LOOKUP_GOT_NOT_FOUND: /* The DNS said that the cell does not exist or there * weren't any addresses to be had. */ cell->dns_expiry = expiry; break; case DNS_LOOKUP_BAD: case DNS_LOOKUP_GOT_LOCAL_FAILURE: case DNS_LOOKUP_GOT_TEMP_FAILURE: case DNS_LOOKUP_GOT_NS_FAILURE: default: cell->dns_expiry = now + 10; break; } } else { cell->dns_expiry = expiry; } /* Replace the VL server list if the new record has servers or the old * record doesn't. */ write_lock(&cell->vl_servers_lock); p = rcu_dereference_protected(cell->vl_servers, true); if (vllist->nr_servers > 0 || p->nr_servers == 0) { rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_source = vllist->source; old = p; } write_unlock(&cell->vl_servers_lock); afs_put_vlserverlist(cell->net, old); out_wake: smp_store_release(&cell->dns_lookup_count, cell->dns_lookup_count + 1); /* vs source/status */ wake_up_var(&cell->dns_lookup_count); _leave(" = %d", ret); return ret; } /* * Destroy a cell record */ static void afs_cell_destroy(struct rcu_head *rcu) { struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu); struct afs_net *net = cell->net; int r; _enter("%p{%s}", cell, cell->name); r = refcount_read(&cell->ref); ASSERTCMP(r, ==, 0); trace_afs_cell(cell->debug_id, r, atomic_read(&cell->active), afs_cell_trace_free); afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers)); afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias); key_put(cell->anonymous_key); kfree(cell->name); kfree(cell); afs_dec_cells_outstanding(net); _leave(" [destroyed]"); } /* * Queue the cell manager. */ static void afs_queue_cell_manager(struct afs_net *net) { int outstanding = atomic_inc_return(&net->cells_outstanding); _enter("%d", outstanding); if (!queue_work(afs_wq, &net->cells_manager)) afs_dec_cells_outstanding(net); } /* * Cell management timer. We have an increment on cells_outstanding that we * need to pass along to the work item. */ void afs_cells_timer(struct timer_list *timer) { struct afs_net *net = container_of(timer, struct afs_net, cells_timer); _enter(""); if (!queue_work(afs_wq, &net->cells_manager)) afs_dec_cells_outstanding(net); } /* * Get a reference on a cell record. */ struct afs_cell *afs_get_cell(struct afs_cell *cell, enum afs_cell_trace reason) { int r; __refcount_inc(&cell->ref, &r); trace_afs_cell(cell->debug_id, r + 1, atomic_read(&cell->active), reason); return cell; } /* * Drop a reference on a cell record. */ void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason) { if (cell) { unsigned int debug_id = cell->debug_id; unsigned int a; bool zero; int r; a = atomic_read(&cell->active); zero = __refcount_dec_and_test(&cell->ref, &r); trace_afs_cell(debug_id, r - 1, a, reason); if (zero) { a = atomic_read(&cell->active); WARN(a != 0, "Cell active count %u > 0\n", a); call_rcu(&cell->rcu, afs_cell_destroy); } } } /* * Note a cell becoming more active. */ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason) { int r, a; r = refcount_read(&cell->ref); WARN_ON(r == 0); a = atomic_inc_return(&cell->active); trace_afs_cell(cell->debug_id, r, a, reason); return cell; } /* * Record a cell becoming less active. When the active counter reaches 1, it * is scheduled for destruction, but may get reactivated. */ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason) { unsigned int debug_id; time64_t now, expire_delay; int r, a; if (!cell) return; _enter("%s", cell->name); now = ktime_get_real_seconds(); cell->last_inactive = now; expire_delay = 0; if (cell->vl_servers->nr_servers) expire_delay = afs_cell_gc_delay; debug_id = cell->debug_id; r = refcount_read(&cell->ref); a = atomic_dec_return(&cell->active); trace_afs_cell(debug_id, r, a, reason); WARN_ON(a == 0); if (a == 1) /* 'cell' may now be garbage collected. */ afs_set_cell_timer(net, expire_delay); } /* * Note that a cell has been seen. */ void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason) { int r, a; r = refcount_read(&cell->ref); a = atomic_read(&cell->active); trace_afs_cell(cell->debug_id, r, a, reason); } /* * Queue a cell for management, giving the workqueue a ref to hold. */ void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason) { afs_get_cell(cell, reason); if (!queue_work(afs_wq, &cell->manager)) afs_put_cell(cell, afs_cell_trace_put_queue_fail); } /* * Allocate a key to use as a placeholder for anonymous user security. */ static int afs_alloc_anon_key(struct afs_cell *cell) { struct key *key; char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp; /* Create a key to represent an anonymous user. */ memcpy(keyname, "afs@", 4); dp = keyname + 4; cp = cell->name; do { *dp++ = tolower(*cp); } while (*cp++); key = rxrpc_get_null_key(keyname); if (IS_ERR(key)) return PTR_ERR(key); cell->anonymous_key = key; _debug("anon key %p{%x}", cell->anonymous_key, key_serial(cell->anonymous_key)); return 0; } /* * Activate a cell. */ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) { struct hlist_node **p; struct afs_cell *pcell; int ret; if (!cell->anonymous_key) { ret = afs_alloc_anon_key(cell); if (ret < 0) return ret; } ret = afs_proc_cell_setup(cell); if (ret < 0) return ret; mutex_lock(&net->proc_cells_lock); for (p = &net->proc_cells.first; *p; p = &(*p)->next) { pcell = hlist_entry(*p, struct afs_cell, proc_link); if (strcmp(cell->name, pcell->name) < 0) break; } cell->proc_link.pprev = p; cell->proc_link.next = *p; rcu_assign_pointer(*p, &cell->proc_link.next); if (cell->proc_link.next) cell->proc_link.next->pprev = &cell->proc_link.next; afs_dynroot_mkdir(net, cell); mutex_unlock(&net->proc_cells_lock); return 0; } /* * Deactivate a cell. */ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) { _enter("%s", cell->name); afs_proc_cell_remove(cell); mutex_lock(&net->proc_cells_lock); hlist_del_rcu(&cell->proc_link); afs_dynroot_rmdir(net, cell); mutex_unlock(&net->proc_cells_lock); _leave(""); } /* * Manage a cell record, initialising and destroying it, maintaining its DNS * records. */ static void afs_manage_cell(struct afs_cell *cell) { struct afs_net *net = cell->net; int ret, active; _enter("%s", cell->name); again: _debug("state %u", cell->state); switch (cell->state) { case AFS_CELL_INACTIVE: case AFS_CELL_FAILED: down_write(&net->cells_lock); active = 1; if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) { rb_erase(&cell->net_node, &net->cells); trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 0, afs_cell_trace_unuse_delete); smp_store_release(&cell->state, AFS_CELL_REMOVED); } up_write(&net->cells_lock); if (cell->state == AFS_CELL_REMOVED) { wake_up_var(&cell->state); goto final_destruction; } if (cell->state == AFS_CELL_FAILED) goto done; smp_store_release(&cell->state, AFS_CELL_UNSET); wake_up_var(&cell->state); goto again; case AFS_CELL_UNSET: smp_store_release(&cell->state, AFS_CELL_ACTIVATING); wake_up_var(&cell->state); goto again; case AFS_CELL_ACTIVATING: ret = afs_activate_cell(net, cell); if (ret < 0) goto activation_failed; smp_store_release(&cell->state, AFS_CELL_ACTIVE); wake_up_var(&cell->state); goto again; case AFS_CELL_ACTIVE: if (atomic_read(&cell->active) > 1) { if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) { ret = afs_update_cell(cell); if (ret < 0) cell->error = ret; } goto done; } smp_store_release(&cell->state, AFS_CELL_DEACTIVATING); wake_up_var(&cell->state); goto again; case AFS_CELL_DEACTIVATING: if (atomic_read(&cell->active) > 1) goto reverse_deactivation; afs_deactivate_cell(net, cell); smp_store_release(&cell->state, AFS_CELL_INACTIVE); wake_up_var(&cell->state); goto again; case AFS_CELL_REMOVED: goto done; default: break; } _debug("bad state %u", cell->state); BUG(); /* Unhandled state */ activation_failed: cell->error = ret; afs_deactivate_cell(net, cell); smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */ wake_up_var(&cell->state); goto again; reverse_deactivation: smp_store_release(&cell->state, AFS_CELL_ACTIVE); wake_up_var(&cell->state); _leave(" [deact->act]"); return; done: _leave(" [done %u]", cell->state); return; final_destruction: /* The root volume is pinning the cell */ afs_put_volume(cell->root_volume, afs_volume_trace_put_cell_root); cell->root_volume = NULL; afs_put_cell(cell, afs_cell_trace_put_destroy); } static void afs_manage_cell_work(struct work_struct *work) { struct afs_cell *cell = container_of(work, struct afs_cell, manager); afs_manage_cell(cell); afs_put_cell(cell, afs_cell_trace_put_queue_work); } /* * Manage the records of cells known to a network namespace. This includes * updating the DNS records and garbage collecting unused cells that were * automatically added. * * Note that constructed cell records may only be removed from net->cells by * this work item, so it is safe for this work item to stash a cursor pointing * into the tree and then return to caller (provided it skips cells that are * still under construction). * * Note also that we were given an increment on net->cells_outstanding by * whoever queued us that we need to deal with before returning. */ void afs_manage_cells(struct work_struct *work) { struct afs_net *net = container_of(work, struct afs_net, cells_manager); struct rb_node *cursor; time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; bool purging = !net->live; _enter(""); /* Trawl the cell database looking for cells that have expired from * lack of use and cells whose DNS results have expired and dispatch * their managers. */ down_read(&net->cells_lock); for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) { struct afs_cell *cell = rb_entry(cursor, struct afs_cell, net_node); unsigned active; bool sched_cell = false; active = atomic_read(&cell->active); trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), active, afs_cell_trace_manage); ASSERTCMP(active, >=, 1); if (purging) { if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) { active = atomic_dec_return(&cell->active); trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), active, afs_cell_trace_unuse_pin); } } if (active == 1) { struct afs_vlserver_list *vllist; time64_t expire_at = cell->last_inactive; read_lock(&cell->vl_servers_lock); vllist = rcu_dereference_protected( cell->vl_servers, lockdep_is_held(&cell->vl_servers_lock)); if (vllist->nr_servers > 0) expire_at += afs_cell_gc_delay; read_unlock(&cell->vl_servers_lock); if (purging || expire_at <= now) sched_cell = true; else if (expire_at < next_manage) next_manage = expire_at; } if (!purging) { if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) sched_cell = true; } if (sched_cell) afs_queue_cell(cell, afs_cell_trace_get_queue_manage); } up_read(&net->cells_lock); /* Update the timer on the way out. We have to pass an increment on * cells_outstanding in the namespace that we are in to the timer or * the work scheduler. */ if (!purging && next_manage < TIME64_MAX) { now = ktime_get_real_seconds(); if (next_manage - now <= 0) { if (queue_work(afs_wq, &net->cells_manager)) atomic_inc(&net->cells_outstanding); } else { afs_set_cell_timer(net, next_manage - now); } } afs_dec_cells_outstanding(net); _leave(" [%d]", atomic_read(&net->cells_outstanding)); } /* * Purge in-memory cell database. */ void afs_cell_purge(struct afs_net *net) { struct afs_cell *ws; _enter(""); down_write(&net->cells_lock); ws = net->ws_cell; net->ws_cell = NULL; up_write(&net->cells_lock); afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws); _debug("del timer"); if (del_timer_sync(&net->cells_timer)) atomic_dec(&net->cells_outstanding); _debug("kick mgr"); afs_queue_cell_manager(net); _debug("wait"); wait_var_event(&net->cells_outstanding, !atomic_read(&net->cells_outstanding)); _leave(""); }
3 1085 1088 1092 1085 197 195 177 177 168 1089 598 1095 1088 1086 1092 1087 1088 1097 1090 1096 1090 1093 1088 1087 1089 1088 1085 1083 176 172 173 168 174 173 172 174 173 169 5 5 11 161 6 170 170 119 86 120 150 1 150 76 172 173 151 119 126 8 124 74 159 116 76 157 172 177 176 175 197 196 197 197 194 188 189 188 184 189 177 173 190 85 63 86 44 85 196 196 197 198 197 197 188 190 4 183 182 176 174 174 91 171 159 159 77 23 195 4 3 2 2 2 1 1 1 1 1 1 1 2 5 4 4 2 1 1 5 1 12 2 10 1 9 9 9 10 12 200 199 198 197 195 194 196 195 12 12 11 192 1 5 2 3 1 1 1 1 3 5 206 209 210 203 203 206 13 13 4 13 5 5 4 1 4 3 1 2 1 1 3 3 3 3 2 2 2 2 3 41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 // SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/capability.h> #include <linux/in.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> #include <linux/poison.h> #include <net/ipv6.h> #include <net/compat.h> #include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> #include <linux/cpumask.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_log.h> #include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); void *ip6t_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ip6t, IP6T); } EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool ip6_packet_match(const struct sk_buff *skb, const char *indev, const char *outdev, const struct ip6t_ip6 *ip6info, unsigned int *protoff, u16 *fragoff, bool *hotdrop) { unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); if (NF_INVF(ip6info, IP6T_INV_SRCIP, ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, &ip6info->src)) || NF_INVF(ip6info, IP6T_INV_DSTIP, ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, &ip6info->dst))) return false; ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0)) return false; /* ... might want to do something with class and flowlabel here ... */ /* look for the desired protocol header */ if (ip6info->flags & IP6T_F_PROTO) { int protohdr; unsigned short _frag_off; protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off, NULL); if (protohdr < 0) { if (_frag_off == 0) *hotdrop = true; return false; } *fragoff = _frag_off; if (ip6info->proto == protohdr) { if (ip6info->invflags & IP6T_INV_PROTO) return false; return true; } /* We need match for the '-p all', too! */ if ((ip6info->proto != 0) && !(ip6info->invflags & IP6T_INV_PROTO)) return false; } return true; } /* should be ip6 safe */ static bool ip6_checkentry(const struct ip6t_ip6 *ipv6) { if (ipv6->flags & ~IP6T_F_MASK) return false; if (ipv6->invflags & ~IP6T_INV_MASK) return false; return true; } static unsigned int ip6t_error(struct sk_buff *skb, const struct xt_action_param *par) { net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); return NF_DROP; } static inline struct ip6t_entry * get_entry(const void *base, unsigned int offset) { return (struct ip6t_entry *)(base + offset); } /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; return e->target_offset == sizeof(struct ip6t_entry) && memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * ip6t_get_target_c(const struct ip6t_entry *e) { return ip6t_get_target((struct ip6t_entry *)e); } #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* This cries for unification! */ static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", [NF_INET_FORWARD] = "FORWARD", [NF_INET_LOCAL_OUT] = "OUTPUT", [NF_INET_POST_ROUTING] = "POSTROUTING", }; enum nf_ip_trace_comments { NF_IP6_TRACE_COMMENT_RULE, NF_IP6_TRACE_COMMENT_RETURN, NF_IP6_TRACE_COMMENT_POLICY, }; static const char *const comments[] = { [NF_IP6_TRACE_COMMENT_RULE] = "rule", [NF_IP6_TRACE_COMMENT_RETURN] = "return", [NF_IP6_TRACE_COMMENT_POLICY] = "policy", }; static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { .level = LOGLEVEL_WARNING, .logflags = NF_LOG_DEFAULT_MASK, }, }, }; /* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, const char *hookname, const char **chainname, const char **comment, unsigned int *rulenum) { const struct xt_standard_target *t = (void *)ip6t_get_target_c(s); if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) { /* Head of user chain: ERROR target with chainname */ *chainname = t->target.data; (*rulenum) = 0; } else if (s == e) { (*rulenum)++; if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] : comments[NF_IP6_TRACE_COMMENT_RETURN]; } return 1; } else (*rulenum)++; return 0; } static void trace_packet(struct net *net, const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, const char *tablename, const struct xt_table_info *private, const struct ip6t_entry *e) { const struct ip6t_entry *root; const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP6_TRACE_COMMENT_RULE]; xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) if (get_chainname_rulenum(iter, e, hookname, &chainname, &comment, &rulenum) != 0) break; nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } #endif static inline struct ip6t_entry * ip6t_next_entry(const struct ip6t_entry *entry) { return (void *)entry + entry->next_offset; } /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; const void *table_base; struct ip6t_entry *e, **jumpstack; unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; /* Initialization */ stackidx = 0; indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.fragoff = 0; acpar.hotdrop = false; acpar.state = state; WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; /* Switch to alternate jumpstack if we're being invoked via TEE. * TEE issues XT_CONTINUE verdict on original skb so we must not * clobber the jumpstack. * * For recursion via REJECT or SYNPROXY the stack will be clobbered * but it is no problem since absolute verdict is issued by these. */ if (static_key_false(&xt_tee_enabled)) jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); e = get_entry(table_base, private->hook_entry[hook]); do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; struct xt_counters *counter; WARN_ON(!e); acpar.thoff = 0; if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) { no_match: e = ip6t_next_entry(e); continue; } xt_ematch_foreach(ematch, e) { acpar.match = ematch->u.kernel.match; acpar.matchinfo = ematch->data; if (!acpar.match->match(skb, &acpar)) goto no_match; } counter = xt_get_this_cpu_counter(&e->counters); ADD_COUNTER(*counter, skb->len, 1); t = ip6t_get_target_c(e); WARN_ON(!t->u.kernel.target); #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(state->net, skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; v = ((struct xt_standard_target *)t)->verdict; if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { verdict = (unsigned int)(-v) - 1; break; } if (stackidx == 0) e = get_entry(table_base, private->underflow[hook]); else e = ip6t_next_entry(jumpstack[--stackidx]); continue; } if (table_base + v != ip6t_next_entry(e) && !(e->ipv6.flags & IP6T_F_GOTO)) { if (unlikely(stackidx >= private->stacksize)) { verdict = NF_DROP; break; } jumpstack[stackidx++] = e; } e = get_entry(table_base, v); continue; } acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); if (verdict == XT_CONTINUE) e = ip6t_next_entry(e); else /* Verdict */ break; } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); if (acpar.hotdrop) return NF_DROP; else return verdict; } /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0, unsigned int *offsets) { unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; /* Set initial back pointer. */ e->counters.pcnt = pos; for (;;) { const struct xt_standard_target *t = (void *)ip6t_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && t->verdict < 0) || visited) { unsigned int oldpos, size; /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; /* We're at the start. */ if (pos == oldpos) goto next; e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; pos += size; } else { int newpos = t->verdict; if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } } next: ; } return 1; } static void cleanup_match(struct xt_entry_match *m, struct net *net) { struct xt_mtdtor_param par; par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV6; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); } static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; par->match = m->u.kernel.match; par->matchinfo = m->data; return xt_check_match(par, m->u.match_size - sizeof(*m), ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); } static int find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { struct xt_match *match; int ret; match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; ret = check_match(m, par); if (ret) goto err; return 0; err: module_put(m->u.kernel.match->me); return ret; } static int check_target(struct ip6t_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = ip6t_get_target(e); struct xt_tgchk_param par = { .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, .targinfo = t->data, .hook_mask = e->comefrom, .family = NFPROTO_IPV6, }; return xt_check_target(&par, t->u.target_size - sizeof(*t), e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO); } static int find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; int ret; unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; j = 0; memset(&mtpar, 0, sizeof(mtpar)); mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV6; xt_ematch_foreach(ematch, e) { ret = find_check_match(ematch, &mtpar); if (ret != 0) goto cleanup_matches; ++j; } t = ip6t_get_target(e); target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto cleanup_matches; } t->u.kernel.target = target; ret = check_target(e, net, name); if (ret) goto err; return 0; err: module_put(t->u.kernel.target->me); cleanup_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; cleanup_match(ematch, net); } xt_percpu_counter_free(&e->counters); return ret; } static bool check_underflow(const struct ip6t_entry *e) { const struct xt_entry_target *t; unsigned int verdict; if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct xt_standard_target *)t)->verdict; verdict = -verdict - 1; return verdict == NF_DROP || verdict == NF_ACCEPT; } static int check_entry_size_and_hooks(struct ip6t_entry *e, struct xt_table_info *newinfo, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, unsigned int valid_hooks) { unsigned int h; int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) return -EINVAL; if (!ip6_checkentry(&e->ipv6)) return -EINVAL; err = xt_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (err) return err; /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) return -EINVAL; newinfo->underflow[h] = underflows[h]; } } /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; return 0; } static void cleanup_entry(struct ip6t_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) cleanup_match(ematch, net); t = ip6t_get_target(e); par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV6; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ip6t_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ip6t_entry *iter; unsigned int *offsets; unsigned int i; int ret = 0; newinfo->size = repl->size; newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } offsets = xt_alloc_entry_offsets(newinfo->number); if (!offsets) return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { ret = check_entry_size_and_hooks(iter, newinfo, entry0, entry0 + repl->size, repl->hook_entry, repl->underflow, repl->valid_hooks); if (ret != 0) goto out_free; if (i < repl->num_entries) offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ip6t_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } ret = -EINVAL; if (i != repl->num_entries) goto out_free; ret = xt_check_table_hooks(newinfo, repl->valid_hooks); if (ret) goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; goto out_free; } kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; ++i; } if (ret != 0) { xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; cleanup_entry(iter, net); } return ret; } return ret; out_free: kvfree(offsets); return ret; } static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ip6t_entry *iter; unsigned int cpu; unsigned int i; for_each_possible_cpu(cpu) { seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); bcnt = tmp->bcnt; pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; cond_resched(); } } } static void get_old_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ip6t_entry *iter; unsigned int cpu, i; for_each_possible_cpu(cpu) { i = 0; xt_entry_foreach(iter, t->entries, t->size) { const struct xt_counters *tmp; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); ++i; } cond_resched(); } } static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); get_counters(private, counters); return counters; } static int copy_entries_to_user(unsigned int total_size, const struct xt_table *table, void __user *userptr) { unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); loc_cpu_entry = private->entries; /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; const struct xt_entry_match *m; const struct xt_entry_target *t; e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; } if (copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], sizeof(counters[num])) != 0) { ret = -EFAULT; goto free_counters; } for (i = sizeof(struct ip6t_entry); i < e->target_offset; i += m->u.match_size) { m = (void *)e + i; if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ip6t_get_target_c(e); if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } } free_counters: vfree(counters); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; if (v > 0) v += xt_compat_calc_jump(AF_INET6, v); memcpy(dst, &v, sizeof(v)); } static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; if (cv > 0) cv -= xt_compat_calc_jump(AF_INET6, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static int compat_calc_entry(const struct ip6t_entry *e, const struct xt_table_info *info, const void *base, struct xt_table_info *newinfo) { const struct xt_entry_match *ematch; const struct xt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); entry_offset = (void *)e - base; xt_ematch_foreach(ematch, e) off += xt_compat_match_offset(ematch->u.kernel.match); t = ip6t_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(AF_INET6, entry_offset, off); if (ret) return ret; for (i = 0; i < NF_INET_NUMHOOKS; i++) { if (info->hook_entry[i] && (e < (struct ip6t_entry *)(base + info->hook_entry[i]))) newinfo->hook_entry[i] -= off; if (info->underflow[i] && (e < (struct ip6t_entry *)(base + info->underflow[i]))) newinfo->underflow[i] -= off; } return 0; } static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ip6t_entry *iter; const void *loc_cpu_entry; int ret; if (!newinfo || !info) return -EINVAL; /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; ret = xt_compat_init_offsets(AF_INET6, info->number); if (ret) return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) return ret; } return 0; } #endif static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; int ret; if (*len != sizeof(struct ip6t_getinfo)) return -EINVAL; if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_lock(AF_INET6); #endif t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct xt_table_info tmp; if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET6); private = &tmp; } #endif memset(&info, 0, sizeof(info)); info.valid_hooks = t->valid_hooks; memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry)); memcpy(info.underflow, private->underflow, sizeof(info.underflow)); info.num_entries = private->number; info.size = private->size; strcpy(info.name, name); if (copy_to_user(user, &info, *len) != 0) ret = -EFAULT; else ret = 0; xt_table_unlock(t); module_put(t->me); } else ret = PTR_ERR(t); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_unlock(AF_INET6); #endif return ret; } static int get_entries(struct net *net, struct ip6t_get_entries __user *uptr, const int *len) { int ret; struct ip6t_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct ip6t_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else ret = -EAGAIN; module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); return ret; } static int __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { int ret; struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; struct ip6t_entry *iter; counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; goto out; } t = xt_request_find_table_lock(net, AF_INET6, name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } /* You lied! */ if (valid_hooks != t->valid_hooks) { ret = -EINVAL; goto put_module; } oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; /* Update module usage count based on number of rules */ if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); if ((oldinfo->number > oldinfo->initial_entries) && (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); xt_table_unlock(t); get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) { /* Silent error, can't fail, new table is already in place */ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); } vfree(counters); return 0; put_module: module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: vfree(counters); out: return ret; } static int do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct ip6t_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ip6t_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) { unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; unsigned int addend; paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free; } local_bh_disable(); private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); unlock_up_free: local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: vfree(paddc); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct compat_ip6t_replace { char name[XT_TABLE_MAXNAMELEN]; u32 valid_hooks; u32 num_entries; u32 size; u32 hook_entry[NF_INET_NUMHOOKS]; u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ struct compat_ip6t_entry entries[]; }; static int compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, unsigned int *size, struct xt_counters *counters, unsigned int i) { struct xt_entry_target *t; struct compat_ip6t_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; const struct xt_entry_match *ematch; int ret = 0; origsize = *size; ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) return -EFAULT; *dstptr += sizeof(struct compat_ip6t_entry); *size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); xt_ematch_foreach(ematch, e) { ret = xt_compat_match_to_user(ematch, dstptr, size); if (ret != 0) return ret; } target_offset = e->target_offset - (origsize - *size); t = ip6t_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) return ret; next_offset = e->next_offset - (origsize - *size); if (put_user(target_offset, &ce->target_offset) != 0 || put_user(next_offset, &ce->next_offset) != 0) return -EFAULT; return 0; } static int compat_find_calc_match(struct xt_entry_match *m, const struct ip6t_ip6 *ipv6, int *size) { struct xt_match *match; match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; *size += xt_compat_match_offset(match); return 0; } static void compat_release_entry(struct compat_ip6t_entry *e) { struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) module_put(ematch->u.kernel.match->me); t = compat_ip6t_get_target(e); module_put(t->u.kernel.target->me); } static int check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; int ret, off; if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct compat_ip6t_entry) + sizeof(struct compat_xt_entry_target)) return -EINVAL; if (!ip6_checkentry(&e->ipv6)) return -EINVAL; ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { ret = compat_find_calc_match(ematch, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; } t = compat_ip6t_get_target(e); target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto release_matches; } t->u.kernel.target = target; off += xt_compat_target_offset(target); *size += off; ret = xt_compat_add_offset(AF_INET6, entry_offset, off); if (ret) goto out; return 0; out: module_put(t->u.kernel.target->me); release_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; module_put(ematch->u.kernel.match->me); } return ret; } static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; int h; struct xt_entry_match *ematch; origsize = *size; de = *dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); xt_ematch_foreach(ematch, e) xt_compat_match_from_user(ematch, dstptr, size); de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } } static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, void **pentry0, const struct compat_ip6t_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ip6t_entry *iter0; struct ip6t_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; size = compatr->size; info->number = compatr->num_entries; j = 0; xt_compat_lock(AF_INET6); ret = xt_compat_init_offsets(AF_INET6, compatr->num_entries); if (ret) goto out_unlock; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; if (j != compatr->num_entries) goto out_unlock; ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; memset(newinfo->entries, 0, size); newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; size = compatr->size; xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); /* all module references in entry0 are now gone. */ xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); memcpy(&repl, compatr, sizeof(*compatr)); for (i = 0; i < NF_INET_NUMHOOKS; i++) { repl.hook_entry[i] = newinfo->hook_entry[i]; repl.underflow[i] = newinfo->underflow[i]; } repl.num_counters = 0; repl.counters = NULL; repl.size = newinfo->size; ret = translate_table(net, newinfo, entry1, &repl); if (ret) goto free_newinfo; *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); return 0; free_newinfo: xt_free_table_info(newinfo); return ret; out_unlock: xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; } static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct compat_ip6t_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ip6t_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } struct compat_ip6t_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; struct compat_ip6t_entry entrytable[]; }; static int compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; unsigned int i = 0; struct ip6t_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); pos = userptr; size = total_size; xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) break; } vfree(counters); return ret; } static int compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, int *len) { int ret; struct compat_ip6t_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); else if (!ret) ret = -EAGAIN; xt_compat_flush_offsets(AF_INET6); module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); xt_compat_unlock(AF_INET6); return ret; } #endif static int do_ip6t_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IP6T_SO_SET_REPLACE: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_do_replace(sock_net(sk), arg, len); else #endif ret = do_replace(sock_net(sk), arg, len); break; case IP6T_SO_SET_ADD_COUNTERS: ret = do_add_counters(sock_net(sk), arg, len); break; default: ret = -EINVAL; } return ret; } static int do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IP6T_SO_GET_INFO: ret = get_info(sock_net(sk), user, len); break; case IP6T_SO_GET_ENTRIES: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_get_entries(sock_net(sk), user, len); else #endif ret = get_entries(sock_net(sk), user, len); break; case IP6T_SO_GET_REVISION_MATCH: case IP6T_SO_GET_REVISION_TARGET: { struct xt_get_revision rev; int target; if (*len != sizeof(rev)) { ret = -EINVAL; break; } if (copy_from_user(&rev, user, sizeof(rev)) != 0) { ret = -EFAULT; break; } rev.name[sizeof(rev.name)-1] = 0; if (cmd == IP6T_SO_GET_REVISION_TARGET) target = 1; else target = 0; try_then_request_module(xt_find_revision(AF_INET6, rev.name, rev.revision, target, &ret), "ip6t_%s", rev.name); break; } default: ret = -EINVAL; } return ret; } static void __ip6t_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; struct ip6t_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); } int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *template_ops) { struct nf_hook_ops *ops; unsigned int num_ops; int ret, i; struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) { xt_free_table_info(newinfo); return ret; } new_table = xt_register_table(net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ip6t_entry *iter; xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); xt_free_table_info(newinfo); return PTR_ERR(new_table); } if (!template_ops) return 0; num_ops = hweight32(table->valid_hooks); if (num_ops == 0) { ret = -EINVAL; goto out_free; } ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); if (!ops) { ret = -ENOMEM; goto out_free; } for (i = 0; i < num_ops; i++) ops[i].priv = new_table; new_table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); if (ret != 0) goto out_free; return ret; out_free: __ip6t_unregister_table(net, new_table); return ret; } void ip6t_unregister_table_pre_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); if (table) nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } void ip6t_unregister_table_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); if (table) __ip6t_unregister_table(net, table); } /* The built-in targets: standard (NULL) and error. */ static struct xt_target ip6t_builtin_tg[] __read_mostly = { { .name = XT_STANDARD_TARGET, .targetsize = sizeof(int), .family = NFPROTO_IPV6, #ifdef CONFIG_NETFILTER_XTABLES_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, .compat_to_user = compat_standard_to_user, #endif }, { .name = XT_ERROR_TARGET, .target = ip6t_error, .targetsize = XT_FUNCTION_MAXNAMELEN, .family = NFPROTO_IPV6, }, }; static struct nf_sockopt_ops ip6t_sockopts = { .pf = PF_INET6, .set_optmin = IP6T_BASE_CTL, .set_optmax = IP6T_SO_SET_MAX+1, .set = do_ip6t_set_ctl, .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, .owner = THIS_MODULE, }; static int __net_init ip6_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_IPV6); } static void __net_exit ip6_tables_net_exit(struct net *net) { xt_proto_fini(net, NFPROTO_IPV6); } static struct pernet_operations ip6_tables_net_ops = { .init = ip6_tables_net_init, .exit = ip6_tables_net_exit, }; static int __init ip6_tables_init(void) { int ret; ret = register_pernet_subsys(&ip6_tables_net_ops); if (ret < 0) goto err1; /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); if (ret < 0) goto err2; /* Register setsockopt */ ret = nf_register_sockopt(&ip6t_sockopts); if (ret < 0) goto err4; return 0; err4: xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); err2: unregister_pernet_subsys(&ip6_tables_net_ops); err1: return ret; } static void __exit ip6_tables_fini(void) { nf_unregister_sockopt(&ip6t_sockopts); xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); unregister_pernet_subsys(&ip6_tables_net_ops); } EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); module_init(ip6_tables_init); module_exit(ip6_tables_fini);
4438 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM csd #if !defined(_TRACE_CSD_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_CSD_H #include <linux/tracepoint.h> TRACE_EVENT(csd_queue_cpu, TP_PROTO(const unsigned int cpu, unsigned long callsite, smp_call_func_t func, call_single_data_t *csd), TP_ARGS(cpu, callsite, func, csd), TP_STRUCT__entry( __field(unsigned int, cpu) __field(void *, callsite) __field(void *, func) __field(void *, csd) ), TP_fast_assign( __entry->cpu = cpu; __entry->callsite = (void *)callsite; __entry->func = func; __entry->csd = csd; ), TP_printk("cpu=%u callsite=%pS func=%ps csd=%p", __entry->cpu, __entry->callsite, __entry->func, __entry->csd) ); /* * Tracepoints for a function which is called as an effect of smp_call_function.* */ DECLARE_EVENT_CLASS(csd_function, TP_PROTO(smp_call_func_t func, call_single_data_t *csd), TP_ARGS(func, csd), TP_STRUCT__entry( __field(void *, func) __field(void *, csd) ), TP_fast_assign( __entry->func = func; __entry->csd = csd; ), TP_printk("func=%ps, csd=%p", __entry->func, __entry->csd) ); DEFINE_EVENT(csd_function, csd_function_entry, TP_PROTO(smp_call_func_t func, call_single_data_t *csd), TP_ARGS(func, csd) ); DEFINE_EVENT(csd_function, csd_function_exit, TP_PROTO(smp_call_func_t func, call_single_data_t *csd), TP_ARGS(func, csd) ); #endif /* _TRACE_CSD_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef LINUX_MLD_H #define LINUX_MLD_H #include <linux/in6.h> #include <linux/icmpv6.h> /* MLDv1 Query/Report/Done */ struct mld_msg { struct icmp6hdr mld_hdr; struct in6_addr mld_mca; }; #define mld_type mld_hdr.icmp6_type #define mld_code mld_hdr.icmp6_code #define mld_cksum mld_hdr.icmp6_cksum #define mld_maxdelay mld_hdr.icmp6_maxdelay #define mld_reserved mld_hdr.icmp6_dataun.un_data16[1] /* Multicast Listener Discovery version 2 headers */ /* MLDv2 Report */ struct mld2_grec { __u8 grec_type; __u8 grec_auxwords; __be16 grec_nsrcs; struct in6_addr grec_mca; struct in6_addr grec_src[]; }; struct mld2_report { struct icmp6hdr mld2r_hdr; struct mld2_grec mld2r_grec[]; }; #define mld2r_type mld2r_hdr.icmp6_type #define mld2r_resv1 mld2r_hdr.icmp6_code #define mld2r_cksum mld2r_hdr.icmp6_cksum #define mld2r_resv2 mld2r_hdr.icmp6_dataun.un_data16[0] #define mld2r_ngrec mld2r_hdr.icmp6_dataun.un_data16[1] /* MLDv2 Query */ struct mld2_query { struct icmp6hdr mld2q_hdr; struct in6_addr mld2q_mca; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 mld2q_qrv:3, mld2q_suppress:1, mld2q_resv2:4; #elif defined(__BIG_ENDIAN_BITFIELD) __u8 mld2q_resv2:4, mld2q_suppress:1, mld2q_qrv:3; #else #error "Please fix <asm/byteorder.h>" #endif __u8 mld2q_qqic; __be16 mld2q_nsrcs; struct in6_addr mld2q_srcs[]; }; #define mld2q_type mld2q_hdr.icmp6_type #define mld2q_code mld2q_hdr.icmp6_code #define mld2q_cksum mld2q_hdr.icmp6_cksum #define mld2q_mrc mld2q_hdr.icmp6_maxdelay #define mld2q_resv1 mld2q_hdr.icmp6_dataun.un_data16[1] /* RFC3810, 5.1.3. Maximum Response Code: * * If Maximum Response Code >= 32768, Maximum Response Code represents a * floating-point value as follows: * * 0 1 2 3 4 5 6 7 8 9 A B C D E F * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |1| exp | mant | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ #define MLDV2_MRC_EXP(value) (((value) >> 12) & 0x0007) #define MLDV2_MRC_MAN(value) ((value) & 0x0fff) /* RFC3810, 5.1.9. QQIC (Querier's Query Interval Code): * * If QQIC >= 128, QQIC represents a floating-point value as follows: * * 0 1 2 3 4 5 6 7 * +-+-+-+-+-+-+-+-+ * |1| exp | mant | * +-+-+-+-+-+-+-+-+ */ #define MLDV2_QQIC_EXP(value) (((value) >> 4) & 0x07) #define MLDV2_QQIC_MAN(value) ((value) & 0x0f) #define MLD_EXP_MIN_LIMIT 32768UL #define MLDV1_MRD_MAX_COMPAT (MLD_EXP_MIN_LIMIT - 1) #define MLD_MAX_QUEUE 8 #define MLD_MAX_SKBS 32 static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2) { /* RFC3810, 5.1.3. Maximum Response Code */ unsigned long ret, mc_mrc = ntohs(mlh2->mld2q_mrc); if (mc_mrc < MLD_EXP_MIN_LIMIT) { ret = mc_mrc; } else { unsigned long mc_man, mc_exp; mc_exp = MLDV2_MRC_EXP(mc_mrc); mc_man = MLDV2_MRC_MAN(mc_mrc); ret = (mc_man | 0x1000) << (mc_exp + 3); } return ret; } #endif
7 7 7 6 7 7 3 6 6 6 6 6 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 // SPDX-License-Identifier: GPL-2.0-or-later /* Iterator helpers. * * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/uio.h> #include <linux/scatterlist.h> #include <linux/netfs.h> #include "internal.h" /** * netfs_extract_user_iter - Extract the pages from a user iterator into a bvec * @orig: The original iterator * @orig_len: The amount of iterator to copy * @new: The iterator to be set up * @extraction_flags: Flags to qualify the request * * Extract the page fragments from the given amount of the source iterator and * build up a second iterator that refers to all of those bits. This allows * the original iterator to disposed of. * * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA be * allowed on the pages extracted. * * On success, the number of elements in the bvec is returned, the original * iterator will have been advanced by the amount extracted. * * The iov_iter_extract_mode() function should be used to query how cleanup * should be performed. */ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags) { struct bio_vec *bv = NULL; struct page **pages; unsigned int cur_npages; unsigned int max_pages; unsigned int npages = 0; unsigned int i; ssize_t ret; size_t count = orig_len, offset, len; size_t bv_size, pg_size; if (WARN_ON_ONCE(!iter_is_ubuf(orig) && !iter_is_iovec(orig))) return -EIO; max_pages = iov_iter_npages(orig, INT_MAX); bv_size = array_size(max_pages, sizeof(*bv)); bv = kvmalloc(bv_size, GFP_KERNEL); if (!bv) return -ENOMEM; /* Put the page list at the end of the bvec list storage. bvec * elements are larger than page pointers, so as long as we work * 0->last, we should be fine. */ pg_size = array_size(max_pages, sizeof(*pages)); pages = (void *)bv + bv_size - pg_size; while (count && npages < max_pages) { ret = iov_iter_extract_pages(orig, &pages, count, max_pages - npages, extraction_flags, &offset); if (ret < 0) { pr_err("Couldn't get user pages (rc=%zd)\n", ret); break; } if (ret > count) { pr_err("get_pages rc=%zd more than %zu\n", ret, count); break; } count -= ret; ret += offset; cur_npages = DIV_ROUND_UP(ret, PAGE_SIZE); if (npages + cur_npages > max_pages) { pr_err("Out of bvec array capacity (%u vs %u)\n", npages + cur_npages, max_pages); break; } for (i = 0; i < cur_npages; i++) { len = ret > PAGE_SIZE ? PAGE_SIZE : ret; bvec_set_page(bv + npages + i, *pages++, len - offset, offset); ret -= len; offset = 0; } npages += cur_npages; } iov_iter_bvec(new, orig->data_source, bv, npages, orig_len - count); return npages; } EXPORT_SYMBOL_GPL(netfs_extract_user_iter); /* * Select the span of a bvec iterator we're going to use. Limit it by both maximum * size and maximum number of segments. Returns the size of the span in bytes. */ static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs) { const struct bio_vec *bvecs = iter->bvec; unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; size_t len, span = 0, n = iter->count; size_t skip = iter->iov_offset + start_offset; if (WARN_ON(!iov_iter_is_bvec(iter)) || WARN_ON(start_offset > n) || n == 0) return 0; while (n && ix < nbv && skip) { len = bvecs[ix].bv_len; if (skip < len) break; skip -= len; n -= len; ix++; } while (n && ix < nbv) { len = min3(n, bvecs[ix].bv_len - skip, max_size); span += len; nsegs++; ix++; if (span >= max_size || nsegs >= max_segs) break; skip = 0; n -= len; } return min(span, max_size); } /* * Select the span of an xarray iterator we're going to use. Limit it by both * maximum size and maximum number of segments. It is assumed that segments * can be larger than a page in size, provided they're physically contiguous. * Returns the size of the span in bytes. */ static size_t netfs_limit_xarray(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs) { struct folio *folio; unsigned int nsegs = 0; loff_t pos = iter->xarray_start + iter->iov_offset; pgoff_t index = pos / PAGE_SIZE; size_t span = 0, n = iter->count; XA_STATE(xas, iter->xarray, index); if (WARN_ON(!iov_iter_is_xarray(iter)) || WARN_ON(start_offset > n) || n == 0) return 0; max_size = min(max_size, n - start_offset); rcu_read_lock(); xas_for_each(&xas, folio, ULONG_MAX) { size_t offset, flen, len; if (xas_retry(&xas, folio)) continue; if (WARN_ON(xa_is_value(folio))) break; if (WARN_ON(folio_test_hugetlb(folio))) break; flen = folio_size(folio); offset = offset_in_folio(folio, pos); len = min(max_size, flen - offset); span += len; nsegs++; if (span >= max_size || nsegs >= max_segs) break; } rcu_read_unlock(); return min(span, max_size); } /* * Select the span of a folio queue iterator we're going to use. Limit it by * both maximum size and maximum number of segments. Returns the size of the * span in bytes. */ static size_t netfs_limit_folioq(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs) { const struct folio_queue *folioq = iter->folioq; unsigned int nsegs = 0; unsigned int slot = iter->folioq_slot; size_t span = 0, n = iter->count; if (WARN_ON(!iov_iter_is_folioq(iter)) || WARN_ON(start_offset > n) || n == 0) return 0; max_size = umin(max_size, n - start_offset); if (slot >= folioq_nr_slots(folioq)) { folioq = folioq->next; slot = 0; } start_offset += iter->iov_offset; do { size_t flen = folioq_folio_size(folioq, slot); if (start_offset < flen) { span += flen - start_offset; nsegs++; start_offset = 0; } else { start_offset -= flen; } if (span >= max_size || nsegs >= max_segs) break; slot++; if (slot >= folioq_nr_slots(folioq)) { folioq = folioq->next; slot = 0; } } while (folioq); return umin(span, max_size); } size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs) { if (iov_iter_is_folioq(iter)) return netfs_limit_folioq(iter, start_offset, max_size, max_segs); if (iov_iter_is_bvec(iter)) return netfs_limit_bvec(iter, start_offset, max_size, max_segs); if (iov_iter_is_xarray(iter)) return netfs_limit_xarray(iter, start_offset, max_size, max_segs); BUG(); } EXPORT_SYMBOL(netfs_limit_iter);
1125 1126 1125 261 262 261 262 262 259 1126 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2008 IBM Corporation * Author: Mimi Zohar <zohar@us.ibm.com> * * File: integrity_audit.c * Audit calls for the integrity subsystem */ #include <linux/fs.h> #include <linux/gfp.h> #include <linux/audit.h> #include "integrity.h" static int integrity_audit_info; /* ima_audit_setup - enable informational auditing messages */ static int __init integrity_audit_setup(char *str) { unsigned long audit; if (!kstrtoul(str, 0, &audit)) integrity_audit_info = audit ? 1 : 0; return 1; } __setup("integrity_audit=", integrity_audit_setup); void integrity_audit_msg(int audit_msgno, struct inode *inode, const unsigned char *fname, const char *op, const char *cause, int result, int audit_info) { integrity_audit_message(audit_msgno, inode, fname, op, cause, result, audit_info, 0); } void integrity_audit_message(int audit_msgno, struct inode *inode, const unsigned char *fname, const char *op, const char *cause, int result, int audit_info, int errno) { struct audit_buffer *ab; char name[TASK_COMM_LEN]; if (!integrity_audit_info && audit_info == 1) /* Skip info messages */ return; ab = audit_log_start(audit_context(), GFP_KERNEL, audit_msgno); if (!ab) return; audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u", task_pid_nr(current), from_kuid(&init_user_ns, current_uid()), from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); audit_log_task_context(ab); audit_log_format(ab, " op=%s cause=%s comm=", op, cause); audit_log_untrustedstring(ab, get_task_comm(name, current)); if (fname) { audit_log_format(ab, " name="); audit_log_untrustedstring(ab, fname); } if (inode) { audit_log_format(ab, " dev="); audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); } audit_log_format(ab, " res=%d errno=%d", !result, errno); audit_log_end(ab); }
5181 387 17 103 75 3796 48 394 394 120 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PGTABLE_64_H #define _ASM_X86_PGTABLE_64_H #include <linux/const.h> #include <asm/pgtable_64_types.h> #ifndef __ASSEMBLY__ /* * This file contains the functions and defines necessary to modify and use * the x86-64 page table tree. */ #include <asm/processor.h> #include <linux/bitops.h> #include <linux/threads.h> #include <asm/fixmap.h> extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt extern void paging_init(void); static inline void sync_initial_page_table(void) { } #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pmd_val(e)) #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pud_val(e)) #if CONFIG_PGTABLE_LEVELS >= 5 #define p4d_ERROR(e) \ pr_err("%s:%d: bad p4d %p(%016lx)\n", \ __FILE__, __LINE__, &(e), p4d_val(e)) #endif #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) struct mm_struct; #define mm_p4d_folded mm_p4d_folded static inline bool mm_p4d_folded(struct mm_struct *mm) { return !pgtable_l5_enabled(); } void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); static inline void native_set_pte(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); } static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { native_set_pte(ptep, native_make_pte(0)); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { native_set_pte(ptep, pte); } static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { WRITE_ONCE(*pmdp, pmd); } static inline void native_pmd_clear(pmd_t *pmd) { native_set_pmd(pmd, native_make_pmd(0)); } static inline pte_t native_ptep_get_and_clear(pte_t *xp) { #ifdef CONFIG_SMP return native_make_pte(xchg(&xp->pte, 0)); #else /* native_local_ptep_get_and_clear, but duplicated because of cyclic dependency */ pte_t ret = *xp; native_pte_clear(NULL, 0, xp); return ret; #endif } static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) { #ifdef CONFIG_SMP return native_make_pmd(xchg(&xp->pmd, 0)); #else /* native_local_pmdp_get_and_clear, but duplicated because of cyclic dependency */ pmd_t ret = *xp; native_pmd_clear(xp); return ret; #endif } static inline void native_set_pud(pud_t *pudp, pud_t pud) { WRITE_ONCE(*pudp, pud); } static inline void native_pud_clear(pud_t *pud) { native_set_pud(pud, native_make_pud(0)); } static inline pud_t native_pudp_get_and_clear(pud_t *xp) { #ifdef CONFIG_SMP return native_make_pud(xchg(&xp->pud, 0)); #else /* native_local_pudp_get_and_clear, * but duplicated because of cyclic dependency */ pud_t ret = *xp; native_pud_clear(xp); return ret; #endif } static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) { WRITE_ONCE(*p4dp, p4d); return; } pgd = native_make_pgd(native_p4d_val(p4d)); pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); } static inline void native_p4d_clear(p4d_t *p4d) { native_set_p4d(p4d, native_make_p4d(0)); } static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); } static inline void native_pgd_clear(pgd_t *pgd) { native_set_pgd(pgd, native_make_pgd(0)); } /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ /* PGD - Level 4 access */ /* PUD - Level 3 access */ /* PMD - Level 2 access */ /* PTE - Level 1 access */ /* * Encode and de-code a swap entry * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| E|F|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above * there. We also need to avoid using A and D because of an * erratum where they can be incorrectly set by hardware on * non-present PTEs. * * SD Bits 1-4 are not used in non-present format and available for * special use described below: * * SD (1) in swp entry is used to store soft dirty bit, which helps us * remember soft dirty over page migration * * F (2) in swp entry is used to record when a pagetable is * writeprotected by userfaultfd WP support. * * E (3) in swp entry is used to remember PG_anon_exclusive. * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. * * The offset is inverted by a binary not operation to make the high * physical bits set. */ #define SWP_TYPE_BITS 5 #define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) /* We always extract/encode the offset by shifting it all the way up, and then down again */ #define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) /* Extract the high bits for type */ #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) /* Shift up (to get rid of type), then down to get value */ #define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) /* * Shift the offset up "too far" by TYPE bits, then down again * The offset is inverted by a binary not operation to make the high * physical bits set. */ #define __swp_entry(type, offset) ((swp_entry_t) { \ (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) #define __swp_entry_to_pte(x) (__pte((x).val)) #define __swp_entry_to_pmd(x) (__pmd((x).val)) extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #define PAGE_AGP PAGE_KERNEL_NOCACHE #define HAVE_PAGE_AGP 1 /* fs/proc/kcore.c */ #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) #define __HAVE_ARCH_PTE_SAME #define vmemmap ((struct page *)VMEMMAP_START) extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); #define gup_fast_permitted gup_fast_permitted static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { if (end >> __VIRTUAL_MASK_SHIFT) return false; return true; } #include <asm/pgtable-invert.h> #else /* __ASSEMBLY__ */ #define l4_index(x) (((x) >> 39) & 511) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) L4_START_KERNEL = l4_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) #define SYM_DATA_START_PAGE_ALIGNED(name) \ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) /* Automate the creation of 1 to 1 mapping pmd entries */ #define PMDS(START, PERM, COUNT) \ i = 0 ; \ .rept (COUNT) ; \ .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ i = i + 1 ; \ .endr #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */
3 3 3 3 3 3 3 9 8 3 1 8 3 5 9 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 9 9 9 9 9 3 6 6 2 6 6 3 9 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for Sony / PS2 / PS3 / PS4 BD devices. * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2008 Jiri Slaby * Copyright (c) 2012 David Dillow <dave@thedillows.org> * Copyright (c) 2006-2013 Jiri Kosina * Copyright (c) 2013 Colin Leitner <colin.leitner@gmail.com> * Copyright (c) 2014-2016 Frank Praznik <frank.praznik@gmail.com> * Copyright (c) 2018 Todd Kelner * Copyright (c) 2020-2021 Pascal Giard <pascal.giard@etsmtl.ca> * Copyright (c) 2020 Sanjay Govind <sanjay.govind9@gmail.com> * Copyright (c) 2021 Daniel Nguyen <daniel.nguyen.1@ens.etsmtl.ca> */ /* */ /* * NOTE: in order for the Sony PS3 BD Remote Control to be found by * a Bluetooth host, the key combination Start+Enter has to be kept pressed * for about 7 seconds with the Bluetooth Host Controller in discovering mode. * * There will be no PIN request from the device. */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/leds.h> #include <linux/power_supply.h> #include <linux/spinlock.h> #include <linux/list.h> #include <linux/idr.h> #include <linux/input/mt.h> #include <linux/crc32.h> #include <linux/usb.h> #include <linux/timer.h> #include <linux/unaligned.h> #include "hid-ids.h" #define VAIO_RDESC_CONSTANT BIT(0) #define SIXAXIS_CONTROLLER_USB BIT(1) #define SIXAXIS_CONTROLLER_BT BIT(2) #define BUZZ_CONTROLLER BIT(3) #define PS3REMOTE BIT(4) #define MOTION_CONTROLLER_USB BIT(5) #define MOTION_CONTROLLER_BT BIT(6) #define NAVIGATION_CONTROLLER_USB BIT(7) #define NAVIGATION_CONTROLLER_BT BIT(8) #define SINO_LITE_CONTROLLER BIT(9) #define FUTUREMAX_DANCE_MAT BIT(10) #define NSG_MR5U_REMOTE_BT BIT(11) #define NSG_MR7U_REMOTE_BT BIT(12) #define SHANWAN_GAMEPAD BIT(13) #define GH_GUITAR_CONTROLLER BIT(14) #define GHL_GUITAR_PS3WIIU BIT(15) #define GHL_GUITAR_PS4 BIT(16) #define SIXAXIS_CONTROLLER (SIXAXIS_CONTROLLER_USB | SIXAXIS_CONTROLLER_BT) #define MOTION_CONTROLLER (MOTION_CONTROLLER_USB | MOTION_CONTROLLER_BT) #define NAVIGATION_CONTROLLER (NAVIGATION_CONTROLLER_USB |\ NAVIGATION_CONTROLLER_BT) #define SONY_LED_SUPPORT (SIXAXIS_CONTROLLER | BUZZ_CONTROLLER |\ MOTION_CONTROLLER | NAVIGATION_CONTROLLER) #define SONY_BATTERY_SUPPORT (SIXAXIS_CONTROLLER | MOTION_CONTROLLER_BT | NAVIGATION_CONTROLLER) #define SONY_FF_SUPPORT (SIXAXIS_CONTROLLER | MOTION_CONTROLLER) #define SONY_BT_DEVICE (SIXAXIS_CONTROLLER_BT | MOTION_CONTROLLER_BT | NAVIGATION_CONTROLLER_BT) #define NSG_MRXU_REMOTE (NSG_MR5U_REMOTE_BT | NSG_MR7U_REMOTE_BT) #define MAX_LEDS 4 #define NSG_MRXU_MAX_X 1667 #define NSG_MRXU_MAX_Y 1868 /* The PS3/Wii U dongles require a poke every 10 seconds, but the PS4 * requires one every 8 seconds. Using 8 seconds for all for simplicity. */ #define GHL_GUITAR_POKE_INTERVAL 8 /* In seconds */ #define GUITAR_TILT_USAGE 44 /* Magic data taken from GHLtarUtility: * https://github.com/ghlre/GHLtarUtility/blob/master/PS3Guitar.cs * Note: The Wii U and PS3 dongles happen to share the same! */ static const char ghl_ps3wiiu_magic_data[] = { 0x02, 0x08, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00 }; /* Magic data for the PS4 dongles sniffed with a USB protocol * analyzer. */ static const char ghl_ps4_magic_data[] = { 0x30, 0x02, 0x08, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00 }; /* PS/3 Motion controller */ static const u8 motion_rdesc[] = { 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x04, /* Usage (Joystick), */ 0xA1, 0x01, /* Collection (Application), */ 0xA1, 0x02, /* Collection (Logical), */ 0x85, 0x01, /* Report ID (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x15, /* Report Count (21), */ 0x15, 0x00, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x35, 0x00, /* Physical Minimum (0), */ 0x45, 0x01, /* Physical Maximum (1), */ 0x05, 0x09, /* Usage Page (Button), */ 0x19, 0x01, /* Usage Minimum (01h), */ 0x29, 0x15, /* Usage Maximum (15h), */ 0x81, 0x02, /* Input (Variable), * Buttons */ 0x95, 0x0B, /* Report Count (11), */ 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ 0x81, 0x03, /* Input (Constant, Variable), * Padding */ 0x15, 0x00, /* Logical Minimum (0), */ 0x26, 0xFF, 0x00, /* Logical Maximum (255), */ 0x05, 0x01, /* Usage Page (Desktop), */ 0xA1, 0x00, /* Collection (Physical), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x01, /* Report Count (1), */ 0x35, 0x00, /* Physical Minimum (0), */ 0x46, 0xFF, 0x00, /* Physical Maximum (255), */ 0x09, 0x30, /* Usage (X), */ 0x81, 0x02, /* Input (Variable), * Trigger */ 0xC0, /* End Collection, */ 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x07, /* Report Count (7), * skip 7 bytes */ 0x81, 0x02, /* Input (Variable), */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x75, 0x10, /* Report Size (16), */ 0x46, 0xFF, 0xFF, /* Physical Maximum (65535), */ 0x27, 0xFF, 0xFF, 0x00, 0x00, /* Logical Maximum (65535), */ 0x95, 0x03, /* Report Count (3), * 3x Accels */ 0x09, 0x33, /* Usage (rX), */ 0x09, 0x34, /* Usage (rY), */ 0x09, 0x35, /* Usage (rZ), */ 0x81, 0x02, /* Input (Variable), */ 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ 0x95, 0x03, /* Report Count (3), * Skip Accels 2nd frame */ 0x81, 0x02, /* Input (Variable), */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x01, /* Usage (Pointer), */ 0x95, 0x03, /* Report Count (3), * 3x Gyros */ 0x81, 0x02, /* Input (Variable), */ 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ 0x95, 0x03, /* Report Count (3), * Skip Gyros 2nd frame */ 0x81, 0x02, /* Input (Variable), */ 0x75, 0x0C, /* Report Size (12), */ 0x46, 0xFF, 0x0F, /* Physical Maximum (4095), */ 0x26, 0xFF, 0x0F, /* Logical Maximum (4095), */ 0x95, 0x04, /* Report Count (4), * Skip Temp and Magnetometers */ 0x81, 0x02, /* Input (Variable), */ 0x75, 0x08, /* Report Size (8), */ 0x46, 0xFF, 0x00, /* Physical Maximum (255), */ 0x26, 0xFF, 0x00, /* Logical Maximum (255), */ 0x95, 0x06, /* Report Count (6), * Skip Timestamp and Extension Bytes */ 0x81, 0x02, /* Input (Variable), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x30, /* Report Count (48), */ 0x09, 0x01, /* Usage (Pointer), */ 0x91, 0x02, /* Output (Variable), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x30, /* Report Count (48), */ 0x09, 0x01, /* Usage (Pointer), */ 0xB1, 0x02, /* Feature (Variable), */ 0xC0, /* End Collection, */ 0xA1, 0x02, /* Collection (Logical), */ 0x85, 0x02, /* Report ID (2), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x30, /* Report Count (48), */ 0x09, 0x01, /* Usage (Pointer), */ 0xB1, 0x02, /* Feature (Variable), */ 0xC0, /* End Collection, */ 0xA1, 0x02, /* Collection (Logical), */ 0x85, 0xEE, /* Report ID (238), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x30, /* Report Count (48), */ 0x09, 0x01, /* Usage (Pointer), */ 0xB1, 0x02, /* Feature (Variable), */ 0xC0, /* End Collection, */ 0xA1, 0x02, /* Collection (Logical), */ 0x85, 0xEF, /* Report ID (239), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x30, /* Report Count (48), */ 0x09, 0x01, /* Usage (Pointer), */ 0xB1, 0x02, /* Feature (Variable), */ 0xC0, /* End Collection, */ 0xC0 /* End Collection */ }; static const u8 ps3remote_rdesc[] = { 0x05, 0x01, /* GUsagePage Generic Desktop */ 0x09, 0x05, /* LUsage 0x05 [Game Pad] */ 0xA1, 0x01, /* MCollection Application (mouse, keyboard) */ /* Use collection 1 for joypad buttons */ 0xA1, 0x02, /* MCollection Logical (interrelated data) */ /* * Ignore the 1st byte, maybe it is used for a controller * number but it's not needed for correct operation */ 0x75, 0x08, /* GReportSize 0x08 [8] */ 0x95, 0x01, /* GReportCount 0x01 [1] */ 0x81, 0x01, /* MInput 0x01 (Const[0] Arr[1] Abs[2]) */ /* * Bytes from 2nd to 4th are a bitmap for joypad buttons, for these * buttons multiple keypresses are allowed */ 0x05, 0x09, /* GUsagePage Button */ 0x19, 0x01, /* LUsageMinimum 0x01 [Button 1 (primary/trigger)] */ 0x29, 0x18, /* LUsageMaximum 0x18 [Button 24] */ 0x14, /* GLogicalMinimum [0] */ 0x25, 0x01, /* GLogicalMaximum 0x01 [1] */ 0x75, 0x01, /* GReportSize 0x01 [1] */ 0x95, 0x18, /* GReportCount 0x18 [24] */ 0x81, 0x02, /* MInput 0x02 (Data[0] Var[1] Abs[2]) */ 0xC0, /* MEndCollection */ /* Use collection 2 for remote control buttons */ 0xA1, 0x02, /* MCollection Logical (interrelated data) */ /* 5th byte is used for remote control buttons */ 0x05, 0x09, /* GUsagePage Button */ 0x18, /* LUsageMinimum [No button pressed] */ 0x29, 0xFE, /* LUsageMaximum 0xFE [Button 254] */ 0x14, /* GLogicalMinimum [0] */ 0x26, 0xFE, 0x00, /* GLogicalMaximum 0x00FE [254] */ 0x75, 0x08, /* GReportSize 0x08 [8] */ 0x95, 0x01, /* GReportCount 0x01 [1] */ 0x80, /* MInput */ /* * Ignore bytes from 6th to 11th, 6th to 10th are always constant at * 0xff and 11th is for press indication */ 0x75, 0x08, /* GReportSize 0x08 [8] */ 0x95, 0x06, /* GReportCount 0x06 [6] */ 0x81, 0x01, /* MInput 0x01 (Const[0] Arr[1] Abs[2]) */ /* 12th byte is for battery strength */ 0x05, 0x06, /* GUsagePage Generic Device Controls */ 0x09, 0x20, /* LUsage 0x20 [Battery Strength] */ 0x14, /* GLogicalMinimum [0] */ 0x25, 0x05, /* GLogicalMaximum 0x05 [5] */ 0x75, 0x08, /* GReportSize 0x08 [8] */ 0x95, 0x01, /* GReportCount 0x01 [1] */ 0x81, 0x02, /* MInput 0x02 (Data[0] Var[1] Abs[2]) */ 0xC0, /* MEndCollection */ 0xC0 /* MEndCollection [Game Pad] */ }; static const unsigned int ps3remote_keymap_joypad_buttons[] = { [0x01] = KEY_SELECT, [0x02] = BTN_THUMBL, /* L3 */ [0x03] = BTN_THUMBR, /* R3 */ [0x04] = BTN_START, [0x05] = KEY_UP, [0x06] = KEY_RIGHT, [0x07] = KEY_DOWN, [0x08] = KEY_LEFT, [0x09] = BTN_TL2, /* L2 */ [0x0a] = BTN_TR2, /* R2 */ [0x0b] = BTN_TL, /* L1 */ [0x0c] = BTN_TR, /* R1 */ [0x0d] = KEY_OPTION, /* options/triangle */ [0x0e] = KEY_BACK, /* back/circle */ [0x0f] = BTN_0, /* cross */ [0x10] = KEY_SCREEN, /* view/square */ [0x11] = KEY_HOMEPAGE, /* PS button */ [0x14] = KEY_ENTER, }; static const unsigned int ps3remote_keymap_remote_buttons[] = { [0x00] = KEY_1, [0x01] = KEY_2, [0x02] = KEY_3, [0x03] = KEY_4, [0x04] = KEY_5, [0x05] = KEY_6, [0x06] = KEY_7, [0x07] = KEY_8, [0x08] = KEY_9, [0x09] = KEY_0, [0x0e] = KEY_ESC, /* return */ [0x0f] = KEY_CLEAR, [0x16] = KEY_EJECTCD, [0x1a] = KEY_MENU, /* top menu */ [0x28] = KEY_TIME, [0x30] = KEY_PREVIOUS, [0x31] = KEY_NEXT, [0x32] = KEY_PLAY, [0x33] = KEY_REWIND, /* scan back */ [0x34] = KEY_FORWARD, /* scan forward */ [0x38] = KEY_STOP, [0x39] = KEY_PAUSE, [0x40] = KEY_CONTEXT_MENU, /* pop up/menu */ [0x60] = KEY_FRAMEBACK, /* slow/step back */ [0x61] = KEY_FRAMEFORWARD, /* slow/step forward */ [0x63] = KEY_SUBTITLE, [0x64] = KEY_AUDIO, [0x65] = KEY_ANGLE, [0x70] = KEY_INFO, /* display */ [0x80] = KEY_BLUE, [0x81] = KEY_RED, [0x82] = KEY_GREEN, [0x83] = KEY_YELLOW, }; static const unsigned int buzz_keymap[] = { /* * The controller has 4 remote buzzers, each with one LED and 5 * buttons. * * We use the mapping chosen by the controller, which is: * * Key Offset * ------------------- * Buzz 1 * Blue 5 * Orange 4 * Green 3 * Yellow 2 * * So, for example, the orange button on the third buzzer is mapped to * BTN_TRIGGER_HAPPY14 */ [1] = BTN_TRIGGER_HAPPY1, [2] = BTN_TRIGGER_HAPPY2, [3] = BTN_TRIGGER_HAPPY3, [4] = BTN_TRIGGER_HAPPY4, [5] = BTN_TRIGGER_HAPPY5, [6] = BTN_TRIGGER_HAPPY6, [7] = BTN_TRIGGER_HAPPY7, [8] = BTN_TRIGGER_HAPPY8, [9] = BTN_TRIGGER_HAPPY9, [10] = BTN_TRIGGER_HAPPY10, [11] = BTN_TRIGGER_HAPPY11, [12] = BTN_TRIGGER_HAPPY12, [13] = BTN_TRIGGER_HAPPY13, [14] = BTN_TRIGGER_HAPPY14, [15] = BTN_TRIGGER_HAPPY15, [16] = BTN_TRIGGER_HAPPY16, [17] = BTN_TRIGGER_HAPPY17, [18] = BTN_TRIGGER_HAPPY18, [19] = BTN_TRIGGER_HAPPY19, [20] = BTN_TRIGGER_HAPPY20, }; /* The Navigation controller is a partial DS3 and uses the same HID report * and hence the same keymap indices, however not all axes/buttons * are physically present. We use the same axis and button mapping as * the DS3, which uses the Linux gamepad spec. */ static const unsigned int navigation_absmap[] = { [0x30] = ABS_X, [0x31] = ABS_Y, [0x33] = ABS_Z, /* L2 */ }; /* Buttons not physically available on the device, but still available * in the reports are explicitly set to 0 for documentation purposes. */ static const unsigned int navigation_keymap[] = { [0x01] = 0, /* Select */ [0x02] = BTN_THUMBL, /* L3 */ [0x03] = 0, /* R3 */ [0x04] = 0, /* Start */ [0x05] = BTN_DPAD_UP, /* Up */ [0x06] = BTN_DPAD_RIGHT, /* Right */ [0x07] = BTN_DPAD_DOWN, /* Down */ [0x08] = BTN_DPAD_LEFT, /* Left */ [0x09] = BTN_TL2, /* L2 */ [0x0a] = 0, /* R2 */ [0x0b] = BTN_TL, /* L1 */ [0x0c] = 0, /* R1 */ [0x0d] = BTN_NORTH, /* Triangle */ [0x0e] = BTN_EAST, /* Circle */ [0x0f] = BTN_SOUTH, /* Cross */ [0x10] = BTN_WEST, /* Square */ [0x11] = BTN_MODE, /* PS */ }; static const unsigned int sixaxis_absmap[] = { [0x30] = ABS_X, [0x31] = ABS_Y, [0x32] = ABS_RX, /* right stick X */ [0x35] = ABS_RY, /* right stick Y */ }; static const unsigned int sixaxis_keymap[] = { [0x01] = BTN_SELECT, /* Select */ [0x02] = BTN_THUMBL, /* L3 */ [0x03] = BTN_THUMBR, /* R3 */ [0x04] = BTN_START, /* Start */ [0x05] = BTN_DPAD_UP, /* Up */ [0x06] = BTN_DPAD_RIGHT, /* Right */ [0x07] = BTN_DPAD_DOWN, /* Down */ [0x08] = BTN_DPAD_LEFT, /* Left */ [0x09] = BTN_TL2, /* L2 */ [0x0a] = BTN_TR2, /* R2 */ [0x0b] = BTN_TL, /* L1 */ [0x0c] = BTN_TR, /* R1 */ [0x0d] = BTN_NORTH, /* Triangle */ [0x0e] = BTN_EAST, /* Circle */ [0x0f] = BTN_SOUTH, /* Cross */ [0x10] = BTN_WEST, /* Square */ [0x11] = BTN_MODE, /* PS */ }; static enum power_supply_property sony_battery_props[] = { POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_CAPACITY, POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_STATUS, }; struct sixaxis_led { u8 time_enabled; /* the total time the led is active (0xff means forever) */ u8 duty_length; /* how long a cycle is in deciseconds (0 means "really fast") */ u8 enabled; u8 duty_off; /* % of duty_length the led is off (0xff means 100%) */ u8 duty_on; /* % of duty_length the led is on (0xff mean 100%) */ } __packed; struct sixaxis_rumble { u8 padding; u8 right_duration; /* Right motor duration (0xff means forever) */ u8 right_motor_on; /* Right (small) motor on/off, only supports values of 0 or 1 (off/on) */ u8 left_duration; /* Left motor duration (0xff means forever) */ u8 left_motor_force; /* left (large) motor, supports force values from 0 to 255 */ } __packed; struct sixaxis_output_report { u8 report_id; struct sixaxis_rumble rumble; u8 padding[4]; u8 leds_bitmap; /* bitmap of enabled LEDs: LED_1 = 0x02, LED_2 = 0x04, ... */ struct sixaxis_led led[4]; /* LEDx at (4 - x) */ struct sixaxis_led _reserved; /* LED5, not actually soldered */ } __packed; union sixaxis_output_report_01 { struct sixaxis_output_report data; u8 buf[36]; }; struct motion_output_report_02 { u8 type, zero; u8 r, g, b; u8 zero2; u8 rumble; }; #define SIXAXIS_REPORT_0xF2_SIZE 17 #define SIXAXIS_REPORT_0xF5_SIZE 8 #define MOTION_REPORT_0x02_SIZE 49 #define SENSOR_SUFFIX " Motion Sensors" #define TOUCHPAD_SUFFIX " Touchpad" #define SIXAXIS_INPUT_REPORT_ACC_X_OFFSET 41 #define SIXAXIS_ACC_RES_PER_G 113 static DEFINE_SPINLOCK(sony_dev_list_lock); static LIST_HEAD(sony_device_list); static DEFINE_IDA(sony_device_id_allocator); enum sony_worker { SONY_WORKER_STATE }; struct sony_sc { spinlock_t lock; struct list_head list_node; struct hid_device *hdev; struct input_dev *touchpad; struct input_dev *sensor_dev; struct led_classdev *leds[MAX_LEDS]; unsigned long quirks; struct work_struct state_worker; void (*send_output_report)(struct sony_sc *); struct power_supply *battery; struct power_supply_desc battery_desc; int device_id; u8 *output_report_dmabuf; #ifdef CONFIG_SONY_FF u8 left; u8 right; #endif u8 mac_address[6]; u8 state_worker_initialized; u8 defer_initialization; u8 battery_capacity; int battery_status; u8 led_state[MAX_LEDS]; u8 led_delay_on[MAX_LEDS]; u8 led_delay_off[MAX_LEDS]; u8 led_count; /* GH Live */ struct urb *ghl_urb; struct timer_list ghl_poke_timer; }; static void sony_set_leds(struct sony_sc *sc); static inline void sony_schedule_work(struct sony_sc *sc, enum sony_worker which) { unsigned long flags; switch (which) { case SONY_WORKER_STATE: spin_lock_irqsave(&sc->lock, flags); if (!sc->defer_initialization && sc->state_worker_initialized) schedule_work(&sc->state_worker); spin_unlock_irqrestore(&sc->lock, flags); break; } } static void ghl_magic_poke_cb(struct urb *urb) { struct sony_sc *sc = urb->context; if (urb->status < 0) hid_err(sc->hdev, "URB transfer failed : %d", urb->status); mod_timer(&sc->ghl_poke_timer, jiffies + GHL_GUITAR_POKE_INTERVAL*HZ); } static void ghl_magic_poke(struct timer_list *t) { int ret; struct sony_sc *sc = from_timer(sc, t, ghl_poke_timer); ret = usb_submit_urb(sc->ghl_urb, GFP_ATOMIC); if (ret < 0) hid_err(sc->hdev, "usb_submit_urb failed: %d", ret); } static int ghl_init_urb(struct sony_sc *sc, struct usb_device *usbdev, const char ghl_magic_data[], u16 poke_size) { struct usb_ctrlrequest *cr; u8 *databuf; unsigned int pipe; u16 ghl_magic_value = (((HID_OUTPUT_REPORT + 1) << 8) | ghl_magic_data[0]); pipe = usb_sndctrlpipe(usbdev, 0); cr = devm_kzalloc(&sc->hdev->dev, sizeof(*cr), GFP_ATOMIC); if (cr == NULL) return -ENOMEM; databuf = devm_kzalloc(&sc->hdev->dev, poke_size, GFP_ATOMIC); if (databuf == NULL) return -ENOMEM; cr->bRequestType = USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT; cr->bRequest = USB_REQ_SET_CONFIGURATION; cr->wValue = cpu_to_le16(ghl_magic_value); cr->wIndex = 0; cr->wLength = cpu_to_le16(poke_size); memcpy(databuf, ghl_magic_data, poke_size); usb_fill_control_urb( sc->ghl_urb, usbdev, pipe, (unsigned char *) cr, databuf, poke_size, ghl_magic_poke_cb, sc); return 0; } static int guitar_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR) { unsigned int abs = usage->hid & HID_USAGE; if (abs == GUITAR_TILT_USAGE) { hid_map_usage_clear(hi, usage, bit, max, EV_ABS, ABS_RY); return 1; } } return 0; } static const u8 *motion_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *rsize) { *rsize = sizeof(motion_rdesc); return motion_rdesc; } static const u8 *ps3remote_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *rsize) { *rsize = sizeof(ps3remote_rdesc); return ps3remote_rdesc; } static int ps3remote_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { unsigned int key = usage->hid & HID_USAGE; if ((usage->hid & HID_USAGE_PAGE) != HID_UP_BUTTON) return -1; switch (usage->collection_index) { case 1: if (key >= ARRAY_SIZE(ps3remote_keymap_joypad_buttons)) return -1; key = ps3remote_keymap_joypad_buttons[key]; if (!key) return -1; break; case 2: if (key >= ARRAY_SIZE(ps3remote_keymap_remote_buttons)) return -1; key = ps3remote_keymap_remote_buttons[key]; if (!key) return -1; break; default: return -1; } hid_map_usage_clear(hi, usage, bit, max, EV_KEY, key); return 1; } static int navigation_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) { unsigned int key = usage->hid & HID_USAGE; if (key >= ARRAY_SIZE(sixaxis_keymap)) return -1; key = navigation_keymap[key]; if (!key) return -1; hid_map_usage_clear(hi, usage, bit, max, EV_KEY, key); return 1; } else if (usage->hid == HID_GD_POINTER) { /* See comment in sixaxis_mapping, basically the L2 (and R2) * triggers are reported through GD Pointer. * In addition we ignore any analog button 'axes' and only * support digital buttons. */ switch (usage->usage_index) { case 8: /* L2 */ usage->hid = HID_GD_Z; break; default: return -1; } hid_map_usage_clear(hi, usage, bit, max, EV_ABS, usage->hid & 0xf); return 1; } else if ((usage->hid & HID_USAGE_PAGE) == HID_UP_GENDESK) { unsigned int abs = usage->hid & HID_USAGE; if (abs >= ARRAY_SIZE(navigation_absmap)) return -1; abs = navigation_absmap[abs]; hid_map_usage_clear(hi, usage, bit, max, EV_ABS, abs); return 1; } return -1; } static int sixaxis_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) { unsigned int key = usage->hid & HID_USAGE; if (key >= ARRAY_SIZE(sixaxis_keymap)) return -1; key = sixaxis_keymap[key]; hid_map_usage_clear(hi, usage, bit, max, EV_KEY, key); return 1; } else if (usage->hid == HID_GD_POINTER) { /* The DS3 provides analog values for most buttons and even * for HAT axes through GD Pointer. L2 and R2 are reported * among these as well instead of as GD Z / RZ. Remap L2 * and R2 and ignore other analog 'button axes' as there is * no good way for reporting them. */ switch (usage->usage_index) { case 8: /* L2 */ usage->hid = HID_GD_Z; break; case 9: /* R2 */ usage->hid = HID_GD_RZ; break; default: return -1; } hid_map_usage_clear(hi, usage, bit, max, EV_ABS, usage->hid & 0xf); return 1; } else if ((usage->hid & HID_USAGE_PAGE) == HID_UP_GENDESK) { unsigned int abs = usage->hid & HID_USAGE; if (abs >= ARRAY_SIZE(sixaxis_absmap)) return -1; abs = sixaxis_absmap[abs]; hid_map_usage_clear(hi, usage, bit, max, EV_ABS, abs); return 1; } return -1; } static const u8 *sony_report_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *rsize) { struct sony_sc *sc = hid_get_drvdata(hdev); if (sc->quirks & (SINO_LITE_CONTROLLER | FUTUREMAX_DANCE_MAT)) return rdesc; /* * Some Sony RF receivers wrongly declare the mouse pointer as a * a constant non-data variable. */ if ((sc->quirks & VAIO_RDESC_CONSTANT) && *rsize >= 56 && /* usage page: generic desktop controls */ /* rdesc[0] == 0x05 && rdesc[1] == 0x01 && */ /* usage: mouse */ rdesc[2] == 0x09 && rdesc[3] == 0x02 && /* input (usage page for x,y axes): constant, variable, relative */ rdesc[54] == 0x81 && rdesc[55] == 0x07) { hid_info(hdev, "Fixing up Sony RF Receiver report descriptor\n"); /* input: data, variable, relative */ rdesc[55] = 0x06; } if (sc->quirks & MOTION_CONTROLLER) return motion_fixup(hdev, rdesc, rsize); if (sc->quirks & PS3REMOTE) return ps3remote_fixup(hdev, rdesc, rsize); /* * Some knock-off USB dongles incorrectly report their button count * as 13 instead of 16 causing three non-functional buttons. */ if ((sc->quirks & SIXAXIS_CONTROLLER_USB) && *rsize >= 45 && /* Report Count (13) */ rdesc[23] == 0x95 && rdesc[24] == 0x0D && /* Usage Maximum (13) */ rdesc[37] == 0x29 && rdesc[38] == 0x0D && /* Report Count (3) */ rdesc[43] == 0x95 && rdesc[44] == 0x03) { hid_info(hdev, "Fixing up USB dongle report descriptor\n"); rdesc[24] = 0x10; rdesc[38] = 0x10; rdesc[44] = 0x00; } return rdesc; } static void sixaxis_parse_report(struct sony_sc *sc, u8 *rd, int size) { static const u8 sixaxis_battery_capacity[] = { 0, 1, 25, 50, 75, 100 }; unsigned long flags; int offset; u8 battery_capacity; int battery_status; /* * The sixaxis is charging if the battery value is 0xee * and it is fully charged if the value is 0xef. * It does not report the actual level while charging so it * is set to 100% while charging is in progress. */ offset = (sc->quirks & MOTION_CONTROLLER) ? 12 : 30; if (rd[offset] >= 0xee) { battery_capacity = 100; battery_status = (rd[offset] & 0x01) ? POWER_SUPPLY_STATUS_FULL : POWER_SUPPLY_STATUS_CHARGING; } else { u8 index = rd[offset] <= 5 ? rd[offset] : 5; battery_capacity = sixaxis_battery_capacity[index]; battery_status = POWER_SUPPLY_STATUS_DISCHARGING; } spin_lock_irqsave(&sc->lock, flags); sc->battery_capacity = battery_capacity; sc->battery_status = battery_status; spin_unlock_irqrestore(&sc->lock, flags); if (sc->quirks & SIXAXIS_CONTROLLER) { int val; offset = SIXAXIS_INPUT_REPORT_ACC_X_OFFSET; val = ((rd[offset+1] << 8) | rd[offset]) - 511; input_report_abs(sc->sensor_dev, ABS_X, val); /* Y and Z are swapped and inversed */ val = 511 - ((rd[offset+5] << 8) | rd[offset+4]); input_report_abs(sc->sensor_dev, ABS_Y, val); val = 511 - ((rd[offset+3] << 8) | rd[offset+2]); input_report_abs(sc->sensor_dev, ABS_Z, val); input_sync(sc->sensor_dev); } } static void nsg_mrxu_parse_report(struct sony_sc *sc, u8 *rd, int size) { int n, offset, relx, rely; u8 active; /* * The NSG-MRxU multi-touch trackpad data starts at offset 1 and * the touch-related data starts at offset 2. * For the first byte, bit 0 is set when touchpad button is pressed. * Bit 2 is set when a touch is active and the drag (Fn) key is pressed. * This drag key is mapped to BTN_LEFT. It is operational only when a * touch point is active. * Bit 4 is set when only the first touch point is active. * Bit 6 is set when only the second touch point is active. * Bits 5 and 7 are set when both touch points are active. * The next 3 bytes are two 12 bit X/Y coordinates for the first touch. * The following byte, offset 5, has the touch width and length. * Bits 0-4=X (width), bits 5-7=Y (length). * A signed relative X coordinate is at offset 6. * The bytes at offset 7-9 are the second touch X/Y coordinates. * Offset 10 has the second touch width and length. * Offset 11 has the relative Y coordinate. */ offset = 1; input_report_key(sc->touchpad, BTN_LEFT, rd[offset] & 0x0F); active = (rd[offset] >> 4); relx = (s8) rd[offset+5]; rely = ((s8) rd[offset+10]) * -1; offset++; for (n = 0; n < 2; n++) { u16 x, y; u8 contactx, contacty; x = rd[offset] | ((rd[offset+1] & 0x0F) << 8); y = ((rd[offset+1] & 0xF0) >> 4) | (rd[offset+2] << 4); input_mt_slot(sc->touchpad, n); input_mt_report_slot_state(sc->touchpad, MT_TOOL_FINGER, active & 0x03); if (active & 0x03) { contactx = rd[offset+3] & 0x0F; contacty = rd[offset+3] >> 4; input_report_abs(sc->touchpad, ABS_MT_TOUCH_MAJOR, max(contactx, contacty)); input_report_abs(sc->touchpad, ABS_MT_TOUCH_MINOR, min(contactx, contacty)); input_report_abs(sc->touchpad, ABS_MT_ORIENTATION, (bool) (contactx > contacty)); input_report_abs(sc->touchpad, ABS_MT_POSITION_X, x); input_report_abs(sc->touchpad, ABS_MT_POSITION_Y, NSG_MRXU_MAX_Y - y); /* * The relative coordinates belong to the first touch * point, when present, or to the second touch point * when the first is not active. */ if ((n == 0) || ((n == 1) && (active & 0x01))) { input_report_rel(sc->touchpad, REL_X, relx); input_report_rel(sc->touchpad, REL_Y, rely); } } offset += 5; active >>= 2; } input_mt_sync_frame(sc->touchpad); input_sync(sc->touchpad); } static int sony_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *rd, int size) { struct sony_sc *sc = hid_get_drvdata(hdev); /* * Sixaxis HID report has acclerometers/gyro with MSByte first, this * has to be BYTE_SWAPPED before passing up to joystick interface */ if ((sc->quirks & SIXAXIS_CONTROLLER) && rd[0] == 0x01 && size == 49) { /* * When connected via Bluetooth the Sixaxis occasionally sends * a report with the second byte 0xff and the rest zeroed. * * This report does not reflect the actual state of the * controller must be ignored to avoid generating false input * events. */ if (rd[1] == 0xff) return -EINVAL; swap(rd[41], rd[42]); swap(rd[43], rd[44]); swap(rd[45], rd[46]); swap(rd[47], rd[48]); sixaxis_parse_report(sc, rd, size); } else if ((sc->quirks & MOTION_CONTROLLER_BT) && rd[0] == 0x01 && size == 49) { sixaxis_parse_report(sc, rd, size); } else if ((sc->quirks & NAVIGATION_CONTROLLER) && rd[0] == 0x01 && size == 49) { sixaxis_parse_report(sc, rd, size); } else if ((sc->quirks & NSG_MRXU_REMOTE) && rd[0] == 0x02) { nsg_mrxu_parse_report(sc, rd, size); return 1; } if (sc->defer_initialization) { sc->defer_initialization = 0; sony_schedule_work(sc, SONY_WORKER_STATE); } return 0; } static int sony_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct sony_sc *sc = hid_get_drvdata(hdev); if (sc->quirks & BUZZ_CONTROLLER) { unsigned int key = usage->hid & HID_USAGE; if ((usage->hid & HID_USAGE_PAGE) != HID_UP_BUTTON) return -1; switch (usage->collection_index) { case 1: if (key >= ARRAY_SIZE(buzz_keymap)) return -1; key = buzz_keymap[key]; if (!key) return -1; break; default: return -1; } hid_map_usage_clear(hi, usage, bit, max, EV_KEY, key); return 1; } if (sc->quirks & PS3REMOTE) return ps3remote_mapping(hdev, hi, field, usage, bit, max); if (sc->quirks & NAVIGATION_CONTROLLER) return navigation_mapping(hdev, hi, field, usage, bit, max); if (sc->quirks & SIXAXIS_CONTROLLER) return sixaxis_mapping(hdev, hi, field, usage, bit, max); if (sc->quirks & GH_GUITAR_CONTROLLER) return guitar_mapping(hdev, hi, field, usage, bit, max); /* Let hid-core decide for the others */ return 0; } static int sony_register_touchpad(struct sony_sc *sc, int touch_count, int w, int h, int touch_major, int touch_minor, int orientation) { size_t name_sz; char *name; int ret; sc->touchpad = devm_input_allocate_device(&sc->hdev->dev); if (!sc->touchpad) return -ENOMEM; input_set_drvdata(sc->touchpad, sc); sc->touchpad->dev.parent = &sc->hdev->dev; sc->touchpad->phys = sc->hdev->phys; sc->touchpad->uniq = sc->hdev->uniq; sc->touchpad->id.bustype = sc->hdev->bus; sc->touchpad->id.vendor = sc->hdev->vendor; sc->touchpad->id.product = sc->hdev->product; sc->touchpad->id.version = sc->hdev->version; /* This suffix was originally apended when hid-sony also * supported DS4 devices. The DS4 was implemented using multiple * evdev nodes and hence had the need to separete them out using * a suffix. Other devices which were added later like Sony TV remotes * inhirited this suffix. */ name_sz = strlen(sc->hdev->name) + sizeof(TOUCHPAD_SUFFIX); name = devm_kzalloc(&sc->hdev->dev, name_sz, GFP_KERNEL); if (!name) return -ENOMEM; snprintf(name, name_sz, "%s" TOUCHPAD_SUFFIX, sc->hdev->name); sc->touchpad->name = name; /* We map the button underneath the touchpad to BTN_LEFT. */ __set_bit(EV_KEY, sc->touchpad->evbit); __set_bit(BTN_LEFT, sc->touchpad->keybit); __set_bit(INPUT_PROP_BUTTONPAD, sc->touchpad->propbit); input_set_abs_params(sc->touchpad, ABS_MT_POSITION_X, 0, w, 0, 0); input_set_abs_params(sc->touchpad, ABS_MT_POSITION_Y, 0, h, 0, 0); if (touch_major > 0) { input_set_abs_params(sc->touchpad, ABS_MT_TOUCH_MAJOR, 0, touch_major, 0, 0); if (touch_minor > 0) input_set_abs_params(sc->touchpad, ABS_MT_TOUCH_MINOR, 0, touch_minor, 0, 0); if (orientation > 0) input_set_abs_params(sc->touchpad, ABS_MT_ORIENTATION, 0, orientation, 0, 0); } if (sc->quirks & NSG_MRXU_REMOTE) { __set_bit(EV_REL, sc->touchpad->evbit); } ret = input_mt_init_slots(sc->touchpad, touch_count, INPUT_MT_POINTER); if (ret < 0) return ret; ret = input_register_device(sc->touchpad); if (ret < 0) return ret; return 0; } static int sony_register_sensors(struct sony_sc *sc) { size_t name_sz; char *name; int ret; sc->sensor_dev = devm_input_allocate_device(&sc->hdev->dev); if (!sc->sensor_dev) return -ENOMEM; input_set_drvdata(sc->sensor_dev, sc); sc->sensor_dev->dev.parent = &sc->hdev->dev; sc->sensor_dev->phys = sc->hdev->phys; sc->sensor_dev->uniq = sc->hdev->uniq; sc->sensor_dev->id.bustype = sc->hdev->bus; sc->sensor_dev->id.vendor = sc->hdev->vendor; sc->sensor_dev->id.product = sc->hdev->product; sc->sensor_dev->id.version = sc->hdev->version; /* Append a suffix to the controller name as there are various * DS4 compatible non-Sony devices with different names. */ name_sz = strlen(sc->hdev->name) + sizeof(SENSOR_SUFFIX); name = devm_kzalloc(&sc->hdev->dev, name_sz, GFP_KERNEL); if (!name) return -ENOMEM; snprintf(name, name_sz, "%s" SENSOR_SUFFIX, sc->hdev->name); sc->sensor_dev->name = name; if (sc->quirks & SIXAXIS_CONTROLLER) { /* For the DS3 we only support the accelerometer, which works * quite well even without calibration. The device also has * a 1-axis gyro, but it is very difficult to manage from within * the driver even to get data, the sensor is inaccurate and * the behavior is very different between hardware revisions. */ input_set_abs_params(sc->sensor_dev, ABS_X, -512, 511, 4, 0); input_set_abs_params(sc->sensor_dev, ABS_Y, -512, 511, 4, 0); input_set_abs_params(sc->sensor_dev, ABS_Z, -512, 511, 4, 0); input_abs_set_res(sc->sensor_dev, ABS_X, SIXAXIS_ACC_RES_PER_G); input_abs_set_res(sc->sensor_dev, ABS_Y, SIXAXIS_ACC_RES_PER_G); input_abs_set_res(sc->sensor_dev, ABS_Z, SIXAXIS_ACC_RES_PER_G); } __set_bit(INPUT_PROP_ACCELEROMETER, sc->sensor_dev->propbit); ret = input_register_device(sc->sensor_dev); if (ret < 0) return ret; return 0; } /* * Sending HID_REQ_GET_REPORT changes the operation mode of the ps3 controller * to "operational". Without this, the ps3 controller will not report any * events. */ static int sixaxis_set_operational_usb(struct hid_device *hdev) { struct sony_sc *sc = hid_get_drvdata(hdev); const int buf_size = max(SIXAXIS_REPORT_0xF2_SIZE, SIXAXIS_REPORT_0xF5_SIZE); u8 *buf; int ret; buf = kmalloc(buf_size, GFP_KERNEL); if (!buf) return -ENOMEM; ret = hid_hw_raw_request(hdev, 0xf2, buf, SIXAXIS_REPORT_0xF2_SIZE, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret < 0) { hid_err(hdev, "can't set operational mode: step 1\n"); goto out; } /* * Some compatible controllers like the Speedlink Strike FX and * Gasia need another query plus an USB interrupt to get operational. */ ret = hid_hw_raw_request(hdev, 0xf5, buf, SIXAXIS_REPORT_0xF5_SIZE, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret < 0) { hid_err(hdev, "can't set operational mode: step 2\n"); goto out; } /* * But the USB interrupt would cause SHANWAN controllers to * start rumbling non-stop, so skip step 3 for these controllers. */ if (sc->quirks & SHANWAN_GAMEPAD) goto out; ret = hid_hw_output_report(hdev, buf, 1); if (ret < 0) { hid_info(hdev, "can't set operational mode: step 3, ignoring\n"); ret = 0; } out: kfree(buf); return ret; } static int sixaxis_set_operational_bt(struct hid_device *hdev) { static const u8 report[] = { 0xf4, 0x42, 0x03, 0x00, 0x00 }; u8 *buf; int ret; buf = kmemdup(report, sizeof(report), GFP_KERNEL); if (!buf) return -ENOMEM; ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(report), HID_FEATURE_REPORT, HID_REQ_SET_REPORT); kfree(buf); return ret; } static void sixaxis_set_leds_from_id(struct sony_sc *sc) { static const u8 sixaxis_leds[10][4] = { { 0x01, 0x00, 0x00, 0x00 }, { 0x00, 0x01, 0x00, 0x00 }, { 0x00, 0x00, 0x01, 0x00 }, { 0x00, 0x00, 0x00, 0x01 }, { 0x01, 0x00, 0x00, 0x01 }, { 0x00, 0x01, 0x00, 0x01 }, { 0x00, 0x00, 0x01, 0x01 }, { 0x01, 0x00, 0x01, 0x01 }, { 0x00, 0x01, 0x01, 0x01 }, { 0x01, 0x01, 0x01, 0x01 } }; int id = sc->device_id; BUILD_BUG_ON(MAX_LEDS < ARRAY_SIZE(sixaxis_leds[0])); if (id < 0) return; id %= 10; memcpy(sc->led_state, sixaxis_leds[id], sizeof(sixaxis_leds[id])); } static void buzz_set_leds(struct sony_sc *sc) { struct hid_device *hdev = sc->hdev; struct list_head *report_list = &hdev->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); s32 *value = report->field[0]->value; BUILD_BUG_ON(MAX_LEDS < 4); value[0] = 0x00; value[1] = sc->led_state[0] ? 0xff : 0x00; value[2] = sc->led_state[1] ? 0xff : 0x00; value[3] = sc->led_state[2] ? 0xff : 0x00; value[4] = sc->led_state[3] ? 0xff : 0x00; value[5] = 0x00; value[6] = 0x00; hid_hw_request(hdev, report, HID_REQ_SET_REPORT); } static void sony_set_leds(struct sony_sc *sc) { if (!(sc->quirks & BUZZ_CONTROLLER)) sony_schedule_work(sc, SONY_WORKER_STATE); else buzz_set_leds(sc); } static void sony_led_set_brightness(struct led_classdev *led, enum led_brightness value) { struct device *dev = led->dev->parent; struct hid_device *hdev = to_hid_device(dev); struct sony_sc *drv_data; int n; int force_update; drv_data = hid_get_drvdata(hdev); if (!drv_data) { hid_err(hdev, "No device data\n"); return; } /* * The Sixaxis on USB will override any LED settings sent to it * and keep flashing all of the LEDs until the PS button is pressed. * Updates, even if redundant, must be always be sent to the * controller to avoid having to toggle the state of an LED just to * stop the flashing later on. */ force_update = !!(drv_data->quirks & SIXAXIS_CONTROLLER_USB); for (n = 0; n < drv_data->led_count; n++) { if (led == drv_data->leds[n] && (force_update || (value != drv_data->led_state[n] || drv_data->led_delay_on[n] || drv_data->led_delay_off[n]))) { drv_data->led_state[n] = value; /* Setting the brightness stops the blinking */ drv_data->led_delay_on[n] = 0; drv_data->led_delay_off[n] = 0; sony_set_leds(drv_data); break; } } } static enum led_brightness sony_led_get_brightness(struct led_classdev *led) { struct device *dev = led->dev->parent; struct hid_device *hdev = to_hid_device(dev); struct sony_sc *drv_data; int n; drv_data = hid_get_drvdata(hdev); if (!drv_data) { hid_err(hdev, "No device data\n"); return LED_OFF; } for (n = 0; n < drv_data->led_count; n++) { if (led == drv_data->leds[n]) return drv_data->led_state[n]; } return LED_OFF; } static int sony_led_blink_set(struct led_classdev *led, unsigned long *delay_on, unsigned long *delay_off) { struct device *dev = led->dev->parent; struct hid_device *hdev = to_hid_device(dev); struct sony_sc *drv_data = hid_get_drvdata(hdev); int n; u8 new_on, new_off; if (!drv_data) { hid_err(hdev, "No device data\n"); return -EINVAL; } /* Max delay is 255 deciseconds or 2550 milliseconds */ if (*delay_on > 2550) *delay_on = 2550; if (*delay_off > 2550) *delay_off = 2550; /* Blink at 1 Hz if both values are zero */ if (!*delay_on && !*delay_off) *delay_on = *delay_off = 500; new_on = *delay_on / 10; new_off = *delay_off / 10; for (n = 0; n < drv_data->led_count; n++) { if (led == drv_data->leds[n]) break; } /* This LED is not registered on this device */ if (n >= drv_data->led_count) return -EINVAL; /* Don't schedule work if the values didn't change */ if (new_on != drv_data->led_delay_on[n] || new_off != drv_data->led_delay_off[n]) { drv_data->led_delay_on[n] = new_on; drv_data->led_delay_off[n] = new_off; sony_schedule_work(drv_data, SONY_WORKER_STATE); } return 0; } static int sony_leds_init(struct sony_sc *sc) { struct hid_device *hdev = sc->hdev; int n, ret = 0; int use_color_names; struct led_classdev *led; size_t name_sz; char *name; size_t name_len; const char *name_fmt; static const char * const color_name_str[] = { "red", "green", "blue", "global" }; u8 max_brightness[MAX_LEDS] = { [0 ... (MAX_LEDS - 1)] = 1 }; u8 use_hw_blink[MAX_LEDS] = { 0 }; BUG_ON(!(sc->quirks & SONY_LED_SUPPORT)); if (sc->quirks & BUZZ_CONTROLLER) { sc->led_count = 4; use_color_names = 0; name_len = strlen("::buzz#"); name_fmt = "%s::buzz%d"; /* Validate expected report characteristics. */ if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 0, 0, 7)) return -ENODEV; } else if (sc->quirks & MOTION_CONTROLLER) { sc->led_count = 3; memset(max_brightness, 255, 3); use_color_names = 1; name_len = 0; name_fmt = "%s:%s"; } else if (sc->quirks & NAVIGATION_CONTROLLER) { static const u8 navigation_leds[4] = {0x01, 0x00, 0x00, 0x00}; memcpy(sc->led_state, navigation_leds, sizeof(navigation_leds)); sc->led_count = 1; memset(use_hw_blink, 1, 4); use_color_names = 0; name_len = strlen("::sony#"); name_fmt = "%s::sony%d"; } else { sixaxis_set_leds_from_id(sc); sc->led_count = 4; memset(use_hw_blink, 1, 4); use_color_names = 0; name_len = strlen("::sony#"); name_fmt = "%s::sony%d"; } /* * Clear LEDs as we have no way of reading their initial state. This is * only relevant if the driver is loaded after somebody actively set the * LEDs to on */ sony_set_leds(sc); name_sz = strlen(dev_name(&hdev->dev)) + name_len + 1; for (n = 0; n < sc->led_count; n++) { if (use_color_names) name_sz = strlen(dev_name(&hdev->dev)) + strlen(color_name_str[n]) + 2; led = devm_kzalloc(&hdev->dev, sizeof(struct led_classdev) + name_sz, GFP_KERNEL); if (!led) { hid_err(hdev, "Couldn't allocate memory for LED %d\n", n); return -ENOMEM; } name = (void *)(&led[1]); if (use_color_names) snprintf(name, name_sz, name_fmt, dev_name(&hdev->dev), color_name_str[n]); else snprintf(name, name_sz, name_fmt, dev_name(&hdev->dev), n + 1); led->name = name; led->brightness = sc->led_state[n]; led->max_brightness = max_brightness[n]; led->flags = LED_CORE_SUSPENDRESUME; led->brightness_get = sony_led_get_brightness; led->brightness_set = sony_led_set_brightness; if (use_hw_blink[n]) led->blink_set = sony_led_blink_set; sc->leds[n] = led; ret = devm_led_classdev_register(&hdev->dev, led); if (ret) { hid_err(hdev, "Failed to register LED %d\n", n); return ret; } } return 0; } static void sixaxis_send_output_report(struct sony_sc *sc) { static const union sixaxis_output_report_01 default_report = { .buf = { 0x01, 0x01, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x27, 0x10, 0x00, 0x32, 0xff, 0x27, 0x10, 0x00, 0x32, 0xff, 0x27, 0x10, 0x00, 0x32, 0xff, 0x27, 0x10, 0x00, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00 } }; struct sixaxis_output_report *report = (struct sixaxis_output_report *)sc->output_report_dmabuf; int n; /* Initialize the report with default values */ memcpy(report, &default_report, sizeof(struct sixaxis_output_report)); #ifdef CONFIG_SONY_FF report->rumble.right_motor_on = sc->right ? 1 : 0; report->rumble.left_motor_force = sc->left; #endif report->leds_bitmap |= sc->led_state[0] << 1; report->leds_bitmap |= sc->led_state[1] << 2; report->leds_bitmap |= sc->led_state[2] << 3; report->leds_bitmap |= sc->led_state[3] << 4; /* Set flag for all leds off, required for 3rd party INTEC controller */ if ((report->leds_bitmap & 0x1E) == 0) report->leds_bitmap |= 0x20; /* * The LEDs in the report are indexed in reverse order to their * corresponding light on the controller. * Index 0 = LED 4, index 1 = LED 3, etc... * * In the case of both delay values being zero (blinking disabled) the * default report values should be used or the controller LED will be * always off. */ for (n = 0; n < 4; n++) { if (sc->led_delay_on[n] || sc->led_delay_off[n]) { report->led[3 - n].duty_off = sc->led_delay_off[n]; report->led[3 - n].duty_on = sc->led_delay_on[n]; } } /* SHANWAN controllers require output reports via intr channel */ if (sc->quirks & SHANWAN_GAMEPAD) hid_hw_output_report(sc->hdev, (u8 *)report, sizeof(struct sixaxis_output_report)); else hid_hw_raw_request(sc->hdev, report->report_id, (u8 *)report, sizeof(struct sixaxis_output_report), HID_OUTPUT_REPORT, HID_REQ_SET_REPORT); } static void motion_send_output_report(struct sony_sc *sc) { struct hid_device *hdev = sc->hdev; struct motion_output_report_02 *report = (struct motion_output_report_02 *)sc->output_report_dmabuf; memset(report, 0, MOTION_REPORT_0x02_SIZE); report->type = 0x02; /* set leds */ report->r = sc->led_state[0]; report->g = sc->led_state[1]; report->b = sc->led_state[2]; #ifdef CONFIG_SONY_FF report->rumble = max(sc->right, sc->left); #endif hid_hw_output_report(hdev, (u8 *)report, MOTION_REPORT_0x02_SIZE); } #ifdef CONFIG_SONY_FF static inline void sony_send_output_report(struct sony_sc *sc) { if (sc->send_output_report) sc->send_output_report(sc); } #endif static void sony_state_worker(struct work_struct *work) { struct sony_sc *sc = container_of(work, struct sony_sc, state_worker); sc->send_output_report(sc); } static int sony_allocate_output_report(struct sony_sc *sc) { if ((sc->quirks & SIXAXIS_CONTROLLER) || (sc->quirks & NAVIGATION_CONTROLLER)) sc->output_report_dmabuf = devm_kmalloc(&sc->hdev->dev, sizeof(union sixaxis_output_report_01), GFP_KERNEL); else if (sc->quirks & MOTION_CONTROLLER) sc->output_report_dmabuf = devm_kmalloc(&sc->hdev->dev, MOTION_REPORT_0x02_SIZE, GFP_KERNEL); else return 0; if (!sc->output_report_dmabuf) return -ENOMEM; return 0; } #ifdef CONFIG_SONY_FF static int sony_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct sony_sc *sc = hid_get_drvdata(hid); if (effect->type != FF_RUMBLE) return 0; sc->left = effect->u.rumble.strong_magnitude / 256; sc->right = effect->u.rumble.weak_magnitude / 256; sony_schedule_work(sc, SONY_WORKER_STATE); return 0; } static int sony_init_ff(struct sony_sc *sc) { struct hid_input *hidinput; struct input_dev *input_dev; if (list_empty(&sc->hdev->inputs)) { hid_err(sc->hdev, "no inputs found\n"); return -ENODEV; } hidinput = list_entry(sc->hdev->inputs.next, struct hid_input, list); input_dev = hidinput->input; input_set_capability(input_dev, EV_FF, FF_RUMBLE); return input_ff_create_memless(input_dev, NULL, sony_play_effect); } #else static int sony_init_ff(struct sony_sc *sc) { return 0; } #endif static int sony_battery_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { struct sony_sc *sc = power_supply_get_drvdata(psy); unsigned long flags; int ret = 0; u8 battery_capacity; int battery_status; spin_lock_irqsave(&sc->lock, flags); battery_capacity = sc->battery_capacity; battery_status = sc->battery_status; spin_unlock_irqrestore(&sc->lock, flags); switch (psp) { case POWER_SUPPLY_PROP_PRESENT: val->intval = 1; break; case POWER_SUPPLY_PROP_SCOPE: val->intval = POWER_SUPPLY_SCOPE_DEVICE; break; case POWER_SUPPLY_PROP_CAPACITY: val->intval = battery_capacity; break; case POWER_SUPPLY_PROP_STATUS: val->intval = battery_status; break; default: ret = -EINVAL; break; } return ret; } static int sony_battery_probe(struct sony_sc *sc, int append_dev_id) { const char *battery_str_fmt = append_dev_id ? "sony_controller_battery_%pMR_%i" : "sony_controller_battery_%pMR"; struct power_supply_config psy_cfg = { .drv_data = sc, }; struct hid_device *hdev = sc->hdev; int ret; /* * Set the default battery level to 100% to avoid low battery warnings * if the battery is polled before the first device report is received. */ sc->battery_capacity = 100; sc->battery_desc.properties = sony_battery_props; sc->battery_desc.num_properties = ARRAY_SIZE(sony_battery_props); sc->battery_desc.get_property = sony_battery_get_property; sc->battery_desc.type = POWER_SUPPLY_TYPE_BATTERY; sc->battery_desc.use_for_apm = 0; sc->battery_desc.name = devm_kasprintf(&hdev->dev, GFP_KERNEL, battery_str_fmt, sc->mac_address, sc->device_id); if (!sc->battery_desc.name) return -ENOMEM; sc->battery = devm_power_supply_register(&hdev->dev, &sc->battery_desc, &psy_cfg); if (IS_ERR(sc->battery)) { ret = PTR_ERR(sc->battery); hid_err(hdev, "Unable to register battery device\n"); return ret; } power_supply_powers(sc->battery, &hdev->dev); return 0; } /* * If a controller is plugged in via USB while already connected via Bluetooth * it will show up as two devices. A global list of connected controllers and * their MAC addresses is maintained to ensure that a device is only connected * once. * * Some USB-only devices masquerade as Sixaxis controllers and all have the * same dummy Bluetooth address, so a comparison of the connection type is * required. Devices are only rejected in the case where two devices have * matching Bluetooth addresses on different bus types. */ static inline int sony_compare_connection_type(struct sony_sc *sc0, struct sony_sc *sc1) { const int sc0_not_bt = !(sc0->quirks & SONY_BT_DEVICE); const int sc1_not_bt = !(sc1->quirks & SONY_BT_DEVICE); return sc0_not_bt == sc1_not_bt; } static int sony_check_add_dev_list(struct sony_sc *sc) { struct sony_sc *entry; unsigned long flags; int ret; spin_lock_irqsave(&sony_dev_list_lock, flags); list_for_each_entry(entry, &sony_device_list, list_node) { ret = memcmp(sc->mac_address, entry->mac_address, sizeof(sc->mac_address)); if (!ret) { if (sony_compare_connection_type(sc, entry)) { ret = 1; } else { ret = -EEXIST; hid_info(sc->hdev, "controller with MAC address %pMR already connected\n", sc->mac_address); } goto unlock; } } ret = 0; list_add(&(sc->list_node), &sony_device_list); unlock: spin_unlock_irqrestore(&sony_dev_list_lock, flags); return ret; } static void sony_remove_dev_list(struct sony_sc *sc) { unsigned long flags; if (sc->list_node.next) { spin_lock_irqsave(&sony_dev_list_lock, flags); list_del(&(sc->list_node)); spin_unlock_irqrestore(&sony_dev_list_lock, flags); } } static int sony_get_bt_devaddr(struct sony_sc *sc) { int ret; /* HIDP stores the device MAC address as a string in the uniq field. */ ret = strlen(sc->hdev->uniq); if (ret != 17) return -EINVAL; ret = sscanf(sc->hdev->uniq, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", &sc->mac_address[5], &sc->mac_address[4], &sc->mac_address[3], &sc->mac_address[2], &sc->mac_address[1], &sc->mac_address[0]); if (ret != 6) return -EINVAL; return 0; } static int sony_check_add(struct sony_sc *sc) { u8 *buf = NULL; int n, ret; if ((sc->quirks & MOTION_CONTROLLER_BT) || (sc->quirks & NAVIGATION_CONTROLLER_BT) || (sc->quirks & SIXAXIS_CONTROLLER_BT)) { /* * sony_get_bt_devaddr() attempts to parse the Bluetooth MAC * address from the uniq string where HIDP stores it. * As uniq cannot be guaranteed to be a MAC address in all cases * a failure of this function should not prevent the connection. */ if (sony_get_bt_devaddr(sc) < 0) { hid_warn(sc->hdev, "UNIQ does not contain a MAC address; duplicate check skipped\n"); return 0; } } else if ((sc->quirks & SIXAXIS_CONTROLLER_USB) || (sc->quirks & NAVIGATION_CONTROLLER_USB)) { buf = kmalloc(SIXAXIS_REPORT_0xF2_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; /* * The MAC address of a Sixaxis controller connected via USB can * be retrieved with feature report 0xf2. The address begins at * offset 4. */ ret = hid_hw_raw_request(sc->hdev, 0xf2, buf, SIXAXIS_REPORT_0xF2_SIZE, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret != SIXAXIS_REPORT_0xF2_SIZE) { hid_err(sc->hdev, "failed to retrieve feature report 0xf2 with the Sixaxis MAC address\n"); ret = ret < 0 ? ret : -EINVAL; goto out_free; } /* * The Sixaxis device MAC in the report is big-endian and must * be byte-swapped. */ for (n = 0; n < 6; n++) sc->mac_address[5-n] = buf[4+n]; snprintf(sc->hdev->uniq, sizeof(sc->hdev->uniq), "%pMR", sc->mac_address); } else { return 0; } ret = sony_check_add_dev_list(sc); out_free: kfree(buf); return ret; } static int sony_set_device_id(struct sony_sc *sc) { int ret; /* * Only Sixaxis controllers get an id. * All others are set to -1. */ if (sc->quirks & SIXAXIS_CONTROLLER) { ret = ida_alloc(&sony_device_id_allocator, GFP_KERNEL); if (ret < 0) { sc->device_id = -1; return ret; } sc->device_id = ret; } else { sc->device_id = -1; } return 0; } static void sony_release_device_id(struct sony_sc *sc) { if (sc->device_id >= 0) { ida_free(&sony_device_id_allocator, sc->device_id); sc->device_id = -1; } } static inline void sony_init_output_report(struct sony_sc *sc, void (*send_output_report)(struct sony_sc *)) { sc->send_output_report = send_output_report; if (!sc->state_worker_initialized) INIT_WORK(&sc->state_worker, sony_state_worker); sc->state_worker_initialized = 1; } static inline void sony_cancel_work_sync(struct sony_sc *sc) { unsigned long flags; if (sc->state_worker_initialized) { spin_lock_irqsave(&sc->lock, flags); sc->state_worker_initialized = 0; spin_unlock_irqrestore(&sc->lock, flags); cancel_work_sync(&sc->state_worker); } } static int sony_input_configured(struct hid_device *hdev, struct hid_input *hidinput) { struct sony_sc *sc = hid_get_drvdata(hdev); int append_dev_id; int ret; ret = sony_set_device_id(sc); if (ret < 0) { hid_err(hdev, "failed to allocate the device id\n"); goto err_stop; } ret = append_dev_id = sony_check_add(sc); if (ret < 0) goto err_stop; ret = sony_allocate_output_report(sc); if (ret < 0) { hid_err(hdev, "failed to allocate the output report buffer\n"); goto err_stop; } if (sc->quirks & NAVIGATION_CONTROLLER_USB) { /* * The Sony Sixaxis does not handle HID Output Reports on the * Interrupt EP like it could, so we need to force HID Output * Reports to use HID_REQ_SET_REPORT on the Control EP. * * There is also another issue about HID Output Reports via USB, * the Sixaxis does not want the report_id as part of the data * packet, so we have to discard buf[0] when sending the actual * control message, even for numbered reports, humpf! * * Additionally, the Sixaxis on USB isn't properly initialized * until the PS logo button is pressed and as such won't retain * any state set by an output report, so the initial * configuration report is deferred until the first input * report arrives. */ hdev->quirks |= HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP; hdev->quirks |= HID_QUIRK_SKIP_OUTPUT_REPORT_ID; sc->defer_initialization = 1; ret = sixaxis_set_operational_usb(hdev); if (ret < 0) { hid_err(hdev, "Failed to set controller into operational mode\n"); goto err_stop; } sony_init_output_report(sc, sixaxis_send_output_report); } else if (sc->quirks & NAVIGATION_CONTROLLER_BT) { /* * The Navigation controller wants output reports sent on the ctrl * endpoint when connected via Bluetooth. */ hdev->quirks |= HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP; ret = sixaxis_set_operational_bt(hdev); if (ret < 0) { hid_err(hdev, "Failed to set controller into operational mode\n"); goto err_stop; } sony_init_output_report(sc, sixaxis_send_output_report); } else if (sc->quirks & SIXAXIS_CONTROLLER_USB) { /* * The Sony Sixaxis does not handle HID Output Reports on the * Interrupt EP and the device only becomes active when the * PS button is pressed. See comment for Navigation controller * above for more details. */ hdev->quirks |= HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP; hdev->quirks |= HID_QUIRK_SKIP_OUTPUT_REPORT_ID; sc->defer_initialization = 1; ret = sixaxis_set_operational_usb(hdev); if (ret < 0) { hid_err(hdev, "Failed to set controller into operational mode\n"); goto err_stop; } ret = sony_register_sensors(sc); if (ret) { hid_err(sc->hdev, "Unable to initialize motion sensors: %d\n", ret); goto err_stop; } sony_init_output_report(sc, sixaxis_send_output_report); } else if (sc->quirks & SIXAXIS_CONTROLLER_BT) { /* * The Sixaxis wants output reports sent on the ctrl endpoint * when connected via Bluetooth. */ hdev->quirks |= HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP; ret = sixaxis_set_operational_bt(hdev); if (ret < 0) { hid_err(hdev, "Failed to set controller into operational mode\n"); goto err_stop; } ret = sony_register_sensors(sc); if (ret) { hid_err(sc->hdev, "Unable to initialize motion sensors: %d\n", ret); goto err_stop; } sony_init_output_report(sc, sixaxis_send_output_report); } else if (sc->quirks & NSG_MRXU_REMOTE) { /* * The NSG-MRxU touchpad supports 2 touches and has a * resolution of 1667x1868 */ ret = sony_register_touchpad(sc, 2, NSG_MRXU_MAX_X, NSG_MRXU_MAX_Y, 15, 15, 1); if (ret) { hid_err(sc->hdev, "Unable to initialize multi-touch slots: %d\n", ret); goto err_stop; } } else if (sc->quirks & MOTION_CONTROLLER) { sony_init_output_report(sc, motion_send_output_report); } if (sc->quirks & SONY_LED_SUPPORT) { ret = sony_leds_init(sc); if (ret < 0) goto err_stop; } if (sc->quirks & SONY_BATTERY_SUPPORT) { ret = sony_battery_probe(sc, append_dev_id); if (ret < 0) goto err_stop; /* Open the device to receive reports with battery info */ ret = hid_hw_open(hdev); if (ret < 0) { hid_err(hdev, "hw open failed\n"); goto err_stop; } } if (sc->quirks & SONY_FF_SUPPORT) { ret = sony_init_ff(sc); if (ret < 0) goto err_close; } return 0; err_close: hid_hw_close(hdev); err_stop: sony_cancel_work_sync(sc); sony_remove_dev_list(sc); sony_release_device_id(sc); return ret; } static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; unsigned long quirks = id->driver_data; struct sony_sc *sc; struct usb_device *usbdev; unsigned int connect_mask = HID_CONNECT_DEFAULT; if (!strcmp(hdev->name, "FutureMax Dance Mat")) quirks |= FUTUREMAX_DANCE_MAT; if (!strcmp(hdev->name, "SHANWAN PS3 GamePad") || !strcmp(hdev->name, "ShanWan PS(R) Ga`epad")) quirks |= SHANWAN_GAMEPAD; sc = devm_kzalloc(&hdev->dev, sizeof(*sc), GFP_KERNEL); if (sc == NULL) { hid_err(hdev, "can't alloc sony descriptor\n"); return -ENOMEM; } spin_lock_init(&sc->lock); sc->quirks = quirks; hid_set_drvdata(hdev, sc); sc->hdev = hdev; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } if (sc->quirks & VAIO_RDESC_CONSTANT) connect_mask |= HID_CONNECT_HIDDEV_FORCE; else if (sc->quirks & SIXAXIS_CONTROLLER) connect_mask |= HID_CONNECT_HIDDEV_FORCE; /* Patch the hw version on DS3 compatible devices, so applications can * distinguish between the default HID mappings and the mappings defined * by the Linux game controller spec. This is important for the SDL2 * library, which has a game controller database, which uses device ids * in combination with version as a key. */ if (sc->quirks & SIXAXIS_CONTROLLER) hdev->version |= 0x8000; ret = hid_hw_start(hdev, connect_mask); if (ret) { hid_err(hdev, "hw start failed\n"); return ret; } /* sony_input_configured can fail, but this doesn't result * in hid_hw_start failures (intended). Check whether * the HID layer claimed the device else fail. * We don't know the actual reason for the failure, most * likely it is due to EEXIST in case of double connection * of USB and Bluetooth, but could have been due to ENOMEM * or other reasons as well. */ if (!(hdev->claimed & HID_CLAIMED_INPUT)) { hid_err(hdev, "failed to claim input\n"); ret = -ENODEV; goto err; } if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) { if (!hid_is_usb(hdev)) { ret = -EINVAL; goto err; } usbdev = to_usb_device(sc->hdev->dev.parent->parent); sc->ghl_urb = usb_alloc_urb(0, GFP_ATOMIC); if (!sc->ghl_urb) { ret = -ENOMEM; goto err; } if (sc->quirks & GHL_GUITAR_PS3WIIU) ret = ghl_init_urb(sc, usbdev, ghl_ps3wiiu_magic_data, ARRAY_SIZE(ghl_ps3wiiu_magic_data)); else if (sc->quirks & GHL_GUITAR_PS4) ret = ghl_init_urb(sc, usbdev, ghl_ps4_magic_data, ARRAY_SIZE(ghl_ps4_magic_data)); if (ret) { hid_err(hdev, "error preparing URB\n"); goto err; } timer_setup(&sc->ghl_poke_timer, ghl_magic_poke, 0); mod_timer(&sc->ghl_poke_timer, jiffies + GHL_GUITAR_POKE_INTERVAL*HZ); } return ret; err: usb_free_urb(sc->ghl_urb); hid_hw_stop(hdev); return ret; } static void sony_remove(struct hid_device *hdev) { struct sony_sc *sc = hid_get_drvdata(hdev); if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) { del_timer_sync(&sc->ghl_poke_timer); usb_free_urb(sc->ghl_urb); } hid_hw_close(hdev); sony_cancel_work_sync(sc); sony_remove_dev_list(sc); sony_release_device_id(sc); hid_hw_stop(hdev); } #ifdef CONFIG_PM static int sony_suspend(struct hid_device *hdev, pm_message_t message) { #ifdef CONFIG_SONY_FF /* On suspend stop any running force-feedback events */ if (SONY_FF_SUPPORT) { struct sony_sc *sc = hid_get_drvdata(hdev); sc->left = sc->right = 0; sony_send_output_report(sc); } #endif return 0; } static int sony_resume(struct hid_device *hdev) { struct sony_sc *sc = hid_get_drvdata(hdev); /* * The Sixaxis and navigation controllers on USB need to be * reinitialized on resume or they won't behave properly. */ if ((sc->quirks & SIXAXIS_CONTROLLER_USB) || (sc->quirks & NAVIGATION_CONTROLLER_USB)) { sixaxis_set_operational_usb(sc->hdev); sc->defer_initialization = 1; } return 0; } #endif static const struct hid_device_id sony_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER), .driver_data = SIXAXIS_CONTROLLER_USB }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER), .driver_data = NAVIGATION_CONTROLLER_USB }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER), .driver_data = NAVIGATION_CONTROLLER_BT }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_MOTION_CONTROLLER), .driver_data = MOTION_CONTROLLER_USB }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_MOTION_CONTROLLER), .driver_data = MOTION_CONTROLLER_BT }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER), .driver_data = SIXAXIS_CONTROLLER_BT }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE), .driver_data = VAIO_RDESC_CONSTANT }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE), .driver_data = VAIO_RDESC_CONSTANT }, /* * Wired Buzz Controller. Reported as Sony Hub from its USB ID and as * Logitech joystick from the device descriptor. */ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_BUZZ_CONTROLLER), .driver_data = BUZZ_CONTROLLER }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_WIRELESS_BUZZ_CONTROLLER), .driver_data = BUZZ_CONTROLLER }, /* PS3 BD Remote Control */ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_BDREMOTE), .driver_data = PS3REMOTE }, /* Logitech Harmony Adapter for PS3 */ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3), .driver_data = PS3REMOTE }, /* SMK-Link PS3 BD Remote Control */ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_PS3_BDREMOTE), .driver_data = PS3REMOTE }, /* Nyko Core Controller for PS3 */ { HID_USB_DEVICE(USB_VENDOR_ID_SINO_LITE, USB_DEVICE_ID_SINO_LITE_CONTROLLER), .driver_data = SIXAXIS_CONTROLLER_USB | SINO_LITE_CONTROLLER }, /* SMK-Link NSG-MR5U Remote Control */ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_NSG_MR5U_REMOTE), .driver_data = NSG_MR5U_REMOTE_BT }, /* SMK-Link NSG-MR7U Remote Control */ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_NSG_MR7U_REMOTE), .driver_data = NSG_MR7U_REMOTE_BT }, /* Guitar Hero Live PS3 and Wii U guitar dongles */ { HID_USB_DEVICE(USB_VENDOR_ID_SONY_RHYTHM, USB_DEVICE_ID_SONY_PS3WIIU_GHLIVE_DONGLE), .driver_data = GHL_GUITAR_PS3WIIU | GH_GUITAR_CONTROLLER }, /* Guitar Hero PC Guitar Dongle */ { HID_USB_DEVICE(USB_VENDOR_ID_REDOCTANE, USB_DEVICE_ID_REDOCTANE_GUITAR_DONGLE), .driver_data = GH_GUITAR_CONTROLLER }, /* Guitar Hero PS3 World Tour Guitar Dongle */ { HID_USB_DEVICE(USB_VENDOR_ID_SONY_RHYTHM, USB_DEVICE_ID_SONY_PS3_GUITAR_DONGLE), .driver_data = GH_GUITAR_CONTROLLER }, /* Guitar Hero Live PS4 guitar dongles */ { HID_USB_DEVICE(USB_VENDOR_ID_REDOCTANE, USB_DEVICE_ID_REDOCTANE_PS4_GHLIVE_DONGLE), .driver_data = GHL_GUITAR_PS4 | GH_GUITAR_CONTROLLER }, { } }; MODULE_DEVICE_TABLE(hid, sony_devices); static struct hid_driver sony_driver = { .name = "sony", .id_table = sony_devices, .input_mapping = sony_mapping, .input_configured = sony_input_configured, .probe = sony_probe, .remove = sony_remove, .report_fixup = sony_report_fixup, .raw_event = sony_raw_event, #ifdef CONFIG_PM .suspend = sony_suspend, .resume = sony_resume, .reset_resume = sony_resume, #endif }; static int __init sony_init(void) { dbg_hid("Sony:%s\n", __func__); return hid_register_driver(&sony_driver); } static void __exit sony_exit(void) { dbg_hid("Sony:%s\n", __func__); hid_unregister_driver(&sony_driver); ida_destroy(&sony_device_id_allocator); } module_init(sony_init); module_exit(sony_exit); MODULE_DESCRIPTION("HID driver for Sony / PS2 / PS3 / PS4 BD devices"); MODULE_LICENSE("GPL");
1 1 1 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 7 7 7 6 6 6 5 6 4 6 4 3 3 3 1 1 2 2 2 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for NXP PN533 NFC Chip - USB transport layer * * Copyright (C) 2011 Instituto Nokia de Tecnologia * Copyright (C) 2012-2013 Tieto Poland */ #include <linux/device.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/nfc.h> #include <linux/netdevice.h> #include <net/nfc/nfc.h> #include "pn533.h" #define VERSION "0.1" #define PN533_VENDOR_ID 0x4CC #define PN533_PRODUCT_ID 0x2533 #define SCM_VENDOR_ID 0x4E6 #define SCL3711_PRODUCT_ID 0x5591 #define SONY_VENDOR_ID 0x054c #define PASORI_PRODUCT_ID 0x02e1 #define ACS_VENDOR_ID 0x072f #define ACR122U_PRODUCT_ID 0x2200 static const struct usb_device_id pn533_usb_table[] = { { USB_DEVICE(PN533_VENDOR_ID, PN533_PRODUCT_ID), .driver_info = PN533_DEVICE_STD }, { USB_DEVICE(SCM_VENDOR_ID, SCL3711_PRODUCT_ID), .driver_info = PN533_DEVICE_STD }, { USB_DEVICE(SONY_VENDOR_ID, PASORI_PRODUCT_ID), .driver_info = PN533_DEVICE_PASORI }, { USB_DEVICE(ACS_VENDOR_ID, ACR122U_PRODUCT_ID), .driver_info = PN533_DEVICE_ACR122U }, { } }; MODULE_DEVICE_TABLE(usb, pn533_usb_table); struct pn533_usb_phy { struct usb_device *udev; struct usb_interface *interface; struct urb *out_urb; struct urb *in_urb; struct urb *ack_urb; u8 *ack_buffer; struct pn533 *priv; }; static void pn533_recv_response(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; struct sk_buff *skb = NULL; if (!urb->status) { skb = alloc_skb(urb->actual_length, GFP_ATOMIC); if (!skb) { nfc_err(&phy->udev->dev, "failed to alloc memory\n"); } else { skb_put_data(skb, urb->transfer_buffer, urb->actual_length); } } pn533_recv_frame(phy->priv, skb, urb->status); } static int pn533_submit_urb_for_response(struct pn533_usb_phy *phy, gfp_t flags) { phy->in_urb->complete = pn533_recv_response; return usb_submit_urb(phy->in_urb, flags); } static void pn533_recv_ack(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; struct pn533 *priv = phy->priv; struct pn533_cmd *cmd = priv->cmd; struct pn533_std_frame *in_frame; int rc; cmd->status = urb->status; switch (urb->status) { case 0: break; /* success */ case -ECONNRESET: case -ENOENT: dev_dbg(&phy->udev->dev, "The urb has been stopped (status %d)\n", urb->status); goto sched_wq; case -ESHUTDOWN: default: nfc_err(&phy->udev->dev, "Urb failure (status %d)\n", urb->status); goto sched_wq; } in_frame = phy->in_urb->transfer_buffer; if (!pn533_rx_frame_is_ack(in_frame)) { nfc_err(&phy->udev->dev, "Received an invalid ack\n"); cmd->status = -EIO; goto sched_wq; } rc = pn533_submit_urb_for_response(phy, GFP_ATOMIC); if (rc) { nfc_err(&phy->udev->dev, "usb_submit_urb failed with result %d\n", rc); cmd->status = rc; goto sched_wq; } return; sched_wq: queue_work(priv->wq, &priv->cmd_complete_work); } static int pn533_submit_urb_for_ack(struct pn533_usb_phy *phy, gfp_t flags) { phy->in_urb->complete = pn533_recv_ack; return usb_submit_urb(phy->in_urb, flags); } static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags) { struct pn533_usb_phy *phy = dev->phy; static const u8 ack[6] = {0x00, 0x00, 0xff, 0x00, 0xff, 0x00}; /* spec 7.1.1.3: Preamble, SoPC (2), ACK Code (2), Postamble */ if (!phy->ack_buffer) { phy->ack_buffer = kmemdup(ack, sizeof(ack), flags); if (!phy->ack_buffer) return -ENOMEM; } phy->ack_urb->transfer_buffer = phy->ack_buffer; phy->ack_urb->transfer_buffer_length = sizeof(ack); return usb_submit_urb(phy->ack_urb, flags); } struct pn533_out_arg { struct pn533_usb_phy *phy; struct completion done; }; static int pn533_usb_send_frame(struct pn533 *dev, struct sk_buff *out) { struct pn533_usb_phy *phy = dev->phy; struct pn533_out_arg arg; void *cntx; int rc; if (phy->priv == NULL) phy->priv = dev; phy->out_urb->transfer_buffer = out->data; phy->out_urb->transfer_buffer_length = out->len; print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, out->data, out->len, false); arg.phy = phy; init_completion(&arg.done); cntx = phy->out_urb->context; phy->out_urb->context = &arg; rc = usb_submit_urb(phy->out_urb, GFP_KERNEL); if (rc) return rc; wait_for_completion(&arg.done); phy->out_urb->context = cntx; if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); if (rc) goto error; } else if (dev->protocol_type == PN533_PROTO_REQ_ACK_RESP) { /* request for ACK if that's the case */ rc = pn533_submit_urb_for_ack(phy, GFP_KERNEL); if (rc) goto error; } return 0; error: usb_unlink_urb(phy->out_urb); return rc; } static void pn533_usb_abort_cmd(struct pn533 *dev, gfp_t flags) { struct pn533_usb_phy *phy = dev->phy; /* ACR122U does not support any command which aborts last * issued command i.e. as ACK for standard PN533. Additionally, * it behaves stange, sending broken or incorrect responses, * when we cancel urb before the chip will send response. */ if (dev->device_type == PN533_DEVICE_ACR122U) return; /* An ack will cancel the last issued command */ pn533_usb_send_ack(dev, flags); /* cancel the urb request */ usb_kill_urb(phy->in_urb); } /* ACR122 specific structs and functions */ /* ACS ACR122 pn533 frame definitions */ #define PN533_ACR122_TX_FRAME_HEADER_LEN (sizeof(struct pn533_acr122_tx_frame) \ + 2) #define PN533_ACR122_TX_FRAME_TAIL_LEN 0 #define PN533_ACR122_RX_FRAME_HEADER_LEN (sizeof(struct pn533_acr122_rx_frame) \ + 2) #define PN533_ACR122_RX_FRAME_TAIL_LEN 2 #define PN533_ACR122_FRAME_MAX_PAYLOAD_LEN PN533_STD_FRAME_MAX_PAYLOAD_LEN /* CCID messages types */ #define PN533_ACR122_PC_TO_RDR_ICCPOWERON 0x62 #define PN533_ACR122_PC_TO_RDR_ESCAPE 0x6B #define PN533_ACR122_RDR_TO_PC_ESCAPE 0x83 struct pn533_acr122_ccid_hdr { u8 type; u32 datalen; u8 slot; u8 seq; /* * 3 msg specific bytes or status, error and 1 specific * byte for reposnse msg */ u8 params[3]; } __packed; struct pn533_acr122_apdu_hdr { u8 class; u8 ins; u8 p1; u8 p2; } __packed; struct pn533_acr122_tx_frame { struct pn533_acr122_ccid_hdr ccid; struct pn533_acr122_apdu_hdr apdu; u8 datalen; u8 data[]; /* pn533 frame: TFI ... */ } __packed; struct pn533_acr122_rx_frame { struct pn533_acr122_ccid_hdr ccid; u8 data[]; /* pn533 frame : TFI ... */ } __packed; static void pn533_acr122_tx_frame_init(void *_frame, u8 cmd_code) { struct pn533_acr122_tx_frame *frame = _frame; frame->ccid.type = PN533_ACR122_PC_TO_RDR_ESCAPE; /* sizeof(apdu_hdr) + sizeof(datalen) */ frame->ccid.datalen = sizeof(frame->apdu) + 1; frame->ccid.slot = 0; frame->ccid.seq = 0; frame->ccid.params[0] = 0; frame->ccid.params[1] = 0; frame->ccid.params[2] = 0; frame->data[0] = PN533_STD_FRAME_DIR_OUT; frame->data[1] = cmd_code; frame->datalen = 2; /* data[0] + data[1] */ frame->apdu.class = 0xFF; frame->apdu.ins = 0; frame->apdu.p1 = 0; frame->apdu.p2 = 0; } static void pn533_acr122_tx_frame_finish(void *_frame) { struct pn533_acr122_tx_frame *frame = _frame; frame->ccid.datalen += frame->datalen; } static void pn533_acr122_tx_update_payload_len(void *_frame, int len) { struct pn533_acr122_tx_frame *frame = _frame; frame->datalen += len; } static bool pn533_acr122_is_rx_frame_valid(void *_frame, struct pn533 *dev) { struct pn533_acr122_rx_frame *frame = _frame; if (frame->ccid.type != 0x83) return false; if (!frame->ccid.datalen) return false; if (frame->data[frame->ccid.datalen - 2] == 0x63) return false; return true; } static int pn533_acr122_rx_frame_size(void *frame) { struct pn533_acr122_rx_frame *f = frame; /* f->ccid.datalen already includes tail length */ return sizeof(struct pn533_acr122_rx_frame) + f->ccid.datalen; } static u8 pn533_acr122_get_cmd_code(void *frame) { struct pn533_acr122_rx_frame *f = frame; return PN533_FRAME_CMD(f); } static struct pn533_frame_ops pn533_acr122_frame_ops = { .tx_frame_init = pn533_acr122_tx_frame_init, .tx_frame_finish = pn533_acr122_tx_frame_finish, .tx_update_payload_len = pn533_acr122_tx_update_payload_len, .tx_header_len = PN533_ACR122_TX_FRAME_HEADER_LEN, .tx_tail_len = PN533_ACR122_TX_FRAME_TAIL_LEN, .rx_is_frame_valid = pn533_acr122_is_rx_frame_valid, .rx_header_len = PN533_ACR122_RX_FRAME_HEADER_LEN, .rx_tail_len = PN533_ACR122_RX_FRAME_TAIL_LEN, .rx_frame_size = pn533_acr122_rx_frame_size, .max_payload_len = PN533_ACR122_FRAME_MAX_PAYLOAD_LEN, .get_cmd_code = pn533_acr122_get_cmd_code, }; struct pn533_acr122_poweron_rdr_arg { int rc; struct completion done; }; static void pn533_acr122_poweron_rdr_resp(struct urb *urb) { struct pn533_acr122_poweron_rdr_arg *arg = urb->context; print_hex_dump_debug("ACR122 RX: ", DUMP_PREFIX_NONE, 16, 1, urb->transfer_buffer, urb->transfer_buffer_length, false); arg->rc = urb->status; complete(&arg->done); } static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) { /* Power on th reader (CCID cmd) */ u8 cmd[10] = {PN533_ACR122_PC_TO_RDR_ICCPOWERON, 0, 0, 0, 0, 0, 0, 3, 0, 0}; char *buffer; int transferred; int rc; void *cntx; struct pn533_acr122_poweron_rdr_arg arg; buffer = kmemdup(cmd, sizeof(cmd), GFP_KERNEL); if (!buffer) return -ENOMEM; init_completion(&arg.done); cntx = phy->in_urb->context; /* backup context */ phy->in_urb->complete = pn533_acr122_poweron_rdr_resp; phy->in_urb->context = &arg; print_hex_dump_debug("ACR122 TX: ", DUMP_PREFIX_NONE, 16, 1, cmd, sizeof(cmd), false); rc = usb_bulk_msg(phy->udev, phy->out_urb->pipe, buffer, sizeof(cmd), &transferred, 5000); kfree(buffer); if (rc || (transferred != sizeof(cmd))) { nfc_err(&phy->udev->dev, "Reader power on cmd error %d\n", rc); return rc; } rc = usb_submit_urb(phy->in_urb, GFP_KERNEL); if (rc) { nfc_err(&phy->udev->dev, "Can't submit reader poweron cmd response %d\n", rc); return rc; } wait_for_completion(&arg.done); phy->in_urb->context = cntx; /* restore context */ return arg.rc; } static void pn533_out_complete(struct urb *urb) { struct pn533_out_arg *arg = urb->context; struct pn533_usb_phy *phy = arg->phy; switch (urb->status) { case 0: break; /* success */ case -ECONNRESET: case -ENOENT: dev_dbg(&phy->udev->dev, "The urb has been stopped (status %d)\n", urb->status); break; case -ESHUTDOWN: default: nfc_err(&phy->udev->dev, "Urb failure (status %d)\n", urb->status); } complete(&arg->done); } static void pn533_ack_complete(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; switch (urb->status) { case 0: break; /* success */ case -ECONNRESET: case -ENOENT: dev_dbg(&phy->udev->dev, "The urb has been stopped (status %d)\n", urb->status); break; case -ESHUTDOWN: default: nfc_err(&phy->udev->dev, "Urb failure (status %d)\n", urb->status); } } static const struct pn533_phy_ops usb_phy_ops = { .send_frame = pn533_usb_send_frame, .send_ack = pn533_usb_send_ack, .abort_cmd = pn533_usb_abort_cmd, }; static int pn533_usb_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct pn533 *priv; struct pn533_usb_phy *phy; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; int in_endpoint = 0; int out_endpoint = 0; int rc = -ENOMEM; int i; u32 protocols; enum pn533_protocol_type protocol_type = PN533_PROTO_REQ_ACK_RESP; struct pn533_frame_ops *fops = NULL; unsigned char *in_buf; int in_buf_len = PN533_EXT_FRAME_HEADER_LEN + PN533_STD_FRAME_MAX_PAYLOAD_LEN + PN533_STD_FRAME_TAIL_LEN; phy = devm_kzalloc(&interface->dev, sizeof(*phy), GFP_KERNEL); if (!phy) return -ENOMEM; in_buf = kzalloc(in_buf_len, GFP_KERNEL); if (!in_buf) return -ENOMEM; phy->udev = usb_get_dev(interface_to_usbdev(interface)); phy->interface = interface; iface_desc = interface->cur_altsetting; for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; if (!in_endpoint && usb_endpoint_is_bulk_in(endpoint)) in_endpoint = endpoint->bEndpointAddress; if (!out_endpoint && usb_endpoint_is_bulk_out(endpoint)) out_endpoint = endpoint->bEndpointAddress; } if (!in_endpoint || !out_endpoint) { nfc_err(&interface->dev, "Could not find bulk-in or bulk-out endpoint\n"); rc = -ENODEV; goto error; } phy->in_urb = usb_alloc_urb(0, GFP_KERNEL); phy->out_urb = usb_alloc_urb(0, GFP_KERNEL); phy->ack_urb = usb_alloc_urb(0, GFP_KERNEL); if (!phy->in_urb || !phy->out_urb || !phy->ack_urb) goto error; usb_fill_bulk_urb(phy->in_urb, phy->udev, usb_rcvbulkpipe(phy->udev, in_endpoint), in_buf, in_buf_len, NULL, phy); usb_fill_bulk_urb(phy->out_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), NULL, 0, pn533_out_complete, phy); usb_fill_bulk_urb(phy->ack_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), NULL, 0, pn533_ack_complete, phy); switch (id->driver_info) { case PN533_DEVICE_STD: protocols = PN533_ALL_PROTOCOLS; break; case PN533_DEVICE_PASORI: protocols = PN533_NO_TYPE_B_PROTOCOLS; break; case PN533_DEVICE_ACR122U: protocols = PN533_NO_TYPE_B_PROTOCOLS; fops = &pn533_acr122_frame_ops; protocol_type = PN533_PROTO_REQ_RESP; rc = pn533_acr122_poweron_rdr(phy); if (rc < 0) { nfc_err(&interface->dev, "Couldn't poweron the reader (error %d)\n", rc); goto error; } break; default: nfc_err(&interface->dev, "Unknown device type %lu\n", id->driver_info); rc = -EINVAL; goto error; } priv = pn53x_common_init(id->driver_info, protocol_type, phy, &usb_phy_ops, fops, &phy->udev->dev); if (IS_ERR(priv)) { rc = PTR_ERR(priv); goto error; } phy->priv = priv; rc = pn533