| 17 17 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | /* $NetBSD: keccak.c,v 1.1 2017/11/30 05:47:24 riastradh Exp $ */ /*- * Copyright (c) 2015 Taylor R. Campbell * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <sys/cdefs.h> #if defined(_KERNEL) || defined(_STANDALONE) __KERNEL_RCSID(0, "$NetBSD: keccak.c,v 1.1 2017/11/30 05:47:24 riastradh Exp $"); #include <sys/types.h> #else __RCSID("$NetBSD: keccak.c,v 1.1 2017/11/30 05:47:24 riastradh Exp $"); #include <stdint.h> #endif #include "keccak.h" #define secret /* can't use in variable-time operations, should zero */ #define FOR5(X, STMT) do \ { \ (X) = 0; STMT; \ (X) = 1; STMT; \ (X) = 2; STMT; \ (X) = 3; STMT; \ (X) = 4; STMT; \ } while (0) static inline secret uint64_t rol64(secret uint64_t v, unsigned c) { return ((v << c) | (v >> (64 - c))); } static inline void keccakf1600_theta(secret uint64_t A[25]) { secret uint64_t C0, C1, C2, C3, C4; unsigned y; C0 = C1 = C2 = C3 = C4 = 0; FOR5(y, { C0 ^= A[0 + 5*y]; C1 ^= A[1 + 5*y]; C2 ^= A[2 + 5*y]; C3 ^= A[3 + 5*y]; C4 ^= A[4 + 5*y]; }); FOR5(y, { A[0 + 5*y] ^= C4 ^ rol64(C1, 1); A[1 + 5*y] ^= C0 ^ rol64(C2, 1); A[2 + 5*y] ^= C1 ^ rol64(C3, 1); A[3 + 5*y] ^= C2 ^ rol64(C4, 1); A[4 + 5*y] ^= C3 ^ rol64(C0, 1); }); } static inline void keccakf1600_rho_pi(secret uint64_t A[25]) { secret uint64_t T, U; /* * Permute by (x,y) |---> (y, 2x + 3y mod 5) starting at (1,0), * rotate the ith element by (i + 1)(i + 2)/2 mod 64. */ U = A[ 1]; T = U; U = A[10]; A[10] = rol64(T, 1); T = U; U = A[ 7]; A[ 7] = rol64(T, 3); T = U; U = A[11]; A[11] = rol64(T, 6); T = U; U = A[17]; A[17] = rol64(T, 10); T = U; U = A[18]; A[18] = rol64(T, 15); T = U; U = A[ 3]; A[ 3] = rol64(T, 21); T = U; U = A[ 5]; A[ 5] = rol64(T, 28); T = U; U = A[16]; A[16] = rol64(T, 36); T = U; U = A[ 8]; A[ 8] = rol64(T, 45); T = U; U = A[21]; A[21] = rol64(T, 55); T = U; U = A[24]; A[24] = rol64(T, 2); T = U; U = A[ 4]; A[ 4] = rol64(T, 14); T = U; U = A[15]; A[15] = rol64(T, 27); T = U; U = A[23]; A[23] = rol64(T, 41); T = U; U = A[19]; A[19] = rol64(T, 56); T = U; U = A[13]; A[13] = rol64(T, 8); T = U; U = A[12]; A[12] = rol64(T, 25); T = U; U = A[ 2]; A[ 2] = rol64(T, 43); T = U; U = A[20]; A[20] = rol64(T, 62); T = U; U = A[14]; A[14] = rol64(T, 18); T = U; U = A[22]; A[22] = rol64(T, 39); T = U; U = A[ 9]; A[ 9] = rol64(T, 61); T = U; U = A[ 6]; A[ 6] = rol64(T, 20); T = U; A[ 1] = rol64(T, 44); } static inline void keccakf1600_chi(secret uint64_t A[25]) { secret uint64_t B0, B1, B2, B3, B4; unsigned y; FOR5(y, { B0 = A[0 + 5*y]; B1 = A[1 + 5*y]; B2 = A[2 + 5*y]; B3 = A[3 + 5*y]; B4 = A[4 + 5*y]; A[0 + 5*y] ^= ~B1 & B2; A[1 + 5*y] ^= ~B2 & B3; A[2 + 5*y] ^= ~B3 & B4; A[3 + 5*y] ^= ~B4 & B0; A[4 + 5*y] ^= ~B0 & B1; }); } static void keccakf1600_round(secret uint64_t A[25]) { keccakf1600_theta(A); keccakf1600_rho_pi(A); keccakf1600_chi(A); } void keccakf1600(secret uint64_t A[25]) { /* * RC[i] = \sum_{j = 0,...,6} rc(j + 7i) 2^(2^j - 1), * rc(t) = (x^t mod x^8 + x^6 + x^5 + x^4 + 1) mod x in GF(2)[x] */ static const uint64_t RC[24] = { 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL, }; unsigned i; for (i = 0; i < 24; i++) { keccakf1600_round(A); A[0] ^= RC[i]; } } |
| 2 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 | /* $NetBSD: uftdi.c,v 1.76 2021/08/07 16:19:17 thorpej Exp $ */ /* * Copyright (c) 2000 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Lennart Augustsson (lennart@augustsson.net). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uftdi.c,v 1.76 2021/08/07 16:19:17 thorpej Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/device.h> #include <sys/conf.h> #include <sys/tty.h> #include <dev/usb/usb.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdevs.h> #include <dev/usb/ucomvar.h> #include <dev/usb/uftdireg.h> #ifdef UFTDI_DEBUG #define DPRINTF(x) if (uftdidebug) printf x #define DPRINTFN(n,x) if (uftdidebug>(n)) printf x int uftdidebug = 0; #else #define DPRINTF(x) #define DPRINTFN(n,x) #endif #define UFTDI_CONFIG_NO 1 /* * These are the default number of bytes transferred per frame if the * endpoint doesn't tell us. The output buffer size is a hard limit * for devices that use a 6-bit size encoding. */ #define UFTDIIBUFSIZE 64 #define UFTDIOBUFSIZE 64 /* * Magic constants! Where do these come from? They're what Linux uses... */ #define UFTDI_MAX_IBUFSIZE 512 #define UFTDI_MAX_OBUFSIZE 256 struct uftdi_softc { device_t sc_dev; /* base device */ struct usbd_device * sc_udev; /* device */ struct usbd_interface * sc_iface; /* interface */ int sc_iface_no; enum uftdi_type sc_type; u_int sc_flags; #define FLAGS_BAUDCLK_12M 0x00000001 #define FLAGS_ROUNDOFF_232A 0x00000002 #define FLAGS_BAUDBITS_HINDEX 0x00000004 u_int sc_hdrlen; u_int sc_chiptype; u_char sc_msr; u_char sc_lsr; device_t sc_subdev; bool sc_dying; u_int last_lcr; }; static void uftdi_get_status(void *, int, u_char *, u_char *); static void uftdi_set(void *, int, int, int); static int uftdi_param(void *, int, struct termios *); static int uftdi_open(void *, int); static void uftdi_read(void *, int, u_char **, uint32_t *); static void uftdi_write(void *, int, u_char *, u_char *, uint32_t *); static void uftdi_break(void *, int, int); static const struct ucom_methods uftdi_methods = { .ucom_get_status = uftdi_get_status, .ucom_set = uftdi_set, .ucom_param = uftdi_param, .ucom_open = uftdi_open, .ucom_read = uftdi_read, .ucom_write = uftdi_write, }; /* * The devices default to UFTDI_TYPE_8U232AM. * Remember to update uftdi_attach() if it should be UFTDI_TYPE_SIO instead */ static const struct usb_devno uftdi_devs[] = { { USB_VENDOR_BBELECTRONICS, USB_PRODUCT_BBELECTRONICS_USOTL4 }, { USB_VENDOR_FALCOM, USB_PRODUCT_FALCOM_TWIST }, { USB_VENDOR_FALCOM, USB_PRODUCT_FALCOM_SAMBA }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_230X }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_232H }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_232RL }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_2232C }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_4232H }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_8U100AX }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_8U232AM }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_KW }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_YS }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_Y6 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_Y8 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_IC }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_DB9 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_RS232 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_Y9 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_COASTAL_TNCX }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_CTI_485_MINI }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_CTI_NANO_485 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SEMC_DSS20 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_LK202_24_USB }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_LK204_24_USB }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_MX200_USB }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_MX4_MX5_USB }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_631 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_632 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_633 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_634 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_635 }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_OPENRD_JTAGKEY }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_BEAGLEBONE }, { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MAXSTREAM_PKG_U }, { USB_VENDOR_xxFTDI, USB_PRODUCT_xxFTDI_SHEEVAPLUG_JTAG }, { USB_VENDOR_INTREPIDCS, USB_PRODUCT_INTREPIDCS_VALUECAN }, { USB_VENDOR_INTREPIDCS, USB_PRODUCT_INTREPIDCS_NEOVI }, { USB_VENDOR_MELCO, USB_PRODUCT_MELCO_PCOPRS1 }, { USB_VENDOR_RATOC, USB_PRODUCT_RATOC_REXUSB60F }, { USB_VENDOR_RTSYS, USB_PRODUCT_RTSYS_CT57A }, { USB_VENDOR_RTSYS, USB_PRODUCT_RTSYS_RTS03 }, { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_USBSERIAL }, { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_SEAPORT4P1 }, { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_SEAPORT4P2 }, { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_SEAPORT4P3 }, { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_SEAPORT4P4 }, { USB_VENDOR_SIIG2, USB_PRODUCT_SIIG2_US2308 }, { USB_VENDOR_MISC, USB_PRODUCT_MISC_TELLSTICK }, { USB_VENDOR_MISC, USB_PRODUCT_MISC_TELLSTICK_DUO }, }; #define uftdi_lookup(v, p) usb_lookup(uftdi_devs, v, p) static int uftdi_match(device_t, cfdata_t, void *); static void uftdi_attach(device_t, device_t, void *); static void uftdi_childdet(device_t, device_t); static int uftdi_detach(device_t, int); CFATTACH_DECL2_NEW(uftdi, sizeof(struct uftdi_softc), uftdi_match, uftdi_attach, uftdi_detach, NULL, NULL, uftdi_childdet); static int uftdi_match(device_t parent, cfdata_t match, void *aux) { struct usbif_attach_arg *uiaa = aux; DPRINTFN(20,("uftdi: vendor=%#x, product=%#x\n", uiaa->uiaa_vendor, uiaa->uiaa_product)); if (uiaa->uiaa_configno != UFTDI_CONFIG_NO) return UMATCH_NONE; return uftdi_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL ? UMATCH_VENDOR_PRODUCT_CONF_IFACE : UMATCH_NONE; } static void uftdi_attach(device_t parent, device_t self, void *aux) { struct uftdi_softc *sc = device_private(self); struct usbif_attach_arg *uiaa = aux; struct usbd_device *dev = uiaa->uiaa_device; struct usbd_interface *iface = uiaa->uiaa_iface; usb_device_descriptor_t *ddesc; usb_interface_descriptor_t *id; usb_endpoint_descriptor_t *ed; char *devinfop; int i; struct ucom_attach_args ucaa; DPRINTFN(10,("\nuftdi_attach: sc=%p\n", sc)); aprint_naive("\n"); aprint_normal("\n"); devinfop = usbd_devinfo_alloc(dev, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); sc->sc_dev = self; sc->sc_udev = dev; sc->sc_dying = false; sc->sc_iface_no = uiaa->uiaa_ifaceno; sc->sc_type = UFTDI_TYPE_8U232AM; /* most devices are post-8U232AM */ sc->sc_hdrlen = 0; ddesc = usbd_get_device_descriptor(dev); sc->sc_chiptype = UGETW(ddesc->bcdDevice); switch (sc->sc_chiptype) { case 0x0200: if (ddesc->iSerialNumber != 0) sc->sc_flags |= FLAGS_ROUNDOFF_232A; ucaa.ucaa_portno = 0; break; case 0x0400: ucaa.ucaa_portno = 0; break; case 0x0500: sc->sc_flags |= FLAGS_BAUDBITS_HINDEX; ucaa.ucaa_portno = FTDI_PIT_SIOA + sc->sc_iface_no; break; case 0x0600: ucaa.ucaa_portno = 0; break; case 0x0700: case 0x0800: case 0x0900: sc->sc_flags |= FLAGS_BAUDCLK_12M; sc->sc_flags |= FLAGS_BAUDBITS_HINDEX; ucaa.ucaa_portno = FTDI_PIT_SIOA + sc->sc_iface_no; break; case 0x1000: sc->sc_flags |= FLAGS_BAUDBITS_HINDEX; ucaa.ucaa_portno = FTDI_PIT_SIOA + sc->sc_iface_no; break; default: if (sc->sc_chiptype < 0x0200) { sc->sc_type = UFTDI_TYPE_SIO; sc->sc_hdrlen = 1; } ucaa.ucaa_portno = 0; break; } id = usbd_get_interface_descriptor(iface); sc->sc_iface = iface; ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1; ucaa.ucaa_ibufsize = ucaa.ucaa_obufsize = 0; for (i = 0; i < id->bNumEndpoints; i++) { int addr, dir, attr; ed = usbd_interface2endpoint_descriptor(iface, i); if (ed == NULL) { aprint_error_dev(self, "could not read endpoint descriptor\n"); goto bad; } addr = ed->bEndpointAddress; dir = UE_GET_DIR(ed->bEndpointAddress); attr = ed->bmAttributes & UE_XFERTYPE; if (dir == UE_DIR_IN && attr == UE_BULK) { ucaa.ucaa_bulkin = addr; ucaa.ucaa_ibufsize = UGETW(ed->wMaxPacketSize); if (ucaa.ucaa_ibufsize >= UFTDI_MAX_IBUFSIZE) ucaa.ucaa_ibufsize = UFTDI_MAX_IBUFSIZE; } else if (dir == UE_DIR_OUT && attr == UE_BULK) { ucaa.ucaa_bulkout = addr; ucaa.ucaa_obufsize = UGETW(ed->wMaxPacketSize) - sc->sc_hdrlen; if (ucaa.ucaa_obufsize >= UFTDI_MAX_OBUFSIZE) ucaa.ucaa_obufsize = UFTDI_MAX_OBUFSIZE; /* Limit length if we have a 6-bit header. */ if ((sc->sc_hdrlen > 0) && (ucaa.ucaa_obufsize > UFTDIOBUFSIZE)) ucaa.ucaa_obufsize = UFTDIOBUFSIZE; } else { aprint_error_dev(self, "unexpected endpoint\n"); goto bad; } } if (ucaa.ucaa_bulkin == -1) { aprint_error_dev(self, "Could not find data bulk in\n"); goto bad; } if (ucaa.ucaa_bulkout == -1) { aprint_error_dev(self, "Could not find data bulk out\n"); goto bad; } /* ucaa_bulkin, ucaa_bulkout set above */ if (ucaa.ucaa_ibufsize == 0) ucaa.ucaa_ibufsize = UFTDIIBUFSIZE; ucaa.ucaa_ibufsizepad = ucaa.ucaa_ibufsize; if (ucaa.ucaa_obufsize == 0) ucaa.ucaa_obufsize = UFTDIOBUFSIZE - sc->sc_hdrlen; ucaa.ucaa_opkthdrlen = sc->sc_hdrlen; ucaa.ucaa_device = dev; ucaa.ucaa_iface = iface; ucaa.ucaa_methods = &uftdi_methods; ucaa.ucaa_arg = sc; ucaa.ucaa_info = NULL; DPRINTF(("uftdi: in=%#x out=%#x isize=%#x osize=%#x\n", ucaa.ucaa_bulkin, ucaa.ucaa_bulkout, ucaa.ucaa_ibufsize, ucaa.ucaa_obufsize)); sc->sc_subdev = config_found(self, &ucaa, ucomprint, CFARGS(.submatch = ucomsubmatch)); usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); return; bad: DPRINTF(("uftdi_attach: ATTACH ERROR\n")); sc->sc_dying = true; return; } static void uftdi_childdet(device_t self, device_t child) { struct uftdi_softc *sc = device_private(self); KASSERT(child == sc->sc_subdev); sc->sc_subdev = NULL; } static int uftdi_detach(device_t self, int flags) { struct uftdi_softc *sc = device_private(self); int rv = 0; DPRINTF(("uftdi_detach: sc=%p flags=%d\n", sc, flags)); sc->sc_dying = true; if (sc->sc_subdev != NULL) { rv = config_detach(sc->sc_subdev, flags); sc->sc_subdev = NULL; } usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev); return rv; } static int uftdi_open(void *vsc, int portno) { struct uftdi_softc *sc = vsc; usb_device_request_t req; usbd_status err; struct termios t; DPRINTF(("uftdi_open: sc=%p\n", sc)); if (sc->sc_dying) return EIO; /* Perform a full reset on the device */ req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = FTDI_SIO_RESET; USETW(req.wValue, FTDI_SIO_RESET_SIO); USETW(req.wIndex, portno); USETW(req.wLength, 0); err = usbd_do_request(sc->sc_udev, &req, NULL); if (err) return EIO; /* Set 9600 baud, 2 stop bits, no parity, 8 bits */ t.c_ospeed = 9600; t.c_cflag = CSTOPB | CS8; (void)uftdi_param(sc, portno, &t); /* Turn on RTS/CTS flow control */ req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = FTDI_SIO_SET_FLOW_CTRL; USETW(req.wValue, 0); USETW2(req.wIndex, FTDI_SIO_RTS_CTS_HS, portno); USETW(req.wLength, 0); err = usbd_do_request(sc->sc_udev, &req, NULL); if (err) return EIO; return 0; } static void uftdi_read(void *vsc, int portno, u_char **ptr, uint32_t *count) { struct uftdi_softc *sc = vsc; u_char msr, lsr; DPRINTFN(15,("uftdi_read: sc=%p, port=%d count=%d\n", sc, portno, *count)); msr = FTDI_GET_MSR(*ptr); lsr = FTDI_GET_LSR(*ptr); #ifdef UFTDI_DEBUG if (*count != 2) DPRINTFN(10,("uftdi_read: sc=%p, port=%d count=%d data[0]=" "0x%02x\n", sc, portno, *count, (*ptr)[2])); #endif if (sc->sc_msr != msr || (sc->sc_lsr & FTDI_LSR_MASK) != (lsr & FTDI_LSR_MASK)) { DPRINTF(("uftdi_read: status change msr=0x%02x(0x%02x) " "lsr=0x%02x(0x%02x)\n", msr, sc->sc_msr, lsr, sc->sc_lsr)); sc->sc_msr = msr; sc->sc_lsr = lsr; ucom_status_change(device_private(sc->sc_subdev)); } /* Adjust buffer pointer to skip status prefix */ *ptr += 2; } static void uftdi_write(void *vsc, int portno, u_char *to, u_char *from, uint32_t *count) { struct uftdi_softc *sc = vsc; DPRINTFN(10,("uftdi_write: sc=%p, port=%d count=%u data[0]=0x%02x\n", vsc, portno, *count, from[0])); /* Make length tag and copy data */ if (sc->sc_hdrlen > 0) *to = FTDI_OUT_TAG(*count, portno); memcpy(to + sc->sc_hdrlen, from, *count); *count += sc->sc_hdrlen; } static void uftdi_set(void *vsc, int portno, int reg, int onoff) { struct uftdi_softc *sc = vsc; usb_device_request_t req; int ctl; DPRINTF(("uftdi_set: sc=%p, port=%d reg=%d onoff=%d\n", vsc, portno, reg, onoff)); if (sc->sc_dying) return; switch (reg) { case UCOM_SET_DTR: ctl = onoff ? FTDI_SIO_SET_DTR_HIGH : FTDI_SIO_SET_DTR_LOW; break; case UCOM_SET_RTS: ctl = onoff ? FTDI_SIO_SET_RTS_HIGH : FTDI_SIO_SET_RTS_LOW; break; case UCOM_SET_BREAK: uftdi_break(sc, portno, onoff); return; default: return; } req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = FTDI_SIO_MODEM_CTRL; USETW(req.wValue, ctl); USETW(req.wIndex, portno); USETW(req.wLength, 0); DPRINTFN(2,("uftdi_set: reqtype=0x%02x req=0x%02x value=0x%04x " "index=0x%04x len=%d\n", req.bmRequestType, req.bRequest, UGETW(req.wValue), UGETW(req.wIndex), UGETW(req.wLength))); (void)usbd_do_request(sc->sc_udev, &req, NULL); } /* * Return true if the given speed is within operational tolerance of the target * speed. FTDI recommends that the hardware speed be within 3% of nominal. */ static inline bool uftdi_baud_within_tolerance(uint64_t speed, uint64_t target) { return ((speed >= (target * 100) / 103) && (speed <= (target * 100) / 97)); } static int uftdi_encode_baudrate(struct uftdi_softc *sc, int speed, int *rate, int *ratehi) { static const uint8_t encoded_fraction[8] = { 0, 3, 2, 4, 1, 5, 6, 7 }; static const uint8_t roundoff_232a[16] = { 0, 1, 0, 1, 0, -1, 2, 1, 0, -1, -2, -3, 4, 3, 2, 1, }; uint32_t clk, divisor, fastclk_flag, frac, hwspeed; /* * If this chip has the fast clock capability and the speed is within * range, use the 12MHz clock, otherwise the standard clock is 3MHz. */ if ((sc->sc_flags & FLAGS_BAUDCLK_12M) && speed >= 1200) { clk = 12000000; fastclk_flag = (1 << 17); } else { clk = 3000000; fastclk_flag = 0; } /* * Make sure the requested speed is reachable with the available clock * and a 14-bit divisor. */ if (speed < (clk >> 14) || speed > clk) return -1; /* * Calculate the divisor, initially yielding a fixed point number with a * 4-bit (1/16ths) fraction, then round it to the nearest fraction the * hardware can handle. When the integral part of the divisor is * greater than one, the fractional part is in 1/8ths of the base clock. * The FT8U232AM chips can handle only 0.125, 0.250, and 0.5 fractions. * Later chips can handle all 1/8th fractions. * * If the integral part of the divisor is 1, a special rule applies: the * fractional part can only be .0 or .5 (this is a limitation of the * hardware). We handle this by truncating the fraction rather than * rounding, because this only applies to the two fastest speeds the * chip can achieve and rounding doesn't matter, either you've asked for * that exact speed or you've asked for something the chip can't do. * * For the FT8U232AM chips, use a roundoff table to adjust the result * to the nearest 1/8th fraction that is supported by the hardware, * leaving a fixed-point number with a 3-bit fraction which exactly * represents the math the hardware divider will do. For later-series * chips that support all 8 fractional divisors, just round 16ths to * 8ths by adding 1 and dividing by 2. */ divisor = (clk << 4) / speed; if ((divisor & 0xf) == 1) divisor &= 0xfffffff8; else if (sc->sc_flags & FLAGS_ROUNDOFF_232A) divisor += roundoff_232a[divisor & 0x0f]; else divisor += 1; /* Rounds odd 16ths up to next 8th. */ divisor >>= 1; /* * Ensure the resulting hardware speed will be within operational * tolerance (within 3% of nominal). */ hwspeed = (clk << 3) / divisor; if (!uftdi_baud_within_tolerance(hwspeed, speed)) return -1; /* * Re-pack the divisor into hardware format. The lower 14-bits hold the * integral part, while the upper bits specify the fraction by indexing * a table of fractions within the hardware which is laid out as: * {0.0, 0.5, 0.25, 0.125, 0.325, 0.625, 0.725, 0.875} * The A-series chips only have the first four table entries; the * roundoff table logic above ensures that the fractional part for those * chips will be one of the first four values. * * When the divisor is 1 a special encoding applies: 1.0 is encoded as * 0.0, and 1.5 is encoded as 1.0. The rounding logic above has already * ensured that the fraction is either .0 or .5 if the integral is 1. */ frac = divisor & 0x07; divisor >>= 3; if (divisor == 1) { if (frac == 0) divisor = 0; /* 1.0 becomes 0.0 */ else frac = 0; /* 1.5 becomes 1.0 */ } divisor |= (encoded_fraction[frac] << 14) | fastclk_flag; *rate = (uint16_t)divisor; *ratehi = (uint16_t)(divisor >> 16); /* * If this chip requires the baud bits to be in the high byte of the * index word, move the bits up to that location. */ if (sc->sc_flags & FLAGS_BAUDBITS_HINDEX) *ratehi <<= 8; return 0; } static int uftdi_param(void *vsc, int portno, struct termios *t) { struct uftdi_softc *sc = vsc; usb_device_request_t req; usbd_status err; int rate, ratehi, rerr, data, flow; DPRINTF(("uftdi_param: sc=%p\n", sc)); if (sc->sc_dying) return EIO; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = FTDI_SIO_SET_BITMODE; USETW(req.wValue, FTDI_BITMODE_RESET << 8 | 0x00); USETW(req.wIndex, portno); USETW(req.wLength, 0); err = usbd_do_request(sc->sc_udev, &req, NULL); if (err) return EIO; switch (sc->sc_type) { case UFTDI_TYPE_SIO: switch (t->c_ospeed) { case 300: rate = ftdi_sio_b300; break; case 600: rate = ftdi_sio_b600; break; case 1200: rate = ftdi_sio_b1200; break; case 2400: rate = ftdi_sio_b2400; break; case 4800: rate = ftdi_sio_b4800; break; case 9600: rate = ftdi_sio_b9600; break; case 19200: rate = ftdi_sio_b19200; break; case 38400: rate = ftdi_sio_b38400; break; case 57600: rate = ftdi_sio_b57600; break; case 115200: rate = ftdi_sio_b115200; break; default: return EINVAL; } ratehi = 0; break; case UFTDI_TYPE_8U232AM: rerr = uftdi_encode_baudrate(sc, t->c_ospeed, &rate, &ratehi); if (rerr != 0) return EINVAL; break; default: return EINVAL; } req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = FTDI_SIO_SET_BAUD_RATE; USETW(req.wValue, rate); USETW(req.wIndex, portno | ratehi); USETW(req.wLength, 0); DPRINTFN(2,("uftdi_param: reqtype=0x%02x req=0x%02x value=0x%04x " "index=0x%04x len=%d\n", req.bmRequestType, req.bRequest, UGETW(req.wValue), UGETW(req.wIndex), UGETW(req.wLength))); err = usbd_do_request(sc->sc_udev, &req, NULL); if (err) return EIO; if (ISSET(t->c_cflag, CSTOPB)) data = FTDI_SIO_SET_DATA_STOP_BITS_2; else data = FTDI_SIO_SET_DATA_STOP_BITS_1; if (ISSET(t->c_cflag, PARENB)) { if (ISSET(t->c_cflag, PARODD)) data |= FTDI_SIO_SET_DATA_PARITY_ODD; else data |= FTDI_SIO_SET_DATA_PARITY_EVEN; } else data |= FTDI_SIO_SET_DATA_PARITY_NONE; switch (ISSET(t->c_cflag, CSIZE)) { case CS5: data |= FTDI_SIO_SET_DATA_BITS(5); break; case CS6: data |= FTDI_SIO_SET_DATA_BITS(6); break; case CS7: data |= FTDI_SIO_SET_DATA_BITS(7); break; case CS8: data |= FTDI_SIO_SET_DATA_BITS(8); break; } sc->last_lcr = data; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = FTDI_SIO_SET_DATA; USETW(req.wValue, data); USETW(req.wIndex, portno); USETW(req.wLength, 0); DPRINTFN(2,("uftdi_param: reqtype=0x%02x req=0x%02x value=0x%04x " "index=0x%04x len=%d\n", req.bmRequestType, req.bRequest, UGETW(req.wValue), UGETW(req.wIndex), UGETW(req.wLength))); err = usbd_do_request(sc->sc_udev, &req, NULL); if (err) return EIO; if (ISSET(t->c_cflag, CRTSCTS)) { flow = FTDI_SIO_RTS_CTS_HS; USETW(req.wValue, 0); } else if (ISSET(t->c_iflag, IXON) && ISSET(t->c_iflag, IXOFF)) { flow = FTDI_SIO_XON_XOFF_HS; USETW2(req.wValue, t->c_cc[VSTOP], t->c_cc[VSTART]); } else { flow = FTDI_SIO_DISABLE_FLOW_CTRL; USETW(req.wValue, 0); } req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = FTDI_SIO_SET_FLOW_CTRL; USETW2(req.wIndex, flow, portno); USETW(req.wLength, 0); err = usbd_do_request(sc->sc_udev, &req, NULL); if (err) return EIO; return 0; } static void uftdi_get_status(void *vsc, int portno, u_char *lsr, u_char *msr) { struct uftdi_softc *sc = vsc; DPRINTF(("uftdi_status: msr=0x%02x lsr=0x%02x\n", sc->sc_msr, sc->sc_lsr)); if (sc->sc_dying) return; *msr = sc->sc_msr; *lsr = sc->sc_lsr; } static void uftdi_break(void *vsc, int portno, int onoff) { struct uftdi_softc *sc = vsc; usb_device_request_t req; int data; DPRINTF(("uftdi_break: sc=%p, port=%d onoff=%d\n", vsc, portno, onoff)); if (onoff) { data = sc->last_lcr | FTDI_SIO_SET_BREAK; } else { data = sc->last_lcr; } req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = FTDI_SIO_SET_DATA; USETW(req.wValue, data); USETW(req.wIndex, portno); USETW(req.wLength, 0); (void)usbd_do_request(sc->sc_udev, &req, NULL); } |
| 155 150 157 145 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | /* $NetBSD: kern_module_hook.c,v 1.4 2019/12/13 08:02:53 skrll Exp $ */ /*- * Copyright (c) 2019 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Kernel module support. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_module_hook.c,v 1.4 2019/12/13 08:02:53 skrll Exp $"); #include <sys/param.h> #include <sys/atomic.h> #include <sys/condvar.h> #include <sys/module_hook.h> #include <sys/mutex.h> #include <sys/pserialize.h> #include <uvm/uvm_extern.h> /* Locking/synchronization stuff for module hooks */ static struct { kmutex_t mtx; kcondvar_t cv; pserialize_t psz; } module_hook __cacheline_aligned; /* * We use pserialize_perform() to issue a memory barrier on the current * CPU and on all other CPUs so that all prior memory operations on the * current CPU globally happen before all subsequent memory operations * on the current CPU, as perceived by any other CPU. * * pserialize_perform() might be rather heavy-weight here, but it only * happens during module loading, and it allows MODULE_HOOK_CALL() to * work without any other memory barriers. */ void module_hook_set(bool *hooked, struct localcount *lc) { KASSERT(kernconfig_is_held()); KASSERT(!*hooked); localcount_init(lc); /* Wait until setup has been witnessed by all CPUs. */ pserialize_perform(module_hook.psz); /* Let others use it */ atomic_store_relaxed(hooked, true); } void module_hook_unset(bool *hooked, struct localcount *lc) { KASSERT(kernconfig_is_held()); KASSERT(*hooked); /* Get exclusive with pserialize and localcount. */ mutex_enter(&module_hook.mtx); /* Prevent new calls to module_hook_tryenter(). */ atomic_store_relaxed(hooked, false); /* Wait for existing calls to module_hook_tryenter(). */ pserialize_perform(module_hook.psz); /* Wait for module_hook_exit. */ localcount_drain(lc, &module_hook.cv, &module_hook.mtx); /* All done! */ mutex_exit(&module_hook.mtx); localcount_fini(lc); } bool module_hook_tryenter(bool *hooked, struct localcount *lc) { bool call_hook; int s; s = pserialize_read_enter(); call_hook = atomic_load_relaxed(hooked); if (call_hook) localcount_acquire(lc); pserialize_read_exit(s); return call_hook; } void module_hook_exit(struct localcount *lc) { localcount_release(lc, &module_hook.cv, &module_hook.mtx); } void module_hook_init(void) { mutex_init(&module_hook.mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&module_hook.cv, "mod_hook"); module_hook.psz = pserialize_create(); } |
| 8 8 7 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | /* $NetBSD: bt_proto.c,v 1.16 2016/01/21 15:41:30 riastradh Exp $ */ /*- * Copyright (c) 2005 Iain Hibbert. * Copyright (c) 2006 Itronix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of Itronix Inc. may not be used to endorse * or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: bt_proto.c,v 1.16 2016/01/21 15:41:30 riastradh Exp $"); #include <sys/param.h> #include <sys/domain.h> #include <sys/kernel.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/systm.h> #include <net/route.h> #include <netbt/bluetooth.h> #include <netbt/hci.h> #include <netbt/l2cap.h> #include <netbt/rfcomm.h> #include <netbt/sco.h> DOMAIN_DEFINE(btdomain); /* forward declare and add to link set */ static void bt_init(void); PR_WRAP_CTLOUTPUT(hci_ctloutput) PR_WRAP_CTLOUTPUT(sco_ctloutput) PR_WRAP_CTLOUTPUT(l2cap_ctloutput) PR_WRAP_CTLOUTPUT(rfcomm_ctloutput) #define hci_ctloutput hci_ctloutput_wrapper #define sco_ctloutput sco_ctloutput_wrapper #define l2cap_ctloutput l2cap_ctloutput_wrapper #define rfcomm_ctloutput rfcomm_ctloutput_wrapper static const struct protosw btsw[] = { { /* raw HCI commands */ .pr_type = SOCK_RAW, .pr_domain = &btdomain, .pr_protocol = BTPROTO_HCI, .pr_flags = (PR_ADDR | PR_ATOMIC), .pr_init = hci_init, .pr_ctloutput = hci_ctloutput, .pr_usrreqs = &hci_usrreqs, }, { /* HCI SCO data (audio) */ .pr_type = SOCK_SEQPACKET, .pr_domain = &btdomain, .pr_protocol = BTPROTO_SCO, .pr_flags = (PR_CONNREQUIRED | PR_ATOMIC | PR_LISTEN), .pr_ctloutput = sco_ctloutput, .pr_usrreqs = &sco_usrreqs, }, { /* L2CAP Connection Oriented */ .pr_type = SOCK_SEQPACKET, .pr_domain = &btdomain, .pr_protocol = BTPROTO_L2CAP, .pr_flags = (PR_CONNREQUIRED | PR_ATOMIC | PR_LISTEN), .pr_ctloutput = l2cap_ctloutput, .pr_usrreqs = &l2cap_usrreqs, .pr_init = l2cap_init, }, { /* RFCOMM */ .pr_type = SOCK_STREAM, .pr_domain = &btdomain, .pr_protocol = BTPROTO_RFCOMM, .pr_flags = (PR_CONNREQUIRED | PR_LISTEN | PR_WANTRCVD), .pr_ctloutput = rfcomm_ctloutput, .pr_usrreqs = &rfcomm_usrreqs, .pr_init = rfcomm_init, }, }; struct domain btdomain = { .dom_family = AF_BLUETOOTH, .dom_name = "bluetooth", .dom_init = bt_init, .dom_protosw = btsw, .dom_protoswNPROTOSW = &btsw[__arraycount(btsw)], }; kmutex_t *bt_lock; static void bt_init(void) { bt_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); } |
| 2 2 2 2 2 2 2 2 2 2 6 6 5 4 3 3 3 1 3 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 | /* $NetBSD: wskbdutil.c,v 1.19 2017/11/03 19:20:27 maya Exp $ */ /*- * Copyright (c) 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Juergen Hannken-Illjes. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: wskbdutil.c,v 1.19 2017/11/03 19:20:27 maya Exp $"); #include <sys/param.h> #include <sys/errno.h> #include <sys/systm.h> #include <sys/malloc.h> #include <dev/wscons/wsksymdef.h> #include <dev/wscons/wsksymvar.h> static struct compose_tab_s { keysym_t elem[2]; keysym_t result; } compose_tab[] = { { { KS_plus, KS_plus }, KS_numbersign }, { { KS_a, KS_a }, KS_at }, { { KS_parenleft, KS_parenleft }, KS_bracketleft }, { { KS_slash, KS_slash }, KS_backslash }, { { KS_parenright, KS_parenright }, KS_bracketright }, { { KS_parenleft, KS_minus }, KS_braceleft }, { { KS_slash, KS_minus }, KS_bar }, { { KS_parenright, KS_minus }, KS_braceright }, { { KS_exclam, KS_exclam }, KS_exclamdown }, { { KS_c, KS_slash }, KS_cent }, { { KS_l, KS_minus }, KS_sterling }, { { KS_y, KS_minus }, KS_yen }, { { KS_s, KS_o }, KS_section }, { { KS_x, KS_o }, KS_currency }, { { KS_c, KS_o }, KS_copyright }, { { KS_less, KS_less }, KS_guillemotleft }, { { KS_greater, KS_greater }, KS_guillemotright }, { { KS_question, KS_question }, KS_questiondown }, { { KS_dead_acute, KS_space }, KS_acute }, { { KS_dead_grave, KS_space }, KS_grave }, { { KS_dead_tilde, KS_space }, KS_asciitilde }, { { KS_dead_circumflex, KS_space }, KS_asciicircum }, { { KS_dead_circumflex, KS_A }, KS_Acircumflex }, { { KS_dead_diaeresis, KS_A }, KS_Adiaeresis }, { { KS_dead_grave, KS_A }, KS_Agrave }, { { KS_dead_abovering, KS_A }, KS_Aring }, { { KS_dead_tilde, KS_A }, KS_Atilde }, { { KS_dead_cedilla, KS_C }, KS_Ccedilla }, { { KS_dead_acute, KS_E }, KS_Eacute }, { { KS_dead_circumflex, KS_E }, KS_Ecircumflex }, { { KS_dead_diaeresis, KS_E }, KS_Ediaeresis }, { { KS_dead_grave, KS_E }, KS_Egrave }, { { KS_dead_acute, KS_I }, KS_Iacute }, { { KS_dead_circumflex, KS_I }, KS_Icircumflex }, { { KS_dead_diaeresis, KS_I }, KS_Idiaeresis }, { { KS_dead_grave, KS_I }, KS_Igrave }, { { KS_dead_tilde, KS_N }, KS_Ntilde }, { { KS_dead_acute, KS_O }, KS_Oacute }, { { KS_dead_circumflex, KS_O }, KS_Ocircumflex }, { { KS_dead_diaeresis, KS_O }, KS_Odiaeresis }, { { KS_dead_grave, KS_O }, KS_Ograve }, { { KS_dead_tilde, KS_O }, KS_Otilde }, { { KS_dead_acute, KS_U }, KS_Uacute }, { { KS_dead_circumflex, KS_U }, KS_Ucircumflex }, { { KS_dead_diaeresis, KS_U }, KS_Udiaeresis }, { { KS_dead_grave, KS_U }, KS_Ugrave }, { { KS_dead_acute, KS_Y }, KS_Yacute }, { { KS_dead_acute, KS_a }, KS_aacute }, { { KS_dead_circumflex, KS_a }, KS_acircumflex }, { { KS_dead_diaeresis, KS_a }, KS_adiaeresis }, { { KS_dead_grave, KS_a }, KS_agrave }, { { KS_dead_abovering, KS_a }, KS_aring }, { { KS_dead_tilde, KS_a }, KS_atilde }, { { KS_dead_cedilla, KS_c }, KS_ccedilla }, { { KS_dead_acute, KS_e }, KS_eacute }, { { KS_dead_circumflex, KS_e }, KS_ecircumflex }, { { KS_dead_diaeresis, KS_e }, KS_ediaeresis }, { { KS_dead_grave, KS_e }, KS_egrave }, { { KS_dead_acute, KS_i }, KS_iacute }, { { KS_dead_circumflex, KS_i }, KS_icircumflex }, { { KS_dead_diaeresis, KS_i }, KS_idiaeresis }, { { KS_dead_grave, KS_i }, KS_igrave }, { { KS_dead_tilde, KS_n }, KS_ntilde }, { { KS_dead_acute, KS_o }, KS_oacute }, { { KS_dead_circumflex, KS_o }, KS_ocircumflex }, { { KS_dead_diaeresis, KS_o }, KS_odiaeresis }, { { KS_dead_grave, KS_o }, KS_ograve }, { { KS_dead_tilde, KS_o }, KS_otilde }, { { KS_dead_acute, KS_u }, KS_uacute }, { { KS_dead_circumflex, KS_u }, KS_ucircumflex }, { { KS_dead_diaeresis, KS_u }, KS_udiaeresis }, { { KS_dead_grave, KS_u }, KS_ugrave }, { { KS_dead_acute, KS_y }, KS_yacute }, { { KS_dead_diaeresis, KS_y }, KS_ydiaeresis }, { { KS_quotedbl, KS_A }, KS_Adiaeresis }, { { KS_quotedbl, KS_E }, KS_Ediaeresis }, { { KS_quotedbl, KS_I }, KS_Idiaeresis }, { { KS_quotedbl, KS_O }, KS_Odiaeresis }, { { KS_quotedbl, KS_U }, KS_Udiaeresis }, { { KS_quotedbl, KS_a }, KS_adiaeresis }, { { KS_quotedbl, KS_e }, KS_ediaeresis }, { { KS_quotedbl, KS_i }, KS_idiaeresis }, { { KS_quotedbl, KS_o }, KS_odiaeresis }, { { KS_quotedbl, KS_u }, KS_udiaeresis }, { { KS_quotedbl, KS_y }, KS_ydiaeresis }, { { KS_acute, KS_A }, KS_Aacute }, { { KS_asciicircum, KS_A }, KS_Acircumflex }, { { KS_grave, KS_A }, KS_Agrave }, { { KS_asterisk, KS_A }, KS_Aring }, { { KS_asciitilde, KS_A }, KS_Atilde }, { { KS_cedilla, KS_C }, KS_Ccedilla }, { { KS_acute, KS_E }, KS_Eacute }, { { KS_asciicircum, KS_E }, KS_Ecircumflex }, { { KS_grave, KS_E }, KS_Egrave }, { { KS_acute, KS_I }, KS_Iacute }, { { KS_asciicircum, KS_I }, KS_Icircumflex }, { { KS_grave, KS_I }, KS_Igrave }, { { KS_asciitilde, KS_N }, KS_Ntilde }, { { KS_acute, KS_O }, KS_Oacute }, { { KS_asciicircum, KS_O }, KS_Ocircumflex }, { { KS_grave, KS_O }, KS_Ograve }, { { KS_asciitilde, KS_O }, KS_Otilde }, { { KS_acute, KS_U }, KS_Uacute }, { { KS_asciicircum, KS_U }, KS_Ucircumflex }, { { KS_grave, KS_U }, KS_Ugrave }, { { KS_acute, KS_Y }, KS_Yacute }, { { KS_acute, KS_a }, KS_aacute }, { { KS_asciicircum, KS_a }, KS_acircumflex }, { { KS_grave, KS_a }, KS_agrave }, { { KS_asterisk, KS_a }, KS_aring }, { { KS_asciitilde, KS_a }, KS_atilde }, { { KS_cedilla, KS_c }, KS_ccedilla }, { { KS_acute, KS_e }, KS_eacute }, { { KS_asciicircum, KS_e }, KS_ecircumflex }, { { KS_grave, KS_e }, KS_egrave }, { { KS_acute, KS_i }, KS_iacute }, { { KS_asciicircum, KS_i }, KS_icircumflex }, { { KS_grave, KS_i }, KS_igrave }, { { KS_asciitilde, KS_n }, KS_ntilde }, { { KS_acute, KS_o }, KS_oacute }, { { KS_asciicircum, KS_o }, KS_ocircumflex }, { { KS_grave, KS_o }, KS_ograve }, { { KS_asciitilde, KS_o }, KS_otilde }, { { KS_acute, KS_u }, KS_uacute }, { { KS_asciicircum, KS_u }, KS_ucircumflex }, { { KS_grave, KS_u }, KS_ugrave }, { { KS_acute, KS_y }, KS_yacute }, { { KS_dead_semi, KS_gr_A }, KS_gr_At }, { { KS_dead_semi, KS_gr_E }, KS_gr_Et }, { { KS_dead_semi, KS_gr_H }, KS_gr_Ht }, { { KS_dead_semi, KS_gr_I }, KS_gr_It }, { { KS_dead_semi, KS_gr_O }, KS_gr_Ot }, { { KS_dead_semi, KS_gr_Y }, KS_gr_Yt }, { { KS_dead_semi, KS_gr_V }, KS_gr_Vt }, { { KS_dead_colon, KS_gr_I }, KS_gr_Id }, { { KS_dead_colon, KS_gr_Y }, KS_gr_Yd }, { { KS_dead_semi, KS_gr_a }, KS_gr_at }, { { KS_dead_semi, KS_gr_e }, KS_gr_et }, { { KS_dead_semi, KS_gr_h }, KS_gr_ht }, { { KS_dead_semi, KS_gr_i }, KS_gr_it }, { { KS_dead_semi, KS_gr_o }, KS_gr_ot }, { { KS_dead_semi, KS_gr_y }, KS_gr_yt }, { { KS_dead_semi, KS_gr_v }, KS_gr_vt }, { { KS_dead_colon, KS_gr_i }, KS_gr_id }, { { KS_dead_colon, KS_gr_y }, KS_gr_yd }, /* Latin 2*/ { { KS_dead_acute, KS_S }, KS_Sacute }, { { KS_dead_acute, KS_Z }, KS_Zacute }, { { KS_dead_acute, KS_s }, KS_sacute }, { { KS_dead_acute, KS_z }, KS_zacute }, { { KS_dead_acute, KS_R }, KS_Racute }, { { KS_dead_acute, KS_A }, KS_Aacute }, { { KS_dead_acute, KS_L }, KS_Lacute }, { { KS_dead_acute, KS_C }, KS_Cacute }, { { KS_dead_acute, KS_E }, KS_Eacute }, { { KS_dead_acute, KS_I }, KS_Iacute }, { { KS_dead_acute, KS_N }, KS_Nacute }, { { KS_dead_acute, KS_O }, KS_Oacute }, { { KS_dead_acute, KS_U }, KS_Uacute }, { { KS_dead_acute, KS_Y }, KS_Yacute }, { { KS_dead_acute, KS_r }, KS_racute }, { { KS_dead_acute, KS_a }, KS_aacute }, { { KS_dead_acute, KS_l }, KS_lacute }, { { KS_dead_acute, KS_c }, KS_cacute }, { { KS_dead_acute, KS_e }, KS_eacute }, { { KS_dead_acute, KS_i }, KS_iacute }, { { KS_dead_acute, KS_n }, KS_nacute }, { { KS_dead_acute, KS_o }, KS_oacute }, { { KS_dead_acute, KS_u }, KS_uacute }, { { KS_dead_acute, KS_y }, KS_yacute }, { { KS_dead_breve, KS_A }, KS_Abreve }, { { KS_dead_breve, KS_a }, KS_abreve }, { { KS_dead_caron, KS_L }, KS_Lcaron }, { { KS_dead_caron, KS_S }, KS_Scaron }, { { KS_dead_caron, KS_T }, KS_Tcaron }, { { KS_dead_caron, KS_Z }, KS_Zcaron }, { { KS_dead_caron, KS_l }, KS_lcaron }, { { KS_dead_caron, KS_s }, KS_scaron }, { { KS_dead_caron, KS_t }, KS_tcaron }, { { KS_dead_caron, KS_z }, KS_zcaron }, { { KS_dead_caron, KS_C }, KS_Ccaron }, { { KS_dead_caron, KS_E }, KS_Ecaron }, { { KS_dead_caron, KS_D }, KS_Dcaron }, { { KS_dead_caron, KS_N }, KS_Ncaron }, { { KS_dead_caron, KS_R }, KS_Rcaron }, { { KS_dead_caron, KS_c }, KS_ccaron }, { { KS_dead_caron, KS_e }, KS_ecaron }, { { KS_dead_caron, KS_d }, KS_dcaron }, { { KS_dead_caron, KS_n }, KS_ncaron }, { { KS_dead_caron, KS_r }, KS_rcaron }, { { KS_dead_cedilla, KS_S }, KS_Scedilla }, { { KS_dead_cedilla, KS_s }, KS_scedilla }, { { KS_dead_cedilla, KS_C }, KS_Ccedilla }, { { KS_dead_cedilla, KS_T }, KS_Tcedilla }, { { KS_dead_cedilla, KS_c }, KS_ccedilla }, { { KS_dead_cedilla, KS_t }, KS_tcedilla }, { { KS_dead_circumflex, KS_A }, KS_Acircumflex }, { { KS_dead_circumflex, KS_I }, KS_Icircumflex }, { { KS_dead_circumflex, KS_O }, KS_Ocircumflex }, { { KS_dead_circumflex, KS_a }, KS_acircumflex }, { { KS_dead_circumflex, KS_i }, KS_icircumflex }, { { KS_dead_circumflex, KS_o }, KS_ocircumflex }, { { KS_dead_diaeresis, KS_A }, KS_Adiaeresis }, { { KS_dead_diaeresis, KS_E }, KS_Ediaeresis }, { { KS_dead_diaeresis, KS_O }, KS_Odiaeresis }, { { KS_dead_diaeresis, KS_U }, KS_Udiaeresis }, { { KS_dead_diaeresis, KS_a }, KS_adiaeresis }, { { KS_dead_diaeresis, KS_e }, KS_ediaeresis }, { { KS_dead_diaeresis, KS_o }, KS_odiaeresis }, { { KS_dead_diaeresis, KS_u }, KS_udiaeresis }, { { KS_dead_dotaccent, KS_Z }, KS_Zabovedot }, { { KS_dead_dotaccent, KS_z }, KS_zabovedot }, { { KS_dead_hungarumlaut, KS_O }, KS_Odoubleacute }, { { KS_dead_hungarumlaut, KS_U }, KS_Udoubleacute }, { { KS_dead_hungarumlaut, KS_o }, KS_odoubleacute }, { { KS_dead_hungarumlaut, KS_u }, KS_udoubleacute }, { { KS_dead_ogonek, KS_A }, KS_Aogonek }, { { KS_dead_ogonek, KS_a }, KS_aogonek }, { { KS_dead_ogonek, KS_E }, KS_Eogonek }, { { KS_dead_ogonek, KS_e }, KS_eogonek }, { { KS_dead_abovering, KS_U }, KS_Uabovering }, { { KS_dead_abovering, KS_u }, KS_uabovering }, { { KS_dead_slash, KS_L }, KS_Lstroke }, { { KS_dead_slash, KS_l }, KS_lstroke } }; #define COMPOSE_SIZE __arraycount(compose_tab) static int compose_tab_inorder = 0; static inline int compose_tab_cmp(struct compose_tab_s *, struct compose_tab_s *); static keysym_t ksym_upcase(keysym_t); static void fillmapentry(const keysym_t *, int, struct wscons_keymap *); static inline int compose_tab_cmp(struct compose_tab_s *i, struct compose_tab_s *j) { if (i->elem[0] == j->elem[0]) return(i->elem[1] - j->elem[1]); else return(i->elem[0] - j->elem[0]); } keysym_t wskbd_compose_value(keysym_t *compose_buf) { int i, j, r; struct compose_tab_s v; if (! compose_tab_inorder) { /* Insertion sort. */ for (i = 1; i < COMPOSE_SIZE; i++) { v = compose_tab[i]; /* find correct slot, moving others up */ for (j = i; --j >= 0 && compose_tab_cmp(& v, & compose_tab[j]) < 0; ) compose_tab[j + 1] = compose_tab[j]; compose_tab[j + 1] = v; } compose_tab_inorder = 1; } for (j = 0, i = COMPOSE_SIZE; i != 0; i /= 2) { if (compose_tab[j + i/2].elem[0] == compose_buf[0]) { if (compose_tab[j + i/2].elem[1] == compose_buf[1]) return(compose_tab[j + i/2].result); r = compose_tab[j + i/2].elem[1] < compose_buf[1]; } else r = compose_tab[j + i/2].elem[0] < compose_buf[0]; if (r) { j += i/2 + 1; i--; } } return(KS_voidSymbol); } static const u_char latin1_to_upper[256] = { /* 0 8 1 9 2 a 3 b 4 c 5 d 6 e 7 f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 1 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 1 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 2 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 2 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 4 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 4 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 5 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 5 */ 0x00, 'A', 'B', 'C', 'D', 'E', 'F', 'G', /* 6 */ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', /* 6 */ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', /* 7 */ 'X', 'Y', 'Z', 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 9 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 9 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* a */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* a */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* b */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* b */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* c */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* c */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* d */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* d */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* e */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* e */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0x00, /* f */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0x00, /* f */ }; static keysym_t ksym_upcase(keysym_t ksym) { if (ksym >= KS_f1 && ksym <= KS_f20) return(KS_F1 - KS_f1 + ksym); if (KS_GROUP(ksym) == KS_GROUP_Plain && ksym <= 0xff && latin1_to_upper[ksym] != 0x00) return(latin1_to_upper[ksym]); return(ksym); } static void fillmapentry(const keysym_t *kp, int len, struct wscons_keymap *mapentry) { switch (len) { case 0: mapentry->group1[0] = KS_voidSymbol; mapentry->group1[1] = KS_voidSymbol; mapentry->group2[0] = KS_voidSymbol; mapentry->group2[1] = KS_voidSymbol; break; case 1: mapentry->group1[0] = kp[0]; mapentry->group1[1] = ksym_upcase(kp[0]); mapentry->group2[0] = mapentry->group1[0]; mapentry->group2[1] = mapentry->group1[1]; break; case 2: mapentry->group1[0] = kp[0]; mapentry->group1[1] = kp[1]; mapentry->group2[0] = mapentry->group1[0]; mapentry->group2[1] = mapentry->group1[1]; break; case 3: mapentry->group1[0] = kp[0]; mapentry->group1[1] = kp[1]; mapentry->group2[0] = kp[2]; mapentry->group2[1] = ksym_upcase(kp[2]); break; case 4: mapentry->group1[0] = kp[0]; mapentry->group1[1] = kp[1]; mapentry->group2[0] = kp[2]; mapentry->group2[1] = kp[3]; break; } } void wskbd_get_mapentry(const struct wskbd_mapdata *mapdata, int kc, struct wscons_keymap *mapentry) { kbd_t cur; const keysym_t *kp; const struct wscons_keydesc *mp; int l; mapentry->command = KS_voidSymbol; mapentry->group1[0] = KS_voidSymbol; mapentry->group1[1] = KS_voidSymbol; mapentry->group2[0] = KS_voidSymbol; mapentry->group2[1] = KS_voidSymbol; for (cur = mapdata->layout & ~KB_HANDLEDBYWSKBD; cur != 0; ) { mp = mapdata->keydesc; while (mp->map_size > 0) { if (mp->name == cur) break; mp++; } /* If map not found, return */ if (mp->map_size <= 0) return; for (kp = mp->map; kp < mp->map + mp->map_size; kp++) if (KS_GROUP(*kp) == KS_GROUP_Keycode && KS_VALUE(*kp) == kc) { /* First skip keycode and possible command */ kp++; if (KS_GROUP(*kp) == KS_GROUP_Command || *kp == KS_Cmd || *kp == KS_Cmd1 || *kp == KS_Cmd2) mapentry->command = *kp++; for (l = 0; kp + l < mp->map + mp->map_size; l++) if (KS_GROUP(kp[l]) == KS_GROUP_Keycode) break; if (l > 4) panic("wskbd_get_mapentry: %d(%d): bad entry", mp->name, *kp); fillmapentry(kp, l, mapentry); return; } cur = mp->base; } } void wskbd_init_keymap(int newlen, struct wscons_keymap **map, int *maplen) { int i; if (newlen != *maplen) { if (*maplen > 0) free(*map, M_TEMP); *maplen = newlen; *map = malloc(newlen*sizeof(struct wscons_keymap), M_TEMP, M_WAITOK); } for (i = 0; i < *maplen; i++) { (*map)[i].command = KS_voidSymbol; (*map)[i].group1[0] = KS_voidSymbol; (*map)[i].group1[1] = KS_voidSymbol; (*map)[i].group2[0] = KS_voidSymbol; (*map)[i].group2[1] = KS_voidSymbol; } } int wskbd_load_keymap(const struct wskbd_mapdata *mapdata, struct wscons_keymap **map, int *maplen) { int i, s, kc, stack_ptr; const keysym_t *kp; const struct wscons_keydesc *mp, *stack[10]; kbd_t cur; for (cur = mapdata->layout & ~KB_HANDLEDBYWSKBD, stack_ptr = 0; cur != 0; stack_ptr++) { mp = mapdata->keydesc; while (mp->map_size > 0) { if (cur == 0 || mp->name == cur) { break; } mp++; } if (stack_ptr == __arraycount(stack)) panic("wskbd_load_keymap: %d: recursion too deep", mapdata->layout); if (mp->map_size <= 0) return(EINVAL); stack[stack_ptr] = mp; cur = mp->base; } for (i = 0, s = stack_ptr - 1; s >= 0; s--) { mp = stack[s]; for (kp = mp->map; kp < mp->map + mp->map_size; kp++) if (KS_GROUP(*kp) == KS_GROUP_Keycode && KS_VALUE(*kp) > i) i = KS_VALUE(*kp); } wskbd_init_keymap(i + 1, map, maplen); for (s = stack_ptr - 1; s >= 0; s--) { mp = stack[s]; for (kp = mp->map; kp < mp->map + mp->map_size; ) { if (KS_GROUP(*kp) != KS_GROUP_Keycode) panic("wskbd_load_keymap: %d(%d): bad entry", mp->name, *kp); kc = KS_VALUE(*kp); kp++; if (KS_GROUP(*kp) == KS_GROUP_Command || *kp == KS_Cmd || *kp == KS_Cmd1 || *kp == KS_Cmd2) { (*map)[kc].command = *kp; kp++; } for (i = 0; kp + i < mp->map + mp->map_size; i++) if (KS_GROUP(kp[i]) == KS_GROUP_Keycode) break; if (i > 4) panic("wskbd_load_keymap: %d(%d): bad entry", mp->name, *kp); fillmapentry(kp, i, &(*map)[kc]); kp += i; } } return(0); } |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | /* $NetBSD: pmap_pvt.c,v 1.15 2022/05/08 22:03:02 rin Exp $ */ /*- * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Taylor R. Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __RCSID("$NetBSD: pmap_pvt.c,v 1.15 2022/05/08 22:03:02 rin Exp $"); #include <sys/param.h> #include <sys/atomic.h> #include <sys/kmem.h> #include <sys/pserialize.h> #include <uvm/uvm.h> #include <uvm/pmap/pmap_pvt.h> #if !defined(PMAP_PV_TRACK_ONLY_STUBS) /* * unmanaged pv-tracked ranges * * This is a linear list for now because the only user are the DRM * graphics drivers, with a single tracked range per device, for the * graphics aperture, so there are expected to be few of them. * * This is used only after the VM system is initialized well enough * that we can use kmem_alloc. */ struct pv_track { paddr_t pvt_start; psize_t pvt_size; struct pv_track *pvt_next; struct pmap_page pvt_pages[]; }; static struct { kmutex_t lock; pserialize_t psz; struct pv_track *list; } pv_unmanaged __cacheline_aligned; void pmap_pv_init(void) { mutex_init(&pv_unmanaged.lock, MUTEX_DEFAULT, IPL_NONE); pv_unmanaged.psz = pserialize_create(); pv_unmanaged.list = NULL; } void pmap_pv_track(paddr_t start, psize_t size) { struct pv_track *pvt; size_t npages; KASSERT(start == trunc_page(start)); KASSERT(size == trunc_page(size)); /* We may sleep for allocation. */ ASSERT_SLEEPABLE(); npages = size >> PAGE_SHIFT; pvt = kmem_zalloc(offsetof(struct pv_track, pvt_pages[npages]), KM_SLEEP); pvt->pvt_start = start; pvt->pvt_size = size; #ifdef PMAP_PAGE_INIT for (size_t i = 0; i < npages; i++) PMAP_PAGE_INIT(&pvt->pvt_pages[i]); #endif mutex_enter(&pv_unmanaged.lock); pvt->pvt_next = pv_unmanaged.list; atomic_store_release(&pv_unmanaged.list, pvt); mutex_exit(&pv_unmanaged.lock); } void pmap_pv_untrack(paddr_t start, psize_t size) { struct pv_track **pvtp, *pvt; size_t npages; KASSERT(start == trunc_page(start)); KASSERT(size == trunc_page(size)); /* We may sleep for pserialize_perform. */ ASSERT_SLEEPABLE(); mutex_enter(&pv_unmanaged.lock); for (pvtp = &pv_unmanaged.list; (pvt = *pvtp) != NULL; pvtp = &pvt->pvt_next) { if (pvt->pvt_start != start) continue; if (pvt->pvt_size != size) panic("pmap_pv_untrack: pv-tracking at 0x%"PRIxPADDR ": 0x%"PRIxPSIZE" bytes, not 0x%"PRIxPSIZE" bytes", pvt->pvt_start, pvt->pvt_size, size); /* * Remove from list. Readers can safely see the old * and new states of the list. */ atomic_store_relaxed(pvtp, pvt->pvt_next); /* Wait for readers who can see the old state to finish. */ pserialize_perform(pv_unmanaged.psz); /* * We now have exclusive access to pvt and can destroy * it. Poison it to catch bugs. */ explicit_memset(&pvt->pvt_next, 0x1a, sizeof pvt->pvt_next); goto out; } panic("pmap_pv_untrack: pages not pv-tracked at 0x%"PRIxPADDR " (0x%"PRIxPSIZE" bytes)", start, size); out: mutex_exit(&pv_unmanaged.lock); npages = size >> PAGE_SHIFT; kmem_free(pvt, offsetof(struct pv_track, pvt_pages[npages])); } struct pmap_page * pmap_pv_tracked(paddr_t pa) { struct pv_track *pvt; size_t pgno; int s; KASSERT(pa == trunc_page(pa)); s = pserialize_read_enter(); for (pvt = atomic_load_consume(&pv_unmanaged.list); pvt != NULL; pvt = pvt->pvt_next) { if ((pvt->pvt_start <= pa) && ((pa - pvt->pvt_start) < pvt->pvt_size)) break; } pserialize_read_exit(s); if (pvt == NULL) return NULL; KASSERT(pvt->pvt_start <= pa); KASSERT((pa - pvt->pvt_start) < pvt->pvt_size); pgno = (pa - pvt->pvt_start) >> PAGE_SHIFT; return &pvt->pvt_pages[pgno]; } #else /* PMAP_PV_TRACK_ONLY_STUBS */ /* * Provide empty stubs just for MODULAR kernels. */ void pmap_pv_init(void) { } struct pmap_page * pmap_pv_tracked(paddr_t pa) { return NULL; } #if notdef /* * pmap_pv_{,un}track() are intentionally commented out. If modules * call these functions, the result should be an inconsistent state. * * Such modules require real PV-tracking support. Let us make the * two symbols undefined, and prevent these modules from loaded. */ void pmap_pv_track(paddr_t start, psize_t size) { panic("PV-tracking not supported"); } void pmap_pv_untrack(paddr_t start, psize_t size) { panic("PV-tracking not supported"); } #endif /* notdef */ #endif /* PMAP_PV_TRACK_ONLY_STUBS */ |
| 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 | /* $NetBSD: chacha_sse2.c,v 1.2 2020/07/27 20:48:18 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/types.h> #include <sys/endian.h> #include "immintrin.h" #include "chacha_sse2.h" static inline __m128i rol32(__m128i x, uint8_t n) { return _mm_slli_epi32(x, n) | _mm_srli_epi32(x, 32 - n); } static inline void chacha_permute(__m128i *p0, __m128i *p1, __m128i *p2, __m128i *p3, unsigned nr) { __m128i r0, r1, r2, r3; __m128i c0, c1, c2, c3; r0 = *p0; r1 = *p1; r2 = *p2; r3 = *p3; for (; nr > 0; nr -= 2) { r0 = _mm_add_epi32(r0, r1); r3 ^= r0; r3 = rol32(r3, 16); r2 = _mm_add_epi32(r2, r3); r1 ^= r2; r1 = rol32(r1, 12); r0 = _mm_add_epi32(r0, r1); r3 ^= r0; r3 = rol32(r3, 8); r2 = _mm_add_epi32(r2, r3); r1 ^= r2; r1 = rol32(r1, 7); c0 = r0; c1 = _mm_shuffle_epi32(r1, 0x39); c2 = _mm_shuffle_epi32(r2, 0x4e); c3 = _mm_shuffle_epi32(r3, 0x93); c0 = _mm_add_epi32(c0, c1); c3 ^= c0; c3 = rol32(c3, 16); c2 = _mm_add_epi32(c2, c3); c1 ^= c2; c1 = rol32(c1, 12); c0 = _mm_add_epi32(c0, c1); c3 ^= c0; c3 = rol32(c3, 8); c2 = _mm_add_epi32(c2, c3); c1 ^= c2; c1 = rol32(c1, 7); r0 = c0; r1 = _mm_shuffle_epi32(c1, 0x93); r2 = _mm_shuffle_epi32(c2, 0x4e); r3 = _mm_shuffle_epi32(c3, 0x39); } *p0 = r0; *p1 = r1; *p2 = r2; *p3 = r3; } void chacha_core_sse2(uint8_t out[restrict static 64], const uint8_t in[static 16], const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr) { __m128i in0, in1, in2, in3; __m128i r0, r1, r2, r3; r0 = in0 = _mm_loadu_si128((const __m128i *)c); r1 = in1 = _mm_loadu_si128((const __m128i *)k); r2 = in2 = _mm_loadu_si128((const __m128i *)k + 1); r3 = in3 = _mm_loadu_si128((const __m128i *)in); chacha_permute(&r0, &r1, &r2, &r3, nr); _mm_storeu_si128((__m128i *)out + 0, _mm_add_epi32(r0, in0)); _mm_storeu_si128((__m128i *)out + 1, _mm_add_epi32(r1, in1)); _mm_storeu_si128((__m128i *)out + 2, _mm_add_epi32(r2, in2)); _mm_storeu_si128((__m128i *)out + 3, _mm_add_epi32(r3, in3)); } void hchacha_sse2(uint8_t out[restrict static 32], const uint8_t in[static 16], const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr) { __m128i r0, r1, r2, r3; r0 = _mm_loadu_si128((const __m128i *)c); r1 = _mm_loadu_si128((const __m128i *)k); r2 = _mm_loadu_si128((const __m128i *)k + 1); r3 = _mm_loadu_si128((const __m128i *)in); chacha_permute(&r0, &r1, &r2, &r3, nr); _mm_storeu_si128((__m128i *)out + 0, r0); _mm_storeu_si128((__m128i *)out + 1, r3); } #define CHACHA_QUARTERROUND(a, b, c, d) do \ { \ (a) = _mm_add_epi32((a), (b)); (d) ^= a; (d) = rol32((d), 16); \ (c) = _mm_add_epi32((c), (d)); (b) ^= c; (b) = rol32((b), 12); \ (a) = _mm_add_epi32((a), (b)); (d) ^= a; (d) = rol32((d), 8); \ (c) = _mm_add_epi32((c), (d)); (b) ^= c; (b) = rol32((b), 7); \ } while (/*CONSTCOND*/0) static inline __m128i load1_epi32(const void *p) { return (__m128i)_mm_load1_ps(p); } static inline __m128i loadu_epi32(const void *p) { return _mm_loadu_si128(p); } static inline void storeu_epi32(void *p, __m128i v) { return _mm_storeu_si128(p, v); } static inline __m128i unpack0_epi32(__m128i a, __m128i b, __m128i c, __m128i d) { __m128 lo = (__m128)_mm_unpacklo_epi32(a, b); /* (a[0], b[0], ...) */ __m128 hi = (__m128)_mm_unpacklo_epi32(c, d); /* (c[0], d[0], ...) */ /* (lo[0]=a[0], lo[1]=b[0], hi[0]=c[0], hi[1]=d[0]) */ return (__m128i)_mm_movelh_ps(lo, hi); } static inline __m128i unpack1_epi32(__m128i a, __m128i b, __m128i c, __m128i d) { __m128 lo = (__m128)_mm_unpacklo_epi32(a, b); /* (..., a[1], b[1]) */ __m128 hi = (__m128)_mm_unpacklo_epi32(c, d); /* (..., c[1], d[1]) */ /* (lo[2]=a[1], lo[3]=b[1], hi[2]=c[1], hi[3]=d[1]) */ return (__m128i)_mm_movehl_ps(hi, lo); } static inline __m128i unpack2_epi32(__m128i a, __m128i b, __m128i c, __m128i d) { __m128 lo = (__m128)_mm_unpackhi_epi32(a, b); /* (a[2], b[2], ...) */ __m128 hi = (__m128)_mm_unpackhi_epi32(c, d); /* (c[2], d[2], ...) */ /* (lo[0]=a[2], lo[1]=b[2], hi[0]=c[2], hi[1]=d[2]) */ return (__m128i)_mm_movelh_ps(lo, hi); } static inline __m128i unpack3_epi32(__m128i a, __m128i b, __m128i c, __m128i d) { __m128 lo = (__m128)_mm_unpackhi_epi32(a, b); /* (..., a[3], b[3]) */ __m128 hi = (__m128)_mm_unpackhi_epi32(c, d); /* (..., c[3], d[3]) */ /* (lo[2]=a[3], lo[3]=b[3], hi[2]=c[3], hi[3]=d[3]) */ return (__m128i)_mm_movehl_ps(hi, lo); } void chacha_stream_sse2(uint8_t *restrict s, size_t n, uint32_t blkno, const uint8_t nonce[static 12], const uint8_t k[static 32], unsigned nr) { __m128i x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; __m128i y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; __m128i z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15; unsigned r; if (n < 256) goto out; x0 = load1_epi32(chacha_const32 + 0); x1 = load1_epi32(chacha_const32 + 4); x2 = load1_epi32(chacha_const32 + 8); x3 = load1_epi32(chacha_const32 + 12); x4 = load1_epi32(k + 0); x5 = load1_epi32(k + 4); x6 = load1_epi32(k + 8); x7 = load1_epi32(k + 12); x8 = load1_epi32(k + 16); x9 = load1_epi32(k + 20); x10 = load1_epi32(k + 24); x11 = load1_epi32(k + 28); /* x12 set in the loop */ x13 = load1_epi32(nonce + 0); x14 = load1_epi32(nonce + 4); x15 = load1_epi32(nonce + 8); for (; n >= 256; s += 256, n -= 256, blkno += 4) { x12 = _mm_add_epi32(_mm_set1_epi32(blkno), _mm_set_epi32(3,2,1,0)); y0 = x0; y1 = x1; y2 = x2; y3 = x3; y4 = x4; y5 = x5; y6 = x6; y7 = x7; y8 = x8; y9 = x9; y10 = x10; y11 = x11; y12 = x12; y13 = x13; y14 = x14; y15 = x15; for (r = nr; r > 0; r -= 2) { CHACHA_QUARTERROUND( y0, y4, y8,y12); CHACHA_QUARTERROUND( y1, y5, y9,y13); CHACHA_QUARTERROUND( y2, y6,y10,y14); CHACHA_QUARTERROUND( y3, y7,y11,y15); CHACHA_QUARTERROUND( y0, y5,y10,y15); CHACHA_QUARTERROUND( y1, y6,y11,y12); CHACHA_QUARTERROUND( y2, y7, y8,y13); CHACHA_QUARTERROUND( y3, y4, y9,y14); } y0 = _mm_add_epi32(y0, x0); y1 = _mm_add_epi32(y1, x1); y2 = _mm_add_epi32(y2, x2); y3 = _mm_add_epi32(y3, x3); y4 = _mm_add_epi32(y4, x4); y5 = _mm_add_epi32(y5, x5); y6 = _mm_add_epi32(y6, x6); y7 = _mm_add_epi32(y7, x7); y8 = _mm_add_epi32(y8, x8); y9 = _mm_add_epi32(y9, x9); y10 = _mm_add_epi32(y10, x10); y11 = _mm_add_epi32(y11, x11); y12 = _mm_add_epi32(y12, x12); y13 = _mm_add_epi32(y13, x13); y14 = _mm_add_epi32(y14, x14); y15 = _mm_add_epi32(y15, x15); z0 = unpack0_epi32(y0, y1, y2, y3); z1 = unpack0_epi32(y4, y5, y6, y7); z2 = unpack0_epi32(y8, y9, y10, y11); z3 = unpack0_epi32(y12, y13, y14, y15); z4 = unpack1_epi32(y0, y1, y2, y3); z5 = unpack1_epi32(y4, y5, y6, y7); z6 = unpack1_epi32(y8, y9, y10, y11); z7 = unpack1_epi32(y12, y13, y14, y15); z8 = unpack2_epi32(y0, y1, y2, y3); z9 = unpack2_epi32(y4, y5, y6, y7); z10 = unpack2_epi32(y8, y9, y10, y11); z11 = unpack2_epi32(y12, y13, y14, y15); z12 = unpack3_epi32(y0, y1, y2, y3); z13 = unpack3_epi32(y4, y5, y6, y7); z14 = unpack3_epi32(y8, y9, y10, y11); z15 = unpack3_epi32(y12, y13, y14, y15); storeu_epi32(s + 16*0, z0); storeu_epi32(s + 16*1, z1); storeu_epi32(s + 16*2, z2); storeu_epi32(s + 16*3, z3); storeu_epi32(s + 16*4, z4); storeu_epi32(s + 16*5, z5); storeu_epi32(s + 16*6, z6); storeu_epi32(s + 16*7, z7); storeu_epi32(s + 16*8, z8); storeu_epi32(s + 16*9, z9); storeu_epi32(s + 16*10, z10); storeu_epi32(s + 16*11, z11); storeu_epi32(s + 16*12, z12); storeu_epi32(s + 16*13, z13); storeu_epi32(s + 16*14, z14); storeu_epi32(s + 16*15, z15); } out: if (n) { const __m128i blkno_inc = _mm_set_epi32(0,0,0,1); __m128i in0, in1, in2, in3; __m128i r0, r1, r2, r3; in0 = _mm_loadu_si128((const __m128i *)chacha_const32); in1 = _mm_loadu_si128((const __m128i *)k); in2 = _mm_loadu_si128((const __m128i *)k + 1); in3 = _mm_set_epi32(le32dec(nonce + 8), le32dec(nonce + 4), le32dec(nonce), blkno); for (; n; s += 64, n -= 64) { r0 = in0; r1 = in1; r2 = in2; r3 = in3; chacha_permute(&r0, &r1, &r2, &r3, nr); r0 = _mm_add_epi32(r0, in0); r1 = _mm_add_epi32(r1, in1); r2 = _mm_add_epi32(r2, in2); r3 = _mm_add_epi32(r3, in3); if (n < 64) { uint8_t buf[64] __aligned(16); _mm_storeu_si128((__m128i *)buf + 0, r0); _mm_storeu_si128((__m128i *)buf + 1, r1); _mm_storeu_si128((__m128i *)buf + 2, r2); _mm_storeu_si128((__m128i *)buf + 3, r3); memcpy(s, buf, n); break; } _mm_storeu_si128((__m128i *)s + 0, r0); _mm_storeu_si128((__m128i *)s + 1, r1); _mm_storeu_si128((__m128i *)s + 2, r2); _mm_storeu_si128((__m128i *)s + 3, r3); in3 = _mm_add_epi32(in3, blkno_inc); } } } void chacha_stream_xor_sse2(uint8_t *s, const uint8_t *p, size_t n, uint32_t blkno, const uint8_t nonce[static 12], const uint8_t k[static 32], unsigned nr) { __m128i x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; __m128i y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; __m128i z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15; unsigned r; if (n < 256) goto out; x0 = load1_epi32(chacha_const32 + 0); x1 = load1_epi32(chacha_const32 + 4); x2 = load1_epi32(chacha_const32 + 8); x3 = load1_epi32(chacha_const32 + 12); x4 = load1_epi32(k + 0); x5 = load1_epi32(k + 4); x6 = load1_epi32(k + 8); x7 = load1_epi32(k + 12); x8 = load1_epi32(k + 16); x9 = load1_epi32(k + 20); x10 = load1_epi32(k + 24); x11 = load1_epi32(k + 28); /* x12 set in the loop */ x13 = load1_epi32(nonce + 0); x14 = load1_epi32(nonce + 4); x15 = load1_epi32(nonce + 8); for (; n >= 256; s += 256, p += 256, n -= 256, blkno += 4) { x12 = _mm_add_epi32(_mm_set1_epi32(blkno), _mm_set_epi32(3,2,1,0)); y0 = x0; y1 = x1; y2 = x2; y3 = x3; y4 = x4; y5 = x5; y6 = x6; y7 = x7; y8 = x8; y9 = x9; y10 = x10; y11 = x11; y12 = x12; y13 = x13; y14 = x14; y15 = x15; for (r = nr; r > 0; r -= 2) { CHACHA_QUARTERROUND( y0, y4, y8,y12); CHACHA_QUARTERROUND( y1, y5, y9,y13); CHACHA_QUARTERROUND( y2, y6,y10,y14); CHACHA_QUARTERROUND( y3, y7,y11,y15); CHACHA_QUARTERROUND( y0, y5,y10,y15); CHACHA_QUARTERROUND( y1, y6,y11,y12); CHACHA_QUARTERROUND( y2, y7, y8,y13); CHACHA_QUARTERROUND( y3, y4, y9,y14); } y0 = _mm_add_epi32(y0, x0); y1 = _mm_add_epi32(y1, x1); y2 = _mm_add_epi32(y2, x2); y3 = _mm_add_epi32(y3, x3); y4 = _mm_add_epi32(y4, x4); y5 = _mm_add_epi32(y5, x5); y6 = _mm_add_epi32(y6, x6); y7 = _mm_add_epi32(y7, x7); y8 = _mm_add_epi32(y8, x8); y9 = _mm_add_epi32(y9, x9); y10 = _mm_add_epi32(y10, x10); y11 = _mm_add_epi32(y11, x11); y12 = _mm_add_epi32(y12, x12); y13 = _mm_add_epi32(y13, x13); y14 = _mm_add_epi32(y14, x14); y15 = _mm_add_epi32(y15, x15); z0 = unpack0_epi32(y0, y1, y2, y3); z1 = unpack0_epi32(y4, y5, y6, y7); z2 = unpack0_epi32(y8, y9, y10, y11); z3 = unpack0_epi32(y12, y13, y14, y15); z4 = unpack1_epi32(y0, y1, y2, y3); z5 = unpack1_epi32(y4, y5, y6, y7); z6 = unpack1_epi32(y8, y9, y10, y11); z7 = unpack1_epi32(y12, y13, y14, y15); z8 = unpack2_epi32(y0, y1, y2, y3); z9 = unpack2_epi32(y4, y5, y6, y7); z10 = unpack2_epi32(y8, y9, y10, y11); z11 = unpack2_epi32(y12, y13, y14, y15); z12 = unpack3_epi32(y0, y1, y2, y3); z13 = unpack3_epi32(y4, y5, y6, y7); z14 = unpack3_epi32(y8, y9, y10, y11); z15 = unpack3_epi32(y12, y13, y14, y15); storeu_epi32(s + 16*0, loadu_epi32(p + 16*0) ^ z0); storeu_epi32(s + 16*1, loadu_epi32(p + 16*1) ^ z1); storeu_epi32(s + 16*2, loadu_epi32(p + 16*2) ^ z2); storeu_epi32(s + 16*3, loadu_epi32(p + 16*3) ^ z3); storeu_epi32(s + 16*4, loadu_epi32(p + 16*4) ^ z4); storeu_epi32(s + 16*5, loadu_epi32(p + 16*5) ^ z5); storeu_epi32(s + 16*6, loadu_epi32(p + 16*6) ^ z6); storeu_epi32(s + 16*7, loadu_epi32(p + 16*7) ^ z7); storeu_epi32(s + 16*8, loadu_epi32(p + 16*8) ^ z8); storeu_epi32(s + 16*9, loadu_epi32(p + 16*9) ^ z9); storeu_epi32(s + 16*10, loadu_epi32(p + 16*10) ^ z10); storeu_epi32(s + 16*11, loadu_epi32(p + 16*11) ^ z11); storeu_epi32(s + 16*12, loadu_epi32(p + 16*12) ^ z12); storeu_epi32(s + 16*13, loadu_epi32(p + 16*13) ^ z13); storeu_epi32(s + 16*14, loadu_epi32(p + 16*14) ^ z14); storeu_epi32(s + 16*15, loadu_epi32(p + 16*15) ^ z15); } out: if (n) { const __m128i blkno_inc = _mm_set_epi32(0,0,0,1); __m128i in0, in1, in2, in3; __m128i r0, r1, r2, r3; in0 = _mm_loadu_si128((const __m128i *)chacha_const32); in1 = _mm_loadu_si128((const __m128i *)k); in2 = _mm_loadu_si128((const __m128i *)k + 1); in3 = _mm_set_epi32(le32dec(nonce + 8), le32dec(nonce + 4), le32dec(nonce), blkno); for (; n; s += 64, p += 64, n -= 64) { r0 = in0; r1 = in1; r2 = in2; r3 = in3; chacha_permute(&r0, &r1, &r2, &r3, nr); r0 = _mm_add_epi32(r0, in0); r1 = _mm_add_epi32(r1, in1); r2 = _mm_add_epi32(r2, in2); r3 = _mm_add_epi32(r3, in3); if (n < 64) { uint8_t buf[64] __aligned(16); unsigned i; _mm_storeu_si128((__m128i *)buf + 0, r0); _mm_storeu_si128((__m128i *)buf + 1, r1); _mm_storeu_si128((__m128i *)buf + 2, r2); _mm_storeu_si128((__m128i *)buf + 3, r3); for (i = 0; i < n - n%4; i += 4) le32enc(s + i, le32dec(p + i) ^ le32dec(buf + i)); for (; i < n; i++) s[i] = p[i] ^ buf[i]; break; } r0 ^= _mm_loadu_si128((const __m128i *)p + 0); r1 ^= _mm_loadu_si128((const __m128i *)p + 1); r2 ^= _mm_loadu_si128((const __m128i *)p + 2); r3 ^= _mm_loadu_si128((const __m128i *)p + 3); _mm_storeu_si128((__m128i *)s + 0, r0); _mm_storeu_si128((__m128i *)s + 1, r1); _mm_storeu_si128((__m128i *)s + 2, r2); _mm_storeu_si128((__m128i *)s + 3, r3); in3 = _mm_add_epi32(in3, blkno_inc); } } } void xchacha_stream_sse2(uint8_t *restrict s, size_t nbytes, uint32_t blkno, const uint8_t nonce[static 24], const uint8_t k[static 32], unsigned nr) { uint8_t subkey[32]; uint8_t subnonce[12]; hchacha_sse2(subkey, nonce/*[0:16)*/, k, chacha_const32, nr); memset(subnonce, 0, 4); memcpy(subnonce + 4, nonce + 16, 8); chacha_stream_sse2(s, nbytes, blkno, subnonce, subkey, nr); } void xchacha_stream_xor_sse2(uint8_t *restrict c, const uint8_t *p, size_t nbytes, uint32_t blkno, const uint8_t nonce[static 24], const uint8_t k[static 32], unsigned nr) { uint8_t subkey[32]; uint8_t subnonce[12]; hchacha_sse2(subkey, nonce/*[0:16)*/, k, chacha_const32, nr); memset(subnonce, 0, 4); memcpy(subnonce + 4, nonce + 16, 8); chacha_stream_xor_sse2(c, p, nbytes, blkno, subnonce, subkey, nr); } |
| 4 4 1 1 1 1 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | /* $NetBSD: vfs_hooks.c,v 1.6 2009/03/15 17:14:40 cegger Exp $ */ /*- * Copyright (c) 2005 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * VFS hooks. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: vfs_hooks.c,v 1.6 2009/03/15 17:14:40 cegger Exp $"); #include <sys/param.h> #include <sys/queue.h> #include <sys/mount.h> #include <sys/mutex.h> LIST_HEAD(vfs_hooks_head, vfs_hooks) vfs_hooks_head = LIST_HEAD_INITIALIZER(vfs_hooks_head); kmutex_t vfs_hooks_lock; void vfs_hooks_init(void) { mutex_init(&vfs_hooks_lock, MUTEX_DEFAULT, IPL_NONE); } int vfs_hooks_attach(struct vfs_hooks *vfs_hooks) { mutex_enter(&vfs_hooks_lock); LIST_INSERT_HEAD(&vfs_hooks_head, vfs_hooks, vfs_hooks_list); mutex_exit(&vfs_hooks_lock); return (0); } int vfs_hooks_detach(struct vfs_hooks *vfs_hooks) { struct vfs_hooks *hp; int ret = 0; mutex_enter(&vfs_hooks_lock); LIST_FOREACH(hp, &vfs_hooks_head, vfs_hooks_list) { if (hp == vfs_hooks) { LIST_REMOVE(hp, vfs_hooks_list); break; } } if (hp == NULL) ret = ESRCH; mutex_exit(&vfs_hooks_lock); return (ret); } /* * Macro to be used in one of the vfs_hooks_* function for hooks that * return an error code. Calls will stop as soon as one of the hooks * fails. */ #define VFS_HOOKS_W_ERROR(func, fargs, hook, hargs) \ int \ func fargs \ { \ int error; \ struct vfs_hooks *hp; \ \ error = EJUSTRETURN; \ \ mutex_enter(&vfs_hooks_lock); \ LIST_FOREACH(hp, &vfs_hooks_head, vfs_hooks_list) { \ if (hp-> hook != NULL) { \ error = hp-> hook hargs; \ if (error != 0) \ break; \ } \ } \ mutex_exit(&vfs_hooks_lock); \ \ return error; \ } /* * Macro to be used in one of the vfs_hooks_* function for hooks that * do not return any error code. All hooks will be executed * unconditionally. */ #define VFS_HOOKS_WO_ERROR(func, fargs, hook, hargs) \ void \ func fargs \ { \ struct vfs_hooks *hp; \ \ mutex_enter(&vfs_hooks_lock); \ LIST_FOREACH(hp, &vfs_hooks_head, vfs_hooks_list) { \ if (hp-> hook != NULL) \ hp-> hook hargs; \ } \ mutex_exit(&vfs_hooks_lock); \ } /* * Routines to iterate over VFS hooks lists and execute them. */ VFS_HOOKS_WO_ERROR(vfs_hooks_unmount, (struct mount *mp), vh_unmount, (mp)); VFS_HOOKS_W_ERROR(vfs_hooks_reexport, (struct mount *mp, const char *path, void *data), vh_reexport, (mp, path, data)); |
| 35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 | /* $NetBSD: proc.h,v 1.370 2022/05/09 13:27:24 wiz Exp $ */ /*- * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)proc.h 8.15 (Berkeley) 5/19/95 */ #ifndef _SYS_PROC_H_ #define _SYS_PROC_H_ #include <sys/lwp.h> #if defined(_KMEMUSER) || defined(_KERNEL) #if defined(_KERNEL_OPT) #include "opt_multiprocessor.h" #include "opt_kstack.h" #include "opt_lockdebug.h" #endif #include <machine/proc.h> /* Machine-dependent proc substruct */ #include <machine/pcb.h> #include <sys/aio.h> #include <sys/idtype.h> #include <sys/rwlock.h> #include <sys/mqueue.h> #include <sys/mutex.h> #include <sys/condvar.h> #include <sys/queue.h> #include <sys/radixtree.h> #include <sys/signalvar.h> #include <sys/siginfo.h> #include <sys/event.h> #include <sys/specificdata.h> #ifndef _KERNEL #include <sys/time.h> #include <sys/resource.h> #endif /* * One structure allocated per session. */ struct session { int s_count; /* Ref cnt; pgrps in session */ u_int s_flags; #define S_LOGIN_SET 1 /* s_login set in this session */ struct proc *s_leader; /* Session leader */ struct vnode *s_ttyvp; /* Vnode of controlling terminal */ struct tty *s_ttyp; /* Controlling terminal */ char s_login[MAXLOGNAME]; /* Setlogin() name */ pid_t s_sid; /* Session ID (pid of leader) */ }; /* * One structure allocated per process group. */ struct pgrp { LIST_HEAD(, proc) pg_members; /* Pointer to pgrp members */ struct session *pg_session; /* Pointer to session */ pid_t pg_id; /* Pgrp id */ int pg_jobc; /* * Number of processes qualifying * pgrp for job control */ }; /* * Autoloadable syscall definition */ struct sc_autoload { u_int al_code; const char *al_module; }; /* * One structure allocated per emulation. */ struct exec_package; struct ras; struct kauth_cred; struct emul { const char *e_name; /* Symbolic name */ const char *e_path; /* Extra emulation path (NULL if none)*/ #ifndef __HAVE_MINIMAL_EMUL int e_flags; /* Miscellaneous flags, see above */ /* Syscall handling function */ const int *e_errno; /* Errno array */ int e_nosys; /* Offset of the nosys() syscall */ int e_nsysent; /* Number of system call entries */ #endif struct sysent *e_sysent; /* System call array */ const uint32_t *e_nomodbits; /* sys_nosys/sys_nomodule flags * for syscall_disestablish() */ const char * const *e_syscallnames; /* System call name array */ struct sc_autoload *e_sc_autoload; /* List of autoloadable syscalls */ /* Signal sending function */ void (*e_sendsig)(const struct ksiginfo *, const sigset_t *); void (*e_trapsignal)(struct lwp *, struct ksiginfo *); char *e_sigcode; /* Start of sigcode */ char *e_esigcode; /* End of sigcode */ /* Set registers before execution */ struct uvm_object **e_sigobject;/* shared sigcode object */ void (*e_setregs)(struct lwp *, struct exec_package *, vaddr_t); /* Per-process hooks */ void (*e_proc_exec)(struct proc *, struct exec_package *); void (*e_proc_fork)(struct proc *, struct lwp *, int); void (*e_proc_exit)(struct proc *); void (*e_lwp_fork)(struct lwp *, struct lwp *); void (*e_lwp_exit)(struct lwp *); #ifdef __HAVE_SYSCALL_INTERN void (*e_syscall_intern)(struct proc *); #else void (*e_syscall)(void); #endif /* Emulation specific sysctl data */ struct sysctlnode *e_sysctlovly; vaddr_t (*e_vm_default_addr)(struct proc *, vaddr_t, vsize_t, int); /* Emulation-specific hook for userspace page faults */ int (*e_usertrap)(struct lwp *, vaddr_t, void *); size_t e_ucsize; /* size of ucontext_t */ void (*e_startlwp)(void *); /* Dtrace syscall probe */ void (*e_dtrace_syscall)(uint32_t, register_t, const struct sysent *, const void *, const register_t *, int); /* Emulation specific support for ktracing signal posts */ void (*e_ktrpsig)(int, sig_t, const sigset_t *, const struct ksiginfo *); }; /* * Emulation miscellaneous flags */ #define EMUL_HAS_SYS___syscall 0x001 /* Has SYS___syscall */ /* * Description of a process. * * This structure contains the information needed to manage a thread of * control, known in UN*X as a process; it has references to substructures * containing descriptions of things that the process uses, but may share * with related processes. The process structure and the substructures * are always addressible except for those marked "(PROC ONLY)" below, * which might be addressible only on a processor on which the process * is running. * * Field markings and the corresponding locks: * * a: p_auxlock * k: ktrace_mutex * l: proc_lock * t: p_stmutex * p: p_lock * (: updated atomically * :: unlocked, stable */ struct vmspace; struct proc { LIST_ENTRY(proc) p_list; /* l: List of all processes */ kmutex_t *p_lock; /* :: general mutex */ kcondvar_t p_waitcv; /* p: wait, stop CV on children */ kcondvar_t p_lwpcv; /* p: wait, stop CV on LWPs */ /* Substructures: */ struct kauth_cred *p_cred; /* p: Master copy of credentials */ struct filedesc *p_fd; /* :: Ptr to open files structure */ struct cwdinfo *p_cwdi; /* :: cdir/rdir/cmask info */ struct pstats *p_stats; /* :: Accounting/stats (PROC ONLY) */ struct plimit *p_limit; /* :: Process limits */ struct vmspace *p_vmspace; /* :: Address space */ struct sigacts *p_sigacts; /* :: Process sigactions */ struct aioproc *p_aio; /* p: Asynchronous I/O data */ u_int p_mqueue_cnt; /* (: Count of open message queues */ specificdata_reference p_specdataref; /* subsystem proc-specific data */ int p_exitsig; /* l: signal to send to parent on exit */ int p_flag; /* p: PK_* flags */ int p_sflag; /* p: PS_* flags */ int p_slflag; /* p, l: PSL_* flags */ int p_lflag; /* l: PL_* flags */ int p_stflag; /* t: PST_* flags */ char p_stat; /* p: S* process status. */ char p_trace_enabled;/* p: cached by syscall_intern() */ char p_pad1[2]; /* unused */ pid_t p_pid; /* :: Process identifier. */ LIST_ENTRY(proc) p_pglist; /* l: List of processes in pgrp. */ struct proc *p_pptr; /* l: Pointer to parent process. */ LIST_ENTRY(proc) p_sibling; /* l: List of sibling processes. */ LIST_HEAD(, proc) p_children; /* l: List of children. */ LIST_HEAD(, lwp) p_lwps; /* p: List of LWPs. */ struct ras *p_raslist; /* a: List of RAS entries */ /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_nlwps int p_nlwps; /* p: Number of LWPs */ int p_nzlwps; /* p: Number of zombie LWPs */ int p_nrlwps; /* p: Number running/sleeping LWPs */ int p_nlwpwait; /* p: Number of LWPs in lwp_wait1() */ int p_ndlwps; /* p: Number of detached LWPs */ u_int p_nstopchild; /* l: Count of stopped/dead children */ u_int p_waited; /* l: parent has waited on child */ struct lwp *p_zomblwp; /* p: detached LWP to be reaped */ struct lwp *p_vforklwp; /* p: parent LWP waiting at vfork() */ /* scheduling */ void *p_sched_info; /* p: Scheduler-specific structure */ fixpt_t p_estcpu; /* p: Time avg. value of p_cpticks */ fixpt_t p_estcpu_inherited; /* p: cpu inherited from children */ unsigned int p_forktime; fixpt_t p_pctcpu; /* p: %cpu from dead LWPs */ struct proc *p_opptr; /* l: save parent during ptrace. */ struct ptimers *p_timers; /* Timers: real, virtual, profiling */ struct bintime p_rtime; /* p: real time */ u_quad_t p_uticks; /* t: Statclock hits in user mode */ u_quad_t p_sticks; /* t: Statclock hits in system mode */ u_quad_t p_iticks; /* t: Statclock hits processing intr */ uint64_t p_xutime; /* p: utime exposed to userspace */ uint64_t p_xstime; /* p: stime exposed to userspace */ int p_traceflag; /* k: Kernel trace points */ void *p_tracep; /* k: Trace private data */ struct vnode *p_textvp; /* :: Vnode of executable */ struct emul *p_emul; /* :: emulation information */ void *p_emuldata; /* :: per-proc emul data, or NULL */ const struct execsw *p_execsw; /* :: exec package information */ struct klist p_klist; /* p: knotes attached to proc */ LIST_HEAD(, lwp) p_sigwaiters; /* p: LWPs waiting for signals */ sigpend_t p_sigpend; /* p: pending signals */ struct lcproc *p_lwpctl; /* p, a: _lwp_ctl() information */ pid_t p_ppid; /* :: cached parent pid */ pid_t p_oppid; /* :: cached original parent pid */ char *p_path; /* :: full pathname of executable */ /* * End area that is zeroed on creation */ #define p_endzero p_startcopy /* * The following fields are all copied upon creation in fork. */ #define p_startcopy p_sigctx struct sigctx p_sigctx; /* p: Shared signal state */ u_char p_nice; /* p: Process "nice" value */ char p_comm[MAXCOMLEN+1]; /* p: basename of last exec file */ struct pgrp *p_pgrp; /* l: Pointer to process group */ vaddr_t p_psstrp; /* :: address of process's ps_strings */ u_int p_pax; /* :: PAX flags */ int p_xexit; /* p: exit code */ /* * End area that is copied on creation */ #define p_endcopy p_xsig u_short p_xsig; /* p: stop signal */ u_short p_acflag; /* p: Acc. flags; see struct lwp also */ struct mdproc p_md; /* p: Any machine-dependent fields */ vaddr_t p_stackbase; /* :: ASLR randomized stack base */ struct kdtrace_proc *p_dtrace; /* :: DTrace-specific data. */ /* * Locks in their own cache line towards the end. */ kmutex_t p_auxlock /* :: secondary, longer term lock */ __aligned(COHERENCY_UNIT); kmutex_t p_stmutex; /* :: mutex on profiling state */ krwlock_t p_reflock; /* :: lock for debugger, procfs */ }; #define p_rlimit p_limit->pl_rlimit #define p_session p_pgrp->pg_session #define p_pgid p_pgrp->pg_id #endif /* _KMEMUSER || _KERNEL */ /* * Status values. */ #define SIDL 1 /* Process being created by fork */ #define SACTIVE 2 /* Process is not stopped */ #define SDYING 3 /* About to die */ #define SSTOP 4 /* Process debugging or suspension */ #define SZOMB 5 /* Awaiting collection by parent */ #define SDEAD 6 /* Almost a zombie */ #define P_ZOMBIE(p) \ ((p)->p_stat == SZOMB || (p)->p_stat == SDYING || (p)->p_stat == SDEAD) /* * These flags are kept in p_flag and are protected by p_lock. Access from * process context only. */ #define PK_ADVLOCK 0x00000001 /* Process may hold a POSIX advisory lock */ #define PK_SYSTEM 0x00000002 /* System process (kthread) */ #define PK_SYSVSEM 0x00000004 /* Used SysV semaphores */ #define PK_SUGID 0x00000100 /* Had set id privileges since last exec */ #define PK_KMEM 0x00000200 /* Has kmem access */ #define PK_EXEC 0x00004000 /* Process called exec */ #define PK_NOCLDWAIT 0x00020000 /* No zombies if child dies */ #define PK_32 0x00040000 /* 32-bit process (used on 64-bit kernels) */ #define PK_CLDSIGIGN 0x00080000 /* Process is ignoring SIGCHLD */ #define PK_MARKER 0x80000000 /* Is a dummy marker process */ /* * These flags are kept in p_sflag and are protected by p_lock. Access from * process context only. */ #define PS_NOCLDSTOP 0x00000008 /* No SIGCHLD when children stop */ #define PS_RUMP_LWPEXIT 0x00000400 /* LWPs in RUMP kernel should exit for GC */ #define PS_WCORE 0x00001000 /* Process needs to dump core */ #define PS_WEXIT 0x00002000 /* Working on exiting */ #define PS_STOPFORK 0x00800000 /* Child will be stopped on fork(2) */ #define PS_STOPEXEC 0x01000000 /* Will be stopped on exec(2) */ #define PS_STOPEXIT 0x02000000 /* Will be stopped at process exit */ #define PS_COREDUMP 0x20000000 /* Process core-dumped */ #define PS_CONTINUED 0x40000000 /* Process is continued */ #define PS_STOPPING 0x80000000 /* Transitioning SACTIVE -> SSTOP */ /* * These flags are kept in p_slflag and are protected by the proc_lock * and p_lock. Access from process context only. */ #define PSL_TRACEFORK 0x00000001 /* traced process wants fork events */ #define PSL_TRACEVFORK 0x00000002 /* traced process wants vfork events */ #define PSL_TRACEVFORK_DONE \ 0x00000004 /* traced process wants vfork done events */ #define PSL_TRACELWP_CREATE \ 0x00000008 /* traced process wants LWP create events */ #define PSL_TRACELWP_EXIT \ 0x00000010 /* traced process wants LWP exit events */ #define PSL_TRACEPOSIX_SPAWN \ 0x00000020 /* traced process wants posix_spawn events */ #define PSL_TRACED 0x00000800 /* Debugged process being traced */ #define PSL_TRACEDCHILD 0x00001000 /* Report process birth */ #define PSL_CHTRACED 0x00400000 /* Child has been traced & reparented */ #define PSL_SYSCALL 0x04000000 /* process has PT_SYSCALL enabled */ #define PSL_SYSCALLEMU 0x08000000 /* cancel in-progress syscall */ /* * Kept in p_stflag and protected by p_stmutex. */ #define PST_PROFIL 0x00000020 /* Has started profiling */ /* * Kept in p_lflag and protected by the proc_lock. Access * from process context only. */ #define PL_CONTROLT 0x00000002 /* Has a controlling terminal */ #define PL_PPWAIT 0x00000010 /* Parent is waiting for child exec/exit */ #define PL_SIGCOMPAT 0x00000200 /* Has used compat signal trampoline */ #define PL_ORPHANPG 0x20000000 /* Member of an orphaned pgrp */ #if defined(_KMEMUSER) || defined(_KERNEL) /* * Macro to compute the exit signal to be delivered. */ #define P_EXITSIG(p) \ (((p)->p_slflag & PSL_TRACED) ? SIGCHLD : p->p_exitsig) /* * Compute a wait(2) 16 bit exit status code */ #define P_WAITSTATUS(p) W_EXITCODE((p)->p_xexit, ((p)->p_xsig | \ (((p)->p_sflag & PS_COREDUMP) ? WCOREFLAG : 0))) LIST_HEAD(proclist, proc); /* A list of processes */ /* * This structure associates a proclist with its lock. */ struct proclist_desc { struct proclist *pd_list; /* The list */ /* * XXX Add a pointer to the proclist's lock eventually. */ }; #ifdef _KERNEL /* * We use process IDs <= PID_MAX until there are > 16k processes. * NO_PGID is used to represent "no process group" for a tty. */ #define PID_MAX 30000 #define NO_PGID ((pid_t)-1) #define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) /* * Flags passed to fork1(). */ #define FORK_PPWAIT 0x0001 /* Block parent until child exit */ #define FORK_SHAREVM 0x0002 /* Share vmspace with parent */ #define FORK_SHARECWD 0x0004 /* Share cdir/rdir/cmask */ #define FORK_SHAREFILES 0x0008 /* Share file descriptors */ #define FORK_SHARESIGS 0x0010 /* Share signal actions */ #define FORK_NOWAIT 0x0020 /* Make init the parent of the child */ #define FORK_CLEANFILES 0x0040 /* Start with a clean descriptor set */ #define FORK_SYSTEM 0x0080 /* Fork a kernel thread */ extern struct proc proc0; /* Process slot for swapper */ extern u_int nprocs; /* Current number of procs */ extern int maxproc; /* Max number of procs */ #define vmspace_kernel() (proc0.p_vmspace) extern kmutex_t proc_lock; extern struct proclist allproc; /* List of all processes */ extern struct proclist zombproc; /* List of zombie processes */ extern struct proc *initproc; /* Process slots for init, pager */ extern const struct proclist_desc proclists[]; int proc_find_locked(struct lwp *, struct proc **, pid_t); proc_t * proc_find_raw(pid_t); proc_t * proc_find(pid_t); /* Find process by ID */ proc_t * proc_find_lwpid(pid_t); /* Find process by LWP ID */ struct lwp * proc_find_lwp(proc_t *, pid_t); /* Find LWP in proc by ID */ struct lwp * proc_find_lwp_unlocked(proc_t *, pid_t); /* Find LWP, acquire proc */ struct lwp * proc_find_lwp_acquire_proc(pid_t, proc_t **); struct pgrp * pgrp_find(pid_t); /* Find process group by ID */ void procinit(void); void procinit_sysctl(void); int proc_enterpgrp(struct proc *, pid_t, pid_t, bool); void proc_leavepgrp(struct proc *); void proc_sesshold(struct session *); void proc_sessrele(struct session *); void fixjobc(struct proc *, struct pgrp *, int); int tsleep(wchan_t, pri_t, const char *, int); int mtsleep(wchan_t, pri_t, const char *, int, kmutex_t *); void wakeup(wchan_t); int kpause(const char *, bool, int, kmutex_t *); void exit1(struct lwp *, int, int) __dead; int kill1(struct lwp *l, pid_t pid, ksiginfo_t *ksi, register_t *retval); int do_sys_wait(int *, int *, int, struct rusage *); int do_sys_waitid(idtype_t, id_t, int *, int *, int, struct wrusage *, siginfo_t *); struct proc *proc_alloc(void); void proc0_init(void); pid_t proc_alloc_pid(struct proc *); void proc_free_pid(pid_t); pid_t proc_alloc_lwpid(struct proc *, struct lwp *); void proc_free_lwpid(struct proc *, pid_t); void proc_free_mem(struct proc *); void exit_lwps(struct lwp *l); int fork1(struct lwp *, int, int, void *, size_t, void (*)(void *), void *, register_t *); int pgid_in_session(struct proc *, pid_t); void cpu_lwp_fork(struct lwp *, struct lwp *, void *, size_t, void (*)(void *), void *); void cpu_lwp_free(struct lwp *, int); void cpu_lwp_free2(struct lwp *); void cpu_spawn_return(struct lwp*); #ifdef __HAVE_SYSCALL_INTERN void syscall_intern(struct proc *); #endif void md_child_return(struct lwp *); void child_return(void *); int proc_isunder(struct proc *, struct lwp *); int proc_uidmatch(kauth_cred_t, kauth_cred_t); int proc_vmspace_getref(struct proc *, struct vmspace **); void proc_crmod_leave(kauth_cred_t, kauth_cred_t, bool); void proc_crmod_enter(void); int proc_getauxv(struct proc *, void **, size_t *); int proc_specific_key_create(specificdata_key_t *, specificdata_dtor_t); void proc_specific_key_delete(specificdata_key_t); void proc_initspecific(struct proc *); void proc_finispecific(struct proc *); void * proc_getspecific(struct proc *, specificdata_key_t); void proc_setspecific(struct proc *, specificdata_key_t, void *); int proc_compare(const struct proc *, const struct lwp *, const struct proc *, const struct lwp *); /* * Special handlers for delivering EVFILT_PROC notifications. These * exist to handle some of the special locking considerations around * processes. */ void knote_proc_exec(struct proc *); void knote_proc_fork(struct proc *, struct proc *); void knote_proc_exit(struct proc *); int proclist_foreach_call(struct proclist *, int (*)(struct proc *, void *arg), void *); static __inline struct proc * _proclist_skipmarker(struct proc *p0) { struct proc *p = p0; while (p != NULL && p->p_flag & PK_MARKER) p = LIST_NEXT(p, p_list); return p; } #define PROC_PTRSZ(p) (((p)->p_flag & PK_32) ? sizeof(int) : sizeof(void *)) #define PROC_REGSZ(p) (((p)->p_flag & PK_32) ? \ sizeof(process_reg32) : sizeof(struct reg)) #define PROC_FPREGSZ(p) (((p)->p_flag & PK_32) ? \ sizeof(process_fpreg32) : sizeof(struct fpreg)) #define PROC_DBREGSZ(p) (((p)->p_flag & PK_32) ? \ sizeof(process_dbreg32) : sizeof(struct dbreg)) /* * PROCLIST_FOREACH: iterate on the given proclist, skipping PK_MARKER ones. */ #define PROCLIST_FOREACH(var, head) \ for ((var) = LIST_FIRST(head); \ ((var) = _proclist_skipmarker(var)) != NULL; \ (var) = LIST_NEXT(var, p_list)) #ifdef KSTACK_CHECK_MAGIC void kstack_setup_magic(const struct lwp *); void kstack_check_magic(const struct lwp *); #else #define kstack_setup_magic(x) #define kstack_check_magic(x) #endif extern struct emul emul_netbsd; #endif /* _KERNEL */ /* * Kernel stack parameters. * * KSTACK_LOWEST_ADDR: return the lowest address of the LWP's kernel stack, * excluding red-zone. * * KSTACK_SIZE: the size kernel stack for a LWP, excluding red-zone. * * if <machine/proc.h> provides the MD definition, it will be used. */ #ifndef KSTACK_LOWEST_ADDR #define KSTACK_LOWEST_ADDR(l) ((void *)ALIGN((struct pcb *)((l)->l_addr) + 1)) #endif #ifndef KSTACK_SIZE #define KSTACK_SIZE (USPACE - ALIGN(sizeof(struct pcb))) #endif #endif /* _KMEMUSER || _KERNEL */ #endif /* !_SYS_PROC_H_ */ |
| 1 1 1 1 1 2 3 2 1 2 2 2 2 2 5 5 5 5 4 4 4 4 2 4 1 1 2 2 2 1 2 2 2 2 2 2 2 21 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 | /* $NetBSD: ddp_usrreq.c,v 1.75 2021/09/21 15:01:59 christos Exp $ */ /* * Copyright (c) 1990,1991 Regents of The University of Michigan. * All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby granted, * provided that the above copyright notice appears in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation, and that the name of The University * of Michigan not be used in advertising or publicity pertaining to * distribution of the software without specific, written prior * permission. This software is supplied as is without expressed or * implied warranties of any kind. * * This product includes software developed by the University of * California, Berkeley and its contributors. * * Research Systems Unix Group * The University of Michigan * c/o Wesley Craig * 535 W. William Street * Ann Arbor, Michigan * +1-313-764-2278 * netatalk@umich.edu */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: ddp_usrreq.c,v 1.75 2021/09/21 15:01:59 christos Exp $"); #include "opt_mbuftrace.h" #include "opt_atalk.h" #include <sys/param.h> #include <sys/errno.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/ioctl.h> #include <sys/queue.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/protosw.h> #include <sys/kauth.h> #include <sys/kmem.h> #include <sys/sysctl.h> #include <net/if.h> #include <net/route.h> #include <net/if_ether.h> #include <net/net_stats.h> #include <netinet/in.h> #include <netatalk/at.h> #include <netatalk/at_var.h> #include <netatalk/ddp_var.h> #include <netatalk/ddp_private.h> #include <netatalk/aarp.h> #include <netatalk/at_extern.h> static void at_pcbdisconnect(struct ddpcb *); static void at_sockaddr(struct ddpcb *, struct sockaddr_at *); static int at_pcbsetaddr(struct ddpcb *, struct sockaddr_at *); static int at_pcbconnect(struct ddpcb *, struct sockaddr_at *); static void ddp_detach(struct socket *); struct ifqueue atintrq1, atintrq2; struct ddpcb *ddp_ports[ATPORT_LAST]; struct ddpcb *ddpcb = NULL; percpu_t *ddpstat_percpu; struct at_ifaddrhead at_ifaddr; /* Here as inited in this file */ u_long ddp_sendspace = DDP_MAXSZ; /* Max ddp size + 1 (ddp_type) */ u_long ddp_recvspace = 25 * (587 + sizeof(struct sockaddr_at)); #ifdef MBUFTRACE struct mowner atalk_rx_mowner = MOWNER_INIT("atalk", "rx"); struct mowner atalk_tx_mowner = MOWNER_INIT("atalk", "tx"); #endif static void at_sockaddr(struct ddpcb *ddp, struct sockaddr_at *addr) { *addr = ddp->ddp_lsat; } static int at_pcbsetaddr(struct ddpcb *ddp, struct sockaddr_at *sat) { struct sockaddr_at lsat; struct at_ifaddr *aa; struct ddpcb *ddpp; if (ddp->ddp_lsat.sat_port != ATADDR_ANYPORT) { /* shouldn't be bound */ return (EINVAL); } if (NULL != sat) { /* validate passed address */ if (sat->sat_family != AF_APPLETALK) return (EAFNOSUPPORT); if (sat->sat_len != sizeof(*sat)) return EINVAL; if (sat->sat_addr.s_node != ATADDR_ANYNODE || sat->sat_addr.s_net != ATADDR_ANYNET) { TAILQ_FOREACH(aa, &at_ifaddr, aa_list) { if ((sat->sat_addr.s_net == AA_SAT(aa)->sat_addr.s_net) && (sat->sat_addr.s_node == AA_SAT(aa)->sat_addr.s_node)) break; } if (!aa) return (EADDRNOTAVAIL); } if (sat->sat_port != ATADDR_ANYPORT) { int error; if (sat->sat_port < ATPORT_FIRST || sat->sat_port >= ATPORT_LAST) return (EINVAL); if (sat->sat_port < ATPORT_RESERVED && (error = kauth_authorize_network( kauth_cred_get(), KAUTH_NETWORK_BIND, KAUTH_REQ_NETWORK_BIND_PRIVPORT, ddpcb->ddp_socket, sat, NULL)) != 0) return (error); } } else { memset((void *) & lsat, 0, sizeof(struct sockaddr_at)); lsat.sat_len = sizeof(struct sockaddr_at); lsat.sat_addr.s_node = ATADDR_ANYNODE; lsat.sat_addr.s_net = ATADDR_ANYNET; lsat.sat_family = AF_APPLETALK; sat = &lsat; } if (sat->sat_addr.s_node == ATADDR_ANYNODE && sat->sat_addr.s_net == ATADDR_ANYNET) { if (TAILQ_EMPTY(&at_ifaddr)) return EADDRNOTAVAIL; sat->sat_addr = AA_SAT(TAILQ_FIRST(&at_ifaddr))->sat_addr; } ddp->ddp_lsat = *sat; /* * Choose port. */ if (sat->sat_port == ATADDR_ANYPORT) { for (sat->sat_port = ATPORT_RESERVED; sat->sat_port < ATPORT_LAST; sat->sat_port++) { if (ddp_ports[sat->sat_port - 1] == 0) break; } if (sat->sat_port == ATPORT_LAST) { return (EADDRNOTAVAIL); } ddp->ddp_lsat.sat_port = sat->sat_port; ddp_ports[sat->sat_port - 1] = ddp; } else { for (ddpp = ddp_ports[sat->sat_port - 1]; ddpp; ddpp = ddpp->ddp_pnext) { if (ddpp->ddp_lsat.sat_addr.s_net == sat->sat_addr.s_net && ddpp->ddp_lsat.sat_addr.s_node == sat->sat_addr.s_node) break; } if (ddpp != NULL) return (EADDRINUSE); ddp->ddp_pnext = ddp_ports[sat->sat_port - 1]; ddp_ports[sat->sat_port - 1] = ddp; if (ddp->ddp_pnext) ddp->ddp_pnext->ddp_pprev = ddp; } return 0; } static int at_pcbconnect(struct ddpcb *ddp, struct sockaddr_at *sat) { struct rtentry *rt; const struct sockaddr_at *cdst; struct route *ro; struct at_ifaddr *aa; struct ifnet *ifp; u_short hintnet = 0, net; if (sat->sat_family != AF_APPLETALK) return EAFNOSUPPORT; if (sat->sat_len != sizeof(*sat)) return EINVAL; /* * Under phase 2, network 0 means "the network". We take "the * network" to mean the network the control block is bound to. * If the control block is not bound, there is an error. */ if (sat->sat_addr.s_net == ATADDR_ANYNET && sat->sat_addr.s_node != ATADDR_ANYNODE) { if (ddp->ddp_lsat.sat_port == ATADDR_ANYPORT) { return EADDRNOTAVAIL; } hintnet = ddp->ddp_lsat.sat_addr.s_net; } ro = &ddp->ddp_route; /* * If we've got an old route for this pcb, check that it is valid. * If we've changed our address, we may have an old "good looking" * route here. Attempt to detect it. */ if ((rt = rtcache_validate(ro)) != NULL || (rt = rtcache_update(ro, 1)) != NULL) { if (hintnet) { net = hintnet; } else { net = sat->sat_addr.s_net; } if ((ifp = rt->rt_ifp) != NULL) { TAILQ_FOREACH(aa, &at_ifaddr, aa_list) { if (aa->aa_ifp == ifp && ntohs(net) >= ntohs(aa->aa_firstnet) && ntohs(net) <= ntohs(aa->aa_lastnet)) { break; } } } else aa = NULL; cdst = satocsat(rtcache_getdst(ro)); if (aa == NULL || (cdst->sat_addr.s_net != (hintnet ? hintnet : sat->sat_addr.s_net) || cdst->sat_addr.s_node != sat->sat_addr.s_node)) { rtcache_unref(rt, ro); rtcache_free(ro); rt = NULL; } } /* * If we've got no route for this interface, try to find one. */ if (rt == NULL) { union { struct sockaddr dst; struct sockaddr_at dsta; } u; sockaddr_at_init(&u.dsta, &sat->sat_addr, 0); if (hintnet) u.dsta.sat_addr.s_net = hintnet; rt = rtcache_lookup(ro, &u.dst); } /* * Make sure any route that we have has a valid interface. */ if (rt != NULL && (ifp = rt->rt_ifp) != NULL) { TAILQ_FOREACH(aa, &at_ifaddr, aa_list) { if (aa->aa_ifp == ifp) break; } } else aa = NULL; rtcache_unref(rt, ro); if (aa == NULL) return ENETUNREACH; ddp->ddp_fsat = *sat; if (ddp->ddp_lsat.sat_port == ATADDR_ANYPORT) return at_pcbsetaddr(ddp, NULL); return 0; } static void at_pcbdisconnect(struct ddpcb *ddp) { ddp->ddp_fsat.sat_addr.s_net = ATADDR_ANYNET; ddp->ddp_fsat.sat_addr.s_node = ATADDR_ANYNODE; ddp->ddp_fsat.sat_port = ATADDR_ANYPORT; } static int ddp_attach(struct socket *so, int proto) { struct ddpcb *ddp; int error; KASSERT(sotoddpcb(so) == NULL); sosetlock(so); #ifdef MBUFTRACE so->so_rcv.sb_mowner = &atalk_rx_mowner; so->so_snd.sb_mowner = &atalk_tx_mowner; #endif error = soreserve(so, ddp_sendspace, ddp_recvspace); if (error) { return error; } ddp = kmem_zalloc(sizeof(*ddp), KM_SLEEP); ddp->ddp_lsat.sat_port = ATADDR_ANYPORT; ddp->ddp_next = ddpcb; ddp->ddp_prev = NULL; ddp->ddp_pprev = NULL; ddp->ddp_pnext = NULL; if (ddpcb) { ddpcb->ddp_prev = ddp; } ddpcb = ddp; ddp->ddp_socket = so; so->so_pcb = ddp; return 0; } static void ddp_detach(struct socket *so) { struct ddpcb *ddp = sotoddpcb(so); soisdisconnected(so); so->so_pcb = NULL; /* sofree drops the lock */ sofree(so); mutex_enter(softnet_lock); /* remove ddp from ddp_ports list */ if (ddp->ddp_lsat.sat_port != ATADDR_ANYPORT && ddp_ports[ddp->ddp_lsat.sat_port - 1] != NULL) { if (ddp->ddp_pprev != NULL) { ddp->ddp_pprev->ddp_pnext = ddp->ddp_pnext; } else { ddp_ports[ddp->ddp_lsat.sat_port - 1] = ddp->ddp_pnext; } if (ddp->ddp_pnext != NULL) { ddp->ddp_pnext->ddp_pprev = ddp->ddp_pprev; } } rtcache_free(&ddp->ddp_route); if (ddp->ddp_prev) { ddp->ddp_prev->ddp_next = ddp->ddp_next; } else { ddpcb = ddp->ddp_next; } if (ddp->ddp_next) { ddp->ddp_next->ddp_prev = ddp->ddp_prev; } kmem_free(ddp, sizeof(*ddp)); } static int ddp_accept(struct socket *so, struct sockaddr *nam) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int ddp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l) { KASSERT(solocked(so)); KASSERT(sotoddpcb(so) != NULL); return at_pcbsetaddr(sotoddpcb(so), (struct sockaddr_at *)nam); } static int ddp_listen(struct socket *so, struct lwp *l) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int ddp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l) { struct ddpcb *ddp = sotoddpcb(so); int error = 0; KASSERT(solocked(so)); KASSERT(ddp != NULL); KASSERT(nam != NULL); if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT) return EISCONN; error = at_pcbconnect(ddp, (struct sockaddr_at *)nam); if (error == 0) soisconnected(so); return error; } static int ddp_connect2(struct socket *so, struct socket *so2) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int ddp_disconnect(struct socket *so) { struct ddpcb *ddp = sotoddpcb(so); KASSERT(solocked(so)); KASSERT(ddp != NULL); if (ddp->ddp_fsat.sat_addr.s_node == ATADDR_ANYNODE) return ENOTCONN; at_pcbdisconnect(ddp); soisdisconnected(so); return 0; } static int ddp_shutdown(struct socket *so) { KASSERT(solocked(so)); socantsendmore(so); return 0; } static int ddp_abort(struct socket *so) { KASSERT(solocked(so)); soisdisconnected(so); ddp_detach(so); return 0; } static int ddp_ioctl(struct socket *so, u_long cmd, void *addr, struct ifnet *ifp) { return at_control(cmd, addr, ifp); } static int ddp_stat(struct socket *so, struct stat *ub) { KASSERT(solocked(so)); /* stat: don't bother with a blocksize. */ return 0; } static int ddp_peeraddr(struct socket *so, struct sockaddr *nam) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int ddp_sockaddr(struct socket *so, struct sockaddr *nam) { KASSERT(solocked(so)); KASSERT(sotoddpcb(so) != NULL); KASSERT(nam != NULL); at_sockaddr(sotoddpcb(so), (struct sockaddr_at *)nam); return 0; } static int ddp_rcvd(struct socket *so, int flags, struct lwp *l) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int ddp_recvoob(struct socket *so, struct mbuf *m, int flags) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int ddp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct lwp *l) { struct ddpcb *ddp = sotoddpcb(so); int error = 0; int s = 0; /* XXX gcc 4.8 warns on sgimips */ KASSERT(solocked(so)); KASSERT(ddp != NULL); if (nam) { if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT) return EISCONN; s = splnet(); error = at_pcbconnect(ddp, (struct sockaddr_at *)nam); if (error) { splx(s); return error; } } else { if (ddp->ddp_fsat.sat_port == ATADDR_ANYPORT) return ENOTCONN; } error = ddp_output(m, ddp); m = NULL; if (nam) { at_pcbdisconnect(ddp); splx(s); } return error; } static int ddp_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control) { KASSERT(solocked(so)); m_freem(m); m_freem(control); return EOPNOTSUPP; } static int ddp_purgeif(struct socket *so, struct ifnet *ifp) { mutex_enter(softnet_lock); at_purgeif(ifp); mutex_exit(softnet_lock); return 0; } /* * For the moment, this just find the pcb with the correct local address. * In the future, this will actually do some real searching, so we can use * the sender's address to do de-multiplexing on a single port to many * sockets (pcbs). */ struct ddpcb * ddp_search( struct sockaddr_at *from, struct sockaddr_at *to, struct at_ifaddr *aa) { struct ddpcb *ddp; /* * Check for bad ports. */ if (to->sat_port < ATPORT_FIRST || to->sat_port >= ATPORT_LAST) return NULL; /* * Make sure the local address matches the sent address. What about * the interface? */ for (ddp = ddp_ports[to->sat_port - 1]; ddp; ddp = ddp->ddp_pnext) { /* XXX should we handle 0.YY? */ /* XXXX.YY to socket on destination interface */ if (to->sat_addr.s_net == ddp->ddp_lsat.sat_addr.s_net && to->sat_addr.s_node == ddp->ddp_lsat.sat_addr.s_node) { break; } /* 0.255 to socket on receiving interface */ if (to->sat_addr.s_node == ATADDR_BCAST && (to->sat_addr.s_net == 0 || to->sat_addr.s_net == ddp->ddp_lsat.sat_addr.s_net) && ddp->ddp_lsat.sat_addr.s_net == AA_SAT(aa)->sat_addr.s_net) { break; } /* XXXX.0 to socket on destination interface */ if (to->sat_addr.s_net == aa->aa_firstnet && to->sat_addr.s_node == 0 && ntohs(ddp->ddp_lsat.sat_addr.s_net) >= ntohs(aa->aa_firstnet) && ntohs(ddp->ddp_lsat.sat_addr.s_net) <= ntohs(aa->aa_lastnet)) { break; } } return (ddp); } /* * Initialize all the ddp & appletalk stuff */ void ddp_init(void) { ddpstat_percpu = percpu_alloc(sizeof(uint64_t) * DDP_NSTATS); TAILQ_INIT(&at_ifaddr); atintrq1.ifq_maxlen = IFQ_MAXLEN; atintrq2.ifq_maxlen = IFQ_MAXLEN; IFQ_LOCK_INIT(&atintrq1); IFQ_LOCK_INIT(&atintrq2); MOWNER_ATTACH(&atalk_tx_mowner); MOWNER_ATTACH(&atalk_rx_mowner); MOWNER_ATTACH(&aarp_mowner); } PR_WRAP_USRREQS(ddp) #define ddp_attach ddp_attach_wrapper #define ddp_detach ddp_detach_wrapper #define ddp_accept ddp_accept_wrapper #define ddp_bind ddp_bind_wrapper #define ddp_listen ddp_listen_wrapper #define ddp_connect ddp_connect_wrapper #define ddp_connect2 ddp_connect2_wrapper #define ddp_disconnect ddp_disconnect_wrapper #define ddp_shutdown ddp_shutdown_wrapper #define ddp_abort ddp_abort_wrapper #define ddp_ioctl ddp_ioctl_wrapper #define ddp_stat ddp_stat_wrapper #define ddp_peeraddr ddp_peeraddr_wrapper #define ddp_sockaddr ddp_sockaddr_wrapper #define ddp_rcvd ddp_rcvd_wrapper #define ddp_recvoob ddp_recvoob_wrapper #define ddp_send ddp_send_wrapper #define ddp_sendoob ddp_sendoob_wrapper #define ddp_purgeif ddp_purgeif_wrapper const struct pr_usrreqs ddp_usrreqs = { .pr_attach = ddp_attach, .pr_detach = ddp_detach, .pr_accept = ddp_accept, .pr_bind = ddp_bind, .pr_listen = ddp_listen, .pr_connect = ddp_connect, .pr_connect2 = ddp_connect2, .pr_disconnect = ddp_disconnect, .pr_shutdown = ddp_shutdown, .pr_abort = ddp_abort, .pr_ioctl = ddp_ioctl, .pr_stat = ddp_stat, .pr_peeraddr = ddp_peeraddr, .pr_sockaddr = ddp_sockaddr, .pr_rcvd = ddp_rcvd, .pr_recvoob = ddp_recvoob, .pr_send = ddp_send, .pr_sendoob = ddp_sendoob, .pr_purgeif = ddp_purgeif, }; static int sysctl_net_atalk_ddp_stats(SYSCTLFN_ARGS) { return (NETSTAT_SYSCTL(ddpstat_percpu, DDP_NSTATS)); } /* * Sysctl for DDP variables. */ SYSCTL_SETUP(sysctl_net_atalk_ddp_setup, "sysctl net.atalk.ddp subtree setup") { sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "atalk", NULL, NULL, 0, NULL, 0, CTL_NET, PF_APPLETALK, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "ddp", SYSCTL_DESCR("DDP related settings"), NULL, 0, NULL, 0, CTL_NET, PF_APPLETALK, ATPROTO_DDP, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "stats", SYSCTL_DESCR("DDP statistics"), sysctl_net_atalk_ddp_stats, 0, NULL, 0, CTL_NET, PF_APPLETALK, ATPROTO_DDP, CTL_CREATE, CTL_EOL); } |
| 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | /* $NetBSD: strnlen.c,v 1.2 2014/01/09 11:25:11 apb Exp $ */ /*- * Copyright (c) 2009 David Schultz <das@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if HAVE_NBTOOL_CONFIG_H #include "nbtool_config.h" #endif #include <sys/cdefs.h> #if defined(LIBC_SCCS) && !defined(lint) __RCSID("$NetBSD: strnlen.c,v 1.2 2014/01/09 11:25:11 apb Exp $"); #endif /* LIBC_SCCS and not lint */ /* FreeBSD: src/lib/libc/string/strnlen.c,v 1.1 2009/02/28 06:00:58 das Exp */ #if !defined(_KERNEL) && !defined(_STANDALONE) #include <string.h> #else #include <lib/libkern/libkern.h> #endif #if !HAVE_STRNLEN size_t strnlen(const char *s, size_t maxlen) { size_t len; for (len = 0; len < maxlen; len++, s++) { if (!*s) break; } return (len); } #endif /* !HAVE_STRNLEN */ |
| 29 38 16 14 10 9 10 10 14 9 8 10 10 7 14 6 5 6 3 4 3 2 10 7 7 4 10 9 9 10 3 18 16 16 4 3 2 17 30 19 7 5 10 8 10 1 8 8 8 1 7 7 7 2 2 7 8 8 1 9 28 10 3 8 7 4 5 5 8 9 3 2 3 3 9 7 1 6 6 5 7 16 16 12 12 10 5 5 11 11 2 9 15 11 11 5 1 1 5 5 5 5 5 1 15 9 9 5 4 2 2 4 3 8 4 2 16 16 16 16 5 15 14 24 12 13 9 23 16 9 16 10 7 9 23 21 22 19 19 12 4 17 16 18 17 16 12 3 2 16 17 8 16 10 2 11 15 15 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 | /* $NetBSD: kern_time.c,v 1.217 2022/07/01 21:22:44 riastradh Exp $ */ /*- * Copyright (c) 2000, 2004, 2005, 2007, 2008, 2009, 2020 * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Christopher G. Demetriou, by Andrew Doran, and by Jason R. Thorpe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_time.c 8.4 (Berkeley) 5/26/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.217 2022/07/01 21:22:44 riastradh Exp $"); #include <sys/param.h> #include <sys/resourcevar.h> #include <sys/kernel.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/vnode.h> #include <sys/signalvar.h> #include <sys/syslog.h> #include <sys/timetc.h> #include <sys/timex.h> #include <sys/kauth.h> #include <sys/mount.h> #include <sys/syscallargs.h> #include <sys/cpu.h> kmutex_t itimer_mutex __cacheline_aligned; /* XXX static */ static struct itlist itimer_realtime_changed_notify; static void ptimer_intr(void *); static void *ptimer_sih __read_mostly; static TAILQ_HEAD(, ptimer) ptimer_queue; #define CLOCK_VIRTUAL_P(clockid) \ ((clockid) == CLOCK_VIRTUAL || (clockid) == CLOCK_PROF) CTASSERT(ITIMER_REAL == CLOCK_REALTIME); CTASSERT(ITIMER_VIRTUAL == CLOCK_VIRTUAL); CTASSERT(ITIMER_PROF == CLOCK_PROF); CTASSERT(ITIMER_MONOTONIC == CLOCK_MONOTONIC); #define DELAYTIMER_MAX 32 /* * Initialize timekeeping. */ void time_init(void) { mutex_init(&itimer_mutex, MUTEX_DEFAULT, IPL_SCHED); LIST_INIT(&itimer_realtime_changed_notify); TAILQ_INIT(&ptimer_queue); ptimer_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, ptimer_intr, NULL); } /* * Check if the time will wrap if set to ts. * * ts - timespec describing the new time * delta - the delta between the current time and ts */ bool time_wraps(struct timespec *ts, struct timespec *delta) { /* * Don't allow the time to be set forward so far it * will wrap and become negative, thus allowing an * attacker to bypass the next check below. The * cutoff is 1 year before rollover occurs, so even * if the attacker uses adjtime(2) to move the time * past the cutoff, it will take a very long time * to get to the wrap point. */ if ((ts->tv_sec > LLONG_MAX - 365*24*60*60) || (delta->tv_sec < 0 || delta->tv_nsec < 0)) return true; return false; } /* * itimer_lock: * * Acquire the interval timer data lock. */ void itimer_lock(void) { mutex_spin_enter(&itimer_mutex); } /* * itimer_unlock: * * Release the interval timer data lock. */ void itimer_unlock(void) { mutex_spin_exit(&itimer_mutex); } /* * itimer_lock_held: * * Check that the interval timer lock is held for diagnostic * assertions. */ inline bool __diagused itimer_lock_held(void) { return mutex_owned(&itimer_mutex); } /* * Time of day and interval timer support. * * These routines provide the kernel entry points to get and set * the time-of-day and per-process interval timers. Subroutines * here provide support for adding and subtracting timeval structures * and decrementing interval timers, optionally reloading the interval * timers when they expire. */ /* This function is used by clock_settime and settimeofday */ static int settime1(struct proc *p, const struct timespec *ts, bool check_kauth) { struct timespec delta, now; /* * The time being set to an unreasonable value will cause * unreasonable system behaviour. */ if (ts->tv_sec < 0 || ts->tv_sec > (1LL << 36)) return EINVAL; nanotime(&now); timespecsub(ts, &now, &delta); if (check_kauth && kauth_authorize_system(kauth_cred_get(), KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_SYSTEM, __UNCONST(ts), &delta, KAUTH_ARG(check_kauth ? false : true)) != 0) { return EPERM; } #ifdef notyet if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */ return EPERM; } #endif tc_setclock(ts); resettodr(); /* * Notify pending CLOCK_REALTIME timers about the real time change. * There may be inactive timers on this list, but this happens * comparatively less often than timers firing, and so it's better * to put the extra checks here than to complicate the other code * path. */ struct itimer *it; itimer_lock(); LIST_FOREACH(it, &itimer_realtime_changed_notify, it_rtchgq) { KASSERT(it->it_ops->ito_realtime_changed != NULL); if (timespecisset(&it->it_time.it_value)) { (*it->it_ops->ito_realtime_changed)(it); } } itimer_unlock(); return 0; } int settime(struct proc *p, struct timespec *ts) { return settime1(p, ts, true); } /* ARGSUSED */ int sys___clock_gettime50(struct lwp *l, const struct sys___clock_gettime50_args *uap, register_t *retval) { /* { syscallarg(clockid_t) clock_id; syscallarg(struct timespec *) tp; } */ int error; struct timespec ats; error = clock_gettime1(SCARG(uap, clock_id), &ats); if (error != 0) return error; return copyout(&ats, SCARG(uap, tp), sizeof(ats)); } /* ARGSUSED */ int sys___clock_settime50(struct lwp *l, const struct sys___clock_settime50_args *uap, register_t *retval) { /* { syscallarg(clockid_t) clock_id; syscallarg(const struct timespec *) tp; } */ int error; struct timespec ats; if ((error = copyin(SCARG(uap, tp), &ats, sizeof(ats))) != 0) return error; return clock_settime1(l->l_proc, SCARG(uap, clock_id), &ats, true); } int clock_settime1(struct proc *p, clockid_t clock_id, const struct timespec *tp, bool check_kauth) { int error; if (tp->tv_nsec < 0 || tp->tv_nsec >= 1000000000L) return EINVAL; switch (clock_id) { case CLOCK_REALTIME: if ((error = settime1(p, tp, check_kauth)) != 0) return error; break; case CLOCK_MONOTONIC: return EINVAL; /* read-only clock */ default: return EINVAL; } return 0; } int sys___clock_getres50(struct lwp *l, const struct sys___clock_getres50_args *uap, register_t *retval) { /* { syscallarg(clockid_t) clock_id; syscallarg(struct timespec *) tp; } */ struct timespec ts; int error; if ((error = clock_getres1(SCARG(uap, clock_id), &ts)) != 0) return error; if (SCARG(uap, tp)) error = copyout(&ts, SCARG(uap, tp), sizeof(ts)); return error; } int clock_getres1(clockid_t clock_id, struct timespec *ts) { switch (clock_id) { case CLOCK_REALTIME: case CLOCK_MONOTONIC: ts->tv_sec = 0; if (tc_getfrequency() > 1000000000) ts->tv_nsec = 1; else ts->tv_nsec = 1000000000 / tc_getfrequency(); break; default: return EINVAL; } return 0; } /* ARGSUSED */ int sys___nanosleep50(struct lwp *l, const struct sys___nanosleep50_args *uap, register_t *retval) { /* { syscallarg(struct timespec *) rqtp; syscallarg(struct timespec *) rmtp; } */ struct timespec rmt, rqt; int error, error1; error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec)); if (error) return error; error = nanosleep1(l, CLOCK_MONOTONIC, 0, &rqt, SCARG(uap, rmtp) ? &rmt : NULL); if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR)) return error; error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt)); return error1 ? error1 : error; } /* ARGSUSED */ int sys_clock_nanosleep(struct lwp *l, const struct sys_clock_nanosleep_args *uap, register_t *retval) { /* { syscallarg(clockid_t) clock_id; syscallarg(int) flags; syscallarg(struct timespec *) rqtp; syscallarg(struct timespec *) rmtp; } */ struct timespec rmt, rqt; int error, error1; error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec)); if (error) goto out; error = nanosleep1(l, SCARG(uap, clock_id), SCARG(uap, flags), &rqt, SCARG(uap, rmtp) ? &rmt : NULL); if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR)) goto out; if ((SCARG(uap, flags) & TIMER_ABSTIME) == 0 && (error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt))) != 0) error = error1; out: *retval = error; return 0; } int nanosleep1(struct lwp *l, clockid_t clock_id, int flags, struct timespec *rqt, struct timespec *rmt) { struct timespec rmtstart; int error, timo; if ((error = ts2timo(clock_id, flags, rqt, &timo, &rmtstart)) != 0) { if (error == ETIMEDOUT) { error = 0; if (rmt != NULL) rmt->tv_sec = rmt->tv_nsec = 0; } return error; } /* * Avoid inadvertently sleeping forever */ if (timo == 0) timo = 1; again: error = kpause("nanoslp", true, timo, NULL); if (error == EWOULDBLOCK) error = 0; if (rmt != NULL || error == 0) { struct timespec rmtend; struct timespec t0; struct timespec *t; int err; err = clock_gettime1(clock_id, &rmtend); if (err != 0) return err; t = (rmt != NULL) ? rmt : &t0; if (flags & TIMER_ABSTIME) { timespecsub(rqt, &rmtend, t); } else { if (timespeccmp(&rmtend, &rmtstart, <)) timespecclear(t); /* clock wound back */ else timespecsub(&rmtend, &rmtstart, t); if (timespeccmp(rqt, t, <)) timespecclear(t); else timespecsub(rqt, t, t); } if (t->tv_sec < 0) timespecclear(t); if (error == 0) { timo = tstohz(t); if (timo > 0) goto again; } } if (error == ERESTART) error = EINTR; return error; } int sys_clock_getcpuclockid2(struct lwp *l, const struct sys_clock_getcpuclockid2_args *uap, register_t *retval) { /* { syscallarg(idtype_t idtype; syscallarg(id_t id); syscallarg(clockid_t *)clock_id; } */ pid_t pid; lwpid_t lid; clockid_t clock_id; id_t id = SCARG(uap, id); switch (SCARG(uap, idtype)) { case P_PID: pid = id == 0 ? l->l_proc->p_pid : id; clock_id = CLOCK_PROCESS_CPUTIME_ID | pid; break; case P_LWPID: lid = id == 0 ? l->l_lid : id; clock_id = CLOCK_THREAD_CPUTIME_ID | lid; break; default: return EINVAL; } return copyout(&clock_id, SCARG(uap, clock_id), sizeof(clock_id)); } /* ARGSUSED */ int sys___gettimeofday50(struct lwp *l, const struct sys___gettimeofday50_args *uap, register_t *retval) { /* { syscallarg(struct timeval *) tp; syscallarg(void *) tzp; really "struct timezone *"; } */ struct timeval atv; int error = 0; struct timezone tzfake; if (SCARG(uap, tp)) { memset(&atv, 0, sizeof(atv)); microtime(&atv); error = copyout(&atv, SCARG(uap, tp), sizeof(atv)); if (error) return error; } if (SCARG(uap, tzp)) { /* * NetBSD has no kernel notion of time zone, so we just * fake up a timezone struct and return it if demanded. */ tzfake.tz_minuteswest = 0; tzfake.tz_dsttime = 0; error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake)); } return error; } /* ARGSUSED */ int sys___settimeofday50(struct lwp *l, const struct sys___settimeofday50_args *uap, register_t *retval) { /* { syscallarg(const struct timeval *) tv; syscallarg(const void *) tzp; really "const struct timezone *"; } */ return settimeofday1(SCARG(uap, tv), true, SCARG(uap, tzp), l, true); } int settimeofday1(const struct timeval *utv, bool userspace, const void *utzp, struct lwp *l, bool check_kauth) { struct timeval atv; struct timespec ts; int error; /* Verify all parameters before changing time. */ /* * NetBSD has no kernel notion of time zone, and only an * obsolete program would try to set it, so we log a warning. */ if (utzp) log(LOG_WARNING, "pid %d attempted to set the " "(obsolete) kernel time zone\n", l->l_proc->p_pid); if (utv == NULL) return 0; if (userspace) { if ((error = copyin(utv, &atv, sizeof(atv))) != 0) return error; utv = &atv; } if (utv->tv_usec < 0 || utv->tv_usec >= 1000000) return EINVAL; TIMEVAL_TO_TIMESPEC(utv, &ts); return settime1(l->l_proc, &ts, check_kauth); } int time_adjusted; /* set if an adjustment is made */ /* ARGSUSED */ int sys___adjtime50(struct lwp *l, const struct sys___adjtime50_args *uap, register_t *retval) { /* { syscallarg(const struct timeval *) delta; syscallarg(struct timeval *) olddelta; } */ int error; struct timeval atv, oldatv; if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0) return error; if (SCARG(uap, delta)) { error = copyin(SCARG(uap, delta), &atv, sizeof(*SCARG(uap, delta))); if (error) return error; } adjtime1(SCARG(uap, delta) ? &atv : NULL, SCARG(uap, olddelta) ? &oldatv : NULL, l->l_proc); if (SCARG(uap, olddelta)) error = copyout(&oldatv, SCARG(uap, olddelta), sizeof(*SCARG(uap, olddelta))); return error; } void adjtime1(const struct timeval *delta, struct timeval *olddelta, struct proc *p) { extern int64_t time_adjtime; /* in kern_ntptime.c */ if (olddelta) { memset(olddelta, 0, sizeof(*olddelta)); mutex_spin_enter(&timecounter_lock); olddelta->tv_sec = time_adjtime / 1000000; olddelta->tv_usec = time_adjtime % 1000000; if (olddelta->tv_usec < 0) { olddelta->tv_usec += 1000000; olddelta->tv_sec--; } mutex_spin_exit(&timecounter_lock); } if (delta) { mutex_spin_enter(&timecounter_lock); /* * XXX This should maybe just report failure to * userland for nonsense deltas. */ if (delta->tv_sec > INT64_MAX/1000000 - 1) { time_adjtime = INT64_MAX; } else if (delta->tv_sec < INT64_MIN/1000000 + 1) { time_adjtime = INT64_MIN; } else { time_adjtime = delta->tv_sec * 1000000 + MAX(-999999, MIN(999999, delta->tv_usec)); } if (time_adjtime) { /* We need to save the system time during shutdown */ time_adjusted |= 1; } mutex_spin_exit(&timecounter_lock); } } /* * Interval timer support. * * The itimer_*() routines provide generic support for interval timers, * both real (CLOCK_REALTIME, CLOCK_MONOTIME), and virtual (CLOCK_VIRTUAL, * CLOCK_PROF). * * Real timers keep their deadline as an absolute time, and are fired * by a callout. Virtual timers are kept as a linked-list of deltas, * and are processed by hardclock(). * * Because the real time timer callout may be delayed in real time due * to interrupt processing on the system, it is possible for the real * time timeout routine (itimer_callout()) run past after its deadline. * It does not suffice, therefore, to reload the real timer .it_value * from the timer's .it_interval. Rather, we compute the next deadline * in absolute time based on the current time and the .it_interval value, * and report any overruns. * * Note that while the virtual timers are supported in a generic fashion * here, they only (currently) make sense as per-process timers, and thus * only really work for that case. */ /* * itimer_init: * * Initialize the common data for an interval timer. */ void itimer_init(struct itimer * const it, const struct itimer_ops * const ops, clockid_t const id, struct itlist * const itl) { KASSERT(itimer_lock_held()); KASSERT(ops != NULL); timespecclear(&it->it_time.it_value); it->it_ops = ops; it->it_clockid = id; it->it_overruns = 0; it->it_dying = false; if (!CLOCK_VIRTUAL_P(id)) { KASSERT(itl == NULL); callout_init(&it->it_ch, CALLOUT_MPSAFE); if (id == CLOCK_REALTIME && ops->ito_realtime_changed != NULL) { LIST_INSERT_HEAD(&itimer_realtime_changed_notify, it, it_rtchgq); } } else { KASSERT(itl != NULL); it->it_vlist = itl; it->it_active = false; } } /* * itimer_poison: * * Poison an interval timer, preventing it from being scheduled * or processed, in preparation for freeing the timer. */ void itimer_poison(struct itimer * const it) { KASSERT(itimer_lock_held()); it->it_dying = true; /* * For non-virtual timers, stop the callout, or wait for it to * run if it has already fired. It cannot restart again after * this point: the callout won't restart itself when dying, no * other users holding the lock can restart it, and any other * users waiting for callout_halt concurrently (itimer_settime) * will restart from the top. */ if (!CLOCK_VIRTUAL_P(it->it_clockid)) { callout_halt(&it->it_ch, &itimer_mutex); if (it->it_clockid == CLOCK_REALTIME && it->it_ops->ito_realtime_changed != NULL) { LIST_REMOVE(it, it_rtchgq); } } } /* * itimer_fini: * * Release resources used by an interval timer. * * N.B. itimer_lock must be held on entry, and is released on exit. */ void itimer_fini(struct itimer * const it) { KASSERT(itimer_lock_held()); /* All done with the global state. */ itimer_unlock(); /* Destroy the callout, if needed. */ if (!CLOCK_VIRTUAL_P(it->it_clockid)) callout_destroy(&it->it_ch); } /* * itimer_decr: * * Decrement an interval timer by a specified number of nanoseconds, * which must be less than a second, i.e. < 1000000000. If the timer * expires, then reload it. In this case, carry over (nsec - old value) * to reduce the value reloaded into the timer so that the timer does * not drift. This routine assumes that it is called in a context where * the timers on which it is operating cannot change in value. * * Returns true if the timer has expired. */ static bool itimer_decr(struct itimer *it, int nsec) { struct itimerspec *itp; int error __diagused; KASSERT(itimer_lock_held()); KASSERT(CLOCK_VIRTUAL_P(it->it_clockid)); itp = &it->it_time; if (itp->it_value.tv_nsec < nsec) { if (itp->it_value.tv_sec == 0) { /* expired, and already in next interval */ nsec -= itp->it_value.tv_nsec; goto expire; } itp->it_value.tv_nsec += 1000000000; itp->it_value.tv_sec--; } itp->it_value.tv_nsec -= nsec; nsec = 0; if (timespecisset(&itp->it_value)) return false; /* expired, exactly at end of interval */ expire: if (timespecisset(&itp->it_interval)) { itp->it_value = itp->it_interval; itp->it_value.tv_nsec -= nsec; if (itp->it_value.tv_nsec < 0) { itp->it_value.tv_nsec += 1000000000; itp->it_value.tv_sec--; } error = itimer_settime(it); KASSERT(error == 0); /* virtual, never fails */ } else itp->it_value.tv_nsec = 0; /* sec is already 0 */ return true; } static void itimer_callout(void *); /* * itimer_arm_real: * * Arm a non-virtual timer. */ static void itimer_arm_real(struct itimer * const it) { /* * Don't need to check tshzto() return value, here. * callout_reset() does it for us. */ callout_reset(&it->it_ch, (it->it_clockid == CLOCK_MONOTONIC ? tshztoup(&it->it_time.it_value) : tshzto(&it->it_time.it_value)), itimer_callout, it); } /* * itimer_callout: * * Callout to expire a non-virtual timer. Queue it up for processing, * and then reload, if it is configured to do so. * * N.B. A delay in processing this callout causes multiple * SIGALRM calls to be compressed into one. */ static void itimer_callout(void *arg) { uint64_t last_val, next_val, interval, now_ns; struct timespec now, next; struct itimer * const it = arg; int backwards; itimer_lock(); (*it->it_ops->ito_fire)(it); if (!timespecisset(&it->it_time.it_interval)) { timespecclear(&it->it_time.it_value); itimer_unlock(); return; } if (it->it_clockid == CLOCK_MONOTONIC) { getnanouptime(&now); } else { getnanotime(&now); } backwards = (timespeccmp(&it->it_time.it_value, &now, >)); /* Nonnegative interval guaranteed by itimerfix. */ KASSERT(it->it_time.it_interval.tv_sec >= 0); KASSERT(it->it_time.it_interval.tv_nsec >= 0); /* Handle the easy case of non-overflown timers first. */ if (!backwards && timespecaddok(&it->it_time.it_value, &it->it_time.it_interval)) { timespecadd(&it->it_time.it_value, &it->it_time.it_interval, &next); it->it_time.it_value = next; } else { now_ns = timespec2ns(&now); last_val = timespec2ns(&it->it_time.it_value); interval = timespec2ns(&it->it_time.it_interval); next_val = now_ns + (now_ns - last_val + interval - 1) % interval; if (backwards) next_val += interval; else it->it_overruns += (now_ns - last_val) / interval; it->it_time.it_value.tv_sec = next_val / 1000000000; it->it_time.it_value.tv_nsec = next_val % 1000000000; } /* * Reset the callout, if it's not going away. */ if (!it->it_dying) itimer_arm_real(it); itimer_unlock(); } /* * itimer_settime: * * Set up the given interval timer. The value in it->it_time.it_value * is taken to be an absolute time for CLOCK_REALTIME/CLOCK_MONOTONIC * timers and a relative time for CLOCK_VIRTUAL/CLOCK_PROF timers. * * If the callout had already fired but not yet run, fails with * ERESTART -- caller must restart from the top to look up a timer. */ int itimer_settime(struct itimer *it) { struct itimer *itn, *pitn; struct itlist *itl; KASSERT(itimer_lock_held()); if (!CLOCK_VIRTUAL_P(it->it_clockid)) { /* * Try to stop the callout. However, if it had already * fired, we have to drop the lock to wait for it, so * the world may have changed and pt may not be there * any more. In that case, tell the caller to start * over from the top. */ if (callout_halt(&it->it_ch, &itimer_mutex)) return ERESTART; /* Now we can touch it and start it up again. */ if (timespecisset(&it->it_time.it_value)) itimer_arm_real(it); } else { if (it->it_active) { itn = LIST_NEXT(it, it_list); LIST_REMOVE(it, it_list); for ( ; itn; itn = LIST_NEXT(itn, it_list)) timespecadd(&it->it_time.it_value, &itn->it_time.it_value, &itn->it_time.it_value); } if (timespecisset(&it->it_time.it_value)) { itl = it->it_vlist; for (itn = LIST_FIRST(itl), pitn = NULL; itn && timespeccmp(&it->it_time.it_value, &itn->it_time.it_value, >); pitn = itn, itn = LIST_NEXT(itn, it_list)) timespecsub(&it->it_time.it_value, &itn->it_time.it_value, &it->it_time.it_value); if (pitn) LIST_INSERT_AFTER(pitn, it, it_list); else LIST_INSERT_HEAD(itl, it, it_list); for ( ; itn ; itn = LIST_NEXT(itn, it_list)) timespecsub(&itn->it_time.it_value, &it->it_time.it_value, &itn->it_time.it_value); it->it_active = true; } else { it->it_active = false; } } /* Success! */ return 0; } /* * itimer_gettime: * * Return the remaining time of an interval timer. */ void itimer_gettime(const struct itimer *it, struct itimerspec *aits) { struct timespec now; struct itimer *itn; KASSERT(itimer_lock_held()); *aits = it->it_time; if (!CLOCK_VIRTUAL_P(it->it_clockid)) { /* * Convert from absolute to relative time in .it_value * part of real time timer. If time for real time * timer has passed return 0, else return difference * between current time and time for the timer to go * off. */ if (timespecisset(&aits->it_value)) { if (it->it_clockid == CLOCK_REALTIME) { getnanotime(&now); } else { /* CLOCK_MONOTONIC */ getnanouptime(&now); } if (timespeccmp(&aits->it_value, &now, <)) timespecclear(&aits->it_value); else timespecsub(&aits->it_value, &now, &aits->it_value); } } else if (it->it_active) { for (itn = LIST_FIRST(it->it_vlist); itn && itn != it; itn = LIST_NEXT(itn, it_list)) timespecadd(&aits->it_value, &itn->it_time.it_value, &aits->it_value); KASSERT(itn != NULL); /* it should be findable on the list */ } else timespecclear(&aits->it_value); } /* * Per-process timer support. * * Both the BSD getitimer() family and the POSIX timer_*() family of * routines are supported. * * All timers are kept in an array pointed to by p_timers, which is * allocated on demand - many processes don't use timers at all. The * first four elements in this array are reserved for the BSD timers: * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, element * 2 is ITIMER_PROF, and element 3 is ITIMER_MONOTONIC. The rest may be * allocated by the timer_create() syscall. * * These timers are a "sub-class" of interval timer. */ /* * ptimer_free: * * Free the per-process timer at the specified index. */ static void ptimer_free(struct ptimers *pts, int index) { struct itimer *it; struct ptimer *pt; KASSERT(itimer_lock_held()); it = pts->pts_timers[index]; pt = container_of(it, struct ptimer, pt_itimer); pts->pts_timers[index] = NULL; itimer_poison(it); /* * Remove it from the queue to be signalled. Must be done * after itimer is poisoned, because we may have had to wait * for the callout to complete. */ if (pt->pt_queued) { TAILQ_REMOVE(&ptimer_queue, pt, pt_chain); pt->pt_queued = false; } itimer_fini(it); /* releases itimer_lock */ kmem_free(pt, sizeof(*pt)); } /* * ptimers_alloc: * * Allocate a ptimers for the specified process. */ static struct ptimers * ptimers_alloc(struct proc *p) { struct ptimers *pts; int i; pts = kmem_alloc(sizeof(*pts), KM_SLEEP); LIST_INIT(&pts->pts_virtual); LIST_INIT(&pts->pts_prof); for (i = 0; i < TIMER_MAX; i++) pts->pts_timers[i] = NULL; itimer_lock(); if (p->p_timers == NULL) { p->p_timers = pts; itimer_unlock(); return pts; } itimer_unlock(); kmem_free(pts, sizeof(*pts)); return p->p_timers; } /* * ptimers_free: * * Clean up the per-process timers. If "which" is set to TIMERS_ALL, * then clean up all timers and free all the data structures. If * "which" is set to TIMERS_POSIX, only clean up the timers allocated * by timer_create(), not the BSD setitimer() timers, and only free the * structure if none of those remain. * * This function is exported because it is needed in the exec and * exit code paths. */ void ptimers_free(struct proc *p, int which) { struct ptimers *pts; struct itimer *itn; struct timespec ts; int i; if (p->p_timers == NULL) return; pts = p->p_timers; itimer_lock(); if (which == TIMERS_ALL) { p->p_timers = NULL; i = 0; } else { timespecclear(&ts); for (itn = LIST_FIRST(&pts->pts_virtual); itn && itn != pts->pts_timers[ITIMER_VIRTUAL]; itn = LIST_NEXT(itn, it_list)) { KASSERT(itn->it_clockid == CLOCK_VIRTUAL); timespecadd(&ts, &itn->it_time.it_value, &ts); } LIST_FIRST(&pts->pts_virtual) = NULL; if (itn) { KASSERT(itn->it_clockid == CLOCK_VIRTUAL); timespecadd(&ts, &itn->it_time.it_value, &itn->it_time.it_value); LIST_INSERT_HEAD(&pts->pts_virtual, itn, it_list); } timespecclear(&ts); for (itn = LIST_FIRST(&pts->pts_prof); itn && itn != pts->pts_timers[ITIMER_PROF]; itn = LIST_NEXT(itn, it_list)) { KASSERT(itn->it_clockid == CLOCK_PROF); timespecadd(&ts, &itn->it_time.it_value, &ts); } LIST_FIRST(&pts->pts_prof) = NULL; if (itn) { KASSERT(itn->it_clockid == CLOCK_PROF); timespecadd(&ts, &itn->it_time.it_value, &itn->it_time.it_value); LIST_INSERT_HEAD(&pts->pts_prof, itn, it_list); } i = TIMER_MIN; } for ( ; i < TIMER_MAX; i++) { if (pts->pts_timers[i] != NULL) { /* Free the timer and release the lock. */ ptimer_free(pts, i); /* Reacquire the lock for the next one. */ itimer_lock(); } } if (pts->pts_timers[0] == NULL && pts->pts_timers[1] == NULL && pts->pts_timers[2] == NULL && pts->pts_timers[3] == NULL) { p->p_timers = NULL; itimer_unlock(); kmem_free(pts, sizeof(*pts)); } else itimer_unlock(); } /* * ptimer_fire: * * Fire a per-process timer. */ static void ptimer_fire(struct itimer *it) { struct ptimer *pt = container_of(it, struct ptimer, pt_itimer); KASSERT(itimer_lock_held()); /* * XXX Can overrun, but we don't do signal queueing yet, anyway. * XXX Relying on the clock interrupt is stupid. */ if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) { return; } if (!pt->pt_queued) { TAILQ_INSERT_TAIL(&ptimer_queue, pt, pt_chain); pt->pt_queued = true; softint_schedule(ptimer_sih); } } /* * Operations vector for per-process timers (BSD and POSIX). */ static const struct itimer_ops ptimer_itimer_ops = { .ito_fire = ptimer_fire, }; /* * sys_timer_create: * * System call to create a POSIX timer. */ int sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap, register_t *retval) { /* { syscallarg(clockid_t) clock_id; syscallarg(struct sigevent *) evp; syscallarg(timer_t *) timerid; } */ return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id), SCARG(uap, evp), copyin, l); } int timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp, copyin_t fetch_event, struct lwp *l) { int error; timer_t timerid; struct itlist *itl; struct ptimers *pts; struct ptimer *pt; struct proc *p; p = l->l_proc; if ((u_int)id > CLOCK_MONOTONIC) return EINVAL; if ((pts = p->p_timers) == NULL) pts = ptimers_alloc(p); pt = kmem_zalloc(sizeof(*pt), KM_SLEEP); if (evp != NULL) { if (((error = (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) || ((pt->pt_ev.sigev_notify < SIGEV_NONE) || (pt->pt_ev.sigev_notify > SIGEV_SA)) || (pt->pt_ev.sigev_notify == SIGEV_SIGNAL && (pt->pt_ev.sigev_signo <= 0 || pt->pt_ev.sigev_signo >= NSIG))) { kmem_free(pt, sizeof(*pt)); return (error ? error : EINVAL); } } /* Find a free timer slot, skipping those reserved for setitimer(). */ itimer_lock(); for (timerid = TIMER_MIN; timerid < TIMER_MAX; timerid++) if (pts->pts_timers[timerid] == NULL) break; if (timerid == TIMER_MAX) { itimer_unlock(); kmem_free(pt, sizeof(*pt)); return EAGAIN; } if (evp == NULL) { pt->pt_ev.sigev_notify = SIGEV_SIGNAL; switch (id) { case CLOCK_REALTIME: case CLOCK_MONOTONIC: pt->pt_ev.sigev_signo = SIGALRM; break; case CLOCK_VIRTUAL: pt->pt_ev.sigev_signo = SIGVTALRM; break; case CLOCK_PROF: pt->pt_ev.sigev_signo = SIGPROF; break; } pt->pt_ev.sigev_value.sival_int = timerid; } switch (id) { case CLOCK_VIRTUAL: itl = &pts->pts_virtual; break; case CLOCK_PROF: itl = &pts->pts_prof; break; default: itl = NULL; } itimer_init(&pt->pt_itimer, &ptimer_itimer_ops, id, itl); pt->pt_proc = p; pt->pt_poverruns = 0; pt->pt_entry = timerid; pt->pt_queued = false; pts->pts_timers[timerid] = &pt->pt_itimer; itimer_unlock(); return copyout(&timerid, tid, sizeof(timerid)); } /* * sys_timer_delete: * * System call to delete a POSIX timer. */ int sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap, register_t *retval) { /* { syscallarg(timer_t) timerid; } */ struct proc *p = l->l_proc; timer_t timerid; struct ptimers *pts; struct itimer *it, *itn; timerid = SCARG(uap, timerid); pts = p->p_timers; if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) return EINVAL; itimer_lock(); if ((it = pts->pts_timers[timerid]) == NULL) { itimer_unlock(); return EINVAL; } if (CLOCK_VIRTUAL_P(it->it_clockid)) { if (it->it_active) { itn = LIST_NEXT(it, it_list); LIST_REMOVE(it, it_list); for ( ; itn; itn = LIST_NEXT(itn, it_list)) timespecadd(&it->it_time.it_value, &itn->it_time.it_value, &itn->it_time.it_value); it->it_active = false; } } /* Free the timer and release the lock. */ ptimer_free(pts, timerid); return 0; } /* * sys___timer_settime50: * * System call to set/arm a POSIX timer. */ int sys___timer_settime50(struct lwp *l, const struct sys___timer_settime50_args *uap, register_t *retval) { /* { syscallarg(timer_t) timerid; syscallarg(int) flags; syscallarg(const struct itimerspec *) value; syscallarg(struct itimerspec *) ovalue; } */ int error; struct itimerspec value, ovalue, *ovp = NULL; if ((error = copyin(SCARG(uap, value), &value, sizeof(struct itimerspec))) != 0) return error; if (SCARG(uap, ovalue)) ovp = &ovalue; if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp, SCARG(uap, flags), l->l_proc)) != 0) return error; if (ovp) return copyout(&ovalue, SCARG(uap, ovalue), sizeof(struct itimerspec)); return 0; } int dotimer_settime(int timerid, struct itimerspec *value, struct itimerspec *ovalue, int flags, struct proc *p) { struct timespec now; struct itimerspec val, oval; struct ptimers *pts; struct itimer *it; int error; pts = p->p_timers; if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) return EINVAL; val = *value; if ((error = itimespecfix(&val.it_value)) != 0 || (error = itimespecfix(&val.it_interval)) != 0) return error; itimer_lock(); restart: if ((it = pts->pts_timers[timerid]) == NULL) { itimer_unlock(); return EINVAL; } oval = it->it_time; it->it_time = val; /* * If we've been passed a relative time for a realtime timer, * convert it to absolute; if an absolute time for a virtual * timer, convert it to relative and make sure we don't set it * to zero, which would cancel the timer, or let it go * negative, which would confuse the comparison tests. */ if (timespecisset(&it->it_time.it_value)) { if (!CLOCK_VIRTUAL_P(it->it_clockid)) { if ((flags & TIMER_ABSTIME) == 0) { if (it->it_clockid == CLOCK_REALTIME) { getnanotime(&now); } else { /* CLOCK_MONOTONIC */ getnanouptime(&now); } timespecadd(&it->it_time.it_value, &now, &it->it_time.it_value); } } else { if ((flags & TIMER_ABSTIME) != 0) { getnanotime(&now); timespecsub(&it->it_time.it_value, &now, &it->it_time.it_value); if (!timespecisset(&it->it_time.it_value) || it->it_time.it_value.tv_sec < 0) { it->it_time.it_value.tv_sec = 0; it->it_time.it_value.tv_nsec = 1; } } } } error = itimer_settime(it); if (error == ERESTART) { KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid)); goto restart; } KASSERT(error == 0); itimer_unlock(); if (ovalue) *ovalue = oval; return 0; } /* * sys___timer_gettime50: * * System call to return the time remaining until a POSIX timer fires. */ int sys___timer_gettime50(struct lwp *l, const struct sys___timer_gettime50_args *uap, register_t *retval) { /* { syscallarg(timer_t) timerid; syscallarg(struct itimerspec *) value; } */ struct itimerspec its; int error; if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc, &its)) != 0) return error; return copyout(&its, SCARG(uap, value), sizeof(its)); } int dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its) { struct itimer *it; struct ptimers *pts; pts = p->p_timers; if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) return EINVAL; itimer_lock(); if ((it = pts->pts_timers[timerid]) == NULL) { itimer_unlock(); return EINVAL; } itimer_gettime(it, its); itimer_unlock(); return 0; } /* * sys_timer_getoverrun: * * System call to return the number of times a POSIX timer has * expired while a notification was already pending. The counter * is reset when a timer expires and a notification can be posted. */ int sys_timer_getoverrun(struct lwp *l, const struct sys_timer_getoverrun_args *uap, register_t *retval) { /* { syscallarg(timer_t) timerid; } */ struct proc *p = l->l_proc; struct ptimers *pts; int timerid; struct itimer *it; struct ptimer *pt; timerid = SCARG(uap, timerid); pts = p->p_timers; if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX) return EINVAL; itimer_lock(); if ((it = pts->pts_timers[timerid]) == NULL) { itimer_unlock(); return EINVAL; } pt = container_of(it, struct ptimer, pt_itimer); *retval = pt->pt_poverruns; if (*retval >= DELAYTIMER_MAX) *retval = DELAYTIMER_MAX; itimer_unlock(); return 0; } /* * sys___getitimer50: * * System call to get the time remaining before a BSD timer fires. */ int sys___getitimer50(struct lwp *l, const struct sys___getitimer50_args *uap, register_t *retval) { /* { syscallarg(int) which; syscallarg(struct itimerval *) itv; } */ struct proc *p = l->l_proc; struct itimerval aitv; int error; memset(&aitv, 0, sizeof(aitv)); error = dogetitimer(p, SCARG(uap, which), &aitv); if (error) return error; return copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval)); } int dogetitimer(struct proc *p, int which, struct itimerval *itvp) { struct ptimers *pts; struct itimer *it; struct itimerspec its; if ((u_int)which > ITIMER_MONOTONIC) return EINVAL; itimer_lock(); pts = p->p_timers; if (pts == NULL || (it = pts->pts_timers[which]) == NULL) { timerclear(&itvp->it_value); timerclear(&itvp->it_interval); } else { itimer_gettime(it, &its); TIMESPEC_TO_TIMEVAL(&itvp->it_value, &its.it_value); TIMESPEC_TO_TIMEVAL(&itvp->it_interval, &its.it_interval); } itimer_unlock(); return 0; } /* * sys___setitimer50: * * System call to set/arm a BSD timer. */ int sys___setitimer50(struct lwp *l, const struct sys___setitimer50_args *uap, register_t *retval) { /* { syscallarg(int) which; syscallarg(const struct itimerval *) itv; syscallarg(struct itimerval *) oitv; } */ struct proc *p = l->l_proc; int which = SCARG(uap, which); struct sys___getitimer50_args getargs; const struct itimerval *itvp; struct itimerval aitv; int error; itvp = SCARG(uap, itv); if (itvp && (error = copyin(itvp, &aitv, sizeof(struct itimerval))) != 0) return error; if (SCARG(uap, oitv) != NULL) { SCARG(&getargs, which) = which; SCARG(&getargs, itv) = SCARG(uap, oitv); if ((error = sys___getitimer50(l, &getargs, retval)) != 0) return error; } if (itvp == 0) return 0; return dosetitimer(p, which, &aitv); } int dosetitimer(struct proc *p, int which, struct itimerval *itvp) { struct timespec now; struct ptimers *pts; struct ptimer *spare; struct itimer *it; struct itlist *itl; int error; if ((u_int)which > ITIMER_MONOTONIC) return EINVAL; if (itimerfix(&itvp->it_value) || itimerfix(&itvp->it_interval)) return EINVAL; /* * Don't bother allocating data structures if the process just * wants to clear the timer. */ spare = NULL; pts = p->p_timers; retry: if (!timerisset(&itvp->it_value) && (pts == NULL || pts->pts_timers[which] == NULL)) return 0; if (pts == NULL) pts = ptimers_alloc(p); itimer_lock(); restart: it = pts->pts_timers[which]; if (it == NULL) { struct ptimer *pt; if (spare == NULL) { itimer_unlock(); spare = kmem_zalloc(sizeof(*spare), KM_SLEEP); goto retry; } pt = spare; spare = NULL; it = &pt->pt_itimer; pt->pt_ev.sigev_notify = SIGEV_SIGNAL; pt->pt_ev.sigev_value.sival_int = which; switch (which) { case ITIMER_REAL: case ITIMER_MONOTONIC: itl = NULL; pt->pt_ev.sigev_signo = SIGALRM; break; case ITIMER_VIRTUAL: itl = &pts->pts_virtual; pt->pt_ev.sigev_signo = SIGVTALRM; break; case ITIMER_PROF: itl = &pts->pts_prof; pt->pt_ev.sigev_signo = SIGPROF; break; default: panic("%s: can't happen %d", __func__, which); } itimer_init(it, &ptimer_itimer_ops, which, itl); pt->pt_proc = p; pt->pt_entry = which; pts->pts_timers[which] = it; } TIMEVAL_TO_TIMESPEC(&itvp->it_value, &it->it_time.it_value); TIMEVAL_TO_TIMESPEC(&itvp->it_interval, &it->it_time.it_interval); error = 0; if (timespecisset(&it->it_time.it_value)) { /* Convert to absolute time */ /* XXX need to wrap in splclock for timecounters case? */ switch (which) { case ITIMER_REAL: getnanotime(&now); if (!timespecaddok(&it->it_time.it_value, &now)) { error = EINVAL; goto out; } timespecadd(&it->it_time.it_value, &now, &it->it_time.it_value); break; case ITIMER_MONOTONIC: getnanouptime(&now); if (!timespecaddok(&it->it_time.it_value, &now)) { error = EINVAL; goto out; } timespecadd(&it->it_time.it_value, &now, &it->it_time.it_value); break; default: break; } } error = itimer_settime(it); if (error == ERESTART) { KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid)); goto restart; } KASSERT(error == 0); out: itimer_unlock(); if (spare != NULL) kmem_free(spare, sizeof(*spare)); return error; } /* * ptimer_tick: * * Called from hardclock() to decrement per-process virtual timers. */ void ptimer_tick(lwp_t *l, bool user) { struct ptimers *pts; struct itimer *it; proc_t *p; p = l->l_proc; if (p->p_timers == NULL) return; itimer_lock(); if ((pts = l->l_proc->p_timers) != NULL) { /* * Run current process's virtual and profile time, as needed. */ if (user && (it = LIST_FIRST(&pts->pts_virtual)) != NULL) if (itimer_decr(it, tick * 1000)) (*it->it_ops->ito_fire)(it); if ((it = LIST_FIRST(&pts->pts_prof)) != NULL) if (itimer_decr(it, tick * 1000)) (*it->it_ops->ito_fire)(it); } itimer_unlock(); } /* * ptimer_intr: * * Software interrupt handler for processing per-process * timer expiration. */ static void ptimer_intr(void *cookie) { ksiginfo_t ksi; struct itimer *it; struct ptimer *pt; proc_t *p; mutex_enter(&proc_lock); itimer_lock(); while ((pt = TAILQ_FIRST(&ptimer_queue)) != NULL) { it = &pt->pt_itimer; TAILQ_REMOVE(&ptimer_queue, pt, pt_chain); KASSERT(pt->pt_queued); pt->pt_queued = false; p = pt->pt_proc; if (p->p_timers == NULL) { /* Process is dying. */ continue; } if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) { continue; } if (sigismember(&p->p_sigpend.sp_set, pt->pt_ev.sigev_signo)) { it->it_overruns++; continue; } KSI_INIT(&ksi); ksi.ksi_signo = pt->pt_ev.sigev_signo; ksi.ksi_code = SI_TIMER; ksi.ksi_value = pt->pt_ev.sigev_value; pt->pt_poverruns = it->it_overruns; it->it_overruns = 0; itimer_unlock(); kpsignal(p, &ksi, NULL); itimer_lock(); } itimer_unlock(); mutex_exit(&proc_lock); } |
| 2281 5 5 4 2 3 3 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 | /* $NetBSD: kern_clock.c,v 1.148 2022/03/19 14:34:47 riastradh Exp $ */ /*- * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.148 2022/03/19 14:34:47 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_dtrace.h" #include "opt_gprof.h" #include "opt_multiprocessor.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/callout.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/resourcevar.h> #include <sys/signalvar.h> #include <sys/sysctl.h> #include <sys/timex.h> #include <sys/sched.h> #include <sys/time.h> #include <sys/timetc.h> #include <sys/cpu.h> #include <sys/atomic.h> #include <sys/rndsource.h> #ifdef GPROF #include <sys/gmon.h> #endif #ifdef KDTRACE_HOOKS #include <sys/dtrace_bsd.h> #include <sys/cpu.h> cyclic_clock_func_t cyclic_clock_func[MAXCPUS]; #endif static int sysctl_kern_clockrate(SYSCTLFN_PROTO); /* * Clock handling routines. * * This code is written to operate with two timers that run independently of * each other. The main clock, running hz times per second, is used to keep * track of real time. The second timer handles kernel and user profiling, * and does resource use estimation. If the second timer is programmable, * it is randomized to avoid aliasing between the two clocks. For example, * the randomization prevents an adversary from always giving up the CPU * just before its quantum expires. Otherwise, it would never accumulate * CPU ticks. The mean frequency of the second timer is stathz. * * If no second timer exists, stathz will be zero; in this case we drive * profiling and statistics off the main clock. This WILL NOT be accurate; * do not do it unless absolutely necessary. * * The statistics clock may (or may not) be run at a higher rate while * profiling. This profile clock runs at profhz. We require that profhz * be an integral multiple of stathz. * * If the statistics clock is running fast, it must be divided by the ratio * profhz/stathz for statistics. (For profiling, every tick counts.) */ int stathz; int profhz; int profsrc; int schedhz; int profprocs; static int hardclock_ticks; static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */ static int psdiv; /* prof => stat divider */ int psratio; /* ratio: prof / stat */ struct clockrnd { struct krndsource source; unsigned needed; }; static struct clockrnd hardclockrnd __aligned(COHERENCY_UNIT); static struct clockrnd statclockrnd __aligned(COHERENCY_UNIT); static void clockrnd_get(size_t needed, void *cookie) { struct clockrnd *C = cookie; /* Start sampling. */ atomic_store_relaxed(&C->needed, 2*NBBY*needed); } static void clockrnd_sample(struct clockrnd *C) { struct cpu_info *ci = curcpu(); /* If there's nothing needed right now, stop here. */ if (__predict_true(atomic_load_relaxed(&C->needed) == 0)) return; /* * If we're not the primary core of a package, we're probably * driven by the same clock as the primary core, so don't * bother. */ if (ci != ci->ci_package1st) return; /* Take a sample and enter it into the pool. */ rnd_add_uint32(&C->source, 0); /* * On the primary CPU, count down. Using an atomic decrement * here isn't really necessary -- on every platform we care * about, stores to unsigned int are atomic, and the only other * memory operation that could happen here is for another CPU * to store a higher value for needed. But using an atomic * decrement avoids giving the impression of data races, and is * unlikely to hurt because only one CPU will ever be writing * to the location. */ if (CPU_IS_PRIMARY(curcpu())) { unsigned needed __diagused; needed = atomic_dec_uint_nv(&C->needed); KASSERT(needed != UINT_MAX); } } static u_int get_intr_timecount(struct timecounter *); static struct timecounter intr_timecounter = { .tc_get_timecount = get_intr_timecount, .tc_poll_pps = NULL, .tc_counter_mask = ~0u, .tc_frequency = 0, .tc_name = "clockinterrupt", /* quality - minimum implementation level for a clock */ .tc_quality = 0, .tc_priv = NULL, }; static u_int get_intr_timecount(struct timecounter *tc) { return (u_int)getticks(); } int getticks(void) { return atomic_load_relaxed(&hardclock_ticks); } /* * Initialize clock frequencies and start both clocks running. */ void initclocks(void) { static struct sysctllog *clog; int i; /* * Set divisors to 1 (normal case) and let the machine-specific * code do its bit. */ psdiv = 1; /* * Call cpu_initclocks() before registering the default * timecounter, in case it needs to adjust hz. */ const int old_hz = hz; cpu_initclocks(); if (old_hz != hz) { tick = 1000000 / hz; tickadj = (240000 / (60 * hz)) ? (240000 / (60 * hz)) : 1; } /* * provide minimum default time counter * will only run at interrupt resolution */ intr_timecounter.tc_frequency = hz; tc_init(&intr_timecounter); /* * Compute profhz and stathz, fix profhz if needed. */ i = stathz ? stathz : hz; if (profhz == 0) profhz = i; psratio = profhz / i; if (schedhz == 0) { /* 16Hz is best */ hardscheddiv = hz / 16; if (hardscheddiv <= 0) panic("hardscheddiv"); } sysctl_createv(&clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "clockrate", SYSCTL_DESCR("Kernel clock rates"), sysctl_kern_clockrate, 0, NULL, sizeof(struct clockinfo), CTL_KERN, KERN_CLOCKRATE, CTL_EOL); sysctl_createv(&clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_INT, "hardclock_ticks", SYSCTL_DESCR("Number of hardclock ticks"), NULL, 0, &hardclock_ticks, sizeof(hardclock_ticks), CTL_KERN, KERN_HARDCLOCK_TICKS, CTL_EOL); rndsource_setcb(&hardclockrnd.source, clockrnd_get, &hardclockrnd); rnd_attach_source(&hardclockrnd.source, "hardclock", RND_TYPE_SKEW, RND_FLAG_COLLECT_TIME|RND_FLAG_HASCB); if (stathz) { rndsource_setcb(&statclockrnd.source, clockrnd_get, &statclockrnd); rnd_attach_source(&statclockrnd.source, "statclock", RND_TYPE_SKEW, RND_FLAG_COLLECT_TIME|RND_FLAG_HASCB); } } /* * The real-time timer, interrupting hz times per second. */ void hardclock(struct clockframe *frame) { struct lwp *l; struct cpu_info *ci; clockrnd_sample(&hardclockrnd); ci = curcpu(); l = ci->ci_onproc; ptimer_tick(l, CLKF_USERMODE(frame)); /* * If no separate statistics clock is available, run it from here. */ if (stathz == 0) statclock(frame); /* * If no separate schedclock is provided, call it here * at about 16 Hz. */ if (schedhz == 0) { if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) { schedclock(l); ci->ci_schedstate.spc_schedticks = hardscheddiv; } } if ((--ci->ci_schedstate.spc_ticks) <= 0) sched_tick(ci); if (CPU_IS_PRIMARY(ci)) { atomic_store_relaxed(&hardclock_ticks, atomic_load_relaxed(&hardclock_ticks) + 1); tc_ticktock(); } /* * Update real-time timeout queue. */ callout_hardclock(); } /* * Start profiling on a process. * * Kernel profiling passes proc0 which never exits and hence * keeps the profile clock running constantly. */ void startprofclock(struct proc *p) { KASSERT(mutex_owned(&p->p_stmutex)); if ((p->p_stflag & PST_PROFIL) == 0) { p->p_stflag |= PST_PROFIL; /* * This is only necessary if using the clock as the * profiling source. */ if (++profprocs == 1 && stathz != 0) psdiv = psratio; } } /* * Stop profiling on a process. */ void stopprofclock(struct proc *p) { KASSERT(mutex_owned(&p->p_stmutex)); if (p->p_stflag & PST_PROFIL) { p->p_stflag &= ~PST_PROFIL; /* * This is only necessary if using the clock as the * profiling source. */ if (--profprocs == 0 && stathz != 0) psdiv = 1; } } void schedclock(struct lwp *l) { if ((l->l_flag & LW_IDLE) != 0) return; sched_schedclock(l); } /* * Statistics clock. Grab profile sample, and if divider reaches 0, * do process and kernel statistics. */ void statclock(struct clockframe *frame) { #ifdef GPROF struct gmonparam *g; intptr_t i; #endif struct cpu_info *ci = curcpu(); struct schedstate_percpu *spc = &ci->ci_schedstate; struct proc *p; struct lwp *l; if (stathz) clockrnd_sample(&statclockrnd); /* * Notice changes in divisor frequency, and adjust clock * frequency accordingly. */ if (spc->spc_psdiv != psdiv) { spc->spc_psdiv = psdiv; spc->spc_pscnt = psdiv; if (psdiv == 1) { setstatclockrate(stathz); } else { setstatclockrate(profhz); } } l = ci->ci_onproc; if ((l->l_flag & LW_IDLE) != 0) { /* * don't account idle lwps as swapper. */ p = NULL; } else { p = l->l_proc; mutex_spin_enter(&p->p_stmutex); } if (CLKF_USERMODE(frame)) { KASSERT(p != NULL); if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK) addupc_intr(l, CLKF_PC(frame)); if (--spc->spc_pscnt > 0) { mutex_spin_exit(&p->p_stmutex); return; } /* * Came from user mode; CPU was in user state. * If this process is being profiled record the tick. */ p->p_uticks++; if (p->p_nice > NZERO) spc->spc_cp_time[CP_NICE]++; else spc->spc_cp_time[CP_USER]++; } else { #ifdef GPROF /* * Kernel statistics are just like addupc_intr, only easier. */ #if defined(MULTIPROCESSOR) && !defined(_RUMPKERNEL) g = curcpu()->ci_gmon; if (g != NULL && profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) { #else g = &_gmonparam; if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) { #endif i = CLKF_PC(frame) - g->lowpc; if (i < g->textsize) { i /= HISTFRACTION * sizeof(*g->kcount); g->kcount[i]++; } } #endif #ifdef LWP_PC if (p != NULL && profsrc == PROFSRC_CLOCK && (p->p_stflag & PST_PROFIL)) { addupc_intr(l, LWP_PC(l)); } #endif if (--spc->spc_pscnt > 0) { if (p != NULL) mutex_spin_exit(&p->p_stmutex); return; } /* * Came from kernel mode, so we were: * - handling an interrupt, * - doing syscall or trap work on behalf of the current * user process, or * - spinning in the idle loop. * Whichever it is, charge the time as appropriate. * Note that we charge interrupts to the current process, * regardless of whether they are ``for'' that process, * so that we know how much of its real time was spent * in ``non-process'' (i.e., interrupt) work. */ if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) { if (p != NULL) { p->p_iticks++; } spc->spc_cp_time[CP_INTR]++; } else if (p != NULL) { p->p_sticks++; spc->spc_cp_time[CP_SYS]++; } else { spc->spc_cp_time[CP_IDLE]++; } } spc->spc_pscnt = psdiv; if (p != NULL) { atomic_inc_uint(&l->l_cpticks); mutex_spin_exit(&p->p_stmutex); } #ifdef KDTRACE_HOOKS cyclic_clock_func_t func = cyclic_clock_func[cpu_index(ci)]; if (func) { (*func)((struct clockframe *)frame); } #endif } /* * sysctl helper routine for kern.clockrate. Assembles a struct on * the fly to be returned to the caller. */ static int sysctl_kern_clockrate(SYSCTLFN_ARGS) { struct clockinfo clkinfo; struct sysctlnode node; clkinfo.tick = tick; clkinfo.tickadj = tickadj; clkinfo.hz = hz; clkinfo.profhz = profhz; clkinfo.stathz = stathz ? stathz : hz; node = *rnode; node.sysctl_data = &clkinfo; return (sysctl_lookup(SYSCTLFN_CALL(&node))); } |
| 4 4 47 47 47 46 47 47 46 47 47 80 81 46 80 81 79 82 82 27 27 27 27 80 82 82 81 81 80 80 80 29 80 81 78 82 27 27 27 27 80 79 81 43 81 26 80 63 81 82 82 82 29 29 29 63 62 63 62 63 63 63 63 63 1 62 63 63 63 81 81 80 79 81 80 81 81 81 81 81 80 81 81 81 563 563 563 564 564 508 564 543 543 80 564 564 79 81 79 78 79 81 81 81 80 81 81 80 80 79 81 81 81 81 81 63 63 63 80 81 81 81 63 63 80 80 81 81 79 64 81 80 79 80 80 80 121 121 120 106 27 27 27 27 27 27 27 26 27 27 25 25 27 26 9 9 27 27 18 19 19 27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 | /* $NetBSD: subr_vmem.c,v 1.108 2022/05/31 08:43:16 andvar Exp $ */ /*- * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * reference: * - Magazines and Vmem: Extending the Slab Allocator * to Many CPUs and Arbitrary Resources * http://www.usenix.org/event/usenix01/bonwick.html * * locking & the boundary tag pool: * - A pool(9) is used for vmem boundary tags * - During a pool get call the global vmem_btag_refill_lock is taken, * to serialize access to the allocation reserve, but no other * vmem arena locks. * - During pool_put calls no vmem mutexes are locked. * - pool_drain doesn't hold the pool's mutex while releasing memory to * its backing therefore no interference with any vmem mutexes. * - The boundary tag pool is forced to put page headers into pool pages * (PR_PHINPAGE) and not off page to avoid pool recursion. * (due to sizeof(bt_t) it should be the case anyway) */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.108 2022/05/31 08:43:16 andvar Exp $"); #if defined(_KERNEL) && defined(_KERNEL_OPT) #include "opt_ddb.h" #endif /* defined(_KERNEL) && defined(_KERNEL_OPT) */ #include <sys/param.h> #include <sys/hash.h> #include <sys/queue.h> #include <sys/bitops.h> #if defined(_KERNEL) #include <sys/systm.h> #include <sys/kernel.h> /* hz */ #include <sys/callout.h> #include <sys/kmem.h> #include <sys/pool.h> #include <sys/vmem.h> #include <sys/vmem_impl.h> #include <sys/workqueue.h> #include <sys/atomic.h> #include <uvm/uvm.h> #include <uvm/uvm_extern.h> #include <uvm/uvm_km.h> #include <uvm/uvm_page.h> #include <uvm/uvm_pdaemon.h> #else /* defined(_KERNEL) */ #include <stdio.h> #include <errno.h> #include <assert.h> #include <stdlib.h> #include <string.h> #include "../sys/vmem.h" #include "../sys/vmem_impl.h" #endif /* defined(_KERNEL) */ #if defined(_KERNEL) #include <sys/evcnt.h> #define VMEM_EVCNT_DEFINE(name) \ struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ "vmem", #name); \ EVCNT_ATTACH_STATIC(vmem_evcnt_##name); #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- VMEM_EVCNT_DEFINE(static_bt_count) VMEM_EVCNT_DEFINE(static_bt_inuse) #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) #else /* defined(_KERNEL) */ #define VMEM_EVCNT_INCR(ev) /* nothing */ #define VMEM_EVCNT_DECR(ev) /* nothing */ #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ #define VMEM_CONDVAR_DESTROY(vm) /* nothing */ #define VMEM_CONDVAR_WAIT(vm) /* nothing */ #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */ #define UNITTEST #define KASSERT(a) assert(a) #define mutex_init(a, b, c) /* nothing */ #define mutex_destroy(a) /* nothing */ #define mutex_enter(a) /* nothing */ #define mutex_tryenter(a) true #define mutex_exit(a) /* nothing */ #define mutex_owned(a) /* nothing */ #define ASSERT_SLEEPABLE() /* nothing */ #define panic(...) printf(__VA_ARGS__); abort() #endif /* defined(_KERNEL) */ #if defined(VMEM_SANITY) static void vmem_check(vmem_t *); #else /* defined(VMEM_SANITY) */ #define vmem_check(vm) /* nothing */ #endif /* defined(VMEM_SANITY) */ #define VMEM_HASHSIZE_MIN 1 /* XXX */ #define VMEM_HASHSIZE_MAX 65536 /* XXX */ #define VMEM_HASHSIZE_INIT 1 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) #if defined(_KERNEL) static bool vmem_bootstrapped = false; static kmutex_t vmem_list_lock; static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); #endif /* defined(_KERNEL) */ /* ---- misc */ #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock) #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock) #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock) #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl) #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock) #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock)) #define VMEM_ALIGNUP(addr, align) \ (-(-(addr) & -(align))) #define VMEM_CROSS_P(addr1, addr2, boundary) \ ((((addr1) ^ (addr2)) & -(boundary)) != 0) #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) #define SIZE2ORDER(size) ((int)ilog2(size)) #if !defined(_KERNEL) #define xmalloc(sz, flags) malloc(sz) #define xfree(p, sz) free(p) #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) #define bt_free(vm, bt) free(bt) #else /* defined(_KERNEL) */ #define xmalloc(sz, flags) \ kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); #define xfree(p, sz) kmem_free(p, sz); /* * BT_RESERVE calculation: * we allocate memory for boundary tags with vmem; therefore we have * to keep a reserve of bts used to allocated memory for bts. * This reserve is 4 for each arena involved in allocating vmems memory. * BT_MAXFREE: don't cache excessive counts of bts in arenas */ #define STATIC_BT_COUNT 200 #define BT_MINRESERVE 4 #define BT_MAXFREE 64 static struct vmem_btag static_bts[STATIC_BT_COUNT]; static int static_bt_count = STATIC_BT_COUNT; static struct vmem kmem_va_meta_arena_store; vmem_t *kmem_va_meta_arena; static struct vmem kmem_meta_arena_store; vmem_t *kmem_meta_arena = NULL; static kmutex_t vmem_btag_refill_lock; static kmutex_t vmem_btag_lock; static LIST_HEAD(, vmem_btag) vmem_btag_freelist; static size_t vmem_btag_freelist_count = 0; static struct pool vmem_btag_pool; static void vmem_xfree_bt(vmem_t *, bt_t *); static void vmem_kick_pdaemon(void) { #if defined(_KERNEL) uvm_kick_pdaemon(); #endif } /* ---- boundary tag */ static int bt_refill(vmem_t *vm); static int bt_refill_locked(vmem_t *vm); static void * pool_page_alloc_vmem_meta(struct pool *pp, int flags) { const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; vmem_addr_t va; int ret; ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz, (vflags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va); return ret ? NULL : (void *)va; } static void pool_page_free_vmem_meta(struct pool *pp, void *v) { vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz); } /* allocator for vmem-pool metadata */ struct pool_allocator pool_allocator_vmem_meta = { .pa_alloc = pool_page_alloc_vmem_meta, .pa_free = pool_page_free_vmem_meta, .pa_pagesz = 0 }; static int bt_refill_locked(vmem_t *vm) { bt_t *bt; VMEM_ASSERT_LOCKED(vm); if (vm->vm_nfreetags > BT_MINRESERVE) { return 0; } mutex_enter(&vmem_btag_lock); while (!LIST_EMPTY(&vmem_btag_freelist) && vm->vm_nfreetags <= BT_MINRESERVE) { bt = LIST_FIRST(&vmem_btag_freelist); LIST_REMOVE(bt, bt_freelist); LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); vm->vm_nfreetags++; vmem_btag_freelist_count--; VMEM_EVCNT_INCR(static_bt_inuse); } mutex_exit(&vmem_btag_lock); while (vm->vm_nfreetags <= BT_MINRESERVE) { VMEM_UNLOCK(vm); mutex_enter(&vmem_btag_refill_lock); bt = pool_get(&vmem_btag_pool, PR_NOWAIT); mutex_exit(&vmem_btag_refill_lock); VMEM_LOCK(vm); if (bt == NULL) break; LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); vm->vm_nfreetags++; } if (vm->vm_nfreetags <= BT_MINRESERVE) { return ENOMEM; } if (kmem_meta_arena != NULL) { VMEM_UNLOCK(vm); (void)bt_refill(kmem_arena); (void)bt_refill(kmem_va_meta_arena); (void)bt_refill(kmem_meta_arena); VMEM_LOCK(vm); } return 0; } static int bt_refill(vmem_t *vm) { int rv; VMEM_LOCK(vm); rv = bt_refill_locked(vm); VMEM_UNLOCK(vm); return rv; } static bt_t * bt_alloc(vmem_t *vm, vm_flag_t flags) { bt_t *bt; VMEM_ASSERT_LOCKED(vm); while (vm->vm_nfreetags <= BT_MINRESERVE && (flags & VM_POPULATING) == 0) { if (bt_refill_locked(vm)) { if ((flags & VM_NOSLEEP) != 0) { return NULL; } /* * It would be nice to wait for something specific here * but there are multiple ways that a retry could * succeed and we can't wait for multiple things * simultaneously. So we'll just sleep for an arbitrary * short period of time and retry regardless. * This should be a very rare case. */ vmem_kick_pdaemon(); kpause("btalloc", false, 1, &vm->vm_lock); } } bt = LIST_FIRST(&vm->vm_freetags); LIST_REMOVE(bt, bt_freelist); vm->vm_nfreetags--; return bt; } static void bt_free(vmem_t *vm, bt_t *bt) { VMEM_ASSERT_LOCKED(vm); LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); vm->vm_nfreetags++; } static void bt_freetrim(vmem_t *vm, int freelimit) { bt_t *t; LIST_HEAD(, vmem_btag) tofree; VMEM_ASSERT_LOCKED(vm); LIST_INIT(&tofree); while (vm->vm_nfreetags > freelimit) { bt_t *bt = LIST_FIRST(&vm->vm_freetags); LIST_REMOVE(bt, bt_freelist); vm->vm_nfreetags--; if (bt >= static_bts && bt < &static_bts[STATIC_BT_COUNT]) { mutex_enter(&vmem_btag_lock); LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); vmem_btag_freelist_count++; mutex_exit(&vmem_btag_lock); VMEM_EVCNT_DECR(static_bt_inuse); } else { LIST_INSERT_HEAD(&tofree, bt, bt_freelist); } } VMEM_UNLOCK(vm); while (!LIST_EMPTY(&tofree)) { t = LIST_FIRST(&tofree); LIST_REMOVE(t, bt_freelist); pool_put(&vmem_btag_pool, t); } } #endif /* defined(_KERNEL) */ /* * freelist[0] ... [1, 1] * freelist[1] ... [2, 3] * freelist[2] ... [4, 7] * freelist[3] ... [8, 15] * : * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] * : */ static struct vmem_freelist * bt_freehead_tofree(vmem_t *vm, vmem_size_t size) { const vmem_size_t qsize = size >> vm->vm_quantum_shift; const int idx = SIZE2ORDER(qsize); KASSERT(size != 0 && qsize != 0); KASSERT((size & vm->vm_quantum_mask) == 0); KASSERT(idx >= 0); KASSERT(idx < VMEM_MAXORDER); return &vm->vm_freelist[idx]; } /* * bt_freehead_toalloc: return the freelist for the given size and allocation * strategy. * * for VM_INSTANTFIT, return the list in which any blocks are large enough * for the requested size. otherwise, return the list which can have blocks * large enough for the requested size. */ static struct vmem_freelist * bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) { const vmem_size_t qsize = size >> vm->vm_quantum_shift; int idx = SIZE2ORDER(qsize); KASSERT(size != 0 && qsize != 0); KASSERT((size & vm->vm_quantum_mask) == 0); if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { idx++; /* check too large request? */ } KASSERT(idx >= 0); KASSERT(idx < VMEM_MAXORDER); return &vm->vm_freelist[idx]; } /* ---- boundary tag hash */ static struct vmem_hashlist * bt_hashhead(vmem_t *vm, vmem_addr_t addr) { struct vmem_hashlist *list; unsigned int hash; hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); list = &vm->vm_hashlist[hash & vm->vm_hashmask]; return list; } static bt_t * bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) { struct vmem_hashlist *list; bt_t *bt; list = bt_hashhead(vm, addr); LIST_FOREACH(bt, list, bt_hashlist) { if (bt->bt_start == addr) { break; } } return bt; } static void bt_rembusy(vmem_t *vm, bt_t *bt) { KASSERT(vm->vm_nbusytag > 0); vm->vm_inuse -= bt->bt_size; vm->vm_nbusytag--; LIST_REMOVE(bt, bt_hashlist); } static void bt_insbusy(vmem_t *vm, bt_t *bt) { struct vmem_hashlist *list; KASSERT(bt->bt_type == BT_TYPE_BUSY); list = bt_hashhead(vm, bt->bt_start); LIST_INSERT_HEAD(list, bt, bt_hashlist); if (++vm->vm_nbusytag > vm->vm_maxbusytag) { vm->vm_maxbusytag = vm->vm_nbusytag; } vm->vm_inuse += bt->bt_size; } /* ---- boundary tag list */ static void bt_remseg(vmem_t *vm, bt_t *bt) { TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); } static void bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) { TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); } static void bt_insseg_tail(vmem_t *vm, bt_t *bt) { TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); } static void bt_remfree(vmem_t *vm, bt_t *bt) { KASSERT(bt->bt_type == BT_TYPE_FREE); LIST_REMOVE(bt, bt_freelist); } static void bt_insfree(vmem_t *vm, bt_t *bt) { struct vmem_freelist *list; list = bt_freehead_tofree(vm, bt->bt_size); LIST_INSERT_HEAD(list, bt, bt_freelist); } /* ---- vmem internal functions */ #if defined(QCACHE) static inline vm_flag_t prf_to_vmf(int prflags) { vm_flag_t vmflags; KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); if ((prflags & PR_WAITOK) != 0) { vmflags = VM_SLEEP; } else { vmflags = VM_NOSLEEP; } return vmflags; } static inline int vmf_to_prf(vm_flag_t vmflags) { int prflags; if ((vmflags & VM_SLEEP) != 0) { prflags = PR_WAITOK; } else { prflags = PR_NOWAIT; } return prflags; } static size_t qc_poolpage_size(size_t qcache_max) { int i; for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { /* nothing */ } return ORDER2SIZE(i); } static void * qc_poolpage_alloc(struct pool *pool, int prflags) { qcache_t *qc = QC_POOL_TO_QCACHE(pool); vmem_t *vm = qc->qc_vmem; vmem_addr_t addr; if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) return NULL; return (void *)addr; } static void qc_poolpage_free(struct pool *pool, void *addr) { qcache_t *qc = QC_POOL_TO_QCACHE(pool); vmem_t *vm = qc->qc_vmem; vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); } static void qc_init(vmem_t *vm, size_t qcache_max, int ipl) { qcache_t *prevqc; struct pool_allocator *pa; int qcache_idx_max; int i; KASSERT((qcache_max & vm->vm_quantum_mask) == 0); if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; } vm->vm_qcache_max = qcache_max; pa = &vm->vm_qcache_allocator; memset(pa, 0, sizeof(*pa)); pa->pa_alloc = qc_poolpage_alloc; pa->pa_free = qc_poolpage_free; pa->pa_pagesz = qc_poolpage_size(qcache_max); qcache_idx_max = qcache_max >> vm->vm_quantum_shift; prevqc = NULL; for (i = qcache_idx_max; i > 0; i--) { qcache_t *qc = &vm->vm_qcache_store[i - 1]; size_t size = i << vm->vm_quantum_shift; pool_cache_t pc; qc->qc_vmem = vm; snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", vm->vm_name, size); pc = pool_cache_init(size, ORDER2SIZE(vm->vm_quantum_shift), 0, PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, qc->qc_name, pa, ipl, NULL, NULL, NULL); KASSERT(pc); qc->qc_cache = pc; KASSERT(qc->qc_cache != NULL); /* XXX */ if (prevqc != NULL && qc->qc_cache->pc_pool.pr_itemsperpage == prevqc->qc_cache->pc_pool.pr_itemsperpage) { pool_cache_destroy(qc->qc_cache); vm->vm_qcache[i - 1] = prevqc; continue; } qc->qc_cache->pc_pool.pr_qcache = qc; vm->vm_qcache[i - 1] = qc; prevqc = qc; } } static void qc_destroy(vmem_t *vm) { const qcache_t *prevqc; int i; int qcache_idx_max; qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; prevqc = NULL; for (i = 0; i < qcache_idx_max; i++) { qcache_t *qc = vm->vm_qcache[i]; if (prevqc == qc) { continue; } pool_cache_destroy(qc->qc_cache); prevqc = qc; } } #endif #if defined(_KERNEL) static void vmem_bootstrap(void) { mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); mutex_init(&vmem_btag_refill_lock, MUTEX_DEFAULT, IPL_VM); while (static_bt_count-- > 0) { bt_t *bt = &static_bts[static_bt_count]; LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); VMEM_EVCNT_INCR(static_bt_count); vmem_btag_freelist_count++; } vmem_bootstrapped = TRUE; } void vmem_subsystem_init(vmem_t *vm) { kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va", 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm, 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, IPL_VM); kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta", 0, 0, PAGE_SIZE, uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); pool_init(&vmem_btag_pool, sizeof(bt_t), coherency_unit, 0, PR_PHINPAGE, "vmembt", &pool_allocator_vmem_meta, IPL_VM); } #endif /* defined(_KERNEL) */ static int vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, int spanbttype) { bt_t *btspan; bt_t *btfree; VMEM_ASSERT_LOCKED(vm); KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); KASSERT(spanbttype == BT_TYPE_SPAN || spanbttype == BT_TYPE_SPAN_STATIC); btspan = bt_alloc(vm, flags); if (btspan == NULL) { return ENOMEM; } btfree = bt_alloc(vm, flags); if (btfree == NULL) { bt_free(vm, btspan); return ENOMEM; } btspan->bt_type = spanbttype; btspan->bt_start = addr; btspan->bt_size = size; btfree->bt_type = BT_TYPE_FREE; btfree->bt_start = addr; btfree->bt_size = size; bt_insseg_tail(vm, btspan); bt_insseg(vm, btfree, btspan); bt_insfree(vm, btfree); vm->vm_size += size; return 0; } static void vmem_destroy1(vmem_t *vm) { #if defined(QCACHE) qc_destroy(vm); #endif /* defined(QCACHE) */ VMEM_LOCK(vm); for (int i = 0; i < vm->vm_hashsize; i++) { bt_t *bt; while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); LIST_REMOVE(bt, bt_hashlist); bt_free(vm, bt); } } /* bt_freetrim() drops the lock. */ bt_freetrim(vm, 0); if (vm->vm_hashlist != &vm->vm_hash0) { xfree(vm->vm_hashlist, sizeof(struct vmem_hashlist) * vm->vm_hashsize); } VMEM_CONDVAR_DESTROY(vm); VMEM_LOCK_DESTROY(vm); xfree(vm, sizeof(*vm)); } static int vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) { vmem_addr_t addr; int rc; VMEM_ASSERT_LOCKED(vm); if (vm->vm_importfn == NULL) { return EINVAL; } if (vm->vm_flags & VM_LARGEIMPORT) { size *= 16; } VMEM_UNLOCK(vm); if (vm->vm_flags & VM_XIMPORT) { rc = __FPTRCAST(vmem_ximport_t *, vm->vm_importfn)(vm->vm_arg, size, &size, flags, &addr); } else { rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); } VMEM_LOCK(vm); if (rc) { return ENOMEM; } if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { VMEM_UNLOCK(vm); (*vm->vm_releasefn)(vm->vm_arg, addr, size); VMEM_LOCK(vm); return ENOMEM; } return 0; } static int vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) { bt_t *bt; int i; struct vmem_hashlist *newhashlist; struct vmem_hashlist *oldhashlist; size_t oldhashsize; KASSERT(newhashsize > 0); /* Round hash size up to a power of 2. */ newhashsize = 1 << (ilog2(newhashsize) + 1); newhashlist = xmalloc(sizeof(struct vmem_hashlist) * newhashsize, flags); if (newhashlist == NULL) { return ENOMEM; } for (i = 0; i < newhashsize; i++) { LIST_INIT(&newhashlist[i]); } VMEM_LOCK(vm); /* Decay back to a small hash slowly. */ if (vm->vm_maxbusytag >= 2) { vm->vm_maxbusytag = vm->vm_maxbusytag / 2 - 1; if (vm->vm_nbusytag > vm->vm_maxbusytag) { vm->vm_maxbusytag = vm->vm_nbusytag; } } else { vm->vm_maxbusytag = vm->vm_nbusytag; } oldhashlist = vm->vm_hashlist; oldhashsize = vm->vm_hashsize; vm->vm_hashlist = newhashlist; vm->vm_hashsize = newhashsize; vm->vm_hashmask = newhashsize - 1; if (oldhashlist == NULL) { VMEM_UNLOCK(vm); return 0; } for (i = 0; i < oldhashsize; i++) { while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { bt_rembusy(vm, bt); /* XXX */ bt_insbusy(vm, bt); } } VMEM_UNLOCK(vm); if (oldhashlist != &vm->vm_hash0) { xfree(oldhashlist, sizeof(struct vmem_hashlist) * oldhashsize); } return 0; } /* * vmem_fit: check if a bt can satisfy the given restrictions. * * it's a caller's responsibility to ensure the region is big enough * before calling us. */ static int vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) { vmem_addr_t start; vmem_addr_t end; KASSERT(size > 0); KASSERT(bt->bt_size >= size); /* caller's responsibility */ /* * XXX assumption: vmem_addr_t and vmem_size_t are * unsigned integer of the same size. */ start = bt->bt_start; if (start < minaddr) { start = minaddr; } end = BT_END(bt); if (end > maxaddr) { end = maxaddr; } if (start > end) { return ENOMEM; } start = VMEM_ALIGNUP(start - phase, align) + phase; if (start < bt->bt_start) { start += align; } if (VMEM_CROSS_P(start, start + size - 1, nocross)) { KASSERT(align < nocross); start = VMEM_ALIGNUP(start - phase, nocross) + phase; } if (start <= end && end - start >= size - 1) { KASSERT((start & (align - 1)) == phase); KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); KASSERT(minaddr <= start); KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); KASSERT(bt->bt_start <= start); KASSERT(BT_END(bt) - start >= size - 1); *addrp = start; return 0; } return ENOMEM; } /* ---- vmem API */ /* * vmem_init: creates a vmem arena. */ vmem_t * vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size, vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, vmem_t *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) { int i; KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); KASSERT(quantum > 0); #if defined(_KERNEL) /* XXX: SMP, we get called early... */ if (!vmem_bootstrapped) { vmem_bootstrap(); } #endif /* defined(_KERNEL) */ if (vm == NULL) { vm = xmalloc(sizeof(*vm), flags); } if (vm == NULL) { return NULL; } VMEM_CONDVAR_INIT(vm, "vmem"); VMEM_LOCK_INIT(vm, ipl); vm->vm_flags = flags; vm->vm_nfreetags = 0; LIST_INIT(&vm->vm_freetags); strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); vm->vm_quantum_mask = quantum - 1; vm->vm_quantum_shift = SIZE2ORDER(quantum); KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); vm->vm_importfn = importfn; vm->vm_releasefn = releasefn; vm->vm_arg = arg; vm->vm_nbusytag = 0; vm->vm_maxbusytag = 0; vm->vm_size = 0; vm->vm_inuse = 0; #if defined(QCACHE) qc_init(vm, qcache_max, ipl); #endif /* defined(QCACHE) */ TAILQ_INIT(&vm->vm_seglist); for (i = 0; i < VMEM_MAXORDER; i++) { LIST_INIT(&vm->vm_freelist[i]); } memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0)); vm->vm_hashsize = 1; vm->vm_hashmask = vm->vm_hashsize - 1; vm->vm_hashlist = &vm->vm_hash0; if (size != 0) { if (vmem_add(vm, base, size, flags) != 0) { vmem_destroy1(vm); return NULL; } } #if defined(_KERNEL) if (flags & VM_BOOTSTRAP) { bt_refill(vm); } mutex_enter(&vmem_list_lock); LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); mutex_exit(&vmem_list_lock); #endif /* defined(_KERNEL) */ return vm; } /* * vmem_create: create an arena. * * => must not be called from interrupt context. */ vmem_t * vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) { KASSERT((flags & (VM_XIMPORT)) == 0); return vmem_init(NULL, name, base, size, quantum, importfn, releasefn, source, qcache_max, flags, ipl); } /* * vmem_xcreate: create an arena takes alternative import func. * * => must not be called from interrupt context. */ vmem_t * vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) { KASSERT((flags & (VM_XIMPORT)) == 0); return vmem_init(NULL, name, base, size, quantum, __FPTRCAST(vmem_import_t *, importfn), releasefn, source, qcache_max, flags | VM_XIMPORT, ipl); } void vmem_destroy(vmem_t *vm) { #if defined(_KERNEL) mutex_enter(&vmem_list_lock); LIST_REMOVE(vm, vm_alllist); mutex_exit(&vmem_list_lock); #endif /* defined(_KERNEL) */ vmem_destroy1(vm); } vmem_size_t vmem_roundup_size(vmem_t *vm, vmem_size_t size) { return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; } /* * vmem_alloc: allocate resource from the arena. */ int vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) { const vm_flag_t strat __diagused = flags & VM_FITMASK; int error; KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); KASSERT(size > 0); KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); if ((flags & VM_SLEEP) != 0) { ASSERT_SLEEPABLE(); } #if defined(QCACHE) if (size <= vm->vm_qcache_max) { void *p; int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; qcache_t *qc = vm->vm_qcache[qidx - 1]; p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); if (addrp != NULL) *addrp = (vmem_addr_t)p; error = (p == NULL) ? ENOMEM : 0; goto out; } #endif /* defined(QCACHE) */ error = vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags, addrp); out: KASSERTMSG(error || addrp == NULL || (*addrp & vm->vm_quantum_mask) == 0, "vmem %s mask=0x%jx addr=0x%jx", vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)*addrp); KASSERT(error == 0 || (flags & VM_SLEEP) == 0); return error; } int vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, const vmem_size_t phase, const vmem_size_t nocross, const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, vmem_addr_t *addrp) { struct vmem_freelist *list; struct vmem_freelist *first; struct vmem_freelist *end; bt_t *bt; bt_t *btnew; bt_t *btnew2; const vmem_size_t size = vmem_roundup_size(vm, size0); vm_flag_t strat = flags & VM_FITMASK; vmem_addr_t start; int rc; KASSERT(size0 > 0); KASSERT(size > 0); KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); if ((flags & VM_SLEEP) != 0) { ASSERT_SLEEPABLE(); } KASSERT((align & vm->vm_quantum_mask) == 0); KASSERT((align & (align - 1)) == 0); KASSERT((phase & vm->vm_quantum_mask) == 0); KASSERT((nocross & vm->vm_quantum_mask) == 0); KASSERT((nocross & (nocross - 1)) == 0); KASSERT((align == 0 && phase == 0) || phase < align); KASSERT(nocross == 0 || nocross >= size); KASSERT(minaddr <= maxaddr); KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); if (align == 0) { align = vm->vm_quantum_mask + 1; } /* * allocate boundary tags before acquiring the vmem lock. */ VMEM_LOCK(vm); btnew = bt_alloc(vm, flags); if (btnew == NULL) { VMEM_UNLOCK(vm); return ENOMEM; } btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ if (btnew2 == NULL) { bt_free(vm, btnew); VMEM_UNLOCK(vm); return ENOMEM; } /* * choose a free block from which we allocate. */ retry_strat: first = bt_freehead_toalloc(vm, size, strat); end = &vm->vm_freelist[VMEM_MAXORDER]; retry: bt = NULL; vmem_check(vm); if (strat == VM_INSTANTFIT) { /* * just choose the first block which satisfies our restrictions. * * note that we don't need to check the size of the blocks * because any blocks found on these list should be larger than * the given size. */ for (list = first; list < end; list++) { bt = LIST_FIRST(list); if (bt != NULL) { rc = vmem_fit(bt, size, align, phase, nocross, minaddr, maxaddr, &start); if (rc == 0) { goto gotit; } /* * don't bother to follow the bt_freelist link * here. the list can be very long and we are * told to run fast. blocks from the later free * lists are larger and have better chances to * satisfy our restrictions. */ } } } else { /* VM_BESTFIT */ /* * we assume that, for space efficiency, it's better to * allocate from a smaller block. thus we will start searching * from the lower-order list than VM_INSTANTFIT. * however, don't bother to find the smallest block in a free * list because the list can be very long. we can revisit it * if/when it turns out to be a problem. * * note that the 'first' list can contain blocks smaller than * the requested size. thus we need to check bt_size. */ for (list = first; list < end; list++) { LIST_FOREACH(bt, list, bt_freelist) { if (bt->bt_size >= size) { rc = vmem_fit(bt, size, align, phase, nocross, minaddr, maxaddr, &start); if (rc == 0) { goto gotit; } } } } } #if 1 if (strat == VM_INSTANTFIT) { strat = VM_BESTFIT; goto retry_strat; } #endif if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { /* * XXX should try to import a region large enough to * satisfy restrictions? */ goto fail; } /* XXX eeek, minaddr & maxaddr not respected */ if (vmem_import(vm, size, flags) == 0) { goto retry; } /* XXX */ if ((flags & VM_SLEEP) != 0) { vmem_kick_pdaemon(); VMEM_CONDVAR_WAIT(vm); goto retry; } fail: bt_free(vm, btnew); bt_free(vm, btnew2); VMEM_UNLOCK(vm); return ENOMEM; gotit: KASSERT(bt->bt_type == BT_TYPE_FREE); KASSERT(bt->bt_size >= size); bt_remfree(vm, bt); vmem_check(vm); if (bt->bt_start != start) { btnew2->bt_type = BT_TYPE_FREE; btnew2->bt_start = bt->bt_start; btnew2->bt_size = start - bt->bt_start; bt->bt_start = start; bt->bt_size -= btnew2->bt_size; bt_insfree(vm, btnew2); bt_insseg(vm, btnew2, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); btnew2 = NULL; vmem_check(vm); } KASSERT(bt->bt_start == start); if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { /* split */ btnew->bt_type = BT_TYPE_BUSY; btnew->bt_start = bt->bt_start; btnew->bt_size = size; bt->bt_start = bt->bt_start + size; bt->bt_size -= size; bt_insfree(vm, bt); bt_insseg(vm, btnew, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); bt_insbusy(vm, btnew); vmem_check(vm); } else { bt->bt_type = BT_TYPE_BUSY; bt_insbusy(vm, bt); vmem_check(vm); bt_free(vm, btnew); btnew = bt; } if (btnew2 != NULL) { bt_free(vm, btnew2); } KASSERT(btnew->bt_size >= size); btnew->bt_type = BT_TYPE_BUSY; if (addrp != NULL) *addrp = btnew->bt_start; VMEM_UNLOCK(vm); KASSERTMSG(addrp == NULL || (*addrp & vm->vm_quantum_mask) == 0, "vmem %s mask=0x%jx addr=0x%jx", vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)*addrp); return 0; } /* * vmem_free: free the resource to the arena. */ void vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) { KASSERT(size > 0); KASSERTMSG((addr & vm->vm_quantum_mask) == 0, "vmem %s mask=0x%jx addr=0x%jx", vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)addr); #if defined(QCACHE) if (size <= vm->vm_qcache_max) { int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; qcache_t *qc = vm->vm_qcache[qidx - 1]; pool_cache_put(qc->qc_cache, (void *)addr); return; } #endif /* defined(QCACHE) */ vmem_xfree(vm, addr, size); } void vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) { bt_t *bt; KASSERT(size > 0); KASSERTMSG((addr & vm->vm_quantum_mask) == 0, "vmem %s mask=0x%jx addr=0x%jx", vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)addr); VMEM_LOCK(vm); bt = bt_lookupbusy(vm, addr); KASSERTMSG(bt != NULL, "vmem %s addr 0x%jx size 0x%jx", vm->vm_name, (uintmax_t)addr, (uintmax_t)size); KASSERT(bt->bt_start == addr); KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); /* vmem_xfree_bt() drops the lock. */ vmem_xfree_bt(vm, bt); } void vmem_xfreeall(vmem_t *vm) { bt_t *bt; /* This can't be used if the arena has a quantum cache. */ KASSERT(vm->vm_qcache_max == 0); for (;;) { VMEM_LOCK(vm); TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { if (bt->bt_type == BT_TYPE_BUSY) break; } if (bt != NULL) { /* vmem_xfree_bt() drops the lock. */ vmem_xfree_bt(vm, bt); } else { VMEM_UNLOCK(vm); return; } } } static void vmem_xfree_bt(vmem_t *vm, bt_t *bt) { bt_t *t; VMEM_ASSERT_LOCKED(vm); KASSERT(bt->bt_type == BT_TYPE_BUSY); bt_rembusy(vm, bt); bt->bt_type = BT_TYPE_FREE; /* coalesce */ t = TAILQ_NEXT(bt, bt_seglist); if (t != NULL && t->bt_type == BT_TYPE_FREE) { KASSERT(BT_END(bt) < t->bt_start); /* YYY */ bt_remfree(vm, t); bt_remseg(vm, t); bt->bt_size += t->bt_size; bt_free(vm, t); } t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); if (t != NULL && t->bt_type == BT_TYPE_FREE) { KASSERT(BT_END(t) < bt->bt_start); /* YYY */ bt_remfree(vm, t); bt_remseg(vm, t); bt->bt_size += t->bt_size; bt->bt_start = t->bt_start; bt_free(vm, t); } t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); KASSERT(t != NULL); KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && t->bt_size == bt->bt_size) { vmem_addr_t spanaddr; vmem_size_t spansize; KASSERT(t->bt_start == bt->bt_start); spanaddr = bt->bt_start; spansize = bt->bt_size; bt_remseg(vm, bt); bt_free(vm, bt); bt_remseg(vm, t); bt_free(vm, t); vm->vm_size -= spansize; VMEM_CONDVAR_BROADCAST(vm); /* bt_freetrim() drops the lock. */ bt_freetrim(vm, BT_MAXFREE); (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); } else { bt_insfree(vm, bt); VMEM_CONDVAR_BROADCAST(vm); /* bt_freetrim() drops the lock. */ bt_freetrim(vm, BT_MAXFREE); } } /* * vmem_add: * * => caller must ensure appropriate spl, * if the arena can be accessed from interrupt context. */ int vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) { int rv; VMEM_LOCK(vm); rv = vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); VMEM_UNLOCK(vm); return rv; } /* * vmem_size: information about arenas size * * => return free/allocated size in arena */ vmem_size_t vmem_size(vmem_t *vm, int typemask) { switch (typemask) { case VMEM_ALLOC: return vm->vm_inuse; case VMEM_FREE: return vm->vm_size - vm->vm_inuse; case VMEM_FREE|VMEM_ALLOC: return vm->vm_size; default: panic("vmem_size"); } } /* ---- rehash */ #if defined(_KERNEL) static struct callout vmem_rehash_ch; static int vmem_rehash_interval; static struct workqueue *vmem_rehash_wq; static struct work vmem_rehash_wk; static void vmem_rehash_all(struct work *wk, void *dummy) { vmem_t *vm; KASSERT(wk == &vmem_rehash_wk); mutex_enter(&vmem_list_lock); LIST_FOREACH(vm, &vmem_list, vm_alllist) { size_t desired; size_t current; desired = atomic_load_relaxed(&vm->vm_maxbusytag); current = atomic_load_relaxed(&vm->vm_hashsize); if (desired > VMEM_HASHSIZE_MAX) { desired = VMEM_HASHSIZE_MAX; } else if (desired < VMEM_HASHSIZE_MIN) { desired = VMEM_HASHSIZE_MIN; } if (desired > current * 2 || desired * 2 < current) { vmem_rehash(vm, desired, VM_NOSLEEP); } } mutex_exit(&vmem_list_lock); callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); } static void vmem_rehash_all_kick(void *dummy) { workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); } void vmem_rehash_start(void) { int error; error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); if (error) { panic("%s: workqueue_create %d\n", __func__, error); } callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); vmem_rehash_interval = hz * 10; callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); } #endif /* defined(_KERNEL) */ /* ---- debug */ #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) static void bt_dump(const bt_t *, void (*)(const char *, ...) __printflike(1, 2)); static const char * bt_type_string(int type) { static const char * const table[] = { [BT_TYPE_BUSY] = "busy", [BT_TYPE_FREE] = "free", [BT_TYPE_SPAN] = "span", [BT_TYPE_SPAN_STATIC] = "static span", }; if (type >= __arraycount(table)) { return "BOGUS"; } return table[type]; } static void bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) { (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, bt->bt_type, bt_type_string(bt->bt_type)); } static void vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...) __printflike(1, 2)) { const bt_t *bt; int i; (*pr)("vmem %p '%s'\n", vm, vm->vm_name); TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { bt_dump(bt, pr); } for (i = 0; i < VMEM_MAXORDER; i++) { const struct vmem_freelist *fl = &vm->vm_freelist[i]; if (LIST_EMPTY(fl)) { continue; } (*pr)("freelist[%d]\n", i); LIST_FOREACH(bt, fl, bt_freelist) { bt_dump(bt, pr); } } } #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ #if defined(DDB) static bt_t * vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) { bt_t *bt; TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { if (BT_ISSPAN_P(bt)) { continue; } if (bt->bt_start <= addr && addr <= BT_END(bt)) { return bt; } } return NULL; } void vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) { vmem_t *vm; LIST_FOREACH(vm, &vmem_list, vm_alllist) { bt_t *bt; bt = vmem_whatis_lookup(vm, addr); if (bt == NULL) { continue; } (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", (void *)addr, (void *)bt->bt_start, (size_t)(addr - bt->bt_start), vm->vm_name, (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); } } void vmem_printall(const char *modif, void (*pr)(const char *, ...)) { const vmem_t *vm; LIST_FOREACH(vm, &vmem_list, vm_alllist) { vmem_dump(vm, pr); } } void vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) { const vmem_t *vm = (const void *)addr; vmem_dump(vm, pr); } #endif /* defined(DDB) */ #if defined(_KERNEL) #define vmem_printf printf #else #include <stdio.h> #include <stdarg.h> static void vmem_printf(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); } #endif #if defined(VMEM_SANITY) static bool vmem_check_sanity(vmem_t *vm) { const bt_t *bt, *bt2; KASSERT(vm != NULL); TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { if (bt->bt_start > BT_END(bt)) { printf("corrupted tag\n"); bt_dump(bt, vmem_printf); return false; } } TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { if (bt == bt2) { continue; } if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { continue; } if (bt->bt_start <= BT_END(bt2) && bt2->bt_start <= BT_END(bt)) { printf("overwrapped tags\n"); bt_dump(bt, vmem_printf); bt_dump(bt2, vmem_printf); return false; } } } return true; } static void vmem_check(vmem_t *vm) { if (!vmem_check_sanity(vm)) { panic("insanity vmem %p", vm); } } #endif /* defined(VMEM_SANITY) */ #if defined(UNITTEST) int main(void) { int rc; vmem_t *vm; vmem_addr_t p; struct reg { vmem_addr_t p; vmem_size_t sz; bool x; } *reg = NULL; int nreg = 0; int nalloc = 0; int nfree = 0; vmem_size_t total = 0; #if 1 vm_flag_t strat = VM_INSTANTFIT; #else vm_flag_t strat = VM_BESTFIT; #endif vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, #ifdef _KERNEL IPL_NONE #else 0 #endif ); if (vm == NULL) { printf("vmem_create\n"); exit(EXIT_FAILURE); } vmem_dump(vm, vmem_printf); rc = vmem_add(vm, 0, 50, VM_SLEEP); assert(rc == 0); rc = vmem_add(vm, 100, 200, VM_SLEEP); assert(rc == 0); rc = vmem_add(vm, 2000, 1, VM_SLEEP); assert(rc == 0); rc = vmem_add(vm, 40000, 65536, VM_SLEEP); assert(rc == 0); rc = vmem_add(vm, 10000, 10000, VM_SLEEP); assert(rc == 0); rc = vmem_add(vm, 500, 1000, VM_SLEEP); assert(rc == 0); rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); assert(rc == 0); rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); assert(rc != 0); rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); assert(rc == 0 && p == 0); vmem_xfree(vm, p, 50); rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); assert(rc == 0 && p == 0); rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); assert(rc != 0); rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); assert(rc != 0); rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); assert(rc == 0); vmem_dump(vm, vmem_printf); for (;;) { struct reg *r; int t = rand() % 100; if (t > 45) { /* alloc */ vmem_size_t sz = rand() % 500 + 1; bool x; vmem_size_t align, phase, nocross; vmem_addr_t minaddr, maxaddr; if (t > 70) { x = true; /* XXX */ align = 1 << (rand() % 15); phase = rand() % 65536; nocross = 1 << (rand() % 15); if (align <= phase) { phase = 0; } if (VMEM_CROSS_P(phase, phase + sz - 1, nocross)) { nocross = 0; } do { minaddr = rand() % 50000; maxaddr = rand() % 70000; } while (minaddr > maxaddr); printf("=== xalloc %" PRIu64 " align=%" PRIu64 ", phase=%" PRIu64 ", nocross=%" PRIu64 ", min=%" PRIu64 ", max=%" PRIu64 "\n", (uint64_t)sz, (uint64_t)align, (uint64_t)phase, (uint64_t)nocross, (uint64_t)minaddr, (uint64_t)maxaddr); rc = vmem_xalloc(vm, sz, align, phase, nocross, minaddr, maxaddr, strat|VM_SLEEP, &p); } else { x = false; printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); } printf("-> %" PRIu64 "\n", (uint64_t)p); vmem_dump(vm, vmem_printf); if (rc != 0) { if (x) { continue; } break; } nreg++; reg = realloc(reg, sizeof(*reg) * nreg); r = ®[nreg - 1]; r->p = p; r->sz = sz; r->x = x; total += sz; nalloc++; } else if (nreg != 0) { /* free */ r = ®[rand() % nreg]; printf("=== free %" PRIu64 ", %" PRIu64 "\n", (uint64_t)r->p, (uint64_t)r->sz); if (r->x) { vmem_xfree(vm, r->p, r->sz); } else { vmem_free(vm, r->p, r->sz); } total -= r->sz; vmem_dump(vm, vmem_printf); *r = reg[nreg - 1]; nreg--; nfree++; } printf("total=%" PRIu64 "\n", (uint64_t)total); } fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", (uint64_t)total, nalloc, nfree); exit(EXIT_SUCCESS); } #endif /* defined(UNITTEST) */ |
| 1 1 8 2 2 2 1 1 7 2 2 1 3 3 3 2 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | /* $NetBSD: pci_usrreq.c,v 1.31 2021/09/05 03:47:24 mrg Exp $ */ /* * Copyright 2001 Wasabi Systems, Inc. * All rights reserved. * * Written by Jason R. Thorpe for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * User -> kernel interface for PCI bus access. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: pci_usrreq.c,v 1.31 2021/09/05 03:47:24 mrg Exp $"); #ifdef _KERNEL_OPT #include "opt_pci.h" #endif #include <sys/param.h> #include <sys/conf.h> #include <sys/device.h> #include <sys/ioctl.h> #include <sys/proc.h> #include <sys/systm.h> #include <sys/errno.h> #include <sys/fcntl.h> #include <sys/kauth.h> #include <dev/pci/pcireg.h> #include <dev/pci/pcivar.h> #include <dev/pci/pciio.h> static int pciopen(dev_t dev, int flags, int mode, struct lwp *l) { device_t dv; dv = device_lookup(&pci_cd, minor(dev)); if (dv == NULL) return ENXIO; return 0; } static int pciioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { struct pci_softc *sc = device_lookup_private(&pci_cd, minor(dev)); struct pci_child *child; struct pciio_bdf_cfgreg *bdfr; struct pciio_businfo *binfo; struct pciio_drvname *dname; struct pciio_drvnameonbus *dnameonbus; pcitag_t tag; switch (cmd) { case PCI_IOC_BDF_CFGREAD: case PCI_IOC_BDF_CFGWRITE: bdfr = data; if (bdfr->bus > 255 || bdfr->device >= sc->sc_maxndevs || bdfr->function > 7 || ISSET(bdfr->cfgreg.reg, 3)) return EINVAL; tag = pci_make_tag(sc->sc_pc, bdfr->bus, bdfr->device, bdfr->function); if (cmd == PCI_IOC_BDF_CFGREAD) { bdfr->cfgreg.val = pci_conf_read(sc->sc_pc, tag, bdfr->cfgreg.reg); } else { if ((flag & FWRITE) == 0) return EBADF; pci_conf_write(sc->sc_pc, tag, bdfr->cfgreg.reg, bdfr->cfgreg.val); } return 0; case PCI_IOC_BUSINFO: binfo = data; binfo->busno = sc->sc_bus; binfo->maxdevs = sc->sc_maxndevs; return 0; case PCI_IOC_DRVNAME: dname = data; if (dname->device >= sc->sc_maxndevs || dname->function > 7) return EINVAL; child = &sc->PCI_SC_DEVICESC(dname->device, dname->function); if (!child->c_dev) return ENXIO; strlcpy(dname->name, device_xname(child->c_dev), sizeof dname->name); return 0; case PCI_IOC_DRVNAMEONBUS: dnameonbus = data; int i; for (i = 0; i < pci_cd.cd_ndevs; i++) { sc = device_lookup_private(&pci_cd, i); if (sc == NULL) continue; if (sc->sc_bus == dnameonbus->bus) break; /* found the right bus */ } if (i == pci_cd.cd_ndevs || sc == NULL) return ENXIO; if (dnameonbus->device >= sc->sc_maxndevs || dnameonbus->function > 7) return EINVAL; child = &sc->PCI_SC_DEVICESC(dnameonbus->device, dnameonbus->function); if (!child->c_dev) return ENXIO; strlcpy(dnameonbus->name, device_xname(child->c_dev), sizeof dnameonbus->name); return 0; default: return ENOTTY; } } static paddr_t pcimmap(dev_t dev, off_t offset, int prot) { struct pci_softc *sc = device_lookup_private(&pci_cd, minor(dev)); struct pci_child *c; struct pci_range *r; int flags = 0; int device, range; if (kauth_authorize_machdep(kauth_cred_get(), KAUTH_MACHDEP_UNMANAGEDMEM, NULL, NULL, NULL, NULL) != 0) { return -1; } /* * Since we allow mapping of the entire bus, we * take the offset to be the address on the bus, * and pass 0 as the offset into that range. * * XXX Need a way to deal with linear/etc. * * XXX we rely on MD mmap() methods to enforce limits since these * are hidden in *_tag_t structs if they exist at all */ #ifdef PCI_MAGIC_IO_RANGE /* * first, check if someone's trying to map the IO range * XXX this assumes 64kB IO space even though some machines can have * significantly more than that - macppc's bandit host bridge allows * 8MB IO space and sparc64 may have the entire 4GB available. The * firmware on both tries to use the lower 64kB first though and * exausting it is pretty difficult so we should be safe */ if ((offset >= PCI_MAGIC_IO_RANGE) && (offset < (PCI_MAGIC_IO_RANGE + 0x10000))) { return bus_space_mmap(sc->sc_iot, offset - PCI_MAGIC_IO_RANGE, 0, prot, 0); } #endif /* PCI_MAGIC_IO_RANGE */ for (device = 0; device < __arraycount(sc->sc_devices); device++) { c = &sc->sc_devices[device]; if (c->c_dev == NULL) continue; for (range = 0; range < __arraycount(c->c_range); range++) { r = &c->c_range[range]; if (r->r_size == 0) break; if (offset >= r->r_offset && offset < r->r_offset + r->r_size) { flags = r->r_flags; break; } } } return bus_space_mmap(sc->sc_memt, offset, 0, prot, flags); } const struct cdevsw pci_cdevsw = { .d_open = pciopen, .d_close = nullclose, .d_read = noread, .d_write = nowrite, .d_ioctl = pciioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = pcimmap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER }; /* * pci_devioctl: * * PCI ioctls that can be performed on devices directly. */ int pci_devioctl(pci_chipset_tag_t pc, pcitag_t tag, u_long cmd, void *data, int flag, struct lwp *l) { struct pciio_cfgreg *r = (void *) data; switch (cmd) { case PCI_IOC_CFGREAD: r->val = pci_conf_read(pc, tag, r->reg); break; case PCI_IOC_CFGWRITE: if ((flag & FWRITE) == 0) return EBADF; pci_conf_write(pc, tag, r->reg, r->val); break; default: return EPASSTHROUGH; } return 0; } |
| 1 1 37 37 34 2 1 1 36 16 13 1 12 1 11 11 14 2 2 12 13 11 11 11 4 3 10 11 7 11 5 3 6 4 4 4 2 2 6 6 6 6 10 16 3 1 3 16 7 7 5 2 10 10 7 2 2 2 2 2 16 16 2 47 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 | /* $NetBSD: udp6_usrreq.c,v 1.150 2021/02/19 14:52:00 christos Exp $ */ /* $KAME: udp6_usrreq.c,v 1.86 2001/05/27 17:33:00 itojun Exp $ */ /* $KAME: udp6_output.c,v 1.43 2001/10/15 09:19:52 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: udp6_usrreq.c,v 1.150 2021/02/19 14:52:00 christos Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" #include "opt_inet_csum.h" #include "opt_ipsec.h" #include "opt_net_mpsafe.h" #endif #include <sys/param.h> #include <sys/mbuf.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/syslog.h> #include <sys/domain.h> #include <sys/sysctl.h> #include <net/if.h> #include <net/if_types.h> #include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/in_systm.h> #include <netinet/in_offload.h> #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/in_pcb.h> #include <netinet/udp.h> #include <netinet/udp_var.h> #include <netinet/udp_private.h> #include <netinet/ip6.h> #include <netinet/icmp6.h> #include <netinet6/ip6_var.h> #include <netinet6/ip6_private.h> #include <netinet6/in6_pcb.h> #include <netinet6/udp6_var.h> #include <netinet6/udp6_private.h> #include <netinet6/ip6protosw.h> #include <netinet6/scope6_var.h> #ifdef IPSEC #include <netipsec/ipsec.h> #include <netipsec/esp.h> #ifdef INET6 #include <netipsec/ipsec6.h> #endif #endif #include "faith.h" #if defined(NFAITH) && NFAITH > 0 #include <net/if_faith.h> #endif /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ extern struct inpcbtable udbtable; percpu_t *udp6stat_percpu; /* UDP on IP6 parameters */ static int udp6_sendspace = 9216; /* really max datagram size */ static int udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6)); /* 40 1K datagrams */ static void udp6_notify(struct in6pcb *, int); static void sysctl_net_inet6_udp6_setup(struct sysctllog **); #ifdef IPSEC static int udp6_espinudp(struct mbuf **, int); #endif #ifdef UDP_CSUM_COUNTERS #include <sys/device.h> struct evcnt udp6_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "udp6", "hwcsum bad"); struct evcnt udp6_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "udp6", "hwcsum ok"); struct evcnt udp6_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "udp6", "hwcsum data"); struct evcnt udp6_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "udp6", "swcsum"); EVCNT_ATTACH_STATIC(udp6_hwcsum_bad); EVCNT_ATTACH_STATIC(udp6_hwcsum_ok); EVCNT_ATTACH_STATIC(udp6_hwcsum_data); EVCNT_ATTACH_STATIC(udp6_swcsum); #define UDP_CSUM_COUNTER_INCR(ev) (ev)->ev_count++ #else #define UDP_CSUM_COUNTER_INCR(ev) /* nothing */ #endif void udp6_init(void) { sysctl_net_inet6_udp6_setup(NULL); udp6stat_percpu = percpu_alloc(sizeof(uint64_t) * UDP6_NSTATS); udp_init_common(); } /* * Notify a udp user of an asynchronous error; * just wake up so that he can collect error status. */ static void udp6_notify(struct in6pcb *in6p, int errno) { in6p->in6p_socket->so_error = errno; sorwakeup(in6p->in6p_socket); sowwakeup(in6p->in6p_socket); } void * udp6_ctlinput(int cmd, const struct sockaddr *sa, void *d) { struct udphdr uh; struct ip6_hdr *ip6; const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa; struct mbuf *m; int off; void *cmdarg; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void (*notify)(struct in6pcb *, int) = udp6_notify; struct udp_portonly { u_int16_t uh_sport; u_int16_t uh_dport; } *uhp; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return NULL; if ((unsigned)cmd >= PRC_NCMDS) return NULL; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (cmd == PRC_MSGSIZE) { /* special code is present, see below */ notify = in6_rtchange; } else if (inet6ctlerrmap[cmd] == 0) return NULL; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; off = 0; } if (ip6) { /* check if we can safely examine src and dst ports */ if (m->m_pkthdr.len < off + sizeof(*uhp)) { if (cmd == PRC_MSGSIZE) icmp6_mtudisc_update((struct ip6ctlparam *)d, 0); return NULL; } memset(&uh, 0, sizeof(uh)); m_copydata(m, off, sizeof(*uhp), (void *)&uh); if (cmd == PRC_MSGSIZE) { int valid = 0; /* * Check to see if we have a valid UDP socket * corresponding to the address in the ICMPv6 message * payload. */ if (in6_pcblookup_connect(&udbtable, &sa6->sin6_addr, uh.uh_dport, (const struct in6_addr *)&sa6_src->sin6_addr, uh.uh_sport, 0, 0)) valid++; #if 0 /* * As the use of sendto(2) is fairly popular, * we may want to allow non-connected pcb too. * But it could be too weak against attacks... * We should at least check if the local address (= s) * is really ours. */ else if (in6_pcblookup_bind(&udbtable, &sa6->sin6_addr, uh.uh_dport, 0)) valid++; #endif /* * Depending on the value of "valid" and routing table * size (mtudisc_{hi,lo}wat), we will: * - recalculate the new MTU and create the * corresponding routing entry, or * - ignore the MTU change notification. */ icmp6_mtudisc_update((struct ip6ctlparam *)d, valid); /* * regardless of if we called * icmp6_mtudisc_update(), we need to call * in6_pcbnotify(), to notify path MTU change * to the userland (RFC3542), because some * unconnected sockets may share the same * destination and want to know the path MTU. */ } (void)in6_pcbnotify(&udbtable, sa, uh.uh_dport, sin6tocsa(sa6_src), uh.uh_sport, cmd, cmdarg, notify); } else { (void)in6_pcbnotify(&udbtable, sa, 0, sin6tocsa(sa6_src), 0, cmd, cmdarg, notify); } return NULL; } int udp6_ctloutput(int op, struct socket *so, struct sockopt *sopt) { int s; int error = 0; struct in6pcb *in6p; int family; int optval; family = so->so_proto->pr_domain->dom_family; s = splsoftnet(); switch (family) { #ifdef INET case PF_INET: if (sopt->sopt_level != IPPROTO_UDP) { error = ip_ctloutput(op, so, sopt); goto end; } break; #endif #ifdef INET6 case PF_INET6: if (sopt->sopt_level != IPPROTO_UDP) { error = ip6_ctloutput(op, so, sopt); goto end; } break; #endif default: error = EAFNOSUPPORT; goto end; } switch (op) { case PRCO_SETOPT: in6p = sotoin6pcb(so); switch (sopt->sopt_name) { case UDP_ENCAP: error = sockopt_getint(sopt, &optval); if (error) break; switch(optval) { case 0: in6p->in6p_flags &= ~IN6P_ESPINUDP; break; case UDP_ENCAP_ESPINUDP: in6p->in6p_flags |= IN6P_ESPINUDP; break; default: error = EINVAL; break; } break; default: error = ENOPROTOOPT; break; } break; default: error = EINVAL; break; } end: splx(s); return error; } static void udp6_sendup(struct mbuf *m, int off /* offset of data portion */, struct sockaddr *src, struct socket *so) { struct mbuf *opts = NULL; struct mbuf *n; struct in6pcb *in6p; KASSERT(so != NULL); KASSERT(so->so_proto->pr_domain->dom_family == AF_INET6); in6p = sotoin6pcb(so); KASSERT(in6p != NULL); #if defined(IPSEC) if (ipsec_used && ipsec_in_reject(m, in6p)) { if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) icmp6_error(n, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN, 0); return; } #endif if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) { if (in6p->in6p_flags & IN6P_CONTROLOPTS || SOOPT_TIMESTAMP(in6p->in6p_socket->so_options)) { struct ip6_hdr *ip6 = mtod(n, struct ip6_hdr *); ip6_savecontrol(in6p, &opts, ip6, n); } m_adj(n, off); if (sbappendaddr(&so->so_rcv, src, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); UDP6_STATINC(UDP6_STAT_FULLSOCK); soroverflow(so); } else sorwakeup(so); } } int udp6_realinput(int af, struct sockaddr_in6 *src, struct sockaddr_in6 *dst, struct mbuf **mp, int off) { u_int16_t sport, dport; int rcvcnt; struct in6_addr src6, *dst6; const struct in_addr *dst4; struct inpcb_hdr *inph; struct in6pcb *in6p; struct mbuf *m = *mp; rcvcnt = 0; off += sizeof(struct udphdr); /* now, offset of payload */ if (af != AF_INET && af != AF_INET6) goto bad; if (src->sin6_family != AF_INET6 || dst->sin6_family != AF_INET6) goto bad; src6 = src->sin6_addr; if (sa6_recoverscope(src) != 0) { /* XXX: should be impossible. */ goto bad; } sport = src->sin6_port; dport = dst->sin6_port; dst4 = (struct in_addr *)&dst->sin6_addr.s6_addr[12]; dst6 = &dst->sin6_addr; if (IN6_IS_ADDR_MULTICAST(dst6) || (af == AF_INET && IN_MULTICAST(dst4->s_addr))) { /* * Deliver a multicast or broadcast datagram to *all* sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multi/broadcasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * KAME note: traditionally we dropped udpiphdr from mbuf here. * we need udpiphdr for IPsec processing so we do that later. */ /* * Locate pcb(s) for datagram. */ TAILQ_FOREACH(inph, &udbtable.inpt_queue, inph_queue) { in6p = (struct in6pcb *)inph; if (in6p->in6p_af != AF_INET6) continue; if (in6p->in6p_lport != dport) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, dst6)) continue; } else { if (IN6_IS_ADDR_V4MAPPED(dst6) && (in6p->in6p_flags & IN6P_IPV6_V6ONLY)) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &src6) || in6p->in6p_fport != sport) continue; } else { if (IN6_IS_ADDR_V4MAPPED(&src6) && (in6p->in6p_flags & IN6P_IPV6_V6ONLY)) continue; } udp6_sendup(m, off, sin6tosa(src), in6p->in6p_socket); rcvcnt++; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids searching * through all pcbs in the common case of a non-shared * port. It assumes that an application will never * clear these options after setting them. */ if ((in6p->in6p_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } } else { /* * Locate pcb for datagram. */ in6p = in6_pcblookup_connect(&udbtable, &src6, sport, dst6, dport, 0, 0); if (in6p == 0) { UDP_STATINC(UDP_STAT_PCBHASHMISS); in6p = in6_pcblookup_bind(&udbtable, dst6, dport, 0); if (in6p == 0) return rcvcnt; } #ifdef IPSEC /* Handle ESP over UDP */ if (in6p->in6p_flags & IN6P_ESPINUDP) { switch (udp6_espinudp(mp, off)) { case -1: /* Error, m was freed */ rcvcnt = -1; goto bad; case 1: /* ESP over UDP */ rcvcnt++; goto bad; case 0: /* plain UDP */ default: /* Unexpected */ /* * Normal UDP processing will take place, * m may have changed. */ m = *mp; break; } } #endif if (in6p->in6p_overudp_cb != NULL) { int ret; ret = in6p->in6p_overudp_cb(mp, off, in6p->in6p_socket, sin6tosa(src), in6p->in6p_overudp_arg); switch (ret) { case -1: /* Error, m was freed */ rcvcnt = -1; goto bad; case 1: /* Foo over UDP */ KASSERT(*mp == NULL); rcvcnt++; goto bad; case 0: /* plain UDP */ default: /* Unexpected */ /* * Normal UDP processing will take place, * m may have changed. */ break; } } udp6_sendup(m, off, sin6tosa(src), in6p->in6p_socket); rcvcnt++; } bad: return rcvcnt; } int udp6_input_checksum(struct mbuf *m, const struct udphdr *uh, int off, int len) { /* * XXX it's better to record and check if this mbuf is * already checked. */ if (__predict_false((m->m_flags & M_LOOP) && !udp_do_loopback_cksum)) { goto good; } if (uh->uh_sum == 0) { UDP6_STATINC(UDP6_STAT_NOSUM); goto bad; } switch (m->m_pkthdr.csum_flags & ((m_get_rcvif_NOMPSAFE(m)->if_csum_flags_rx & M_CSUM_UDPv6) | M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) { case M_CSUM_UDPv6|M_CSUM_TCP_UDP_BAD: UDP_CSUM_COUNTER_INCR(&udp6_hwcsum_bad); UDP6_STATINC(UDP6_STAT_BADSUM); goto bad; #if 0 /* notyet */ case M_CSUM_UDPv6|M_CSUM_DATA: #endif case M_CSUM_UDPv6: /* Checksum was okay. */ UDP_CSUM_COUNTER_INCR(&udp6_hwcsum_ok); break; default: /* * Need to compute it ourselves. Maybe skip checksum * on loopback interfaces. */ UDP_CSUM_COUNTER_INCR(&udp6_swcsum); if (in6_cksum(m, IPPROTO_UDP, off, len) != 0) { UDP6_STATINC(UDP6_STAT_BADSUM); goto bad; } } good: return 0; bad: return -1; } int udp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; int off = *offp; struct sockaddr_in6 src, dst; struct ip6_hdr *ip6; struct udphdr *uh; u_int32_t plen, ulen; ip6 = mtod(m, struct ip6_hdr *); #if defined(NFAITH) && 0 < NFAITH if (faithprefix(&ip6->ip6_dst)) { /* send icmp6 host unreach? */ m_freem(m); return IPPROTO_DONE; } #endif UDP6_STATINC(UDP6_STAT_IPACKETS); /* Check for jumbogram is done in ip6_input. We can trust pkthdr.len. */ plen = m->m_pkthdr.len - off; IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(struct udphdr)); if (uh == NULL) { IP6_STATINC(IP6_STAT_TOOSHORT); return IPPROTO_DONE; } /* * Enforce alignment requirements that are violated in * some cases, see kern/50766 for details. */ if (ACCESSIBLE_POINTER(uh, struct udphdr) == 0) { m = m_copyup(m, off + sizeof(struct udphdr), 0); if (m == NULL) { IP6_STATINC(IP6_STAT_TOOSHORT); return IPPROTO_DONE; } ip6 = mtod(m, struct ip6_hdr *); uh = (struct udphdr *)(mtod(m, char *) + off); } KASSERT(ACCESSIBLE_POINTER(uh, struct udphdr)); ulen = ntohs((u_short)uh->uh_ulen); /* * RFC2675 section 4: jumbograms will have 0 in the UDP header field, * iff payload length > 0xffff. */ if (ulen == 0 && plen > 0xffff) ulen = plen; if (plen != ulen) { UDP6_STATINC(UDP6_STAT_BADLEN); goto bad; } /* destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto bad; /* * Checksum extended UDP header and data. Maybe skip checksum * on loopback interfaces. */ if (udp6_input_checksum(m, uh, off, ulen)) goto bad; /* * Construct source and dst sockaddrs. */ memset(&src, 0, sizeof(src)); src.sin6_family = AF_INET6; src.sin6_len = sizeof(struct sockaddr_in6); src.sin6_addr = ip6->ip6_src; src.sin6_port = uh->uh_sport; memset(&dst, 0, sizeof(dst)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(struct sockaddr_in6); dst.sin6_addr = ip6->ip6_dst; dst.sin6_port = uh->uh_dport; if (udp6_realinput(AF_INET6, &src, &dst, &m, off) == 0) { if (m->m_flags & M_MCAST) { UDP6_STATINC(UDP6_STAT_NOPORTMCAST); goto bad; } UDP6_STATINC(UDP6_STAT_NOPORT); icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); m = NULL; } bad: if (m) m_freem(m); return IPPROTO_DONE; } int udp6_output(struct in6pcb * const in6p, struct mbuf *m, struct sockaddr_in6 * const addr6, struct mbuf * const control, struct lwp * const l) { u_int32_t ulen = m->m_pkthdr.len; u_int32_t plen = sizeof(struct udphdr) + ulen; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr _laddr, *laddr, *faddr; struct in6_addr laddr_mapped; /* XXX ugly */ struct sockaddr_in6 *sin6 = NULL; struct ifnet *oifp = NULL; int scope_ambiguous = 0; u_int16_t fport; int error = 0; struct ip6_pktopts *optp = NULL; struct ip6_pktopts opt; int af = AF_INET6, hlen = sizeof(struct ip6_hdr); #ifdef INET struct ip *ip; struct udpiphdr *ui; int flags = 0; #endif struct sockaddr_in6 tmp; if (addr6) { sin6 = addr6; if (sin6->sin6_len != sizeof(*sin6)) { error = EINVAL; goto release; } if (sin6->sin6_family != AF_INET6) { error = EAFNOSUPPORT; goto release; } /* protect *sin6 from overwrites */ tmp = *sin6; sin6 = &tmp; /* * Application should provide a proper zone ID or the use of * default zone IDs should be enabled. Unfortunately, some * applications do not behave as it should, so we need a * workaround. Even if an appropriate ID is not determined, * we'll see if we can determine the outgoing interface. If we * can, determine the zone ID based on the interface below. */ if (sin6->sin6_scope_id == 0 && !ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0) goto release; } if (control) { if (__predict_false(l == NULL)) { panic("%s: control but no lwp", __func__); } if ((error = ip6_setpktopts(control, &opt, in6p->in6p_outputopts, l->l_cred, IPPROTO_UDP)) != 0) goto release; optp = &opt; } else optp = in6p->in6p_outputopts; if (sin6) { /* * Slightly different than v4 version in that we call * in6_selectsrc and in6_pcbsetport to fill in the local * address and port rather than in_pcbconnect. in_pcbconnect * sets in6p_faddr which causes EISCONN below to be hit on * subsequent sendto. */ if (sin6->sin6_port == 0) { error = EADDRNOTAVAIL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { /* how about ::ffff:0.0.0.0 case? */ error = EISCONN; goto release; } faddr = &sin6->sin6_addr; fport = sin6->sin6_port; /* allow 0 port */ if (IN6_IS_ADDR_V4MAPPED(faddr)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY)) { /* * I believe we should explicitly discard the * packet when mapped addresses are disabled, * rather than send the packet as an IPv6 one. * If we chose the latter approach, the packet * might be sent out on the wire based on the * default route, the situation which we'd * probably want to avoid. * (20010421 jinmei@kame.net) */ error = EINVAL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) { /* * when remote addr is an IPv4-mapped address, * local addr should not be an IPv6 address, * since you cannot determine how to map IPv6 * source address to IPv4. */ error = EINVAL; goto release; } af = AF_INET; } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { struct psref psref; int bound = curlwp_bind(); error = in6_selectsrc(sin6, optp, in6p->in6p_moptions, &in6p->in6p_route, &in6p->in6p_laddr, &oifp, &psref, &_laddr); if (error) laddr = NULL; else laddr = &_laddr; if (oifp && scope_ambiguous && (error = in6_setscope(&sin6->sin6_addr, oifp, NULL))) { if_put(oifp, &psref); curlwp_bindx(bound); goto release; } if_put(oifp, &psref); curlwp_bindx(bound); } else { /* * XXX: freebsd[34] does not have in_selectsrc, but * we can omit the whole part because freebsd4 calls * udp_output() directly in this case, and thus we'll * never see this path. */ if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { struct sockaddr_in sin_dst; struct in_addr ina; struct in_ifaddr *ia4; struct psref _psref; int bound; memcpy(&ina, &faddr->s6_addr[12], sizeof(ina)); sockaddr_in_init(&sin_dst, &ina, 0); bound = curlwp_bind(); ia4 = in_selectsrc(&sin_dst, &in6p->in6p_route, in6p->in6p_socket->so_options, NULL, &error, &_psref); if (ia4 == NULL) { curlwp_bindx(bound); if (error == 0) error = EADDRNOTAVAIL; goto release; } memset(&laddr_mapped, 0, sizeof(laddr_mapped)); laddr_mapped.s6_addr16[5] = 0xffff; /* ugly */ memcpy(&laddr_mapped.s6_addr[12], &IA_SIN(ia4)->sin_addr, sizeof(IA_SIN(ia4)->sin_addr)); ia4_release(ia4, &_psref); curlwp_bindx(bound); laddr = &laddr_mapped; } else { laddr = &in6p->in6p_laddr; /* XXX */ } } if (laddr == NULL) { if (error == 0) error = EADDRNOTAVAIL; goto release; } if (in6p->in6p_lport == 0) { /* * Craft a sockaddr_in6 for the local endpoint. Use the * "any" as a base, set the address, and recover the * scope. */ struct sockaddr_in6 lsin6 = *((const struct sockaddr_in6 *)in6p->in6p_socket->so_proto->pr_domain->dom_sa_any); lsin6.sin6_addr = *laddr; error = sa6_recoverscope(&lsin6); if (error) goto release; error = in6_pcbsetport(&lsin6, in6p, l); if (error) { in6p->in6p_laddr = in6addr_any; goto release; } } } else { if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { error = ENOTCONN; goto release; } if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY)) { /* * XXX: this case would happen when the * application sets the V6ONLY flag after * connecting the foreign address. * Such applications should be fixed, * so we bark here. */ log(LOG_INFO, "udp6_output: IPV6_V6ONLY " "option was set for a connected socket\n"); error = EINVAL; goto release; } else af = AF_INET; } laddr = &in6p->in6p_laddr; faddr = &in6p->in6p_faddr; fport = in6p->in6p_fport; } if (af == AF_INET) hlen = sizeof(struct ip); /* * Calculate data length and get a mbuf * for UDP and IP6 headers. */ M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT); if (m == NULL) { error = ENOBUFS; goto release; } /* * Stuff checksum and output datagram. */ udp6 = (struct udphdr *)(mtod(m, char *) + hlen); udp6->uh_sport = in6p->in6p_lport; /* lport is always set in the PCB */ udp6->uh_dport = fport; if (plen <= 0xffff) udp6->uh_ulen = htons((u_int16_t)plen); else udp6->uh_ulen = 0; udp6->uh_sum = 0; switch (af) { case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; #if 0 /* ip6_plen will be filled in ip6_output. */ ip6->ip6_plen = htons((u_int16_t)plen); #endif ip6->ip6_nxt = IPPROTO_UDP; ip6->ip6_hlim = in6_selecthlim_rt(in6p); ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; udp6->uh_sum = in6_cksum_phdr(laddr, faddr, htonl(plen), htonl(IPPROTO_UDP)); m->m_pkthdr.csum_flags = M_CSUM_UDPv6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); UDP6_STATINC(UDP6_STAT_OPACKETS); error = ip6_output(m, optp, &in6p->in6p_route, 0, in6p->in6p_moptions, in6p, NULL); break; case AF_INET: #ifdef INET /* can't transmit jumbogram over IPv4 */ if (plen > 0xffff) { error = EMSGSIZE; goto release; } ip = mtod(m, struct ip *); ui = (struct udpiphdr *)ip; memset(ui->ui_x1, 0, sizeof(ui->ui_x1)); ui->ui_pr = IPPROTO_UDP; ui->ui_len = htons(plen); memcpy(&ui->ui_src, &laddr->s6_addr[12], sizeof(ui->ui_src)); ui->ui_ulen = ui->ui_len; flags = (in6p->in6p_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); memcpy(&ui->ui_dst, &faddr->s6_addr[12], sizeof(ui->ui_dst)); udp6->uh_sum = in_cksum(m, hlen + plen); if (udp6->uh_sum == 0) udp6->uh_sum = 0xffff; ip->ip_len = htons(hlen + plen); ip->ip_ttl = in6_selecthlim(in6p, NULL); /* XXX */ ip->ip_tos = 0; /* XXX */ UDP_STATINC(UDP_STAT_OPACKETS); error = ip_output(m, NULL, &in6p->in6p_route, flags /* XXX */, in6p->in6p_v4moptions, NULL); break; #else error = EAFNOSUPPORT; goto release; #endif } goto releaseopt; release: m_freem(m); releaseopt: if (control) { if (optp == &opt) ip6_clearpktopts(&opt, -1); m_freem(control); } return (error); } static int udp6_attach(struct socket *so, int proto) { struct in6pcb *in6p; int s, error; KASSERT(sotoin6pcb(so) == NULL); sosetlock(so); error = soreserve(so, udp6_sendspace, udp6_recvspace); if (error) { return error; } /* * MAPPED_ADDR implementation spec: * Always attach for IPv6, and only when necessary for IPv4. */ s = splsoftnet(); error = in6_pcballoc(so, &udbtable); splx(s); if (error) { return error; } in6p = sotoin6pcb(so); in6p->in6p_cksum = -1; /* just to be sure */ KASSERT(solocked(so)); return 0; } static void udp6_detach(struct socket *so) { struct in6pcb *in6p = sotoin6pcb(so); int s; KASSERT(solocked(so)); KASSERT(in6p != NULL); s = splsoftnet(); in6_pcbdetach(in6p); splx(s); } static int udp6_accept(struct socket *so, struct sockaddr *nam) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int udp6_bind(struct socket *so, struct sockaddr *nam, struct lwp *l) { struct in6pcb *in6p = sotoin6pcb(so); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error = 0; int s; KASSERT(solocked(so)); KASSERT(in6p != NULL); s = splsoftnet(); error = in6_pcbbind(in6p, sin6, l); splx(s); return error; } static int udp6_listen(struct socket *so, struct lwp *l) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int udp6_connect(struct socket *so, struct sockaddr *nam, struct lwp *l) { struct in6pcb *in6p = sotoin6pcb(so); int error = 0; int s; KASSERT(solocked(so)); KASSERT(in6p != NULL); if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) return EISCONN; s = splsoftnet(); error = in6_pcbconnect(in6p, (struct sockaddr_in6 *)nam, l); splx(s); if (error == 0) soisconnected(so); return error; } static int udp6_connect2(struct socket *so, struct socket *so2) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int udp6_disconnect(struct socket *so) { struct in6pcb *in6p = sotoin6pcb(so); int s; KASSERT(solocked(so)); KASSERT(in6p != NULL); if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) return ENOTCONN; s = splsoftnet(); in6_pcbdisconnect(in6p); memset((void *)&in6p->in6p_laddr, 0, sizeof(in6p->in6p_laddr)); splx(s); so->so_state &= ~SS_ISCONNECTED; /* XXX */ in6_pcbstate(in6p, IN6P_BOUND); /* XXX */ return 0; } static int udp6_shutdown(struct socket *so) { int s; s = splsoftnet(); socantsendmore(so); splx(s); return 0; } static int udp6_abort(struct socket *so) { int s; KASSERT(solocked(so)); KASSERT(sotoin6pcb(so) != NULL); s = splsoftnet(); soisdisconnected(so); in6_pcbdetach(sotoin6pcb(so)); splx(s); return 0; } static int udp6_ioctl(struct socket *so, u_long cmd, void *addr6, struct ifnet *ifp) { /* * MAPPED_ADDR implementation info: * Mapped addr support for PRU_CONTROL is not necessary. * Because typical user of PRU_CONTROL is such as ifconfig, * and they don't associate any addr to their socket. Then * socket family is only hint about the PRU_CONTROL'ed address * family, especially when getting addrs from kernel. * So AF_INET socket need to be used to control AF_INET addrs, * and AF_INET6 socket for AF_INET6 addrs. */ return in6_control(so, cmd, addr6, ifp); } static int udp6_stat(struct socket *so, struct stat *ub) { KASSERT(solocked(so)); /* stat: don't bother with a blocksize */ return 0; } static int udp6_peeraddr(struct socket *so, struct sockaddr *nam) { KASSERT(solocked(so)); KASSERT(sotoin6pcb(so) != NULL); KASSERT(nam != NULL); in6_setpeeraddr(sotoin6pcb(so), (struct sockaddr_in6 *)nam); return 0; } static int udp6_sockaddr(struct socket *so, struct sockaddr *nam) { KASSERT(solocked(so)); KASSERT(sotoin6pcb(so) != NULL); KASSERT(nam != NULL); in6_setsockaddr(sotoin6pcb(so), (struct sockaddr_in6 *)nam); return 0; } static int udp6_rcvd(struct socket *so, int flags, struct lwp *l) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int udp6_recvoob(struct socket *so, struct mbuf *m, int flags) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int udp6_send(struct socket *so, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct lwp *l) { struct in6pcb *in6p = sotoin6pcb(so); int error = 0; int s; KASSERT(solocked(so)); KASSERT(in6p != NULL); KASSERT(m != NULL); s = splsoftnet(); error = udp6_output(in6p, m, (struct sockaddr_in6 *)nam, control, l); splx(s); return error; } static int udp6_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control) { KASSERT(solocked(so)); m_freem(m); m_freem(control); return EOPNOTSUPP; } static int udp6_purgeif(struct socket *so, struct ifnet *ifp) { mutex_enter(softnet_lock); in6_pcbpurgeif0(&udbtable, ifp); #ifdef NET_MPSAFE mutex_exit(softnet_lock); #endif in6_purgeif(ifp); #ifdef NET_MPSAFE mutex_enter(softnet_lock); #endif in6_pcbpurgeif(&udbtable, ifp); mutex_exit(softnet_lock); return 0; } static int sysctl_net_inet6_udp6_stats(SYSCTLFN_ARGS) { return (NETSTAT_SYSCTL(udp6stat_percpu, UDP6_NSTATS)); } static void sysctl_net_inet6_udp6_setup(struct sysctllog **clog) { sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet6", NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET6, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "udp6", SYSCTL_DESCR("UDPv6 related settings"), NULL, 0, NULL, 0, CTL_NET, PF_INET6, IPPROTO_UDP, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "sendspace", SYSCTL_DESCR("Default UDP send buffer size"), NULL, 0, &udp6_sendspace, 0, CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_SENDSPACE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "recvspace", SYSCTL_DESCR("Default UDP receive buffer size"), NULL, 0, &udp6_recvspace, 0, CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_RECVSPACE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "do_loopback_cksum", SYSCTL_DESCR("Perform UDP checksum on loopback"), NULL, 0, &udp_do_loopback_cksum, 0, CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_LOOPBACKCKSUM, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "pcblist", SYSCTL_DESCR("UDP protocol control block list"), sysctl_inpcblist, 0, &udbtable, 0, CTL_NET, PF_INET6, IPPROTO_UDP, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "stats", SYSCTL_DESCR("UDPv6 statistics"), sysctl_net_inet6_udp6_stats, 0, NULL, 0, CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_STATS, CTL_EOL); } void udp6_statinc(u_int stat) { KASSERT(stat < UDP6_NSTATS); UDP6_STATINC(stat); } #ifdef IPSEC /* * Returns: * 1 if the packet was processed * 0 if normal UDP processing should take place * -1 if an error occurred and m was freed */ static int udp6_espinudp(struct mbuf **mp, int off) { const size_t skip = sizeof(struct udphdr); size_t len; void *data; size_t minlen; int ip6hdrlen; struct ip6_hdr *ip6; struct m_tag *tag; struct udphdr *udphdr; u_int16_t sport, dport; struct mbuf *m = *mp; uint32_t *marker; /* * Collapse the mbuf chain if the first mbuf is too short * The longest case is: UDP + non ESP marker + ESP */ minlen = off + sizeof(u_int64_t) + sizeof(struct esp); if (minlen > m->m_pkthdr.len) minlen = m->m_pkthdr.len; if (m->m_len < minlen) { if ((*mp = m_pullup(m, minlen)) == NULL) { return -1; } m = *mp; } len = m->m_len - off; data = mtod(m, char *) + off; /* Ignore keepalive packets */ if ((len == 1) && (*(unsigned char *)data == 0xff)) { m_freem(m); *mp = NULL; /* avoid any further processing by caller ... */ return 1; } /* Handle Non-ESP marker (32bit). If zero, then IKE. */ marker = (uint32_t *)data; if (len <= sizeof(uint32_t)) return 0; if (marker[0] == 0) return 0; /* * Get the UDP ports. They are handled in network * order everywhere in IPSEC_NAT_T code. */ udphdr = (struct udphdr *)((char *)data - skip); sport = udphdr->uh_sport; dport = udphdr->uh_dport; /* * Remove the UDP header (and possibly the non ESP marker) * IPv6 header length is ip6hdrlen * Before: * <---- off ---> * +-----+------+-----+ * | IP6 | UDP | ESP | * +-----+------+-----+ * <-skip-> * After: * +-----+-----+ * | IP6 | ESP | * +-----+-----+ * <-skip-> */ ip6hdrlen = off - sizeof(struct udphdr); memmove(mtod(m, char *) + skip, mtod(m, void *), ip6hdrlen); m_adj(m, skip); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - skip); ip6->ip6_nxt = IPPROTO_ESP; /* * We have modified the packet - it is now ESP, so we should not * return to UDP processing ... * * Add a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember * the source UDP port. This is required if we want * to select the right SPD for multiple hosts behind * same NAT */ if ((tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS, sizeof(sport) + sizeof(dport), M_DONTWAIT)) == NULL) { m_freem(m); return -1; } ((u_int16_t *)(tag + 1))[0] = sport; ((u_int16_t *)(tag + 1))[1] = dport; m_tag_prepend(m, tag); if (ipsec_used) ipsec6_common_input(&m, &ip6hdrlen, IPPROTO_ESP); else m_freem(m); /* We handled it, it shouldn't be handled by UDP */ *mp = NULL; /* avoid free by caller ... */ return 1; } #endif /* IPSEC */ PR_WRAP_USRREQS(udp6) #define udp6_attach udp6_attach_wrapper #define udp6_detach udp6_detach_wrapper #define udp6_accept udp6_accept_wrapper #define udp6_bind udp6_bind_wrapper #define udp6_listen udp6_listen_wrapper #define udp6_connect udp6_connect_wrapper #define udp6_connect2 udp6_connect2_wrapper #define udp6_disconnect udp6_disconnect_wrapper #define udp6_shutdown udp6_shutdown_wrapper #define udp6_abort udp6_abort_wrapper #define udp6_ioctl udp6_ioctl_wrapper #define udp6_stat udp6_stat_wrapper #define udp6_peeraddr udp6_peeraddr_wrapper #define udp6_sockaddr udp6_sockaddr_wrapper #define udp6_rcvd udp6_rcvd_wrapper #define udp6_recvoob udp6_recvoob_wrapper #define udp6_send udp6_send_wrapper #define udp6_sendoob udp6_sendoob_wrapper #define udp6_purgeif udp6_purgeif_wrapper const struct pr_usrreqs udp6_usrreqs = { .pr_attach = udp6_attach, .pr_detach = udp6_detach, .pr_accept = udp6_accept, .pr_bind = udp6_bind, .pr_listen = udp6_listen, .pr_connect = udp6_connect, .pr_connect2 = udp6_connect2, .pr_disconnect = udp6_disconnect, .pr_shutdown = udp6_shutdown, .pr_abort = udp6_abort, .pr_ioctl = udp6_ioctl, .pr_stat = udp6_stat, .pr_peeraddr = udp6_peeraddr, .pr_sockaddr = udp6_sockaddr, .pr_rcvd = udp6_rcvd, .pr_recvoob = udp6_recvoob, .pr_send = udp6_send, .pr_sendoob = udp6_sendoob, .pr_purgeif = udp6_purgeif, }; |
| 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | /* $NetBSD: umodem.c,v 1.74 2020/04/12 01:10:54 simonb Exp $ */ /* * Copyright (c) 1998 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Lennart Augustsson (lennart@augustsson.net) at * Carlstedt Research & Technology. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Comm Class spec: http://www.usb.org/developers/devclass_docs/usbccs10.pdf * http://www.usb.org/developers/devclass_docs/usbcdc11.pdf */ /* * TODO: * - Add error recovery in various places; the big problem is what * to do in a callback if there is an error. * - Implement a Call Device for modems without multiplexed commands. * */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: umodem.c,v 1.74 2020/04/12 01:10:54 simonb Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/ioctl.h> #include <sys/conf.h> #include <sys/tty.h> #include <sys/file.h> #include <sys/select.h> #include <sys/proc.h> #include <sys/vnode.h> #include <sys/device.h> #include <sys/poll.h> #include <dev/usb/usb.h> #include <dev/usb/usbcdc.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdevs.h> #include <dev/usb/usb_quirks.h> #include <dev/usb/ucomvar.h> #include <dev/usb/umodemvar.h> Static const struct ucom_methods umodem_methods = { .ucom_get_status = umodem_get_status, .ucom_set = umodem_set, .ucom_param = umodem_param, .ucom_ioctl = umodem_ioctl, .ucom_open = umodem_open, .ucom_close = umodem_close, }; static int umodem_match(device_t, cfdata_t, void *); static void umodem_attach(device_t, device_t, void *); static int umodem_detach(device_t, int); CFATTACH_DECL_NEW(umodem, sizeof(struct umodem_softc), umodem_match, umodem_attach, umodem_detach, NULL); static int umodem_match(device_t parent, cfdata_t match, void *aux) { struct usbif_attach_arg *uiaa = aux; usb_interface_descriptor_t *id; int cm, acm; if (uiaa->uiaa_class != UICLASS_CDC || uiaa->uiaa_subclass != UISUBCLASS_ABSTRACT_CONTROL_MODEL || !(uiaa->uiaa_proto == UIPROTO_CDC_NOCLASS || uiaa->uiaa_proto == UIPROTO_CDC_AT)) return UMATCH_NONE; id = usbd_get_interface_descriptor(uiaa->uiaa_iface); if (umodem_get_caps(uiaa->uiaa_device, &cm, &acm, id) == -1) return UMATCH_NONE; return UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO; } static void umodem_attach(device_t parent, device_t self, void *aux) { struct umodem_softc *sc = device_private(self); struct usbif_attach_arg *uiaa = aux; struct ucom_attach_args ucaa; memset(&ucaa, 0, sizeof(ucaa)); ucaa.ucaa_portno = UCOM_UNK_PORTNO; ucaa.ucaa_methods = &umodem_methods; ucaa.ucaa_info = NULL; if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler"); if (umodem_common_attach(self, sc, uiaa, &ucaa)) return; return; } static int umodem_detach(device_t self, int flags) { struct umodem_softc *sc = device_private(self); pmf_device_deregister(self); return umodem_common_detach(sc, flags); } |
| 95 97 2 26 95 92 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 | /* $NetBSD: route.h,v 1.129 2021/08/09 20:49:10 andvar Exp $ */ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.h 8.5 (Berkeley) 2/8/95 */ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ #include <sys/queue.h> #include <sys/socket.h> #include <sys/types.h> #include <net/if.h> #ifdef _KERNEL #include <sys/rwlock.h> #include <sys/condvar.h> #include <sys/pserialize.h> #include <sys/percpu.h> #endif #include <sys/psref.h> #if !(defined(_KERNEL) || defined(_STANDALONE)) #include <stdbool.h> #endif /* * Kernel resident routing tables. * * The routing tables are initialized when interface addresses * are set by making entries for all directly connected interfaces. */ /* * A route consists of a destination address and a reference * to a routing entry. These are often held by protocols * in their control blocks, e.g. inpcb. */ struct route { struct rtentry *_ro_rt; struct sockaddr *ro_sa; uint64_t ro_rtcache_generation; struct psref ro_psref; int ro_bound; }; /* * These numbers are used by reliable protocols for determining * retransmission behavior and are included in the routing structure. */ struct rt_metrics { uint64_t rmx_locks; /* Kernel must leave these values alone */ uint64_t rmx_mtu; /* MTU for this path */ uint64_t rmx_hopcount; /* max hops expected */ uint64_t rmx_recvpipe; /* inbound delay-bandwidth product */ uint64_t rmx_sendpipe; /* outbound delay-bandwidth product */ uint64_t rmx_ssthresh; /* outbound gateway buffer limit */ uint64_t rmx_rtt; /* estimated round trip time */ uint64_t rmx_rttvar; /* estimated rtt variance */ time_t rmx_expire; /* lifetime for route, e.g. redirect */ time_t rmx_pksent; /* packets sent using this route */ }; /* * rmx_rtt and rmx_rttvar are stored as microseconds; * RTTTOPRHZ(rtt) converts to a value suitable for use * by a protocol slowtimo counter. */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer * the interface to use from the gateway address supplied when * the route was entered. Routes that forward packets through * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ #ifndef RNF_NORMAL #include <net/radix.h> #endif struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ #define rt_mask(r) ((const struct sockaddr *)((r)->rt_nodes->rn_mask)) struct sockaddr *rt_gateway; /* value */ int rt_flags; /* up/down?, host/net */ int rt_refcnt; /* # held references */ uint64_t rt_use; /* raw # packets forwarded */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface to use */ uint32_t rt_ifa_seqno; void * rt_llinfo; /* pointer to link level info cache */ struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */ struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ LIST_HEAD(, rttimer) rt_timer; /* queue of timeouts for misc funcs */ struct rtentry *rt_parent; /* parent of cloned route */ struct sockaddr *_rt_key; struct sockaddr *rt_tag; /* route tagging info */ #ifdef _KERNEL kcondvar_t rt_cv; struct psref_target rt_psref; SLIST_ENTRY(rtentry) rt_free; /* queue of deferred frees */ #endif }; static __inline const struct sockaddr * rt_getkey(const struct rtentry *rt) { return rt->_rt_key; } /* * Following structure necessary for 4.3 compatibility; * We should eventually move it to a compat file. */ struct ortentry { uint32_t rt_hash; /* to speed lookups */ struct sockaddr rt_dst; /* key */ struct sockaddr rt_gateway; /* value */ int16_t rt_flags; /* up/down?, host/net */ int16_t rt_refcnt; /* # held references */ uint32_t rt_use; /* raw # packets forwarded */ struct ifnet *rt_ifp; /* the answer: interface to use */ }; #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ #define RTF_MASK 0x80 /* subnet mask present */ // #define RTF_CLONING 0x100 /* generate new routes on use */ #define RTF_CONNECTED 0x100 /* hosts on this route are neighbours */ // #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ // #define RTF_LLINFO 0x400 /* generated by ARP or NDP */ #define RTF_LLDATA 0x400 /* used by apps to add/del L2 entries */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ // #define RTF_CLONED 0x2000 /* this is a cloned route */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ #define RTF_SRC 0x10000 /* route has fixed source address */ #define RTF_ANNOUNCE 0x20000 /* announce new ARP or NDP entry */ #define RTF_LOCAL 0x40000 /* route represents a local address */ #define RTF_BROADCAST 0x80000 /* route represents a bcast address */ #define RTF_UPDATING 0x100000 /* route is updating */ /* * The flag is nevert set to rt_flags. It just tells rtrequest1 to set a passed * ifa to rt_ifa (via rti_ifa) and not replace rt_ifa in ifa_rtrequest. */ #define RTF_DONTCHANGEIFA 0x200000 /* suppress rt_ifa replacement */ /* * 0x400 is exposed to userland just for backward compatibility. For that * purpose, it should be shown as LLINFO. */ #define RTFBITS "\020\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE" \ "\010MASK_PRESENT\011CONNECTED\012XRESOLVE\013LLINFO\014STATIC" \ "\015BLACKHOLE\016CLONED\017PROTO2\020PROTO1\021SRC\022ANNOUNCE" \ "\023LOCAL\024BROADCAST\025UPDATING" /* * Routing statistics. */ struct rtstat { uint64_t rts_badredirect; /* bogus redirect calls */ uint64_t rts_dynamic; /* routes created by redirects */ uint64_t rts_newgateway; /* routes modified by redirects */ uint64_t rts_unreach; /* lookups which failed */ uint64_t rts_wildcard; /* lookups satisfied by a wildcard */ }; /* * Structures for routing messages. By forcing the first member to be aligned * at a 64-bit boundary, we also force the size to be a multiple of 64-bits. */ #if !defined(_KERNEL) || !defined(COMPAT_RTSOCK) /* * If we aren't being compiled for backwards compatibility, enforce 64-bit * alignment so any routing message is the same regardless if the kernel * is an ILP32 or LP64 kernel. */ #define __align64 __aligned(sizeof(uint64_t)) #else #define __align64 #endif struct rt_msghdr { u_short rtm_msglen __align64; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ u_short rtm_index; /* index for associated ifp */ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ pid_t rtm_pid; /* identify sender */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ int rtm_use; /* from rtentry */ int rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx __align64; /* metrics themselves */ }; #undef __align64 #define RTM_VERSION 4 /* Up the ante and ignore older versions */ #define RTM_ADD 0x1 /* Add Route */ #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics or flags */ #define RTM_GET 0x4 /* Report Metrics */ #define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_LOCK 0x8 /* fix specified metrics */ #define RTM_OLDADD 0x9 /* caused by SIOCADDRT */ #define RTM_OLDDEL 0xa /* caused by SIOCDELRT */ // #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ #define RTM_ONEWADDR 0xc /* Old (pre-8.0) RTM_NEWADDR message */ #define RTM_ODELADDR 0xd /* Old (pre-8.0) RTM_DELADDR message */ #define RTM_OOIFINFO 0xe /* Old (pre-1.5) RTM_IFINFO message */ #define RTM_OIFINFO 0xf /* Old (pre-64bit time) RTM_IFINFO message */ #define RTM_IFANNOUNCE 0x10 /* iface arrival/departure */ #define RTM_IEEE80211 0x11 /* IEEE80211 wireless event */ #define RTM_SETGATE 0x12 /* set prototype gateway for clones * (see example in arp_rtrequest). */ #define RTM_LLINFO_UPD 0x13 /* indication to ARP/NDP/etc. that link-layer * address has changed */ #define RTM_IFINFO 0x14 /* iface/link going up/down etc. */ #define RTM_OCHGADDR 0x15 /* Old (pre-8.0) RTM_CHGADDR message */ #define RTM_NEWADDR 0x16 /* address being added to iface */ #define RTM_DELADDR 0x17 /* address being removed from iface */ #define RTM_CHGADDR 0x18 /* address properties changed */ #ifdef RTM_NAMES static const char *rtm_names[] = { "*none*", "add", "delete", "change", "get", "losing", "redirect", "miss", "lock", "oldadd", "olddel", "*resolve*", "onewaddr", "odeladdr", "ooifinfo", "oifinfo", "ifannounce", "ieee80211", "setgate", "llinfo_upd", "ifinfo", "ochgaddr", "newaddr", "deladdr", "chgaddr", }; #endif /* * setsockopt defines used for the filtering. */ #define RO_MSGFILTER 1 /* array of which rtm_type to send to client */ #define RO_MISSFILTER 2 /* array of sockaddrs to match miss dst */ #define RO_FILTSA_MAX 30 /* maximum number of sockaddrs per filter */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ #define RTV_EXPIRE 0x4 /* init or lock _expire */ #define RTV_RPIPE 0x8 /* init or lock _recvpipe */ #define RTV_SPIPE 0x10 /* init or lock _sendpipe */ #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ #define RTVBITS "\020\1MTU\2HOPCOUNT\3EXPIRE\4RECVPIPE\5SENDPIPE" \ "\6SSTHRESH\7RTT\010RTTVAR" /* * Bitmask values for rtm_addr. */ #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ #define RTA_TAG 0x100 /* route tag */ #define RTABITS "\020\1DST\2GATEWAY\3NETMASK\4GENMASK\5IFP\6IFA\7AUTHOR" \ "\010BRD\011TAG" /* * Index offsets for sockaddr array for alternate internal encoding. */ #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ #define RTAX_GENMASK 3 /* cloning mask sockaddr present */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ #define RTAX_TAG 8 /* route tag */ #define RTAX_MAX 9 /* size of array to allocate */ #define RT_ROUNDUP2(a, n) ((a) > 0 ? (1 + (((a) - 1U) | ((n) - 1))) : (n)) #define RT_ROUNDUP(a) RT_ROUNDUP2((a), sizeof(uint64_t)) #define RT_ADVANCE(x, n) (x += RT_ROUNDUP((n)->sa_len)) struct rt_addrinfo { int rti_addrs; const struct sockaddr *rti_info[RTAX_MAX]; int rti_flags; struct ifaddr *rti_ifa; struct ifnet *rti_ifp; }; struct route_cb { int ip_count; int ip6_count; int unused1; int mpls_count; int any_count; }; /* * This structure, and the prototypes for the rt_timer_{init,remove_all, * add,timer} functions all used with the kind permission of BSDI. * These allow functions to be called for routes at specific times. */ struct rttimer { TAILQ_ENTRY(rttimer) rtt_next; /* entry on timer queue */ LIST_ENTRY(rttimer) rtt_link; /* multiple timers per rtentry */ struct rttimer_queue *rtt_queue; /* back pointer to queue */ struct rtentry *rtt_rt; /* Back pointer to the route */ void (*rtt_func)(struct rtentry *, struct rttimer *); time_t rtt_time; /* When this timer was registered */ }; struct rttimer_queue { long rtq_timeout; unsigned long rtq_count; TAILQ_HEAD(, rttimer) rtq_head; LIST_ENTRY(rttimer_queue) rtq_link; }; struct rtbl; typedef struct rtbl rtbl_t; #ifdef _KERNEL struct rtbl { struct radix_node_head t_rnh; }; struct rt_walkarg { int w_op; int w_arg; int w_given; int w_needed; void * w_where; int w_tmemsize; int w_tmemneeded; void * w_tmem; }; #if 0 #define RT_DPRINTF(__fmt, ...) do { } while (/*CONSTCOND*/0) #else #define RT_DPRINTF(__fmt, ...) /* do nothing */ #endif struct rtwalk { int (*rw_f)(struct rtentry *, void *); void *rw_v; }; /* * Global data specific to the routing socket. */ struct route_info { struct sockaddr ri_dst; struct sockaddr ri_src; struct route_cb ri_cb; int ri_maxqlen; struct ifqueue ri_intrq; void *ri_sih; }; extern struct route_info route_info; extern struct rtstat rtstat; struct socket; void rt_init(void); int rt_timer_add(struct rtentry *, void(*)(struct rtentry *, struct rttimer *), struct rttimer_queue *); unsigned long rt_timer_count(struct rttimer_queue *); void rt_timer_queue_change(struct rttimer_queue *, long); struct rttimer_queue * rt_timer_queue_create(u_int); void rt_timer_queue_destroy(struct rttimer_queue *); void rt_free(struct rtentry *); void rt_unref(struct rtentry *); int rt_update(struct rtentry *, struct rt_addrinfo *, void *); int rt_update_prepare(struct rtentry *); void rt_update_finish(struct rtentry *); void rt_newmsg(const int, const struct rtentry *); struct rtentry * rtalloc1(const struct sockaddr *, int); int rtinit(struct ifaddr *, int, int); void rtredirect(const struct sockaddr *, const struct sockaddr *, const struct sockaddr *, int, const struct sockaddr *, struct rtentry **); int rtrequest(int, const struct sockaddr *, const struct sockaddr *, const struct sockaddr *, int, struct rtentry **); int rtrequest1(int, struct rt_addrinfo *, struct rtentry **); int rtrequest_newmsg(const int, const struct sockaddr *, const struct sockaddr *, const struct sockaddr *, const int); int rt_ifa_addlocal(struct ifaddr *); int rt_ifa_remlocal(struct ifaddr *, struct ifaddr *); struct ifaddr * rt_get_ifa(struct rtentry *); void rt_replace_ifa(struct rtentry *, struct ifaddr *); int rt_setgate(struct rtentry *, const struct sockaddr *); const struct sockaddr * rt_settag(struct rtentry *, const struct sockaddr *); struct sockaddr * rt_gettag(const struct rtentry *); int rt_check_reject_route(const struct rtentry *, const struct ifnet *); void rt_delete_matched_entries(sa_family_t, int (*)(struct rtentry *, void *), void *); int rt_walktree(sa_family_t, int (*)(struct rtentry *, void *), void *); static __inline void rt_assert_referenced(const struct rtentry *rt) { KASSERT(rt->rt_refcnt > 0); } void rtcache_copy(struct route *, struct route *); void rtcache_free(struct route *); struct rtentry * rtcache_init(struct route *); struct rtentry * rtcache_init_noclone(struct route *); struct rtentry * rtcache_lookup2(struct route *, const struct sockaddr *, int, int *); int rtcache_setdst(struct route *, const struct sockaddr *); struct rtentry * rtcache_update(struct route *, int); static __inline void rtcache_invariants(const struct route *ro) { KASSERT(ro->ro_sa != NULL || ro->_ro_rt == NULL); } static __inline struct rtentry * rtcache_lookup1(struct route *ro, const struct sockaddr *dst, int clone) { int hit; return rtcache_lookup2(ro, dst, clone, &hit); } static __inline struct rtentry * rtcache_lookup(struct route *ro, const struct sockaddr *dst) { return rtcache_lookup1(ro, dst, 1); } static __inline const struct sockaddr * rtcache_getdst(const struct route *ro) { rtcache_invariants(ro); return ro->ro_sa; } struct rtentry * rtcache_validate(struct route *); void rtcache_unref(struct rtentry *, struct route *); percpu_t * rtcache_percpu_alloc(void); static inline struct route * rtcache_percpu_getref(percpu_t *pc) { return *(struct route **)percpu_getref(pc); } static inline void rtcache_percpu_putref(percpu_t *pc) { percpu_putref(pc); } /* rtsock */ void rt_ieee80211msg(struct ifnet *, int, void *, size_t); void rt_ifannouncemsg(struct ifnet *, int); void rt_ifmsg(struct ifnet *); void rt_missmsg(int, const struct rt_addrinfo *, int, int); struct mbuf * rt_msg1(int, struct rt_addrinfo *, void *, int); int rt_msg3(int, struct rt_addrinfo *, void *, struct rt_walkarg *, int *); void rt_addrmsg(int, struct ifaddr *); void rt_addrmsg_src(int, struct ifaddr *, const struct sockaddr *); void rt_addrmsg_rt(int, struct ifaddr *, int, struct rtentry *); void route_enqueue(struct mbuf *, int); struct llentry; void rt_clonedmsg(int, const struct sockaddr *, const struct sockaddr *, const uint8_t *, const struct ifnet *); void rt_setmetrics(void *, struct rtentry *); /* rtbl */ int rt_addaddr(rtbl_t *, struct rtentry *, const struct sockaddr *); void rt_assert_inactive(const struct rtentry *); struct rtentry * rt_deladdr(rtbl_t *, const struct sockaddr *, const struct sockaddr *); rtbl_t *rt_gettable(sa_family_t); int rt_inithead(rtbl_t **, int); struct rtentry * rt_lookup(rtbl_t *, const struct sockaddr *, const struct sockaddr *); struct rtentry * rt_matchaddr(rtbl_t *, const struct sockaddr *); int rt_refines(const struct sockaddr *, const struct sockaddr *); int rtbl_walktree(sa_family_t, int (*)(struct rtentry *, void *), void *); struct rtentry * rtbl_search_matched_entry(sa_family_t, int (*)(struct rtentry *, void *), void *); void rtbl_init(void); void sysctl_net_route_setup(struct sysctllog **, int, const char *); #endif /* _KERNEL */ #endif /* !_NET_ROUTE_H_ */ |
| 4314 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | /* $NetBSD: userret.h,v 1.13 2018/07/26 09:29:08 maxv Exp $ */ /* * XXXfvdl same as i386 counterpart, but should probably be independent. */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include <sys/userret.h> static __inline void userret(struct lwp *); /* * Define the code needed before returning to user mode, for * trap and syscall. */ static __inline void userret(struct lwp *l) { /* Invoke MI userret code */ mi_userret(l); } |
| 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 | /* $NetBSD: rf_dagfuncs.c,v 1.35 2021/08/07 16:19:15 thorpej Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. * * Author: Mark Holland, William V. Courtright II * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * dagfuncs.c -- DAG node execution routines * * Rules: * 1. Every DAG execution function must eventually cause node->status to * get set to "good" or "bad", and "FinishNode" to be called. In the * case of nodes that complete immediately (xor, NullNodeFunc, etc), * the node execution function can do these two things directly. In * the case of nodes that have to wait for some event (a disk read to * complete, a lock to be released, etc) to occur before they can * complete, this is typically achieved by having whatever module * is doing the operation call GenericWakeupFunc upon completion. * 2. DAG execution functions should check the status in the DAG header * and NOP out their operations if the status is not "enable". However, * execution functions that release resources must be sure to release * them even when they NOP out the function that would use them. * Functions that acquire resources should go ahead and acquire them * even when they NOP, so that a downstream release node will not have * to check to find out whether or not the acquire was suppressed. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.35 2021/08/07 16:19:15 thorpej Exp $"); #include <sys/param.h> #include <sys/ioctl.h> #include "rf_archs.h" #include "rf_raid.h" #include "rf_dag.h" #include "rf_layout.h" #include "rf_etimer.h" #include "rf_acctrace.h" #include "rf_diskqueue.h" #include "rf_dagfuncs.h" #include "rf_general.h" #include "rf_engine.h" #include "rf_dagutils.h" #include "rf_kintf.h" #if RF_INCLUDE_PARITYLOGGING > 0 #include "rf_paritylog.h" #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ void (*rf_DiskReadFunc) (RF_DagNode_t *); void (*rf_DiskWriteFunc) (RF_DagNode_t *); void (*rf_DiskReadUndoFunc) (RF_DagNode_t *); void (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); void (*rf_RegularXorUndoFunc) (RF_DagNode_t *); void (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); void (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); /***************************************************************************** * main (only) configuration routine for this module ****************************************************************************/ int rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp) { RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); rf_DiskReadFunc = rf_DiskReadFuncForThreads; rf_DiskReadUndoFunc = rf_DiskUndoFunc; rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; rf_DiskWriteUndoFunc = rf_DiskUndoFunc; rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; return (0); } /***************************************************************************** * the execution function associated with a terminate node ****************************************************************************/ void rf_TerminateFunc(RF_DagNode_t *node) { RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); node->status = rf_good; rf_FinishNode(node, RF_THREAD_CONTEXT); } void rf_TerminateUndoFunc(RF_DagNode_t *node) { } /***************************************************************************** * execution functions associated with a mirror node * * parameters: * * 0 - physical disk address of data * 1 - buffer for holding read data * 2 - parity stripe ID * 3 - flags * 4 - physical disk address of mirror (parity) * ****************************************************************************/ void rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node) { /* select the mirror copy with the shortest queue and fill in node * parameters with physical disk address */ rf_SelectMirrorDiskIdle(node); rf_DiskReadFunc(node); } #if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0) || (RF_DEBUG_VALIDATE_DAG > 0) void rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node) { /* select the mirror copy with the shortest queue and fill in node * parameters with physical disk address */ rf_SelectMirrorDiskPartition(node); rf_DiskReadFunc(node); } #endif void rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node) { } #if RF_INCLUDE_PARITYLOGGING > 0 /***************************************************************************** * the execution function associated with a parity log update node ****************************************************************************/ void rf_ParityLogUpdateFunc(RF_DagNode_t *node) { RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; void *bf = (void *) node->params[1].p; RF_ParityLogData_t *logData; #if RF_ACC_TRACE > 0 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; #endif if (node->dagHdr->status == rf_enable) { #if RF_ACC_TRACE > 0 RF_ETIMER_START(timer); #endif logData = rf_CreateParityLogData(RF_UPDATE, pda, bf, (RF_Raid_t *) (node->dagHdr->raidPtr), node->wakeFunc, node, node->dagHdr->tracerec, timer); if (logData) rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); else { #if RF_ACC_TRACE > 0 RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->plog_us += RF_ETIMER_VAL_US(timer); #endif (node->wakeFunc) (node, ENOMEM); } } } /***************************************************************************** * the execution function associated with a parity log overwrite node ****************************************************************************/ void rf_ParityLogOverwriteFunc(RF_DagNode_t *node) { RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; void *bf = (void *) node->params[1].p; RF_ParityLogData_t *logData; #if RF_ACC_TRACE > 0 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; #endif if (node->dagHdr->status == rf_enable) { #if RF_ACC_TRACE > 0 RF_ETIMER_START(timer); #endif logData = rf_CreateParityLogData(RF_OVERWRITE, pda, bf, (RF_Raid_t *) (node->dagHdr->raidPtr), node->wakeFunc, node, node->dagHdr->tracerec, timer); if (logData) rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); else { #if RF_ACC_TRACE > 0 RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->plog_us += RF_ETIMER_VAL_US(timer); #endif (node->wakeFunc) (node, ENOMEM); } } } void rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node) { } void rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node) { } #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ /***************************************************************************** * the execution function associated with a NOP node ****************************************************************************/ void rf_NullNodeFunc(RF_DagNode_t *node) { node->status = rf_good; rf_FinishNode(node, RF_THREAD_CONTEXT); } void rf_NullNodeUndoFunc(RF_DagNode_t *node) { node->status = rf_undone; rf_FinishNode(node, RF_THREAD_CONTEXT); } /***************************************************************************** * the execution function associated with a disk-read node ****************************************************************************/ void rf_DiskReadFuncForThreads(RF_DagNode_t *node) { RF_DiskQueueData_t *req; RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; void *bf = (void *) node->params[1].p; RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, bf, parityStripeID, which_ru, node->wakeFunc, node, #if RF_ACC_TRACE > 0 node->dagHdr->tracerec, #else NULL, #endif (void *) (node->dagHdr->raidPtr), 0, node->dagHdr->bp); node->dagFuncData = (void *) req; rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority); } /***************************************************************************** * the execution function associated with a disk-write node ****************************************************************************/ void rf_DiskWriteFuncForThreads(RF_DagNode_t *node) { RF_DiskQueueData_t *req; RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; void *bf = (void *) node->params[1].p; RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; /* normal processing (rollaway or forward recovery) begins here */ req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, bf, parityStripeID, which_ru, node->wakeFunc, node, #if RF_ACC_TRACE > 0 node->dagHdr->tracerec, #else NULL, #endif (void *) (node->dagHdr->raidPtr), 0, node->dagHdr->bp); node->dagFuncData = (void *) req; rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority); } /***************************************************************************** * the undo function for disk nodes * Note: this is not a proper undo of a write node, only locks are released. * old data is not restored to disk! ****************************************************************************/ void rf_DiskUndoFunc(RF_DagNode_t *node) { RF_DiskQueueData_t *req; RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 0L, 0, NULL, 0L, 0, node->wakeFunc, node, #if RF_ACC_TRACE > 0 node->dagHdr->tracerec, #else NULL, #endif (void *) (node->dagHdr->raidPtr), 0, NULL); node->dagFuncData = (void *) req; rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY); } /***************************************************************************** * Callback routine for DiskRead and DiskWrite nodes. When the disk * op completes, the routine is called to set the node status and * inform the execution engine that the node has fired. ****************************************************************************/ void rf_GenericWakeupFunc(void *v, int status) { RF_DagNode_t *node = v; switch (node->status) { case rf_fired: if (status) node->status = rf_bad; else node->status = rf_good; break; case rf_recover: /* probably should never reach this case */ if (status) node->status = rf_panic; else node->status = rf_undone; break; default: printf("rf_GenericWakeupFunc:"); printf("node->status is %d,", node->status); printf("status is %d \n", status); RF_PANIC(); break; } if (node->dagFuncData) rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); rf_FinishNode(node, RF_INTR_CONTEXT); } /***************************************************************************** * there are three distinct types of xor nodes: * A "regular xor" is used in the fault-free case where the access * spans a complete stripe unit. It assumes that the result buffer is * one full stripe unit in size, and uses the stripe-unit-offset * values that it computes from the PDAs to determine where within the * stripe unit to XOR each argument buffer. * * A "simple xor" is used in the fault-free case where the access * touches only a portion of one (or two, in some cases) stripe * unit(s). It assumes that all the argument buffers are of the same * size and have the same stripe unit offset. * * A "recovery xor" is used in the degraded-mode case. It's similar * to the regular xor function except that it takes the failed PDA as * an additional parameter, and uses it to determine what portions of * the argument buffers need to be xor'd into the result buffer, and * where in the result buffer they should go. ****************************************************************************/ /* xor the params together and store the result in the result field. * assume the result field points to a buffer that is the size of one * SU, and use the pda params to determine where within the buffer to * XOR the input buffers. */ void rf_RegularXorFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; #if RF_ACC_TRACE > 0 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; #endif int i, retcode; retcode = 0; if (node->dagHdr->status == rf_enable) { /* don't do the XOR if the input is the same as the output */ #if RF_ACC_TRACE > 0 RF_ETIMER_START(timer); #endif for (i = 0; i < node->numParams - 1; i += 2) if (node->params[i + 1].p != node->results[0]) { retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, (char *) node->params[i + 1].p, (char *) node->results[0]); } #if RF_ACC_TRACE > 0 RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->xor_us += RF_ETIMER_VAL_US(timer); #endif } rf_GenericWakeupFunc(node, retcode); /* call wake func * explicitly since no * I/O in this node */ } /* xor the inputs into the result buffer, ignoring placement issues */ void rf_SimpleXorFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; int i, retcode = 0; #if RF_ACC_TRACE > 0 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; #endif if (node->dagHdr->status == rf_enable) { #if RF_ACC_TRACE > 0 RF_ETIMER_START(timer); #endif /* don't do the XOR if the input is the same as the output */ for (i = 0; i < node->numParams - 1; i += 2) if (node->params[i + 1].p != node->results[0]) { retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0], rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector)); } #if RF_ACC_TRACE > 0 RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->xor_us += RF_ETIMER_VAL_US(timer); #endif } rf_GenericWakeupFunc(node, retcode); /* call wake func * explicitly since no * I/O in this node */ } /* this xor is used by the degraded-mode dag functions to recover lost * data. the second-to-last parameter is the PDA for the failed * portion of the access. the code here looks at this PDA and assumes * that the xor target buffer is equal in size to the number of * sectors in the failed PDA. It then uses the other PDAs in the * parameter list to determine where within the target buffer the * corresponding data should be xored. */ void rf_RecoveryXorFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; int i, retcode = 0; RF_PhysDiskAddr_t *pda; int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); char *srcbuf, *destbuf; #if RF_ACC_TRACE > 0 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; #endif if (node->dagHdr->status == rf_enable) { #if RF_ACC_TRACE > 0 RF_ETIMER_START(timer); #endif for (i = 0; i < node->numParams - 2; i += 2) if (node->params[i + 1].p != node->results[0]) { pda = (RF_PhysDiskAddr_t *) node->params[i].p; srcbuf = (char *) node->params[i + 1].p; suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector)); } #if RF_ACC_TRACE > 0 RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->xor_us += RF_ETIMER_VAL_US(timer); #endif } rf_GenericWakeupFunc(node, retcode); } /***************************************************************************** * The next three functions are utilities used by the above * xor-execution functions. ****************************************************************************/ /* * this is just a glorified buffer xor. targbuf points to a buffer * that is one full stripe unit in size. srcbuf points to a buffer * that may be less than 1 SU, but never more. When the access * described by pda is one SU in size (which by implication means it's * SU-aligned), all that happens is (targbuf) <- (srcbuf ^ targbuf). * When the access is less than one SU in size the XOR occurs on only * the portion of targbuf identified in the pda. */ int rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, char *srcbuf, char *targbuf) { char *targptr; int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; int SUOffset = pda->startSector % sectPerSU; int length, retcode = 0; RF_ASSERT(pda->numSector <= sectPerSU); targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); length = rf_RaidAddressToByte(raidPtr, pda->numSector); retcode = rf_bxor(srcbuf, targptr, length); return (retcode); } /* it really should be the case that the buffer pointers (returned by * malloc) are aligned to the natural word size of the machine, so * this is the only case we optimize for. The length should always be * a multiple of the sector size, so there should be no problem with * leftover bytes at the end. */ int rf_bxor(char *src, char *dest, int len) { unsigned mask = sizeof(long) - 1, retcode = 0; if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) { retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT); } else { RF_ASSERT(0); } return (retcode); } /* When XORing in kernel mode, we need to map each user page to kernel * space before we can access it. We don't want to assume anything * about which input buffers are in kernel/user space, nor about their * alignment, so in each loop we compute the maximum number of bytes * that we can xor without crossing any page boundaries, and do only * this many bytes before the next remap. * * len - is in longwords */ int rf_longword_bxor(unsigned long *src, unsigned long *dest, int len) { unsigned long *end = src + len; unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ unsigned long *pg_src, *pg_dest; /* per-page source/dest pointers */ int longs_this_time;/* # longwords to xor in the current iteration */ pg_src = src; pg_dest = dest; if (!pg_src || !pg_dest) return (EFAULT); while (len >= 4) { longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ src += longs_this_time; dest += longs_this_time; len -= longs_this_time; while (longs_this_time >= 4) { d0 = pg_dest[0]; d1 = pg_dest[1]; d2 = pg_dest[2]; d3 = pg_dest[3]; s0 = pg_src[0]; s1 = pg_src[1]; s2 = pg_src[2]; s3 = pg_src[3]; pg_dest[0] = d0 ^ s0; pg_dest[1] = d1 ^ s1; pg_dest[2] = d2 ^ s2; pg_dest[3] = d3 ^ s3; pg_src += 4; pg_dest += 4; longs_this_time -= 4; } while (longs_this_time > 0) { /* cannot cross any page * boundaries here */ *pg_dest++ ^= *pg_src++; longs_this_time--; } /* either we're done, or we've reached a page boundary on one * (or possibly both) of the pointers */ if (len) { if (RF_PAGE_ALIGNED(src)) pg_src = src; if (RF_PAGE_ALIGNED(dest)) pg_dest = dest; if (!pg_src || !pg_dest) return (EFAULT); } } while (src < end) { *pg_dest++ ^= *pg_src++; src++; dest++; len--; if (RF_PAGE_ALIGNED(src)) pg_src = src; if (RF_PAGE_ALIGNED(dest)) pg_dest = dest; } RF_ASSERT(len == 0); return (0); } #if 0 /* dst = a ^ b ^ c; a may equal dst see comment above longword_bxor len is length in longwords */ int rf_longword_bxor3(unsigned long *dst, unsigned long *a, unsigned long *b, unsigned long *c, int len, void *bp) { unsigned long a0, a1, a2, a3, b0, b1, b2, b3; unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest * pointers */ int longs_this_time;/* # longs to xor in the current iteration */ char dst_is_a = 0; pg_a = a; pg_b = b; pg_c = c; if (a == dst) { pg_dst = pg_a; dst_is_a = 1; } else { pg_dst = dst; } /* align dest to cache line. Can't cross a pg boundary on dst here. */ while ((((unsigned long) pg_dst) & 0x1f)) { *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; dst++; a++; b++; c++; if (RF_PAGE_ALIGNED(a)) { pg_a = a; if (!pg_a) return (EFAULT); } if (RF_PAGE_ALIGNED(b)) { pg_b = a; if (!pg_b) return (EFAULT); } if (RF_PAGE_ALIGNED(c)) { pg_c = a; if (!pg_c) return (EFAULT); } len--; } while (len > 4) { longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); a += longs_this_time; b += longs_this_time; c += longs_this_time; dst += longs_this_time; len -= longs_this_time; while (longs_this_time >= 4) { a0 = pg_a[0]; longs_this_time -= 4; a1 = pg_a[1]; a2 = pg_a[2]; a3 = pg_a[3]; pg_a += 4; b0 = pg_b[0]; b1 = pg_b[1]; b2 = pg_b[2]; b3 = pg_b[3]; /* start dual issue */ a0 ^= b0; b0 = pg_c[0]; pg_b += 4; a1 ^= b1; a2 ^= b2; a3 ^= b3; b1 = pg_c[1]; a0 ^= b0; b2 = pg_c[2]; a1 ^= b1; b3 = pg_c[3]; a2 ^= b2; pg_dst[0] = a0; a3 ^= b3; pg_dst[1] = a1; pg_c += 4; pg_dst[2] = a2; pg_dst[3] = a3; pg_dst += 4; } while (longs_this_time > 0) { /* cannot cross any page * boundaries here */ *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; longs_this_time--; } if (len) { if (RF_PAGE_ALIGNED(a)) { pg_a = a; if (!pg_a) return (EFAULT); if (dst_is_a) pg_dst = pg_a; } if (RF_PAGE_ALIGNED(b)) { pg_b = b; if (!pg_b) return (EFAULT); } if (RF_PAGE_ALIGNED(c)) { pg_c = c; if (!pg_c) return (EFAULT); } if (!dst_is_a) if (RF_PAGE_ALIGNED(dst)) { pg_dst = dst; if (!pg_dst) return (EFAULT); } } } while (len) { *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; dst++; a++; b++; c++; if (RF_PAGE_ALIGNED(a)) { pg_a = a; if (!pg_a) return (EFAULT); if (dst_is_a) pg_dst = pg_a; } if (RF_PAGE_ALIGNED(b)) { pg_b = b; if (!pg_b) return (EFAULT); } if (RF_PAGE_ALIGNED(c)) { pg_c = c; if (!pg_c) return (EFAULT); } if (!dst_is_a) if (RF_PAGE_ALIGNED(dst)) { pg_dst = dst; if (!pg_dst) return (EFAULT); } len--; } return (0); } int rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, unsigned char *c, unsigned long len, void *bp) { RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); } #endif |
| 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 | /* $NetBSD: if_smsc.c,v 1.93 2022/08/20 14:08:59 riastradh Exp $ */ /* $OpenBSD: if_smsc.c,v 1.4 2012/09/27 12:38:11 jsg Exp $ */ /* $FreeBSD: src/sys/dev/usb/net/if_smsc.c,v 1.1 2012/08/15 04:03:55 gonzo Exp $ */ /*- * Copyright (c) 2012 * Ben Gray <bgray@freebsd.org>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * SMSC LAN9xxx devices (http://www.smsc.com/) * * The LAN9500 & LAN9500A devices are stand-alone USB to Ethernet chips that * support USB 2.0 and 10/100 Mbps Ethernet. * * The LAN951x devices are an integrated USB hub and USB to Ethernet adapter. * The driver only covers the Ethernet part, the standard USB hub driver * supports the hub part. * * This driver is closely modelled on the Linux driver written and copyrighted * by SMSC. * * H/W TCP & UDP Checksum Offloading * --------------------------------- * The chip supports both tx and rx offloading of UDP & TCP checksums, this * feature can be dynamically enabled/disabled. * * RX checksuming is performed across bytes after the IPv4 header to the end of * the Ethernet frame, this means if the frame is padded with non-zero values * the H/W checksum will be incorrect, however the rx code compensates for this. * * TX checksuming is more complicated, the device requires a special header to * be prefixed onto the start of the frame which indicates the start and end * positions of the UDP or TCP frame. This requires the driver to manually * go through the packet data and decode the headers prior to sending. * On Linux they generally provide cues to the location of the csum and the * area to calculate it over, on FreeBSD we seem to have to do it all ourselves, * hence this is not as optimal and therefore h/w TX checksum is currently not * implemented. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: if_smsc.c,v 1.93 2022/08/20 14:08:59 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #endif #include <sys/param.h> #include <dev/usb/usbnet.h> #include <dev/usb/usbhist.h> #include <dev/usb/if_smscreg.h> #include "ioconf.h" struct smsc_softc { struct usbnet smsc_un; /* * The following stores the settings in the mac control (MAC_CSR) * register */ uint32_t sc_mac_csr; uint32_t sc_rev_id; uint32_t sc_coe_ctrl; }; #define SMSC_MIN_BUFSZ 2048 #define SMSC_MAX_BUFSZ 18944 /* * Various supported device vendors/products. */ static const struct usb_devno smsc_devs[] = { { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_LAN89530 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_LAN9530 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_LAN9730 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9500 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9500A }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9500A_ALT }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9500A_HAL }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9500A_SAL10 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9500_ALT }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9500_SAL10 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9505 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9505A }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9505A_HAL }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9505A_SAL10 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9505_SAL10 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9512_14 }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9512_14_ALT }, { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_SMSC9512_14_SAL10 } }; #ifdef USB_DEBUG #ifndef USMSC_DEBUG #define usmscdebug 0 #else static int usmscdebug = 1; SYSCTL_SETUP(sysctl_hw_smsc_setup, "sysctl hw.usmsc setup") { int err; const struct sysctlnode *rnode; const struct sysctlnode *cnode; err = sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "usmsc", SYSCTL_DESCR("usmsc global controls"), NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL); if (err) goto fail; /* control debugging printfs */ err = sysctl_createv(clog, 0, &rnode, &cnode, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "debug", SYSCTL_DESCR("Enable debugging output"), NULL, 0, &usmscdebug, sizeof(usmscdebug), CTL_CREATE, CTL_EOL); if (err) goto fail; return; fail: aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err); } #endif /* SMSC_DEBUG */ #endif /* USB_DEBUG */ #define DPRINTF(FMT,A,B,C,D) USBHIST_LOG(usmscdebug,FMT,A,B,C,D) #define DPRINTFN(N,FMT,A,B,C,D) USBHIST_LOGN(usmscdebug,N,FMT,A,B,C,D) #define USMSCHIST_FUNC() USBHIST_FUNC() #define USMSCHIST_CALLED() USBHIST_CALLED(usmscdebug) #define smsc_warn_printf(un, fmt, args...) \ printf("%s: warning: " fmt, device_xname((un)->un_dev), ##args) #define smsc_err_printf(un, fmt, args...) \ printf("%s: error: " fmt, device_xname((un)->un_dev), ##args) /* Function declarations */ static int smsc_match(device_t, cfdata_t, void *); static void smsc_attach(device_t, device_t, void *); CFATTACH_DECL_NEW(usmsc, sizeof(struct smsc_softc), smsc_match, smsc_attach, usbnet_detach, usbnet_activate); static int smsc_chip_init(struct usbnet *); static int smsc_setmacaddress(struct usbnet *, const uint8_t *); static int smsc_uno_init(struct ifnet *); static void smsc_uno_stop(struct ifnet *, int); static void smsc_reset(struct smsc_softc *); static void smsc_uno_miibus_statchg(struct ifnet *); static int smsc_readreg(struct usbnet *, uint32_t, uint32_t *); static int smsc_writereg(struct usbnet *, uint32_t, uint32_t); static int smsc_wait_for_bits(struct usbnet *, uint32_t, uint32_t); static int smsc_uno_miibus_readreg(struct usbnet *, int, int, uint16_t *); static int smsc_uno_miibus_writereg(struct usbnet *, int, int, uint16_t); static int smsc_uno_ioctl(struct ifnet *, u_long, void *); static void smsc_uno_mcast(struct ifnet *); static unsigned smsc_uno_tx_prepare(struct usbnet *, struct mbuf *, struct usbnet_chain *); static void smsc_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t); static const struct usbnet_ops smsc_ops = { .uno_stop = smsc_uno_stop, .uno_ioctl = smsc_uno_ioctl, .uno_mcast = smsc_uno_mcast, .uno_read_reg = smsc_uno_miibus_readreg, .uno_write_reg = smsc_uno_miibus_writereg, .uno_statchg = smsc_uno_miibus_statchg, .uno_tx_prepare = smsc_uno_tx_prepare, .uno_rx_loop = smsc_uno_rx_loop, .uno_init = smsc_uno_init, }; static int smsc_readreg(struct usbnet *un, uint32_t off, uint32_t *data) { usb_device_request_t req; uint32_t buf; usbd_status err; if (usbnet_isdying(un)) return 0; req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = SMSC_UR_READ_REG; USETW(req.wValue, 0); USETW(req.wIndex, off); USETW(req.wLength, 4); err = usbd_do_request(un->un_udev, &req, &buf); if (err != 0) smsc_warn_printf(un, "Failed to read register 0x%0x\n", off); *data = le32toh(buf); return err; } static int smsc_writereg(struct usbnet *un, uint32_t off, uint32_t data) { usb_device_request_t req; uint32_t buf; usbd_status err; if (usbnet_isdying(un)) return 0; buf = htole32(data); req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = SMSC_UR_WRITE_REG; USETW(req.wValue, 0); USETW(req.wIndex, off); USETW(req.wLength, 4); err = usbd_do_request(un->un_udev, &req, &buf); if (err != 0) smsc_warn_printf(un, "Failed to write register 0x%0x\n", off); return err; } static int smsc_wait_for_bits(struct usbnet *un, uint32_t reg, uint32_t bits) { uint32_t val; int err, i; for (i = 0; i < 100; i++) { if (usbnet_isdying(un)) return ENXIO; if ((err = smsc_readreg(un, reg, &val)) != 0) return err; if (!(val & bits)) return 0; DELAY(5); } return 1; } static int smsc_uno_miibus_readreg(struct usbnet *un, int phy, int reg, uint16_t *val) { uint32_t addr; uint32_t data = 0; if (un->un_phyno != phy) { *val = 0; return EINVAL; } if (smsc_wait_for_bits(un, SMSC_MII_ADDR, SMSC_MII_BUSY) != 0) { smsc_warn_printf(un, "MII is busy\n"); *val = 0; return ETIMEDOUT; } addr = (phy << 11) | (reg << 6) | SMSC_MII_READ; smsc_writereg(un, SMSC_MII_ADDR, addr); if (smsc_wait_for_bits(un, SMSC_MII_ADDR, SMSC_MII_BUSY) != 0) { smsc_warn_printf(un, "MII read timeout\n"); *val = 0; return ETIMEDOUT; } smsc_readreg(un, SMSC_MII_DATA, &data); *val = data & 0xffff; return 0; } static int smsc_uno_miibus_writereg(struct usbnet *un, int phy, int reg, uint16_t val) { uint32_t addr; if (un->un_phyno != phy) return EINVAL; if (smsc_wait_for_bits(un, SMSC_MII_ADDR, SMSC_MII_BUSY) != 0) { smsc_warn_printf(un, "MII is busy\n"); return ETIMEDOUT; } smsc_writereg(un, SMSC_MII_DATA, val); addr = (phy << 11) | (reg << 6) | SMSC_MII_WRITE; smsc_writereg(un, SMSC_MII_ADDR, addr); if (smsc_wait_for_bits(un, SMSC_MII_ADDR, SMSC_MII_BUSY) != 0) { smsc_warn_printf(un, "MII write timeout\n"); return ETIMEDOUT; } return 0; } static void smsc_uno_miibus_statchg(struct ifnet *ifp) { USMSCHIST_FUNC(); USMSCHIST_CALLED(); struct usbnet * const un = ifp->if_softc; if (usbnet_isdying(un)) return; struct smsc_softc * const sc = usbnet_softc(un); struct mii_data * const mii = usbnet_mii(un); uint32_t flow; uint32_t afc_cfg; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: usbnet_set_link(un, true); break; case IFM_1000_T: /* Gigabit ethernet not supported by chipset */ break; default: break; } } /* Lost link, do nothing. */ if (!usbnet_havelink(un)) return; int err = smsc_readreg(un, SMSC_AFC_CFG, &afc_cfg); if (err) { smsc_warn_printf(un, "failed to read initial AFC_CFG, " "error %d\n", err); return; } /* Enable/disable full duplex operation and TX/RX pause */ if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { DPRINTF("full duplex operation", 0, 0, 0, 0); sc->sc_mac_csr &= ~SMSC_MAC_CSR_RCVOWN; sc->sc_mac_csr |= SMSC_MAC_CSR_FDPX; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) flow = 0xffff0002; else flow = 0; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) afc_cfg |= 0xf; else afc_cfg &= ~0xf; } else { DPRINTF("half duplex operation", 0, 0, 0, 0); sc->sc_mac_csr &= ~SMSC_MAC_CSR_FDPX; sc->sc_mac_csr |= SMSC_MAC_CSR_RCVOWN; flow = 0; afc_cfg |= 0xf; } err = smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr); err += smsc_writereg(un, SMSC_FLOW, flow); err += smsc_writereg(un, SMSC_AFC_CFG, afc_cfg); if (err) smsc_warn_printf(un, "media change failed, error %d\n", err); } static inline uint32_t smsc_hash(uint8_t addr[ETHER_ADDR_LEN]) { return (ether_crc32_be(addr, ETHER_ADDR_LEN) >> 26) & 0x3f; } static void smsc_uno_mcast(struct ifnet *ifp) { USMSCHIST_FUNC(); USMSCHIST_CALLED(); struct usbnet * const un = ifp->if_softc; struct smsc_softc * const sc = usbnet_softc(un); struct ethercom *ec = usbnet_ec(un); struct ether_multi *enm; struct ether_multistep step; uint32_t hashtbl[2] = { 0, 0 }; uint32_t hash; if (usbnet_isdying(un)) return; if (usbnet_ispromisc(un)) { ETHER_LOCK(ec); allmulti: ec->ec_flags |= ETHER_F_ALLMULTI; ETHER_UNLOCK(ec); DPRINTF("receive all multicast enabled", 0, 0, 0, 0); sc->sc_mac_csr |= SMSC_MAC_CSR_MCPAS; sc->sc_mac_csr &= ~SMSC_MAC_CSR_HPFILT; smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr); return; } else { sc->sc_mac_csr |= SMSC_MAC_CSR_HPFILT; sc->sc_mac_csr &= ~(SMSC_MAC_CSR_PRMS | SMSC_MAC_CSR_MCPAS); } ETHER_LOCK(ec); ETHER_FIRST_MULTI(step, ec, enm); while (enm != NULL) { if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) { goto allmulti; } hash = smsc_hash(enm->enm_addrlo); hashtbl[hash >> 5] |= 1 << (hash & 0x1F); ETHER_NEXT_MULTI(step, enm); } ec->ec_flags &= ~ETHER_F_ALLMULTI; ETHER_UNLOCK(ec); /* Debug */ if (sc->sc_mac_csr & SMSC_MAC_CSR_HPFILT) { DPRINTF("receive select group of macs", 0, 0, 0, 0); } else { DPRINTF("receive own packets only", 0, 0, 0, 0); } /* Write the hash table and mac control registers */ //XXX should we be doing this? smsc_writereg(un, SMSC_HASHH, hashtbl[1]); smsc_writereg(un, SMSC_HASHL, hashtbl[0]); smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr); } static int smsc_setoe_locked(struct usbnet *un) { struct smsc_softc * const sc = usbnet_softc(un); struct ifnet * const ifp = usbnet_ifp(un); uint32_t val; int err; KASSERT(IFNET_LOCKED(ifp)); err = smsc_readreg(un, SMSC_COE_CTRL, &val); if (err != 0) { smsc_warn_printf(un, "failed to read SMSC_COE_CTRL (err=%d)\n", err); return err; } /* Enable/disable the Rx checksum */ if (ifp->if_capenable & (IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx)) val |= (SMSC_COE_CTRL_RX_EN | SMSC_COE_CTRL_RX_MODE); else val &= ~(SMSC_COE_CTRL_RX_EN | SMSC_COE_CTRL_RX_MODE); /* Enable/disable the Tx checksum (currently not supported) */ if (ifp->if_capenable & (IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx)) val |= SMSC_COE_CTRL_TX_EN; else val &= ~SMSC_COE_CTRL_TX_EN; sc->sc_coe_ctrl = val; err = smsc_writereg(un, SMSC_COE_CTRL, val); if (err != 0) { smsc_warn_printf(un, "failed to write SMSC_COE_CTRL (err=%d)\n", err); return err; } return 0; } static int smsc_setmacaddress(struct usbnet *un, const uint8_t *addr) { USMSCHIST_FUNC(); USMSCHIST_CALLED(); int err; uint32_t val; DPRINTF("setting mac address to %02jx:%02jx:%02jx:...", addr[0], addr[1], addr[2], 0); DPRINTF("... %02jx:%02jx:%02jx", addr[3], addr[4], addr[5], 0); val = ((uint32_t)addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0]; if ((err = smsc_writereg(un, SMSC_MAC_ADDRL, val)) != 0) goto done; val = (addr[5] << 8) | addr[4]; err = smsc_writereg(un, SMSC_MAC_ADDRH, val); done: return err; } static void smsc_reset(struct smsc_softc *sc) { struct usbnet * const un = &sc->smsc_un; if (usbnet_isdying(un)) return; /* Wait a little while for the chip to get its brains in order. */ DELAY(1000); /* Reinitialize controller to achieve full reset. */ smsc_chip_init(un); } static int smsc_uno_init(struct ifnet *ifp) { struct usbnet * const un = ifp->if_softc; struct smsc_softc * const sc = usbnet_softc(un); /* Reset the ethernet interface. */ smsc_reset(sc); /* TCP/UDP checksum offload engines. */ smsc_setoe_locked(un); return 0; } static void smsc_uno_stop(struct ifnet *ifp, int disable) { struct usbnet * const un = ifp->if_softc; struct smsc_softc * const sc = usbnet_softc(un); // XXXNH didn't do this before smsc_reset(sc); } static int smsc_chip_init(struct usbnet *un) { struct smsc_softc * const sc = usbnet_softc(un); uint32_t reg_val; int burst_cap; int err; /* Enter H/W config mode */ smsc_writereg(un, SMSC_HW_CFG, SMSC_HW_CFG_LRST); if ((err = smsc_wait_for_bits(un, SMSC_HW_CFG, SMSC_HW_CFG_LRST)) != 0) { smsc_warn_printf(un, "timed-out waiting for reset to " "complete\n"); goto init_failed; } /* Reset the PHY */ smsc_writereg(un, SMSC_PM_CTRL, SMSC_PM_CTRL_PHY_RST); if ((err = smsc_wait_for_bits(un, SMSC_PM_CTRL, SMSC_PM_CTRL_PHY_RST)) != 0) { smsc_warn_printf(un, "timed-out waiting for phy reset to " "complete\n"); goto init_failed; } usbd_delay_ms(un->un_udev, 40); /* Set the mac address */ struct ifnet * const ifp = usbnet_ifp(un); const char *eaddr = CLLADDR(ifp->if_sadl); if ((err = smsc_setmacaddress(un, eaddr)) != 0) { smsc_warn_printf(un, "failed to set the MAC address\n"); goto init_failed; } /* * Don't know what the HW_CFG_BIR bit is, but following the reset * sequence as used in the Linux driver. */ if ((err = smsc_readreg(un, SMSC_HW_CFG, ®_val)) != 0) { smsc_warn_printf(un, "failed to read HW_CFG: %d\n", err); goto init_failed; } reg_val |= SMSC_HW_CFG_BIR; smsc_writereg(un, SMSC_HW_CFG, reg_val); /* * There is a so called 'turbo mode' that the linux driver supports, it * seems to allow you to jam multiple frames per Rx transaction. * By default this driver supports that and therefore allows multiple * frames per USB transfer. * * The xfer buffer size needs to reflect this as well, therefore based * on the calculations in the Linux driver the RX bufsize is set to * 18944, * bufsz = (16 * 1024 + 5 * 512) * * Burst capability is the number of URBs that can be in a burst of * data/ethernet frames. */ if (un->un_udev->ud_speed == USB_SPEED_HIGH) burst_cap = 37; else burst_cap = 128; smsc_writereg(un, SMSC_BURST_CAP, burst_cap); /* Set the default bulk in delay (magic value from Linux driver) */ smsc_writereg(un, SMSC_BULK_IN_DLY, 0x00002000); /* * Initialise the RX interface */ if ((err = smsc_readreg(un, SMSC_HW_CFG, ®_val)) < 0) { smsc_warn_printf(un, "failed to read HW_CFG: (err = %d)\n", err); goto init_failed; } /* * The following settings are used for 'turbo mode', a.k.a multiple * frames per Rx transaction (again info taken form Linux driver). */ reg_val |= (SMSC_HW_CFG_MEF | SMSC_HW_CFG_BCE); /* * set Rx data offset to ETHER_ALIGN which will make the IP header * align on a word boundary. */ reg_val |= ETHER_ALIGN << SMSC_HW_CFG_RXDOFF_SHIFT; smsc_writereg(un, SMSC_HW_CFG, reg_val); /* Clear the status register ? */ smsc_writereg(un, SMSC_INTR_STATUS, 0xffffffff); /* Read and display the revision register */ if ((err = smsc_readreg(un, SMSC_ID_REV, &sc->sc_rev_id)) < 0) { smsc_warn_printf(un, "failed to read ID_REV (err = %d)\n", err); goto init_failed; } /* GPIO/LED setup */ reg_val = SMSC_LED_GPIO_CFG_SPD_LED | SMSC_LED_GPIO_CFG_LNK_LED | SMSC_LED_GPIO_CFG_FDX_LED; smsc_writereg(un, SMSC_LED_GPIO_CFG, reg_val); /* * Initialise the TX interface */ smsc_writereg(un, SMSC_FLOW, 0); smsc_writereg(un, SMSC_AFC_CFG, AFC_CFG_DEFAULT); /* Read the current MAC configuration */ if ((err = smsc_readreg(un, SMSC_MAC_CSR, &sc->sc_mac_csr)) < 0) { smsc_warn_printf(un, "failed to read MAC_CSR (err=%d)\n", err); goto init_failed; } /* disable pad stripping, collides with checksum offload */ sc->sc_mac_csr &= ~SMSC_MAC_CSR_PADSTR; /* Vlan */ smsc_writereg(un, SMSC_VLAN1, (uint32_t)ETHERTYPE_VLAN); /* * Start TX */ sc->sc_mac_csr |= SMSC_MAC_CSR_TXEN; smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr); smsc_writereg(un, SMSC_TX_CFG, SMSC_TX_CFG_ON); /* * Start RX */ sc->sc_mac_csr |= SMSC_MAC_CSR_RXEN; smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr); return 0; init_failed: smsc_err_printf(un, "smsc_chip_init failed (err=%d)\n", err); return err; } static int smsc_uno_ioctl(struct ifnet *ifp, u_long cmd, void *data) { struct usbnet * const un = ifp->if_softc; switch (cmd) { case SIOCSIFCAP: smsc_setoe_locked(un); break; default: break; } return 0; } static int smsc_match(device_t parent, cfdata_t match, void *aux) { struct usb_attach_arg *uaa = aux; return (usb_lookup(smsc_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL) ? UMATCH_VENDOR_PRODUCT : UMATCH_NONE; } static void smsc_attach(device_t parent, device_t self, void *aux) { USBNET_MII_DECL_DEFAULT(unm); struct smsc_softc * const sc = device_private(self); struct usbnet * const un = &sc->smsc_un; struct usb_attach_arg *uaa = aux; struct usbd_device *dev = uaa->uaa_device; usb_interface_descriptor_t *id; usb_endpoint_descriptor_t *ed; char *devinfop; unsigned bufsz; int err, i; uint32_t mac_h, mac_l; KASSERT((void *)sc == un); aprint_naive("\n"); aprint_normal("\n"); un->un_dev = self; un->un_udev = dev; un->un_sc = sc; un->un_ops = &smsc_ops; un->un_rx_xfer_flags = USBD_SHORT_XFER_OK; un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER; un->un_rx_list_cnt = SMSC_RX_LIST_CNT; un->un_tx_list_cnt = SMSC_TX_LIST_CNT; devinfop = usbd_devinfo_alloc(un->un_udev, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); err = usbd_set_config_no(dev, SMSC_CONFIG_INDEX, 1); if (err) { aprint_error_dev(self, "failed to set configuration" ", err=%s\n", usbd_errstr(err)); return; } /* Setup the endpoints for the SMSC LAN95xx device(s) */ err = usbd_device2interface_handle(dev, SMSC_IFACE_IDX, &un->un_iface); if (err) { aprint_error_dev(self, "getting interface handle failed\n"); return; } id = usbd_get_interface_descriptor(un->un_iface); if (dev->ud_speed >= USB_SPEED_HIGH) { bufsz = SMSC_MAX_BUFSZ; } else { bufsz = SMSC_MIN_BUFSZ; } un->un_rx_bufsz = bufsz; un->un_tx_bufsz = bufsz; /* Find endpoints. */ for (i = 0; i < id->bNumEndpoints; i++) { ed = usbd_interface2endpoint_descriptor(un->un_iface, i); if (!ed) { aprint_error_dev(self, "couldn't get ep %d\n", i); return; } if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) { un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress; } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) { un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress; #if 0 /* not used yet */ } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) { un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress; #endif } } usbnet_attach(un); #ifdef notyet /* * We can do TCPv4, and UDPv4 checksums in hardware. */ struct ifnet *ifp = usbnet_ifp(un); ifp->if_capabilities |= /*IFCAP_CSUM_TCPv4_Tx |*/ IFCAP_CSUM_TCPv4_Rx | /*IFCAP_CSUM_UDPv4_Tx |*/ IFCAP_CSUM_UDPv4_Rx; #endif struct ethercom *ec = usbnet_ec(un); ec->ec_capabilities = ETHERCAP_VLAN_MTU; /* Setup some of the basics */ un->un_phyno = 1; /* * Attempt to get the mac address, if an EEPROM is not attached this * will just return FF:FF:FF:FF:FF:FF, so in such cases we invent a MAC * address based on urandom. */ memset(un->un_eaddr, 0xff, ETHER_ADDR_LEN); prop_dictionary_t dict = device_properties(self); prop_data_t eaprop = prop_dictionary_get(dict, "mac-address"); if (eaprop != NULL) { KASSERT(prop_object_type(eaprop) == PROP_TYPE_DATA); KASSERT(prop_data_size(eaprop) == ETHER_ADDR_LEN); memcpy(un->un_eaddr, prop_data_value(eaprop), ETHER_ADDR_LEN); } else { /* Check if there is already a MAC address in the register */ if ((smsc_readreg(un, SMSC_MAC_ADDRL, &mac_l) == 0) && (smsc_readreg(un, SMSC_MAC_ADDRH, &mac_h) == 0)) { un->un_eaddr[5] = (uint8_t)((mac_h >> 8) & 0xff); un->un_eaddr[4] = (uint8_t)((mac_h) & 0xff); un->un_eaddr[3] = (uint8_t)((mac_l >> 24) & 0xff); un->un_eaddr[2] = (uint8_t)((mac_l >> 16) & 0xff); un->un_eaddr[1] = (uint8_t)((mac_l >> 8) & 0xff); un->un_eaddr[0] = (uint8_t)((mac_l) & 0xff); } } usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST, 0, &unm); } static void smsc_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len) { USMSCHIST_FUNC(); USMSCHIST_CALLED(); struct smsc_softc * const sc = usbnet_softc(un); struct ifnet *ifp = usbnet_ifp(un); uint8_t *buf = c->unc_buf; int count; count = 0; DPRINTF("total_len %jd/%#jx", total_len, total_len, 0, 0); while (total_len != 0) { uint32_t rxhdr; if (total_len < sizeof(rxhdr)) { DPRINTF("total_len %jd < sizeof(rxhdr) %jd", total_len, sizeof(rxhdr), 0, 0); if_statinc(ifp, if_ierrors); return; } memcpy(&rxhdr, buf, sizeof(rxhdr)); rxhdr = le32toh(rxhdr); buf += sizeof(rxhdr); total_len -= sizeof(rxhdr); if (rxhdr & SMSC_RX_STAT_COLLISION) if_statinc(ifp, if_collisions); if (rxhdr & (SMSC_RX_STAT_ERROR | SMSC_RX_STAT_LENGTH_ERROR | SMSC_RX_STAT_MII_ERROR)) { DPRINTF("rx error (hdr 0x%08jx)", rxhdr, 0, 0, 0); if_statinc(ifp, if_ierrors); return; } uint16_t pktlen = (uint16_t)SMSC_RX_STAT_FRM_LENGTH(rxhdr); DPRINTF("total_len %jd pktlen %jd rxhdr 0x%08jx", total_len, pktlen, rxhdr, 0); if (pktlen < ETHER_HDR_LEN) { DPRINTF("pktlen %jd < ETHER_HDR_LEN %jd", pktlen, ETHER_HDR_LEN, 0, 0); if_statinc(ifp, if_ierrors); return; } pktlen += ETHER_ALIGN; if (pktlen > MCLBYTES) { DPRINTF("pktlen %jd > MCLBYTES %jd", pktlen, MCLBYTES, 0, 0); if_statinc(ifp, if_ierrors); return; } if (pktlen > total_len) { DPRINTF("pktlen %jd > total_len %jd", pktlen, total_len, 0, 0); if_statinc(ifp, if_ierrors); return; } uint8_t *pktbuf = buf + ETHER_ALIGN; size_t buflen = pktlen - ETHER_ALIGN; int mbuf_flags = M_HASFCS; int csum_flags = 0; uint16_t csum_data = 0; KASSERT(pktlen < MCLBYTES); /* Check if RX TCP/UDP checksumming is being offloaded */ if (sc->sc_coe_ctrl & SMSC_COE_CTRL_RX_EN) { DPRINTF("RX checksum offload checking", 0, 0, 0, 0); struct ether_header *eh = (struct ether_header *)pktbuf; const size_t cssz = sizeof(csum_data); /* Remove the extra 2 bytes of the csum */ buflen -= cssz; /* * The checksum appears to be simplistically calculated * over the udp/tcp header and data up to the end of the * eth frame. Which means if the eth frame is padded * the csum calculation is incorrectly performed over * the padding bytes as well. Therefore to be safe we * ignore the H/W csum on frames less than or equal to * 64 bytes. * * Ignore H/W csum for non-IPv4 packets. */ DPRINTF("Ethertype %02jx pktlen %02jx", be16toh(eh->ether_type), pktlen, 0, 0); if (be16toh(eh->ether_type) == ETHERTYPE_IP && pktlen > ETHER_MIN_LEN) { csum_flags |= (M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_DATA); /* * Copy the TCP/UDP checksum from the last 2 * bytes of the transfer and put in the * csum_data field. */ memcpy(&csum_data, buf + pktlen - cssz, cssz); /* * The data is copied in network order, but the * csum algorithm in the kernel expects it to be * in host network order. */ csum_data = ntohs(csum_data); DPRINTF("RX checksum offloaded (0x%04jx)", csum_data, 0, 0, 0); } } /* round up to next longword */ pktlen = (pktlen + 3) & ~0x3; /* total_len does not include the padding */ if (pktlen > total_len) pktlen = total_len; buf += pktlen; total_len -= pktlen; /* push the packet up */ usbnet_enqueue(un, pktbuf, buflen, csum_flags, csum_data, mbuf_flags); count++; } if (count != 0) rnd_add_uint32(usbnet_rndsrc(un), count); } static unsigned smsc_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c) { uint32_t txhdr; uint32_t frm_len = 0; const size_t hdrsz = sizeof(txhdr) * 2; if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - hdrsz) return 0; /* * Each frame is prefixed with two 32-bit values describing the * length of the packet and buffer. */ txhdr = SMSC_TX_CTRL_0_BUF_SIZE(m->m_pkthdr.len) | SMSC_TX_CTRL_0_FIRST_SEG | SMSC_TX_CTRL_0_LAST_SEG; txhdr = htole32(txhdr); memcpy(c->unc_buf, &txhdr, sizeof(txhdr)); txhdr = SMSC_TX_CTRL_1_PKT_LENGTH(m->m_pkthdr.len); txhdr = htole32(txhdr); memcpy(c->unc_buf + sizeof(txhdr), &txhdr, sizeof(txhdr)); frm_len += hdrsz; /* Next copy in the actual packet */ m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf + frm_len); frm_len += m->m_pkthdr.len; return frm_len; } #ifdef _MODULE #include "ioconf.c" #endif USBNET_MODULE(smsc) |
| 1005 1850 1852 1850 1848 438 436 438 437 437 437 2503 2424 2500 2504 2499 2502 843 2420 957 2421 2420 2500 2500 2502 2501 1975 1973 1971 1975 1974 1139 1138 1993 1994 1994 1991 1992 1993 1992 1994 1994 1587 1588 232 1481 1480 1356 1941 1942 1942 1940 1942 1552 1550 1552 1551 1942 1939 1940 1936 1940 1940 1939 1937 476 1061 193 1360 1359 533 477 1359 193 652 651 558 558 191 191 652 652 54 54 45 20 20 20 645 558 558 556 192 651 652 649 265 334 335 335 335 333 33 33 313 13 334 1427 1429 1358 673 334 335 335 650 371 370 370 369 371 174 109 101 75 75 75 75 54 15 55 55 50 162 299 299 274 255 254 255 254 231 231 231 116 116 116 230 230 230 1080 651 1081 574 438 651 648 1081 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 | /* $NetBSD: rb.c,v 1.16 2021/09/16 21:29:41 andvar Exp $ */ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Matt Thomas <matt@3am-software.com>. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if HAVE_NBTOOL_CONFIG_H #include "nbtool_config.h" #endif #if !defined(_KERNEL) && !defined(_STANDALONE) #include <sys/types.h> #include <stddef.h> #include <assert.h> #include <stdbool.h> #ifdef RBDEBUG #define KASSERT(s) assert(s) #define __rbt_unused #else #define KASSERT(s) do { } while (/*CONSTCOND*/ 0) #define __rbt_unused __unused #endif __RCSID("$NetBSD: rb.c,v 1.16 2021/09/16 21:29:41 andvar Exp $"); #else #include <lib/libkern/libkern.h> __KERNEL_RCSID(0, "$NetBSD: rb.c,v 1.16 2021/09/16 21:29:41 andvar Exp $"); #ifndef DIAGNOSTIC #define __rbt_unused __unused #else #define __rbt_unused #endif #endif #ifdef _LIBC __weak_alias(rb_tree_init, _rb_tree_init) __weak_alias(rb_tree_find_node, _rb_tree_find_node) __weak_alias(rb_tree_find_node_geq, _rb_tree_find_node_geq) __weak_alias(rb_tree_find_node_leq, _rb_tree_find_node_leq) __weak_alias(rb_tree_insert_node, _rb_tree_insert_node) __weak_alias(rb_tree_remove_node, _rb_tree_remove_node) __weak_alias(rb_tree_iterate, _rb_tree_iterate) #ifdef RBDEBUG __weak_alias(rb_tree_check, _rb_tree_check) __weak_alias(rb_tree_depths, _rb_tree_depths) #endif #include "namespace.h" #endif #ifdef RBTEST #include "rbtree.h" #else #include <sys/rbtree.h> #endif static void rb_tree_insert_rebalance(struct rb_tree *, struct rb_node *); static void rb_tree_removal_rebalance(struct rb_tree *, struct rb_node *, unsigned int); #ifdef RBDEBUG static const struct rb_node *rb_tree_iterate_const(const struct rb_tree *, const struct rb_node *, const unsigned int); static bool rb_tree_check_node(const struct rb_tree *, const struct rb_node *, const struct rb_node *, bool); #else #define rb_tree_check_node(a, b, c, d) true #endif #define RB_NODETOITEM(rbto, rbn) \ ((void *)((uintptr_t)(rbn) - (rbto)->rbto_node_offset)) #define RB_ITEMTONODE(rbto, rbn) \ ((rb_node_t *)((uintptr_t)(rbn) + (rbto)->rbto_node_offset)) #define RB_SENTINEL_NODE NULL void rb_tree_init(struct rb_tree *rbt, const rb_tree_ops_t *ops) { rbt->rbt_ops = ops; rbt->rbt_root = RB_SENTINEL_NODE; RB_TAILQ_INIT(&rbt->rbt_nodes); #ifndef RBSMALL rbt->rbt_minmax[RB_DIR_LEFT] = rbt->rbt_root; /* minimum node */ rbt->rbt_minmax[RB_DIR_RIGHT] = rbt->rbt_root; /* maximum node */ #endif #ifdef RBSTATS rbt->rbt_count = 0; rbt->rbt_insertions = 0; rbt->rbt_removals = 0; rbt->rbt_insertion_rebalance_calls = 0; rbt->rbt_insertion_rebalance_passes = 0; rbt->rbt_removal_rebalance_calls = 0; rbt->rbt_removal_rebalance_passes = 0; #endif } void * rb_tree_find_node(struct rb_tree *rbt, const void *key) { const rb_tree_ops_t *rbto = rbt->rbt_ops; rbto_compare_key_fn compare_key = rbto->rbto_compare_key; struct rb_node *parent = rbt->rbt_root; while (!RB_SENTINEL_P(parent)) { void *pobj = RB_NODETOITEM(rbto, parent); const signed int diff = (*compare_key)(rbto->rbto_context, pobj, key); if (diff == 0) return pobj; parent = parent->rb_nodes[diff < 0]; } return NULL; } void * rb_tree_find_node_geq(struct rb_tree *rbt, const void *key) { const rb_tree_ops_t *rbto = rbt->rbt_ops; rbto_compare_key_fn compare_key = rbto->rbto_compare_key; struct rb_node *parent = rbt->rbt_root, *last = NULL; while (!RB_SENTINEL_P(parent)) { void *pobj = RB_NODETOITEM(rbto, parent); const signed int diff = (*compare_key)(rbto->rbto_context, pobj, key); if (diff == 0) return pobj; if (diff > 0) last = parent; parent = parent->rb_nodes[diff < 0]; } return last == NULL ? NULL : RB_NODETOITEM(rbto, last); } void * rb_tree_find_node_leq(struct rb_tree *rbt, const void *key) { const rb_tree_ops_t *rbto = rbt->rbt_ops; rbto_compare_key_fn compare_key = rbto->rbto_compare_key; struct rb_node *parent = rbt->rbt_root, *last = NULL; while (!RB_SENTINEL_P(parent)) { void *pobj = RB_NODETOITEM(rbto, parent); const signed int diff = (*compare_key)(rbto->rbto_context, pobj, key); if (diff == 0) return pobj; if (diff < 0) last = parent; parent = parent->rb_nodes[diff < 0]; } return last == NULL ? NULL : RB_NODETOITEM(rbto, last); } void * rb_tree_insert_node(struct rb_tree *rbt, void *object) { const rb_tree_ops_t *rbto = rbt->rbt_ops; rbto_compare_nodes_fn compare_nodes = rbto->rbto_compare_nodes; struct rb_node *parent, *tmp, *self = RB_ITEMTONODE(rbto, object); unsigned int position; bool rebalance; RBSTAT_INC(rbt->rbt_insertions); tmp = rbt->rbt_root; /* * This is a hack. Because rbt->rbt_root is just a struct rb_node *, * just like rb_node->rb_nodes[RB_DIR_LEFT], we can use this fact to * avoid a lot of tests for root and know that even at root, * updating RB_FATHER(rb_node)->rb_nodes[RB_POSITION(rb_node)] will * update rbt->rbt_root. */ parent = (struct rb_node *)(void *)&rbt->rbt_root; position = RB_DIR_LEFT; /* * Find out where to place this new leaf. */ while (!RB_SENTINEL_P(tmp)) { void *tobj = RB_NODETOITEM(rbto, tmp); const signed int diff = (*compare_nodes)(rbto->rbto_context, tobj, object); if (__predict_false(diff == 0)) { /* * Node already exists; return it. */ return tobj; } parent = tmp; position = (diff < 0); tmp = parent->rb_nodes[position]; } #ifdef RBDEBUG { struct rb_node *prev = NULL, *next = NULL; if (position == RB_DIR_RIGHT) prev = parent; else if (tmp != rbt->rbt_root) next = parent; /* * Verify our sequential position */ KASSERT(prev == NULL || !RB_SENTINEL_P(prev)); KASSERT(next == NULL || !RB_SENTINEL_P(next)); if (prev != NULL && next == NULL) next = TAILQ_NEXT(prev, rb_link); if (prev == NULL && next != NULL) prev = TAILQ_PREV(next, rb_node_qh, rb_link); KASSERT(prev == NULL || !RB_SENTINEL_P(prev)); KASSERT(next == NULL || !RB_SENTINEL_P(next)); KASSERT(prev == NULL || (*compare_nodes)(rbto->rbto_context, RB_NODETOITEM(rbto, prev), RB_NODETOITEM(rbto, self)) < 0); KASSERT(next == NULL || (*compare_nodes)(rbto->rbto_context, RB_NODETOITEM(rbto, self), RB_NODETOITEM(rbto, next)) < 0); } #endif /* * Initialize the node and insert as a leaf into the tree. */ RB_SET_FATHER(self, parent); RB_SET_POSITION(self, position); if (__predict_false(parent == (struct rb_node *)(void *)&rbt->rbt_root)) { RB_MARK_BLACK(self); /* root is always black */ #ifndef RBSMALL rbt->rbt_minmax[RB_DIR_LEFT] = self; rbt->rbt_minmax[RB_DIR_RIGHT] = self; #endif rebalance = false; } else { KASSERT(position == RB_DIR_LEFT || position == RB_DIR_RIGHT); #ifndef RBSMALL /* * Keep track of the minimum and maximum nodes. If our * parent is a minmax node and we on their min/max side, * we must be the new min/max node. */ if (parent == rbt->rbt_minmax[position]) rbt->rbt_minmax[position] = self; #endif /* !RBSMALL */ /* * All new nodes are colored red. We only need to rebalance * if our parent is also red. */ RB_MARK_RED(self); rebalance = RB_RED_P(parent); } KASSERT(RB_SENTINEL_P(parent->rb_nodes[position])); self->rb_left = parent->rb_nodes[position]; self->rb_right = parent->rb_nodes[position]; parent->rb_nodes[position] = self; KASSERT(RB_CHILDLESS_P(self)); /* * Insert the new node into a sorted list for easy sequential access */ RBSTAT_INC(rbt->rbt_count); #ifdef RBDEBUG if (RB_ROOT_P(rbt, self)) { RB_TAILQ_INSERT_HEAD(&rbt->rbt_nodes, self, rb_link); } else if (position == RB_DIR_LEFT) { KASSERT((*compare_nodes)(rbto->rbto_context, RB_NODETOITEM(rbto, self), RB_NODETOITEM(rbto, RB_FATHER(self))) < 0); RB_TAILQ_INSERT_BEFORE(RB_FATHER(self), self, rb_link); } else { KASSERT((*compare_nodes)(rbto->rbto_context, RB_NODETOITEM(rbto, RB_FATHER(self)), RB_NODETOITEM(rbto, self)) < 0); RB_TAILQ_INSERT_AFTER(&rbt->rbt_nodes, RB_FATHER(self), self, rb_link); } #endif KASSERT(rb_tree_check_node(rbt, self, NULL, !rebalance)); /* * Rebalance tree after insertion */ if (rebalance) { rb_tree_insert_rebalance(rbt, self); KASSERT(rb_tree_check_node(rbt, self, NULL, true)); } /* Successfully inserted, return our node pointer. */ return object; } /* * Swap the location and colors of 'self' and its child @ which. The child * can not be a sentinel node. This is our rotation function. However, * since it preserves coloring, it great simplifies both insertion and * removal since rotation almost always involves the exchanging of colors * as a separate step. */ static void rb_tree_reparent_nodes(__rbt_unused struct rb_tree *rbt, struct rb_node *old_father, const unsigned int which) { const unsigned int other = which ^ RB_DIR_OTHER; struct rb_node * const grandpa = RB_FATHER(old_father); struct rb_node * const old_child = old_father->rb_nodes[which]; struct rb_node * const new_father = old_child; struct rb_node * const new_child = old_father; KASSERT(which == RB_DIR_LEFT || which == RB_DIR_RIGHT); KASSERT(!RB_SENTINEL_P(old_child)); KASSERT(RB_FATHER(old_child) == old_father); KASSERT(rb_tree_check_node(rbt, old_father, NULL, false)); KASSERT(rb_tree_check_node(rbt, old_child, NULL, false)); KASSERT(RB_ROOT_P(rbt, old_father) || rb_tree_check_node(rbt, grandpa, NULL, false)); /* * Exchange descendant linkages. */ grandpa->rb_nodes[RB_POSITION(old_father)] = new_father; new_child->rb_nodes[which] = old_child->rb_nodes[other]; new_father->rb_nodes[other] = new_child; /* * Update ancestor linkages */ RB_SET_FATHER(new_father, grandpa); RB_SET_FATHER(new_child, new_father); /* * Exchange properties between new_father and new_child. The only * change is that new_child's position is now on the other side. */ #if 0 { struct rb_node tmp; tmp.rb_info = 0; RB_COPY_PROPERTIES(&tmp, old_child); RB_COPY_PROPERTIES(new_father, old_father); RB_COPY_PROPERTIES(new_child, &tmp); } #else RB_SWAP_PROPERTIES(new_father, new_child); #endif RB_SET_POSITION(new_child, other); /* * Make sure to reparent the new child to ourself. */ if (!RB_SENTINEL_P(new_child->rb_nodes[which])) { RB_SET_FATHER(new_child->rb_nodes[which], new_child); RB_SET_POSITION(new_child->rb_nodes[which], which); } KASSERT(rb_tree_check_node(rbt, new_father, NULL, false)); KASSERT(rb_tree_check_node(rbt, new_child, NULL, false)); KASSERT(RB_ROOT_P(rbt, new_father) || rb_tree_check_node(rbt, grandpa, NULL, false)); } static void rb_tree_insert_rebalance(struct rb_tree *rbt, struct rb_node *self) { struct rb_node * father = RB_FATHER(self); struct rb_node * grandpa = RB_FATHER(father); struct rb_node * uncle; unsigned int which; unsigned int other; KASSERT(!RB_ROOT_P(rbt, self)); KASSERT(RB_RED_P(self)); KASSERT(RB_RED_P(father)); RBSTAT_INC(rbt->rbt_insertion_rebalance_calls); for (;;) { KASSERT(!RB_SENTINEL_P(self)); KASSERT(RB_RED_P(self)); KASSERT(RB_RED_P(father)); /* * We are red and our parent is red, therefore we must have a * grandfather and he must be black. */ grandpa = RB_FATHER(father); KASSERT(RB_BLACK_P(grandpa)); KASSERT(RB_DIR_RIGHT == 1 && RB_DIR_LEFT == 0); which = (father == grandpa->rb_right); other = which ^ RB_DIR_OTHER; uncle = grandpa->rb_nodes[other]; if (RB_BLACK_P(uncle)) break; RBSTAT_INC(rbt->rbt_insertion_rebalance_passes); /* * Case 1: our uncle is red * Simply invert the colors of our parent and * uncle and make our grandparent red. And * then solve the problem up at his level. */ RB_MARK_BLACK(uncle); RB_MARK_BLACK(father); if (__predict_false(RB_ROOT_P(rbt, grandpa))) { /* * If our grandpa is root, don't bother * setting him to red, just return. */ KASSERT(RB_BLACK_P(grandpa)); return; } RB_MARK_RED(grandpa); self = grandpa; father = RB_FATHER(self); KASSERT(RB_RED_P(self)); if (RB_BLACK_P(father)) { /* * If our greatgrandpa is black, we're done. */ KASSERT(RB_BLACK_P(rbt->rbt_root)); return; } } KASSERT(!RB_ROOT_P(rbt, self)); KASSERT(RB_RED_P(self)); KASSERT(RB_RED_P(father)); KASSERT(RB_BLACK_P(uncle)); KASSERT(RB_BLACK_P(grandpa)); /* * Case 2&3: our uncle is black. */ if (self == father->rb_nodes[other]) { /* * Case 2: we are on the same side as our uncle * Swap ourselves with our parent so this case * becomes case 3. Basically our parent becomes our * child. */ rb_tree_reparent_nodes(rbt, father, other); KASSERT(RB_FATHER(father) == self); KASSERT(self->rb_nodes[which] == father); KASSERT(RB_FATHER(self) == grandpa); self = father; father = RB_FATHER(self); } KASSERT(RB_RED_P(self) && RB_RED_P(father)); KASSERT(grandpa->rb_nodes[which] == father); /* * Case 3: we are opposite a child of a black uncle. * Swap our parent and grandparent. Since our grandfather * is black, our father will become black and our new sibling * (former grandparent) will become red. */ rb_tree_reparent_nodes(rbt, grandpa, which); KASSERT(RB_FATHER(self) == father); KASSERT(RB_FATHER(self)->rb_nodes[RB_POSITION(self) ^ RB_DIR_OTHER] == grandpa); KASSERT(RB_RED_P(self)); KASSERT(RB_BLACK_P(father)); KASSERT(RB_RED_P(grandpa)); /* * Final step: Set the root to black. */ RB_MARK_BLACK(rbt->rbt_root); } static void rb_tree_prune_node(struct rb_tree *rbt, struct rb_node *self, bool rebalance) { const unsigned int which = RB_POSITION(self); struct rb_node *father = RB_FATHER(self); #ifndef RBSMALL const bool was_root = RB_ROOT_P(rbt, self); #endif KASSERT(rebalance || (RB_ROOT_P(rbt, self) || RB_RED_P(self))); KASSERT(!rebalance || RB_BLACK_P(self)); KASSERT(RB_CHILDLESS_P(self)); KASSERT(rb_tree_check_node(rbt, self, NULL, false)); /* * Since we are childless, we know that self->rb_left is pointing * to the sentinel node. */ father->rb_nodes[which] = self->rb_left; /* * Remove ourselves from the node list, decrement the count, * and update min/max. */ RB_TAILQ_REMOVE(&rbt->rbt_nodes, self, rb_link); RBSTAT_DEC(rbt->rbt_count); #ifndef RBSMALL if (__predict_false(rbt->rbt_minmax[RB_POSITION(self)] == self)) { rbt->rbt_minmax[RB_POSITION(self)] = father; /* * When removing the root, rbt->rbt_minmax[RB_DIR_LEFT] is * updated automatically, but we also need to update * rbt->rbt_minmax[RB_DIR_RIGHT]; */ if (__predict_false(was_root)) { rbt->rbt_minmax[RB_DIR_RIGHT] = father; } } RB_SET_FATHER(self, NULL); #endif /* * Rebalance if requested. */ if (rebalance) rb_tree_removal_rebalance(rbt, father, which); KASSERT(was_root || rb_tree_check_node(rbt, father, NULL, true)); } /* * When deleting an interior node */ static void rb_tree_swap_prune_and_rebalance(struct rb_tree *rbt, struct rb_node *self, struct rb_node *standin) { const unsigned int standin_which = RB_POSITION(standin); unsigned int standin_other = standin_which ^ RB_DIR_OTHER; struct rb_node *standin_son; struct rb_node *standin_father = RB_FATHER(standin); bool rebalance = RB_BLACK_P(standin); if (standin_father == self) { /* * As a child of self, any childen would be opposite of * our parent. */ KASSERT(RB_SENTINEL_P(standin->rb_nodes[standin_other])); standin_son = standin->rb_nodes[standin_which]; } else { /* * Since we aren't a child of self, any childen would be * on the same side as our parent. */ KASSERT(RB_SENTINEL_P(standin->rb_nodes[standin_which])); standin_son = standin->rb_nodes[standin_other]; } /* * the node we are removing must have two children. */ KASSERT(RB_TWOCHILDREN_P(self)); /* * If standin has a child, it must be red. */ KASSERT(RB_SENTINEL_P(standin_son) || RB_RED_P(standin_son)); /* * Verify things are sane. */ KASSERT(rb_tree_check_node(rbt, self, NULL, false)); KASSERT(rb_tree_check_node(rbt, standin, NULL, false)); if (__predict_false(RB_RED_P(standin_son))) { /* * We know we have a red child so if we flip it to black * we don't have to rebalance. */ KASSERT(rb_tree_check_node(rbt, standin_son, NULL, true)); RB_MARK_BLACK(standin_son); rebalance = false; if (standin_father == self) { KASSERT(RB_POSITION(standin_son) == standin_which); } else { KASSERT(RB_POSITION(standin_son) == standin_other); /* * Change the son's parentage to point to his grandpa. */ RB_SET_FATHER(standin_son, standin_father); RB_SET_POSITION(standin_son, standin_which); } } if (standin_father == self) { /* * If we are about to delete the standin's father, then when * we call rebalance, we need to use ourselves as our father. * Otherwise remember our original father. Also, sincef we are * our standin's father we only need to reparent the standin's * brother. * * | R --> S | * | Q S --> Q T | * | t --> | */ KASSERT(RB_SENTINEL_P(standin->rb_nodes[standin_other])); KASSERT(!RB_SENTINEL_P(self->rb_nodes[standin_other])); KASSERT(self->rb_nodes[standin_which] == standin); /* * Have our son/standin adopt his brother as his new son. */ standin_father = standin; } else { /* * | R --> S . | * | / \ | T --> / \ | / | * | ..... | S --> ..... | T | * * Sever standin's connection to his father. */ standin_father->rb_nodes[standin_which] = standin_son; /* * Adopt the far son. */ standin->rb_nodes[standin_other] = self->rb_nodes[standin_other]; RB_SET_FATHER(standin->rb_nodes[standin_other], standin); KASSERT(RB_POSITION(self->rb_nodes[standin_other]) == standin_other); /* * Use standin_other because we need to preserve standin_which * for the removal_rebalance. */ standin_other = standin_which; } /* * Move the only remaining son to our standin. If our standin is our * son, this will be the only son needed to be moved. */ KASSERT(standin->rb_nodes[standin_other] != self->rb_nodes[standin_other]); standin->rb_nodes[standin_other] = self->rb_nodes[standin_other]; RB_SET_FATHER(standin->rb_nodes[standin_other], standin); /* * Now copy the result of self to standin and then replace * self with standin in the tree. */ RB_COPY_PROPERTIES(standin, self); RB_SET_FATHER(standin, RB_FATHER(self)); RB_FATHER(standin)->rb_nodes[RB_POSITION(standin)] = standin; /* * Remove ourselves from the node list, decrement the count, * and update min/max. */ RB_TAILQ_REMOVE(&rbt->rbt_nodes, self, rb_link); RBSTAT_DEC(rbt->rbt_count); #ifndef RBSMALL if (__predict_false(rbt->rbt_minmax[RB_POSITION(self)] == self)) rbt->rbt_minmax[RB_POSITION(self)] = RB_FATHER(self); RB_SET_FATHER(self, NULL); #endif KASSERT(rb_tree_check_node(rbt, standin, NULL, false)); KASSERT(RB_FATHER_SENTINEL_P(standin) || rb_tree_check_node(rbt, standin_father, NULL, false)); KASSERT(RB_LEFT_SENTINEL_P(standin) || rb_tree_check_node(rbt, standin->rb_left, NULL, false)); KASSERT(RB_RIGHT_SENTINEL_P(standin) || rb_tree_check_node(rbt, standin->rb_right, NULL, false)); if (!rebalance) return; rb_tree_removal_rebalance(rbt, standin_father, standin_which); KASSERT(rb_tree_check_node(rbt, standin, NULL, true)); } /* * We could do this by doing * rb_tree_node_swap(rbt, self, which); * rb_tree_prune_node(rbt, self, false); * * But it's more efficient to just evalate and recolor the child. */ static void rb_tree_prune_blackred_branch(struct rb_tree *rbt, struct rb_node *self, unsigned int which) { struct rb_node *father = RB_FATHER(self); struct rb_node *son = self->rb_nodes[which]; #ifndef RBSMALL const bool was_root = RB_ROOT_P(rbt, self); #endif KASSERT(which == RB_DIR_LEFT || which == RB_DIR_RIGHT); KASSERT(RB_BLACK_P(self) && RB_RED_P(son)); KASSERT(!RB_TWOCHILDREN_P(son)); KASSERT(RB_CHILDLESS_P(son)); KASSERT(rb_tree_check_node(rbt, self, NULL, false)); KASSERT(rb_tree_check_node(rbt, son, NULL, false)); /* * Remove ourselves from the tree and give our former child our * properties (position, color, root). */ RB_COPY_PROPERTIES(son, self); father->rb_nodes[RB_POSITION(son)] = son; RB_SET_FATHER(son, father); /* * Remove ourselves from the node list, decrement the count, * and update minmax. */ RB_TAILQ_REMOVE(&rbt->rbt_nodes, self, rb_link); RBSTAT_DEC(rbt->rbt_count); #ifndef RBSMALL if (__predict_false(was_root)) { KASSERT(rbt->rbt_minmax[which] == son); rbt->rbt_minmax[which ^ RB_DIR_OTHER] = son; } else if (rbt->rbt_minmax[RB_POSITION(self)] == self) { rbt->rbt_minmax[RB_POSITION(self)] = son; } RB_SET_FATHER(self, NULL); #endif KASSERT(was_root || rb_tree_check_node(rbt, father, NULL, true)); KASSERT(rb_tree_check_node(rbt, son, NULL, true)); } void rb_tree_remove_node(struct rb_tree *rbt, void *object) { const rb_tree_ops_t *rbto = rbt->rbt_ops; struct rb_node *standin, *self = RB_ITEMTONODE(rbto, object); unsigned int which; KASSERT(!RB_SENTINEL_P(self)); RBSTAT_INC(rbt->rbt_removals); /* * In the following diagrams, we (the node to be removed) are S. Red * nodes are lowercase. T could be either red or black. * * Remember the major axiom of the red-black tree: the number of * black nodes from the root to each leaf is constant across all * leaves, only the number of red nodes varies. * * Thus removing a red leaf doesn't require any other changes to a * red-black tree. So if we must remove a node, attempt to rearrange * the tree so we can remove a red node. * * The simpliest case is a childless red node or a childless root node: * * | T --> T | or | R --> * | * | s --> * | */ if (RB_CHILDLESS_P(self)) { const bool rebalance = RB_BLACK_P(self) && !RB_ROOT_P(rbt, self); rb_tree_prune_node(rbt, self, rebalance); return; } KASSERT(!RB_CHILDLESS_P(self)); if (!RB_TWOCHILDREN_P(self)) { /* * The next simpliest case is the node we are deleting is * black and has one red child. * * | T --> T --> T | * | S --> R --> R | * | r --> s --> * | */ which = RB_LEFT_SENTINEL_P(self) ? RB_DIR_RIGHT : RB_DIR_LEFT; KASSERT(RB_BLACK_P(self)); KASSERT(RB_RED_P(self->rb_nodes[which])); KASSERT(RB_CHILDLESS_P(self->rb_nodes[which])); rb_tree_prune_blackred_branch(rbt, self, which); return; } KASSERT(RB_TWOCHILDREN_P(self)); /* * We invert these because we prefer to remove from the inside of * the tree. */ which = RB_POSITION(self) ^ RB_DIR_OTHER; /* * Let's find the node closes to us opposite of our parent * Now swap it with ourself, "prune" it, and rebalance, if needed. */ standin = RB_ITEMTONODE(rbto, rb_tree_iterate(rbt, object, which)); rb_tree_swap_prune_and_rebalance(rbt, self, standin); } static void rb_tree_removal_rebalance(struct rb_tree *rbt, struct rb_node *parent, unsigned int which) { KASSERT(!RB_SENTINEL_P(parent)); KASSERT(RB_SENTINEL_P(parent->rb_nodes[which])); KASSERT(which == RB_DIR_LEFT || which == RB_DIR_RIGHT); RBSTAT_INC(rbt->rbt_removal_rebalance_calls); while (RB_BLACK_P(parent->rb_nodes[which])) { unsigned int other = which ^ RB_DIR_OTHER; struct rb_node *brother = parent->rb_nodes[other]; RBSTAT_INC(rbt->rbt_removal_rebalance_passes); KASSERT(!RB_SENTINEL_P(brother)); /* * For cases 1, 2a, and 2b, our brother's children must * be black and our father must be black */ if (RB_BLACK_P(parent) && RB_BLACK_P(brother->rb_left) && RB_BLACK_P(brother->rb_right)) { if (RB_RED_P(brother)) { /* * Case 1: Our brother is red, swap its * position (and colors) with our parent. * This should now be case 2b (unless C or E * has a red child which is case 3; thus no * explicit branch to case 2b). * * B -> D * A d -> b E * C E -> A C */ KASSERT(RB_BLACK_P(parent)); rb_tree_reparent_nodes(rbt, parent, other); brother = parent->rb_nodes[other]; KASSERT(!RB_SENTINEL_P(brother)); KASSERT(RB_RED_P(parent)); KASSERT(RB_BLACK_P(brother)); KASSERT(rb_tree_check_node(rbt, brother, NULL, false)); KASSERT(rb_tree_check_node(rbt, parent, NULL, false)); } else { /* * Both our parent and brother are black. * Change our brother to red, advance up rank * and go through the loop again. * * B -> *B * *A D -> A d * C E -> C E */ RB_MARK_RED(brother); KASSERT(RB_BLACK_P(brother->rb_left)); KASSERT(RB_BLACK_P(brother->rb_right)); if (RB_ROOT_P(rbt, parent)) return; /* root == parent == black */ KASSERT(rb_tree_check_node(rbt, brother, NULL, false)); KASSERT(rb_tree_check_node(rbt, parent, NULL, false)); which = RB_POSITION(parent); parent = RB_FATHER(parent); continue; } } /* * Avoid an else here so that case 2a above can hit either * case 2b, 3, or 4. */ if (RB_RED_P(parent) && RB_BLACK_P(brother) && RB_BLACK_P(brother->rb_left) && RB_BLACK_P(brother->rb_right)) { KASSERT(RB_RED_P(parent)); KASSERT(RB_BLACK_P(brother)); KASSERT(RB_BLACK_P(brother->rb_left)); KASSERT(RB_BLACK_P(brother->rb_right)); /* * We are black, our father is red, our brother and * both nephews are black. Simply invert/exchange the * colors of our father and brother (to black and red * respectively). * * | f --> F | * | * B --> * b | * | N N --> N N | */ RB_MARK_BLACK(parent); RB_MARK_RED(brother); KASSERT(rb_tree_check_node(rbt, brother, NULL, true)); break; /* We're done! */ } else { /* * Our brother must be black and have at least one * red child (it may have two). */ KASSERT(RB_BLACK_P(brother)); KASSERT(RB_RED_P(brother->rb_nodes[which]) || RB_RED_P(brother->rb_nodes[other])); if (RB_BLACK_P(brother->rb_nodes[other])) { /* * Case 3: our brother is black, our near * nephew is red, and our far nephew is black. * Swap our brother with our near nephew. * This result in a tree that matches case 4. * (Our father could be red or black). * * | F --> F | * | x B --> x B | * | n --> n | */ KASSERT(RB_RED_P(brother->rb_nodes[which])); rb_tree_reparent_nodes(rbt, brother, which); KASSERT(RB_FATHER(brother) == parent->rb_nodes[other]); brother = parent->rb_nodes[other]; KASSERT(RB_RED_P(brother->rb_nodes[other])); } /* * Case 4: our brother is black and our far nephew * is red. Swap our father and brother locations and * change our far nephew to black. (these can be * done in either order so we change the color first). * The result is a valid red-black tree and is a * terminal case. (again we don't care about the * father's color) * * If the father is red, we will get a red-black-black * tree: * | f -> f --> b | * | B -> B --> F N | * | n -> N --> | * * If the father is black, we will get an all black * tree: * | F -> F --> B | * | B -> B --> F N | * | n -> N --> | * * If we had two red nephews, then after the swap, * our former father would have a red grandson. */ KASSERT(RB_BLACK_P(brother)); KASSERT(RB_RED_P(brother->rb_nodes[other])); RB_MARK_BLACK(brother->rb_nodes[other]); rb_tree_reparent_nodes(rbt, parent, other); break; /* We're done! */ } } KASSERT(rb_tree_check_node(rbt, parent, NULL, true)); } void * rb_tree_iterate(struct rb_tree *rbt, void *object, const unsigned int direction) { const rb_tree_ops_t *rbto = rbt->rbt_ops; const unsigned int other = direction ^ RB_DIR_OTHER; struct rb_node *self; KASSERT(direction == RB_DIR_LEFT || direction == RB_DIR_RIGHT); if (object == NULL) { #ifndef RBSMALL if (RB_SENTINEL_P(rbt->rbt_root)) return NULL; return RB_NODETOITEM(rbto, rbt->rbt_minmax[direction]); #else self = rbt->rbt_root; if (RB_SENTINEL_P(self)) return NULL; while (!RB_SENTINEL_P(self->rb_nodes[direction])) self = self->rb_nodes[direction]; return RB_NODETOITEM(rbto, self); #endif /* !RBSMALL */ } self = RB_ITEMTONODE(rbto, object); KASSERT(!RB_SENTINEL_P(self)); /* * We can't go any further in this direction. We proceed up in the * opposite direction until our parent is in direction we want to go. */ if (RB_SENTINEL_P(self->rb_nodes[direction])) { while (!RB_ROOT_P(rbt, self)) { if (other == RB_POSITION(self)) return RB_NODETOITEM(rbto, RB_FATHER(self)); self = RB_FATHER(self); } return NULL; } /* * Advance down one in current direction and go down as far as possible * in the opposite direction. */ self = self->rb_nodes[direction]; KASSERT(!RB_SENTINEL_P(self)); while (!RB_SENTINEL_P(self->rb_nodes[other])) self = self->rb_nodes[other]; return RB_NODETOITEM(rbto, self); } #ifdef RBDEBUG static const struct rb_node * rb_tree_iterate_const(const struct rb_tree *rbt, const struct rb_node *self, const unsigned int direction) { const unsigned int other = direction ^ RB_DIR_OTHER; KASSERT(direction == RB_DIR_LEFT || direction == RB_DIR_RIGHT); if (self == NULL) { #ifndef RBSMALL if (RB_SENTINEL_P(rbt->rbt_root)) return NULL; return rbt->rbt_minmax[direction]; #else self = rbt->rbt_root; if (RB_SENTINEL_P(self)) return NULL; while (!RB_SENTINEL_P(self->rb_nodes[direction])) self = self->rb_nodes[direction]; return self; #endif /* !RBSMALL */ } KASSERT(!RB_SENTINEL_P(self)); /* * We can't go any further in this direction. We proceed up in the * opposite direction until our parent is in direction we want to go. */ if (RB_SENTINEL_P(self->rb_nodes[direction])) { while (!RB_ROOT_P(rbt, self)) { if (other == RB_POSITION(self)) return RB_FATHER(self); self = RB_FATHER(self); } return NULL; } /* * Advance down one in current direction and go down as far as possible * in the opposite direction. */ self = self->rb_nodes[direction]; KASSERT(!RB_SENTINEL_P(self)); while (!RB_SENTINEL_P(self->rb_nodes[other])) self = self->rb_nodes[other]; return self; } static unsigned int rb_tree_count_black(const struct rb_node *self) { unsigned int left, right; if (RB_SENTINEL_P(self)) return 0; left = rb_tree_count_black(self->rb_left); right = rb_tree_count_black(self->rb_right); KASSERT(left == right); return left + RB_BLACK_P(self); } static bool rb_tree_check_node(const struct rb_tree *rbt, const struct rb_node *self, const struct rb_node *prev, bool red_check) { const rb_tree_ops_t *rbto = rbt->rbt_ops; rbto_compare_nodes_fn compare_nodes = rbto->rbto_compare_nodes; KASSERT(!RB_SENTINEL_P(self)); KASSERT(prev == NULL || (*compare_nodes)(rbto->rbto_context, RB_NODETOITEM(rbto, prev), RB_NODETOITEM(rbto, self)) < 0); /* * Verify our relationship to our parent. */ if (RB_ROOT_P(rbt, self)) { KASSERT(self == rbt->rbt_root); KASSERT(RB_POSITION(self) == RB_DIR_LEFT); KASSERT(RB_FATHER(self)->rb_nodes[RB_DIR_LEFT] == self); KASSERT(RB_FATHER(self) == (const struct rb_node *) &rbt->rbt_root); } else { int diff = (*compare_nodes)(rbto->rbto_context, RB_NODETOITEM(rbto, self), RB_NODETOITEM(rbto, RB_FATHER(self))); KASSERT(self != rbt->rbt_root); KASSERT(!RB_FATHER_SENTINEL_P(self)); if (RB_POSITION(self) == RB_DIR_LEFT) { KASSERT(diff < 0); KASSERT(RB_FATHER(self)->rb_nodes[RB_DIR_LEFT] == self); } else { KASSERT(diff > 0); KASSERT(RB_FATHER(self)->rb_nodes[RB_DIR_RIGHT] == self); } } /* * Verify our position in the linked list against the tree itself. */ { const struct rb_node *prev0 = rb_tree_iterate_const(rbt, self, RB_DIR_LEFT); const struct rb_node *next0 = rb_tree_iterate_const(rbt, self, RB_DIR_RIGHT); KASSERT(prev0 == TAILQ_PREV(self, rb_node_qh, rb_link)); KASSERT(next0 == TAILQ_NEXT(self, rb_link)); #ifndef RBSMALL KASSERT(prev0 != NULL || self == rbt->rbt_minmax[RB_DIR_LEFT]); KASSERT(next0 != NULL || self == rbt->rbt_minmax[RB_DIR_RIGHT]); #endif } /* * The root must be black. * There can never be two adjacent red nodes. */ if (red_check) { KASSERT(!RB_ROOT_P(rbt, self) || RB_BLACK_P(self)); (void) rb_tree_count_black(self); if (RB_RED_P(self)) { const struct rb_node *brother; KASSERT(!RB_ROOT_P(rbt, self)); brother = RB_FATHER(self)->rb_nodes[RB_POSITION(self) ^ RB_DIR_OTHER]; KASSERT(RB_BLACK_P(RB_FATHER(self))); /* * I'm red and have no children, then I must either * have no brother or my brother also be red and * also have no children. (black count == 0) */ KASSERT(!RB_CHILDLESS_P(self) || RB_SENTINEL_P(brother) || RB_RED_P(brother) || RB_CHILDLESS_P(brother)); /* * If I'm not childless, I must have two children * and they must be both be black. */ KASSERT(RB_CHILDLESS_P(self) || (RB_TWOCHILDREN_P(self) && RB_BLACK_P(self->rb_left) && RB_BLACK_P(self->rb_right))); /* * If I'm not childless, thus I have black children, * then my brother must either be black or have two * black children. */ KASSERT(RB_CHILDLESS_P(self) || RB_BLACK_P(brother) || (RB_TWOCHILDREN_P(brother) && RB_BLACK_P(brother->rb_left) && RB_BLACK_P(brother->rb_right))); } else { /* * If I'm black and have one child, that child must * be red and childless. */ KASSERT(RB_CHILDLESS_P(self) || RB_TWOCHILDREN_P(self) || (!RB_LEFT_SENTINEL_P(self) && RB_RIGHT_SENTINEL_P(self) && RB_RED_P(self->rb_left) && RB_CHILDLESS_P(self->rb_left)) || (!RB_RIGHT_SENTINEL_P(self) && RB_LEFT_SENTINEL_P(self) && RB_RED_P(self->rb_right) && RB_CHILDLESS_P(self->rb_right))); /* * If I'm a childless black node and my parent is * black, my 2nd closet relative away from my parent * is either red or has a red parent or red children. */ if (!RB_ROOT_P(rbt, self) && RB_CHILDLESS_P(self) && RB_BLACK_P(RB_FATHER(self))) { const unsigned int which = RB_POSITION(self); const unsigned int other = which ^ RB_DIR_OTHER; const struct rb_node *relative0, *relative; relative0 = rb_tree_iterate_const(rbt, self, other); KASSERT(relative0 != NULL); relative = rb_tree_iterate_const(rbt, relative0, other); KASSERT(relative != NULL); KASSERT(RB_SENTINEL_P(relative->rb_nodes[which])); #if 0 KASSERT(RB_RED_P(relative) || RB_RED_P(relative->rb_left) || RB_RED_P(relative->rb_right) || RB_RED_P(RB_FATHER(relative))); #endif } } /* * A grandparent's children must be real nodes and not * sentinels. First check out grandparent. */ KASSERT(RB_ROOT_P(rbt, self) || RB_ROOT_P(rbt, RB_FATHER(self)) || RB_TWOCHILDREN_P(RB_FATHER(RB_FATHER(self)))); /* * If we are have grandchildren on our left, then * we must have a child on our right. */ KASSERT(RB_LEFT_SENTINEL_P(self) || RB_CHILDLESS_P(self->rb_left) || !RB_RIGHT_SENTINEL_P(self)); /* * If we are have grandchildren on our right, then * we must have a child on our left. */ KASSERT(RB_RIGHT_SENTINEL_P(self) || RB_CHILDLESS_P(self->rb_right) || !RB_LEFT_SENTINEL_P(self)); /* * If we have a child on the left and it doesn't have two * children make sure we don't have great-great-grandchildren on * the right. */ KASSERT(RB_TWOCHILDREN_P(self->rb_left) || RB_CHILDLESS_P(self->rb_right) || RB_CHILDLESS_P(self->rb_right->rb_left) || RB_CHILDLESS_P(self->rb_right->rb_left->rb_left) || RB_CHILDLESS_P(self->rb_right->rb_left->rb_right) || RB_CHILDLESS_P(self->rb_right->rb_right) || RB_CHILDLESS_P(self->rb_right->rb_right->rb_left) || RB_CHILDLESS_P(self->rb_right->rb_right->rb_right)); /* * If we have a child on the right and it doesn't have two * children make sure we don't have great-great-grandchildren on * the left. */ KASSERT(RB_TWOCHILDREN_P(self->rb_right) || RB_CHILDLESS_P(self->rb_left) || RB_CHILDLESS_P(self->rb_left->rb_left) || RB_CHILDLESS_P(self->rb_left->rb_left->rb_left) || RB_CHILDLESS_P(self->rb_left->rb_left->rb_right) || RB_CHILDLESS_P(self->rb_left->rb_right) || RB_CHILDLESS_P(self->rb_left->rb_right->rb_left) || RB_CHILDLESS_P(self->rb_left->rb_right->rb_right)); /* * If we are fully interior node, then our predecessors and * successors must have no children in our direction. */ if (RB_TWOCHILDREN_P(self)) { const struct rb_node *prev0; const struct rb_node *next0; prev0 = rb_tree_iterate_const(rbt, self, RB_DIR_LEFT); KASSERT(prev0 != NULL); KASSERT(RB_RIGHT_SENTINEL_P(prev0)); next0 = rb_tree_iterate_const(rbt, self, RB_DIR_RIGHT); KASSERT(next0 != NULL); KASSERT(RB_LEFT_SENTINEL_P(next0)); } } return true; } void rb_tree_check(const struct rb_tree *rbt, bool red_check) { const struct rb_node *self; const struct rb_node *prev; #ifdef RBSTATS unsigned int count = 0; #endif KASSERT(rbt->rbt_root != NULL); KASSERT(RB_LEFT_P(rbt->rbt_root)); #if defined(RBSTATS) && !defined(RBSMALL) KASSERT(rbt->rbt_count > 1 || rbt->rbt_minmax[RB_DIR_LEFT] == rbt->rbt_minmax[RB_DIR_RIGHT]); #endif prev = NULL; TAILQ_FOREACH(self, &rbt->rbt_nodes, rb_link) { rb_tree_check_node(rbt, self, prev, false); #ifdef RBSTATS count++; #endif } #ifdef RBSTATS KASSERT(rbt->rbt_count == count); #endif if (red_check) { KASSERT(RB_BLACK_P(rbt->rbt_root)); KASSERT(RB_SENTINEL_P(rbt->rbt_root) || rb_tree_count_black(rbt->rbt_root)); /* * The root must be black. * There can never be two adjacent red nodes. */ TAILQ_FOREACH(self, &rbt->rbt_nodes, rb_link) { rb_tree_check_node(rbt, self, NULL, true); } } } #endif /* RBDEBUG */ #ifdef RBSTATS static void rb_tree_mark_depth(const struct rb_tree *rbt, const struct rb_node *self, size_t *depths, size_t depth) { if (RB_SENTINEL_P(self)) return; if (RB_TWOCHILDREN_P(self)) { rb_tree_mark_depth(rbt, self->rb_left, depths, depth + 1); rb_tree_mark_depth(rbt, self->rb_right, depths, depth + 1); return; } depths[depth]++; if (!RB_LEFT_SENTINEL_P(self)) { rb_tree_mark_depth(rbt, self->rb_left, depths, depth + 1); } if (!RB_RIGHT_SENTINEL_P(self)) { rb_tree_mark_depth(rbt, self->rb_right, depths, depth + 1); } } void rb_tree_depths(const struct rb_tree *rbt, size_t *depths) { rb_tree_mark_depth(rbt, rbt->rbt_root, depths, 1); } #endif /* RBSTATS */ |
| 5 1 2 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 | /* $NetBSD: hci_ioctl.c,v 1.15 2021/09/21 15:03:08 christos Exp $ */ /*- * Copyright (c) 2005 Iain Hibbert. * Copyright (c) 2006 Itronix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of Itronix Inc. may not be used to endorse * or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: hci_ioctl.c,v 1.15 2021/09/21 15:03:08 christos Exp $"); #include <sys/param.h> #include <sys/domain.h> #include <sys/ioctl.h> #include <sys/kauth.h> #include <sys/kernel.h> #include <sys/mbuf.h> #include <sys/proc.h> #include <sys/systm.h> #include <netbt/bluetooth.h> #include <netbt/hci.h> #include <netbt/l2cap.h> #include <netbt/rfcomm.h> #ifdef BLUETOOTH_DEBUG #define BDADDR(bd) (bd).b[5], (bd).b[4], (bd).b[3], \ (bd).b[2], (bd).b[1], (bd).b[0] static void hci_dump(void) { struct hci_unit *unit; struct hci_link *link; struct l2cap_channel *chan; struct rfcomm_session *rs; struct rfcomm_dlc *dlc; uprintf("HCI:\n"); SIMPLEQ_FOREACH(unit, &hci_unit_list, hci_next) { uprintf("UNIT %s: flags 0x%4.4x, " "num_cmd=%d, num_acl=%d, num_sco=%d\n", device_xname(unit->hci_dev), unit->hci_flags, unit->hci_num_cmd_pkts, unit->hci_num_acl_pkts, unit->hci_num_sco_pkts); TAILQ_FOREACH(link, &unit->hci_links, hl_next) { uprintf("+HANDLE #%d: %s " "raddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, " "state %d, refcnt %d\n", link->hl_handle, (link->hl_type == HCI_LINK_ACL ? "ACL":"SCO"), BDADDR(link->hl_bdaddr), link->hl_state, link->hl_refcnt); } } uprintf("L2CAP:\n"); LIST_FOREACH(chan, &l2cap_active_list, lc_ncid) { uprintf("CID #%d state %d, psm=0x%4.4x, " "laddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, " "raddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", chan->lc_lcid, chan->lc_state, chan->lc_raddr.bt_psm, BDADDR(chan->lc_laddr.bt_bdaddr), BDADDR(chan->lc_raddr.bt_bdaddr)); } LIST_FOREACH(chan, &l2cap_listen_list, lc_ncid) { uprintf("LISTEN psm=0x%4.4x, " "laddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", chan->lc_laddr.bt_psm, BDADDR(chan->lc_laddr.bt_bdaddr)); } uprintf("RFCOMM:\n"); LIST_FOREACH(rs, &rfcomm_session_active, rs_next) { chan = rs->rs_l2cap; uprintf("SESSION: state=%d, flags=0x%4.4x, psm 0x%4.4x " "laddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, " "raddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", rs->rs_state, rs->rs_flags, chan->lc_raddr.bt_psm, BDADDR(chan->lc_laddr.bt_bdaddr), BDADDR(chan->lc_raddr.bt_bdaddr)); LIST_FOREACH(dlc, &rs->rs_dlcs, rd_next) { uprintf("+DLC channel=%d, dlci=%d, " "state=%d, flags=0x%4.4x, rxcred=%d, rxsize=%ld, " "txcred=%d, pending=%d, txqlen=%d\n", dlc->rd_raddr.bt_channel, dlc->rd_dlci, dlc->rd_state, dlc->rd_flags, dlc->rd_rxcred, (unsigned long)dlc->rd_rxsize, dlc->rd_txcred, dlc->rd_pending, (dlc->rd_txbuf ? dlc->rd_txbuf->m_pkthdr.len : 0)); } } LIST_FOREACH(rs, &rfcomm_session_listen, rs_next) { chan = rs->rs_l2cap; uprintf("LISTEN: psm 0x%4.4x, " "laddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", chan->lc_laddr.bt_psm, BDADDR(chan->lc_laddr.bt_bdaddr)); LIST_FOREACH(dlc, &rs->rs_dlcs, rd_next) uprintf("+DLC channel=%d\n", dlc->rd_laddr.bt_channel); } } #undef BDADDR #endif int hci_ioctl_pcb(unsigned long cmd, void *data) { struct btreq *btr = data; struct hci_unit *unit; int err = 0; DPRINTFN(1, "cmd %#lx\n", cmd); switch(cmd) { #ifdef BLUETOOTH_DEBUG case SIOCBTDUMP: hci_dump(); return 0; #endif /* * Get unit info based on address rather than name */ case SIOCGBTINFOA: unit = hci_unit_lookup(&btr->btr_bdaddr); if (unit == NULL) return ENXIO; break; /* * The remaining ioctl's all use the same btreq structure and * index on the name of the device, so we look that up first. */ case SIOCNBTINFO: /* empty name means give the first unit */ if (btr->btr_name[0] == '\0') { unit = NULL; break; } /* else fall through and look it up */ /* FALLTHROUGH */ case SIOCGBTINFO: case SIOCSBTFLAGS: case SIOCSBTPOLICY: case SIOCSBTPTYPE: case SIOCGBTSTATS: case SIOCZBTSTATS: case SIOCSBTSCOMTU: case SIOCGBTFEAT: SIMPLEQ_FOREACH(unit, &hci_unit_list, hci_next) { if (strncmp(device_xname(unit->hci_dev), btr->btr_name, HCI_DEVNAME_SIZE) == 0) break; } if (unit == NULL) return ENXIO; break; default: /* not one of mine */ return EPASSTHROUGH; } switch(cmd) { case SIOCNBTINFO: /* get next info */ if (unit) unit = SIMPLEQ_NEXT(unit, hci_next); else unit = SIMPLEQ_FIRST(&hci_unit_list); if (unit == NULL) { err = ENXIO; break; } /* FALLTHROUGH */ case SIOCGBTINFO: /* get unit info */ /* FALLTHROUGH */ case SIOCGBTINFOA: /* get info by address */ memset(btr, 0, sizeof(struct btreq)); strlcpy(btr->btr_name, device_xname(unit->hci_dev), HCI_DEVNAME_SIZE); bdaddr_copy(&btr->btr_bdaddr, &unit->hci_bdaddr); btr->btr_flags = unit->hci_flags; btr->btr_num_cmd = unit->hci_num_cmd_pkts; btr->btr_num_acl = unit->hci_num_acl_pkts; btr->btr_num_sco = unit->hci_num_sco_pkts; btr->btr_acl_mtu = unit->hci_max_acl_size; btr->btr_sco_mtu = unit->hci_max_sco_size; btr->btr_max_acl = unit->hci_max_acl_pkts; btr->btr_max_sco = unit->hci_max_sco_pkts; btr->btr_packet_type = unit->hci_packet_type; btr->btr_link_policy = unit->hci_link_policy; break; case SIOCSBTFLAGS: /* set unit flags (privileged) */ err = kauth_authorize_device(kauth_cred_get(), KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd), btr, NULL); if (err) break; if ((unit->hci_flags & BTF_UP) && (btr->btr_flags & BTF_UP) == 0) { hci_disable(unit); unit->hci_flags &= ~BTF_UP; } unit->hci_flags &= ~BTF_MASTER; unit->hci_flags |= (btr->btr_flags & (BTF_INIT | BTF_MASTER)); if ((unit->hci_flags & BTF_UP) == 0 && (btr->btr_flags & BTF_UP)) { err = hci_enable(unit); if (err) break; unit->hci_flags |= BTF_UP; } btr->btr_flags = unit->hci_flags; break; case SIOCSBTPOLICY: /* set unit link policy (privileged) */ err = kauth_authorize_device(kauth_cred_get(), KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd), btr, NULL); if (err) break; unit->hci_link_policy = btr->btr_link_policy; unit->hci_link_policy &= unit->hci_lmp_mask; btr->btr_link_policy = unit->hci_link_policy; break; case SIOCSBTPTYPE: /* set unit packet types (privileged) */ err = kauth_authorize_device(kauth_cred_get(), KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd), btr, NULL); if (err) break; unit->hci_packet_type = btr->btr_packet_type; unit->hci_packet_type &= unit->hci_acl_mask; btr->btr_packet_type = unit->hci_packet_type; break; case SIOCGBTSTATS: /* get unit statistics */ (*unit->hci_if->get_stats)(unit->hci_dev, &btr->btr_stats, 0); break; case SIOCZBTSTATS: /* get & reset unit statistics */ err = kauth_authorize_device(kauth_cred_get(), KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd), btr, NULL); if (err) break; (*unit->hci_if->get_stats)(unit->hci_dev, &btr->btr_stats, 1); break; case SIOCSBTSCOMTU: /* set sco_mtu value for unit */ /* * This is a temporary ioctl and may not be supported * in the future. The need is that if SCO packets are * sent to USB bluetooth controllers that are not an * integer number of frame sizes, the USB bus locks up. */ err = kauth_authorize_device(kauth_cred_get(), KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd), btr, NULL); if (err) break; unit->hci_max_sco_size = btr->btr_sco_mtu; break; case SIOCGBTFEAT: /* get unit features */ memset(btr, 0, sizeof(struct btreq)); strlcpy(btr->btr_name, device_xname(unit->hci_dev), HCI_DEVNAME_SIZE); memcpy(btr->btr_features0, unit->hci_feat0, HCI_FEATURES_SIZE); memcpy(btr->btr_features1, unit->hci_feat1, HCI_FEATURES_SIZE); memcpy(btr->btr_features2, unit->hci_feat2, HCI_FEATURES_SIZE); break; default: err = EFAULT; break; } return err; } |
| 3 3 3 3 3 1 1 1 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 4 4 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 | /* $NetBSD: if_ppp.c,v 1.169 2022/07/06 08:07:23 riastradh Exp $ */ /* Id: if_ppp.c,v 1.6 1997/03/04 03:33:00 paulus Exp */ /* * if_ppp.c - Point-to-Point Protocol (PPP) Asynchronous driver. * * Copyright (c) 1984-2000 Carnegie Mellon University. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The name "Carnegie Mellon University" must not be used to * endorse or promote products derived from this software without * prior written permission. For permission or any legal * details, please contact * Office of Technology Transfer * Carnegie Mellon University * 5000 Forbes Avenue * Pittsburgh, PA 15213-3890 * (412) 268-4387, fax: (412) 268-7395 * tech-transfer@andrew.cmu.edu * * 4. Redistributions of any form whatsoever must retain the following * acknowledgment: * "This product includes software developed by Computing Services * at Carnegie Mellon University (http://www.cmu.edu/computing/)." * * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * Based on: * @(#)if_sl.c 7.6.1.2 (Berkeley) 2/15/89 * * Copyright (c) 1987 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that the above copyright notice and this paragraph are * duplicated in all such forms and that any documentation, * advertising materials, and other materials related to such * distribution and use acknowledge that the software was developed * by the University of California, Berkeley. The name of the * University may not be used to endorse or promote products derived * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * Serial Line interface * * Rick Adams * Center for Seismic Studies * 1300 N 17th Street, Suite 1450 * Arlington, Virginia 22209 * (703)276-7900 * rick@seismo.ARPA * seismo!rick * * Pounded on heavily by Chris Torek (chris@mimsy.umd.edu, umcp-cs!chris). * Converted to 4.3BSD Beta by Chris Torek. * Other changes made at Berkeley, based in part on code by Kirk Smith. * * Converted to 4.3BSD+ 386BSD by Brad Parker (brad@cayman.com) * Added VJ tcp header compression; more unified ioctls * * Extensively modified by Paul Mackerras (paulus@cs.anu.edu.au). * Cleaned up a lot of the mbuf-related code to fix bugs that * caused system crashes and packet corruption. Changed pppstart * so that it doesn't just give up with a collision if the whole * packet doesn't fit in the output ring buffer. * * Added priority queueing for interactive IP packets, following * the model of if_sl.c, plus hooks for bpf. * Paul Mackerras (paulus@cs.anu.edu.au). */ /* from if_sl.c,v 1.11 84/10/04 12:54:47 rick Exp */ /* from NetBSD: if_ppp.c,v 1.15.2.2 1994/07/28 05:17:58 cgd Exp */ /* * XXX IMP ME HARDER * * This is an explanation of that comment. This code used to use * splimp() to block both network and tty interrupts. However, * that call is deprecated. So, we have replaced the uses of * splimp() with splhigh() in order to applomplish what it needs * to accomplish, and added that happy little comment. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: if_ppp.c,v 1.169 2022/07/06 08:07:23 riastradh Exp $"); #ifdef _KERNEL_OPT #include "ppp.h" #include "opt_inet.h" #include "opt_gateway.h" #include "opt_ppp.h" #endif #ifdef INET #define VJC #endif #define PPP_COMPRESS #include <sys/param.h> #include <sys/proc.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/ioctl.h> #include <sys/kernel.h> #include <sys/systm.h> #include <sys/time.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/mutex.h> #include <sys/once.h> #include <sys/conf.h> #include <sys/kauth.h> #include <sys/intr.h> #include <sys/socketvar.h> #include <sys/device.h> #include <sys/module.h> #include <net/if.h> #include <net/if_types.h> #include <net/netisr.h> #include <net/route.h> #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/in_var.h> #ifdef INET #include <netinet/ip.h> #endif #include <net/bpf.h> #include <net/slip.h> #ifdef VJC #include <net/slcompress.h> #endif #include <net/ppp_defs.h> #include <net/if_ppp.h> #include <net/if_pppvar.h> #include <sys/cpu.h> #ifdef PPP_COMPRESS #define PACKETPTR struct mbuf * #include <net/ppp-comp.h> #endif #include "ioconf.h" static int pppsioctl(struct ifnet *, u_long, void *); static void ppp_requeue(struct ppp_softc *); static void ppp_ccp(struct ppp_softc *, struct mbuf *m, int rcvd); static void ppp_ccp_closed(struct ppp_softc *); static void ppp_inproc(struct ppp_softc *, struct mbuf *); static void pppdumpm(struct mbuf *m0); #ifdef ALTQ static void ppp_ifstart(struct ifnet *ifp); #endif static void pppintr(void *); extern struct linesw ppp_disc; /* * We define two link layer specific mbuf flags, to mark high-priority * packets for output, and received packets following lost/corrupted * packets. */ #define M_HIGHPRI M_LINK0 /* output packet for sc_fastq */ #define M_ERRMARK M_LINK1 /* rx packet following lost/corrupted pkt */ static int ppp_clone_create(struct if_clone *, int); static int ppp_clone_destroy(struct ifnet *); static struct ppp_softc *ppp_create(const char *, int); static struct { LIST_HEAD(ppp_sclist, ppp_softc) list; kmutex_t lock; } ppp_softcs __cacheline_aligned; struct if_clone ppp_cloner = IF_CLONE_INITIALIZER("ppp", ppp_clone_create, ppp_clone_destroy); #ifdef PPP_COMPRESS static LIST_HEAD(, compressor) ppp_compressors = { NULL }; static kmutex_t ppp_compressors_mtx; static int ppp_compressor_init(void); static int ppp_compressor_destroy(void); static struct compressor *ppp_get_compressor(uint8_t); static void ppp_compressor_rele(struct compressor *); #endif /* PPP_COMPRESS */ /* * Called from boot code to establish ppp interfaces. */ void pppattach(int n __unused) { /* * Nothing to do here, initialization is handled by the * module initialization code in pppinit() below). */ } static void pppinit(void) { /* Init the compressor sub-sub-system */ ppp_compressor_init(); if (ttyldisc_attach(&ppp_disc) != 0) panic("%s", __func__); mutex_init(&ppp_softcs.lock, MUTEX_DEFAULT, IPL_NONE); LIST_INIT(&ppp_softcs.list); if_clone_attach(&ppp_cloner); } static int pppdetach(void) { int error = 0; if (!LIST_EMPTY(&ppp_softcs.list)) error = EBUSY; if (error == 0) error = ttyldisc_detach(&ppp_disc); if (error == 0) { mutex_destroy(&ppp_softcs.lock); if_clone_detach(&ppp_cloner); ppp_compressor_destroy(); } return error; } static struct ppp_softc * ppp_create(const char *name, int unit) { struct ppp_softc *sc, *sci, *scl = NULL; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAIT|M_ZERO); mutex_enter(&ppp_softcs.lock); if (unit == -1) { int i = 0; LIST_FOREACH(sci, &ppp_softcs.list, sc_iflist) { scl = sci; if (i < sci->sc_unit) { unit = i; break; } else { #ifdef DIAGNOSTIC KASSERT(i == sci->sc_unit); #endif i++; } } if (unit == -1) unit = i; } else { LIST_FOREACH(sci, &ppp_softcs.list, sc_iflist) { scl = sci; if (unit < sci->sc_unit) break; else if (unit == sci->sc_unit) { free(sc, M_DEVBUF); mutex_exit(&ppp_softcs.lock); return NULL; } } } if (sci != NULL) LIST_INSERT_BEFORE(sci, sc, sc_iflist); else if (scl != NULL) LIST_INSERT_AFTER(scl, sc, sc_iflist); else LIST_INSERT_HEAD(&ppp_softcs.list, sc, sc_iflist); mutex_exit(&ppp_softcs.lock); if_initname(&sc->sc_if, name, sc->sc_unit = unit); callout_init(&sc->sc_timo_ch, 0); sc->sc_if.if_softc = sc; sc->sc_if.if_mtu = PPP_MTU; sc->sc_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST; sc->sc_if.if_type = IFT_PPP; sc->sc_if.if_hdrlen = PPP_HDRLEN; sc->sc_if.if_dlt = DLT_NULL; sc->sc_if.if_ioctl = pppsioctl; sc->sc_if.if_output = pppoutput; #ifdef ALTQ sc->sc_if.if_start = ppp_ifstart; #endif IFQ_SET_MAXLEN(&sc->sc_if.if_snd, IFQ_MAXLEN); sc->sc_inq.ifq_maxlen = IFQ_MAXLEN; sc->sc_fastq.ifq_maxlen = IFQ_MAXLEN; sc->sc_rawq.ifq_maxlen = IFQ_MAXLEN; /* Ratio of 1:2 packets between the regular and the fast queue */ sc->sc_maxfastq = 2; IFQ_SET_READY(&sc->sc_if.if_snd); if_attach(&sc->sc_if); if_alloc_sadl(&sc->sc_if); bpf_attach(&sc->sc_if, DLT_NULL, 0); return sc; } static int ppp_clone_create(struct if_clone *ifc, int unit) { return ppp_create(ifc->ifc_name, unit) == NULL ? EEXIST : 0; } static int ppp_clone_destroy(struct ifnet *ifp) { struct ppp_softc *sc = (struct ppp_softc *)ifp->if_softc; if (sc->sc_devp != NULL) return EBUSY; /* Not removing it */ mutex_enter(&ppp_softcs.lock); LIST_REMOVE(sc, sc_iflist); mutex_exit(&ppp_softcs.lock); bpf_detach(ifp); if_detach(ifp); free(sc, M_DEVBUF); return 0; } /* * Allocate a ppp interface unit and initialize it. */ struct ppp_softc * pppalloc(pid_t pid) { struct ppp_softc *sc = NULL, *scf; int i; mutex_enter(&ppp_softcs.lock); LIST_FOREACH(scf, &ppp_softcs.list, sc_iflist) { if (scf->sc_xfer == pid) { scf->sc_xfer = 0; mutex_exit(&ppp_softcs.lock); return scf; } if (scf->sc_devp == NULL && sc == NULL) sc = scf; } mutex_exit(&ppp_softcs.lock); if (sc == NULL) sc = ppp_create(ppp_cloner.ifc_name, -1); sc->sc_si = softint_establish(SOFTINT_NET, pppintr, sc); if (sc->sc_si == NULL) { printf("%s: unable to establish softintr\n", sc->sc_if.if_xname); return NULL; } sc->sc_flags = 0; sc->sc_mru = PPP_MRU; sc->sc_relinq = NULL; (void)memset(&sc->sc_stats, 0, sizeof(sc->sc_stats)); #ifdef VJC sc->sc_comp = malloc(sizeof(struct slcompress), M_DEVBUF, M_NOWAIT); if (sc->sc_comp) sl_compress_init(sc->sc_comp); #endif #ifdef PPP_COMPRESS sc->sc_xc_state = NULL; sc->sc_rc_state = NULL; #endif /* PPP_COMPRESS */ for (i = 0; i < NUM_NP; ++i) sc->sc_npmode[i] = NPMODE_ERROR; sc->sc_npqueue = NULL; sc->sc_npqtail = &sc->sc_npqueue; sc->sc_last_sent = sc->sc_last_recv = time_second; return sc; } /* * Deallocate a ppp unit. Must be called at splsoftnet or higher. */ void pppdealloc(struct ppp_softc *sc) { struct mbuf *m; softint_disestablish(sc->sc_si); if_down(&sc->sc_if); sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); sc->sc_devp = NULL; sc->sc_xfer = 0; for (;;) { IF_DEQUEUE(&sc->sc_rawq, m); if (m == NULL) break; m_freem(m); } for (;;) { IF_DEQUEUE(&sc->sc_inq, m); if (m == NULL) break; m_freem(m); } for (;;) { IF_DEQUEUE(&sc->sc_fastq, m); if (m == NULL) break; m_freem(m); } while ((m = sc->sc_npqueue) != NULL) { sc->sc_npqueue = m->m_nextpkt; m_freem(m); } if (sc->sc_togo != NULL) { m_freem(sc->sc_togo); sc->sc_togo = NULL; } #ifdef PPP_COMPRESS ppp_ccp_closed(sc); sc->sc_xc_state = NULL; sc->sc_rc_state = NULL; #endif /* PPP_COMPRESS */ #ifdef PPP_FILTER if (sc->sc_pass_filt_in.bf_insns != 0) { free(sc->sc_pass_filt_in.bf_insns, M_DEVBUF); sc->sc_pass_filt_in.bf_insns = 0; sc->sc_pass_filt_in.bf_len = 0; } if (sc->sc_pass_filt_out.bf_insns != 0) { free(sc->sc_pass_filt_out.bf_insns, M_DEVBUF); sc->sc_pass_filt_out.bf_insns = 0; sc->sc_pass_filt_out.bf_len = 0; } if (sc->sc_active_filt_in.bf_insns != 0) { free(sc->sc_active_filt_in.bf_insns, M_DEVBUF); sc->sc_active_filt_in.bf_insns = 0; sc->sc_active_filt_in.bf_len = 0; } if (sc->sc_active_filt_out.bf_insns != 0) { free(sc->sc_active_filt_out.bf_insns, M_DEVBUF); sc->sc_active_filt_out.bf_insns = 0; sc->sc_active_filt_out.bf_len = 0; } #endif /* PPP_FILTER */ #ifdef VJC if (sc->sc_comp != 0) { free(sc->sc_comp, M_DEVBUF); sc->sc_comp = 0; } #endif (void)ppp_clone_destroy(&sc->sc_if); } /* * Ioctl routine for generic ppp devices. */ int pppioctl(struct ppp_softc *sc, u_long cmd, void *data, int flag, struct lwp *l) { int s, error, flags, mru, npx; u_int nb; struct ppp_option_data *odp; struct compressor *cp; struct npioctl *npi; time_t t; #ifdef PPP_FILTER struct bpf_program *bp, *nbp; struct bpf_insn *newcode, *oldcode; int newcodelen; #endif /* PPP_FILTER */ #ifdef PPP_COMPRESS u_char ccp_option[CCP_MAX_OPTION_LENGTH]; #endif switch (cmd) { case PPPIOCSFLAGS: case PPPIOCSMRU: case PPPIOCSMAXCID: case PPPIOCSCOMPRESS: case PPPIOCSNPMODE: if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, &sc->sc_if, KAUTH_ARG(cmd), NULL) != 0) return EPERM; break; case PPPIOCXFERUNIT: /* XXX: Why is this privileged?! */ if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, &sc->sc_if, KAUTH_ARG(cmd), NULL) != 0) return EPERM; break; default: break; } switch (cmd) { case FIONREAD: *(int *)data = sc->sc_inq.ifq_len; break; case PPPIOCGUNIT: *(int *)data = sc->sc_unit; break; case PPPIOCGFLAGS: *(u_int *)data = sc->sc_flags; break; case PPPIOCGRAWIN: { struct ppp_rawin *rwin = (struct ppp_rawin *)data; u_char c, q = 0; for (c = sc->sc_rawin_start; c < sizeof(sc->sc_rawin.buf);) rwin->buf[q++] = sc->sc_rawin.buf[c++]; for (c = 0; c < sc->sc_rawin_start;) rwin->buf[q++] = sc->sc_rawin.buf[c++]; rwin->count = sc->sc_rawin.count; } break; case PPPIOCSFLAGS: flags = *(int *)data & SC_MASK; s = splsoftnet(); #ifdef PPP_COMPRESS if (sc->sc_flags & SC_CCP_OPEN && !(flags & SC_CCP_OPEN)) ppp_ccp_closed(sc); #endif splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags = (sc->sc_flags & ~SC_MASK) | flags; splx(s); break; case PPPIOCSMRU: mru = *(int *)data; if (mru >= PPP_MINMRU && mru <= PPP_MAXMRU) sc->sc_mru = mru; break; case PPPIOCGMRU: *(int *)data = sc->sc_mru; break; #ifdef VJC case PPPIOCSMAXCID: if (sc->sc_comp) { s = splsoftnet(); sl_compress_setup(sc->sc_comp, *(int *)data); splx(s); } break; #endif case PPPIOCXFERUNIT: sc->sc_xfer = l->l_proc->p_pid; break; #ifdef PPP_COMPRESS case PPPIOCSCOMPRESS: odp = (struct ppp_option_data *)data; nb = odp->length; if (nb > sizeof(ccp_option)) nb = sizeof(ccp_option); if (nb < 3) return EINVAL; if ((error = copyin(odp->ptr, ccp_option, nb)) != 0) return error; /* preliminary check on the length byte */ if (ccp_option[1] < 2) return EINVAL; cp = ppp_get_compressor(ccp_option[0]); if (cp == NULL) { if (sc->sc_flags & SC_DEBUG) printf("%s: no compressor for [%x %x %x], %x\n", sc->sc_if.if_xname, ccp_option[0], ccp_option[1], ccp_option[2], nb); return EINVAL; /* no handler found */ } /* * Found a handler for the protocol - try to allocate * a compressor or decompressor. */ error = 0; if (odp->transmit) { s = splsoftnet(); if (sc->sc_xc_state != NULL) { (*sc->sc_xcomp->comp_free)(sc->sc_xc_state); ppp_compressor_rele(sc->sc_xcomp); } sc->sc_xcomp = cp; sc->sc_xc_state = cp->comp_alloc(ccp_option, nb); if (sc->sc_xc_state == NULL) { if (sc->sc_flags & SC_DEBUG) printf("%s: comp_alloc failed\n", sc->sc_if.if_xname); error = ENOBUFS; } splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags &= ~SC_COMP_RUN; splx(s); } else { s = splsoftnet(); if (sc->sc_rc_state != NULL) { (*sc->sc_rcomp->decomp_free)(sc->sc_rc_state); ppp_compressor_rele(sc->sc_rcomp); } sc->sc_rcomp = cp; sc->sc_rc_state = cp->decomp_alloc(ccp_option, nb); if (sc->sc_rc_state == NULL) { if (sc->sc_flags & SC_DEBUG) printf("%s: decomp_alloc failed\n", sc->sc_if.if_xname); error = ENOBUFS; } splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags &= ~SC_DECOMP_RUN; splx(s); } return error; #endif /* PPP_COMPRESS */ case PPPIOCGNPMODE: case PPPIOCSNPMODE: npi = (struct npioctl *)data; switch (npi->protocol) { case PPP_IP: npx = NP_IP; break; case PPP_IPV6: npx = NP_IPV6; break; default: return EINVAL; } if (cmd == PPPIOCGNPMODE) { npi->mode = sc->sc_npmode[npx]; } else { if (npi->mode != sc->sc_npmode[npx]) { s = splnet(); sc->sc_npmode[npx] = npi->mode; if (npi->mode != NPMODE_QUEUE) { ppp_requeue(sc); ppp_restart(sc); } splx(s); } } break; case PPPIOCGIDLE: s = splsoftnet(); t = time_second; ((struct ppp_idle *)data)->xmit_idle = t - sc->sc_last_sent; ((struct ppp_idle *)data)->recv_idle = t - sc->sc_last_recv; splx(s); break; #ifdef PPP_FILTER case PPPIOCSPASS: case PPPIOCSACTIVE: /* These are no longer supported. */ return EOPNOTSUPP; case PPPIOCSIPASS: case PPPIOCSOPASS: case PPPIOCSIACTIVE: case PPPIOCSOACTIVE: nbp = (struct bpf_program *)data; if ((unsigned) nbp->bf_len > BPF_MAXINSNS) return EINVAL; newcodelen = nbp->bf_len * sizeof(struct bpf_insn); if (newcodelen != 0) { newcode = malloc(newcodelen, M_DEVBUF, M_WAITOK); /* WAITOK -- malloc() never fails. */ if ((error = copyin((void *)nbp->bf_insns, (void *)newcode, newcodelen)) != 0) { free(newcode, M_DEVBUF); return error; } if (!bpf_validate(newcode, nbp->bf_len)) { free(newcode, M_DEVBUF); return EINVAL; } } else newcode = 0; switch (cmd) { case PPPIOCSIPASS: bp = &sc->sc_pass_filt_in; break; case PPPIOCSOPASS: bp = &sc->sc_pass_filt_out; break; case PPPIOCSIACTIVE: bp = &sc->sc_active_filt_in; break; case PPPIOCSOACTIVE: bp = &sc->sc_active_filt_out; break; default: free(newcode, M_DEVBUF); return EPASSTHROUGH; } oldcode = bp->bf_insns; s = splnet(); bp->bf_len = nbp->bf_len; bp->bf_insns = newcode; splx(s); if (oldcode != 0) free(oldcode, M_DEVBUF); break; #endif /* PPP_FILTER */ default: return EPASSTHROUGH; } return 0; } /* * Process an ioctl request to the ppp network interface. */ static int pppsioctl(struct ifnet *ifp, u_long cmd, void *data) { struct ppp_softc *sc = ifp->if_softc; struct ifaddr *ifa = (struct ifaddr *)data; struct ifreq *ifr = (struct ifreq *)data; struct ppp_stats *psp; #ifdef PPP_COMPRESS struct ppp_comp_stats *pcp; #endif int s = splnet(), error = 0; switch (cmd) { case SIOCSIFFLAGS: if ((error = ifioctl_common(ifp, cmd, data)) != 0) break; if ((ifp->if_flags & IFF_RUNNING) == 0) ifp->if_flags &= ~IFF_UP; break; case SIOCINITIFADDR: switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: printf("%s: af%d not supported\n", ifp->if_xname, ifa->ifa_addr->sa_family); error = EAFNOSUPPORT; break; } ifa->ifa_rtrequest = p2p_rtrequest; break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == NULL) { error = EAFNOSUPPORT; break; } switch (ifreq_getaddr(cmd, ifr)->sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: error = EAFNOSUPPORT; break; } break; case SIOCGPPPSTATS: psp = &((struct ifpppstatsreq *)data)->stats; memset(psp, 0, sizeof(*psp)); psp->p = sc->sc_stats; #if defined(VJC) && !defined(SL_NO_STATS) if (sc->sc_comp) { psp->vj.vjs_packets = sc->sc_comp->sls_packets; psp->vj.vjs_compressed = sc->sc_comp->sls_compressed; psp->vj.vjs_searches = sc->sc_comp->sls_searches; psp->vj.vjs_misses = sc->sc_comp->sls_misses; psp->vj.vjs_uncompressedin = sc->sc_comp->sls_uncompressedin; psp->vj.vjs_compressedin = sc->sc_comp->sls_compressedin; psp->vj.vjs_errorin = sc->sc_comp->sls_errorin; psp->vj.vjs_tossed = sc->sc_comp->sls_tossed; } #endif /* VJC */ break; #ifdef PPP_COMPRESS case SIOCGPPPCSTATS: pcp = &((struct ifpppcstatsreq *)data)->stats; memset(pcp, 0, sizeof(*pcp)); if (sc->sc_xc_state != NULL) (*sc->sc_xcomp->comp_stat)(sc->sc_xc_state, &pcp->c); if (sc->sc_rc_state != NULL) (*sc->sc_rcomp->decomp_stat)(sc->sc_rc_state, &pcp->d); break; #endif /* PPP_COMPRESS */ default: if ((error = ifioctl_common(&sc->sc_if, cmd, data)) == ENETRESET) error = 0; break; } splx(s); return error; } /* * Queue a packet. Start transmission if not active. * Packet is placed in Information field of PPP frame. */ int pppoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, const struct rtentry *rtp) { struct ppp_softc *sc = ifp->if_softc; int protocol, address, control; u_char *cp; int s, error; #ifdef INET struct ip *ip; #endif struct ifqueue *ifq; enum NPmode mode; int len; if (sc->sc_devp == NULL || (ifp->if_flags & IFF_RUNNING) == 0 || ((ifp->if_flags & IFF_UP) == 0 && dst->sa_family != AF_UNSPEC)) { error = ENETDOWN; /* sort of */ goto bad; } IFQ_CLASSIFY(&ifp->if_snd, m0, dst->sa_family); /* * Compute PPP header. */ m0->m_flags &= ~M_HIGHPRI; switch (dst->sa_family) { #ifdef INET case AF_INET: address = PPP_ALLSTATIONS; control = PPP_UI; protocol = PPP_IP; mode = sc->sc_npmode[NP_IP]; /* * If this packet has the "low delay" bit set in the IP header, * put it on the fastq instead. */ ip = mtod(m0, struct ip *); if (ip->ip_tos & IPTOS_LOWDELAY) m0->m_flags |= M_HIGHPRI; break; #endif #ifdef INET6 case AF_INET6: address = PPP_ALLSTATIONS; /*XXX*/ control = PPP_UI; /*XXX*/ protocol = PPP_IPV6; mode = sc->sc_npmode[NP_IPV6]; #if 0 /* XXX flowinfo/traffic class, maybe? */ /* * If this packet has the "low delay" bit set in the IP header, * put it on the fastq instead. */ ip = mtod(m0, struct ip *); if (ip->ip_tos & IPTOS_LOWDELAY) m0->m_flags |= M_HIGHPRI; #endif break; #endif case AF_UNSPEC: address = PPP_ADDRESS(dst->sa_data); control = PPP_CONTROL(dst->sa_data); protocol = PPP_PROTOCOL(dst->sa_data); mode = NPMODE_PASS; break; default: printf("%s: af%d not supported\n", ifp->if_xname, dst->sa_family); error = EAFNOSUPPORT; goto bad; } /* * Drop this packet, or return an error, if necessary. */ if (mode == NPMODE_ERROR) { error = ENETDOWN; goto bad; } if (mode == NPMODE_DROP) { error = 0; goto bad; } /* * Add PPP header. */ M_PREPEND(m0, PPP_HDRLEN, M_DONTWAIT); if (m0 == NULL) { error = ENOBUFS; goto bad; } cp = mtod(m0, u_char *); *cp++ = address; *cp++ = control; *cp++ = protocol >> 8; *cp++ = protocol & 0xff; len = m_length(m0); if (sc->sc_flags & SC_LOG_OUTPKT) { printf("%s output: ", ifp->if_xname); pppdumpm(m0); } if ((protocol & 0x8000) == 0) { #ifdef PPP_FILTER /* * Apply the pass and active filters to the packet, * but only if it is a data packet. */ if (sc->sc_pass_filt_out.bf_insns != 0 && bpf_filter(sc->sc_pass_filt_out.bf_insns, (u_char *)m0, len, 0) == 0) { error = 0; /* drop this packet */ goto bad; } /* * Update the time we sent the most recent packet. */ if (sc->sc_active_filt_out.bf_insns == 0 || bpf_filter(sc->sc_active_filt_out.bf_insns, (u_char *)m0, len, 0)) sc->sc_last_sent = time_second; #else /* * Update the time we sent the most recent packet. */ sc->sc_last_sent = time_second; #endif /* PPP_FILTER */ } /* * See if bpf wants to look at the packet. */ bpf_mtap(&sc->sc_if, m0, BPF_D_OUT); /* * Put the packet on the appropriate queue. */ s = splnet(); if (mode == NPMODE_QUEUE) { /* XXX we should limit the number of packets on this queue */ *sc->sc_npqtail = m0; m0->m_nextpkt = NULL; sc->sc_npqtail = &m0->m_nextpkt; } else { ifq = (m0->m_flags & M_HIGHPRI) ? &sc->sc_fastq : NULL; if ((error = ifq_enqueue2(&sc->sc_if, ifq, m0)) != 0) { splx(s); if_statinc(&sc->sc_if, if_oerrors); sc->sc_stats.ppp_oerrors++; return error; } ppp_restart(sc); } if_statadd2(ifp, if_opackets, 1, if_obytes, len); splx(s); return 0; bad: m_freem(m0); return error; } /* * After a change in the NPmode for some NP, move packets from the * npqueue to the send queue or the fast queue as appropriate. * Should be called at splnet, since we muck with the queues. */ static void ppp_requeue(struct ppp_softc *sc) { struct mbuf *m, **mpp; struct ifqueue *ifq; enum NPmode mode; int error; for (mpp = &sc->sc_npqueue; (m = *mpp) != NULL; ) { switch (PPP_PROTOCOL(mtod(m, u_char *))) { case PPP_IP: mode = sc->sc_npmode[NP_IP]; break; case PPP_IPV6: mode = sc->sc_npmode[NP_IPV6]; break; default: mode = NPMODE_PASS; } switch (mode) { case NPMODE_PASS: /* * This packet can now go on one of the queues to * be sent. */ *mpp = m->m_nextpkt; m->m_nextpkt = NULL; ifq = (m->m_flags & M_HIGHPRI) ? &sc->sc_fastq : NULL; if ((error = ifq_enqueue2(&sc->sc_if, ifq, m)) != 0) { if_statinc(&sc->sc_if, if_oerrors); sc->sc_stats.ppp_oerrors++; } break; case NPMODE_DROP: case NPMODE_ERROR: *mpp = m->m_nextpkt; m_freem(m); break; case NPMODE_QUEUE: mpp = &m->m_nextpkt; break; } } sc->sc_npqtail = mpp; } /* * Transmitter has finished outputting some stuff; * remember to call sc->sc_start later at splsoftnet. */ void ppp_restart(struct ppp_softc *sc) { int s = splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags &= ~SC_TBUSY; softint_schedule(sc->sc_si); splx(s); } /* * Get a packet to send. This procedure is intended to be called at * splsoftnet, since it may involve time-consuming operations such as * applying VJ compression, packet compression, address/control and/or * protocol field compression to the packet. */ struct mbuf * ppp_dequeue(struct ppp_softc *sc) { struct mbuf *m, *mp; u_char *cp; int address, control, protocol; int s; /* * Grab a packet to send: first try the fast queue, then the * normal queue. */ s = splnet(); if (sc->sc_nfastq < sc->sc_maxfastq) { IF_DEQUEUE(&sc->sc_fastq, m); if (m != NULL) sc->sc_nfastq++; else IFQ_DEQUEUE(&sc->sc_if.if_snd, m); } else { sc->sc_nfastq = 0; IFQ_DEQUEUE(&sc->sc_if.if_snd, m); if (m == NULL) { IF_DEQUEUE(&sc->sc_fastq, m); if (m != NULL) sc->sc_nfastq++; } } splx(s); if (m == NULL) return NULL; ++sc->sc_stats.ppp_opackets; /* * Extract the ppp header of the new packet. * The ppp header will be in one mbuf. */ cp = mtod(m, u_char *); address = PPP_ADDRESS(cp); control = PPP_CONTROL(cp); protocol = PPP_PROTOCOL(cp); switch (protocol) { case PPP_IP: #ifdef VJC /* * If the packet is a TCP/IP packet, see if we can compress it. */ if ((sc->sc_flags & SC_COMP_TCP) && sc->sc_comp != NULL) { struct ip *ip; int type; mp = m; ip = (struct ip *)(cp + PPP_HDRLEN); if (mp->m_len <= PPP_HDRLEN) { mp = mp->m_next; if (mp == NULL) break; ip = mtod(mp, struct ip *); } /* * This code assumes the IP/TCP header is in one * non-shared mbuf */ if (ip->ip_p == IPPROTO_TCP) { type = sl_compress_tcp(mp, ip, sc->sc_comp, !(sc->sc_flags & SC_NO_TCP_CCID)); switch (type) { case TYPE_UNCOMPRESSED_TCP: protocol = PPP_VJC_UNCOMP; break; case TYPE_COMPRESSED_TCP: protocol = PPP_VJC_COMP; cp = mtod(m, u_char *); cp[0] = address; /* Header has moved */ cp[1] = control; cp[2] = 0; break; } /* Update protocol in PPP header */ cp[3] = protocol; } } #endif /* VJC */ break; #ifdef PPP_COMPRESS case PPP_CCP: ppp_ccp(sc, m, 0); break; #endif /* PPP_COMPRESS */ } #ifdef PPP_COMPRESS if (protocol != PPP_LCP && protocol != PPP_CCP && sc->sc_xc_state && (sc->sc_flags & SC_COMP_RUN)) { struct mbuf *mcomp = NULL; int slen; slen = 0; for (mp = m; mp != NULL; mp = mp->m_next) slen += mp->m_len; (*sc->sc_xcomp->compress) (sc->sc_xc_state, &mcomp, m, slen, sc->sc_if.if_mtu + PPP_HDRLEN); if (mcomp != NULL) { if (sc->sc_flags & SC_CCP_UP) { /* * Send the compressed packet instead of the * original. */ m_freem(m); m = mcomp; cp = mtod(m, u_char *); protocol = cp[3]; } else { /* * Can't transmit compressed packets until CCP * is up. */ m_freem(mcomp); } } } #endif /* PPP_COMPRESS */ /* * Compress the address/control and protocol, if possible. */ if (sc->sc_flags & SC_COMP_AC && address == PPP_ALLSTATIONS && control == PPP_UI && protocol != PPP_ALLSTATIONS && protocol != PPP_LCP) { /* can compress address/control */ m->m_data += 2; m->m_len -= 2; } if (sc->sc_flags & SC_COMP_PROT && protocol < 0xFF) { /* can compress protocol */ if (mtod(m, u_char *) == cp) { cp[2] = cp[1]; /* move address/control up */ cp[1] = cp[0]; } ++m->m_data; --m->m_len; } return m; } /* * Software interrupt routine, called at splsoftnet. */ static void pppintr(void *arg) { struct ppp_softc *sc = arg; struct mbuf *m; int s; mutex_enter(softnet_lock); if (!(sc->sc_flags & SC_TBUSY) && (IFQ_IS_EMPTY(&sc->sc_if.if_snd) == 0 || sc->sc_fastq.ifq_head || sc->sc_outm)) { s = splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags |= SC_TBUSY; splx(s); (*sc->sc_start)(sc); } for (;;) { s = splnet(); IF_DEQUEUE(&sc->sc_rawq, m); splx(s); if (m == NULL) break; ppp_inproc(sc, m); } mutex_exit(softnet_lock); } #ifdef PPP_COMPRESS /* * Handle a CCP packet. `rcvd' is 1 if the packet was received, * 0 if it is about to be transmitted. */ static void ppp_ccp(struct ppp_softc *sc, struct mbuf *m, int rcvd) { u_char *dp, *ep; struct mbuf *mp; int slen, s; /* * Get a pointer to the data after the PPP header. */ if (m->m_len <= PPP_HDRLEN) { mp = m->m_next; if (mp == NULL) return; dp = mtod(mp, u_char *); } else { mp = m; dp = mtod(mp, u_char *) + PPP_HDRLEN; } ep = mtod(mp, u_char *) + mp->m_len; if (dp + CCP_HDRLEN > ep) return; slen = CCP_LENGTH(dp); if (dp + slen > ep) { if (sc->sc_flags & SC_DEBUG) printf("if_ppp/ccp: not enough data in mbuf (%p+%x > %p+%x)\n", dp, slen, mtod(mp, u_char *), mp->m_len); return; } switch (CCP_CODE(dp)) { case CCP_CONFREQ: case CCP_TERMREQ: case CCP_TERMACK: /* CCP must be going down - disable compression */ if (sc->sc_flags & SC_CCP_UP) { s = splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags &= ~(SC_CCP_UP | SC_COMP_RUN | SC_DECOMP_RUN); splx(s); } break; case CCP_CONFACK: if (sc->sc_flags & SC_CCP_OPEN && !(sc->sc_flags & SC_CCP_UP) && slen >= CCP_HDRLEN + CCP_OPT_MINLEN && slen >= CCP_OPT_LENGTH(dp + CCP_HDRLEN) + CCP_HDRLEN) { if (!rcvd) { /* We're agreeing to send compressed packets. */ if (sc->sc_xc_state != NULL && (*sc->sc_xcomp->comp_init)(sc->sc_xc_state, dp + CCP_HDRLEN, slen - CCP_HDRLEN, sc->sc_unit, 0, sc->sc_flags & SC_DEBUG)) { s = splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags |= SC_COMP_RUN; splx(s); } } else { /* * Peer is agreeing to send compressed * packets. */ if (sc->sc_rc_state != NULL && (*sc->sc_rcomp->decomp_init)( sc->sc_rc_state, dp + CCP_HDRLEN, slen - CCP_HDRLEN, sc->sc_unit, 0, sc->sc_mru, sc->sc_flags & SC_DEBUG)) { s = splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags |= SC_DECOMP_RUN; sc->sc_flags &= ~(SC_DC_ERROR | SC_DC_FERROR); splx(s); } } } break; case CCP_RESETACK: if (sc->sc_flags & SC_CCP_UP) { if (!rcvd) { if (sc->sc_xc_state && (sc->sc_flags & SC_COMP_RUN)) (*sc->sc_xcomp->comp_reset)(sc->sc_xc_state); } else { if (sc->sc_rc_state && (sc->sc_flags & SC_DECOMP_RUN)) { (*sc->sc_rcomp->decomp_reset)(sc->sc_rc_state); s = splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags &= ~SC_DC_ERROR; splx(s); } } } break; } } /* * CCP is down; free (de)compressor state if necessary. */ static void ppp_ccp_closed(struct ppp_softc *sc) { if (sc->sc_xc_state) { (*sc->sc_xcomp->comp_free)(sc->sc_xc_state); ppp_compressor_rele(sc->sc_xcomp); sc->sc_xc_state = NULL; } if (sc->sc_rc_state) { (*sc->sc_rcomp->decomp_free)(sc->sc_rc_state); ppp_compressor_rele(sc->sc_rcomp); sc->sc_rc_state = NULL; } } #endif /* PPP_COMPRESS */ /* * PPP packet input routine. * The caller has checked and removed the FCS and has inserted * the address/control bytes and the protocol high byte if they * were omitted. */ void ppppktin(struct ppp_softc *sc, struct mbuf *m, int lost) { int s = splhigh(); /* XXX IMP ME HARDER */ if (lost) m->m_flags |= M_ERRMARK; IF_ENQUEUE(&sc->sc_rawq, m); softint_schedule(sc->sc_si); splx(s); } /* * Process a received PPP packet, doing decompression as necessary. * Should be called at splsoftnet. */ #define COMPTYPE(proto) \ ((proto) == PPP_VJC_COMP ? TYPE_COMPRESSED_TCP \ : TYPE_UNCOMPRESSED_TCP) static void ppp_inproc(struct ppp_softc *sc, struct mbuf *m) { struct ifnet *ifp = &sc->sc_if; pktqueue_t *pktq = NULL; struct ifqueue *inq = NULL; int s, ilen, proto, rv; u_char *cp, adrs, ctrl; struct mbuf *mp, *dmp = NULL; #ifdef VJC int xlen; u_char *iphdr; u_int hlen; #endif sc->sc_stats.ppp_ipackets++; if (sc->sc_flags & SC_LOG_INPKT) { ilen = 0; for (mp = m; mp != NULL; mp = mp->m_next) ilen += mp->m_len; printf("%s: got %d bytes\n", ifp->if_xname, ilen); pppdumpm(m); } cp = mtod(m, u_char *); adrs = PPP_ADDRESS(cp); ctrl = PPP_CONTROL(cp); proto = PPP_PROTOCOL(cp); if (m->m_flags & M_ERRMARK) { m->m_flags &= ~M_ERRMARK; s = splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags |= SC_VJ_RESET; splx(s); } #ifdef PPP_COMPRESS /* * Decompress this packet if necessary, update the receiver's * dictionary, or take appropriate action on a CCP packet. */ if (proto == PPP_COMP && sc->sc_rc_state && (sc->sc_flags & SC_DECOMP_RUN) && !(sc->sc_flags & SC_DC_ERROR) && !(sc->sc_flags & SC_DC_FERROR)) { /* Decompress this packet */ rv = (*sc->sc_rcomp->decompress)(sc->sc_rc_state, m, &dmp); if (rv == DECOMP_OK) { m_freem(m); if (dmp == NULL) { /* * No error, but no decompressed packet * produced */ return; } m = dmp; cp = mtod(m, u_char *); proto = PPP_PROTOCOL(cp); } else { /* * An error has occurred in decompression. * Pass the compressed packet up to pppd, which may * take CCP down or issue a Reset-Req. */ if (sc->sc_flags & SC_DEBUG) printf("%s: decompress failed %d\n", ifp->if_xname, rv); s = splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags |= SC_VJ_RESET; if (rv == DECOMP_ERROR) sc->sc_flags |= SC_DC_ERROR; else sc->sc_flags |= SC_DC_FERROR; splx(s); } } else { if (sc->sc_rc_state && (sc->sc_flags & SC_DECOMP_RUN)) (*sc->sc_rcomp->incomp)(sc->sc_rc_state, m); if (proto == PPP_CCP) ppp_ccp(sc, m, 1); } #endif ilen = 0; for (mp = m; mp != NULL; mp = mp->m_next) ilen += mp->m_len; #ifdef VJC if (sc->sc_flags & SC_VJ_RESET) { /* * If we've missed a packet, we must toss subsequent compressed * packets which don't have an explicit connection ID. */ if (sc->sc_comp) sl_uncompress_tcp(NULL, 0, TYPE_ERROR, sc->sc_comp); s = splhigh(); /* XXX IMP ME HARDER */ sc->sc_flags &= ~SC_VJ_RESET; splx(s); } /* * See if we have a VJ-compressed packet to uncompress. */ if (proto == PPP_VJC_COMP) { if ((sc->sc_flags & SC_REJ_COMP_TCP) || sc->sc_comp == 0) goto bad; xlen = sl_uncompress_tcp_core(cp + PPP_HDRLEN, m->m_len - PPP_HDRLEN, ilen - PPP_HDRLEN, TYPE_COMPRESSED_TCP, sc->sc_comp, &iphdr, &hlen); if (xlen <= 0) { if (sc->sc_flags & SC_DEBUG) { printf("%s: VJ uncompress failed" " on type comp\n", ifp->if_xname); } goto bad; } /* Copy the PPP and IP headers into a new mbuf. */ MGETHDR(mp, M_DONTWAIT, MT_DATA); if (mp == NULL) goto bad; mp->m_len = 0; mp->m_next = NULL; if (hlen + PPP_HDRLEN > MHLEN) { MCLGET(mp, M_DONTWAIT); if (M_TRAILINGSPACE(mp) < hlen + PPP_HDRLEN) { /* Lose if big headers and no clusters */ m_freem(mp); goto bad; } } cp = mtod(mp, u_char *); cp[0] = adrs; cp[1] = ctrl; cp[2] = 0; cp[3] = PPP_IP; proto = PPP_IP; bcopy(iphdr, cp + PPP_HDRLEN, hlen); mp->m_len = hlen + PPP_HDRLEN; /* * Trim the PPP and VJ headers off the old mbuf * and stick the new and old mbufs together. */ m->m_data += PPP_HDRLEN + xlen; m->m_len -= PPP_HDRLEN + xlen; if (m->m_len <= M_TRAILINGSPACE(mp)) { bcopy(mtod(m, u_char *), mtod(mp, u_char *) + mp->m_len, m->m_len); mp->m_len += m->m_len; mp->m_next = m_free(m); } else mp->m_next = m; m = mp; ilen += hlen - xlen; } else if (proto == PPP_VJC_UNCOMP) { if ((sc->sc_flags & SC_REJ_COMP_TCP) || sc->sc_comp == 0) goto bad; xlen = sl_uncompress_tcp_core(cp + PPP_HDRLEN, m->m_len - PPP_HDRLEN, ilen - PPP_HDRLEN, TYPE_UNCOMPRESSED_TCP, sc->sc_comp, &iphdr, &hlen); if (xlen < 0) { if (sc->sc_flags & SC_DEBUG) { printf("%s: VJ uncompress failed" " on type uncomp\n", ifp->if_xname); } goto bad; } proto = PPP_IP; cp[3] = PPP_IP; } #endif /* VJC */ /* * If the packet will fit in a header mbuf, don't waste a * whole cluster on it. */ if (ilen <= MHLEN && (m->m_flags & M_EXT)) { MGETHDR(mp, M_DONTWAIT, MT_DATA); if (mp != NULL) { m_copydata(m, 0, ilen, mtod(mp, void *)); m_freem(m); m = mp; m->m_len = ilen; } } m->m_pkthdr.len = ilen; m_set_rcvif(m, ifp); if ((proto & 0x8000) == 0) { #ifdef PPP_FILTER /* * See whether we want to pass this packet, and * if it counts as link activity. */ if (sc->sc_pass_filt_in.bf_insns != 0 && bpf_filter(sc->sc_pass_filt_in.bf_insns, (u_char *)m, ilen, 0) == 0) { /* drop this packet */ m_freem(m); return; } if (sc->sc_active_filt_in.bf_insns == 0 || bpf_filter(sc->sc_active_filt_in.bf_insns, (u_char *)m, ilen, 0)) sc->sc_last_recv = time_second; #else /* * Record the time that we received this packet. */ sc->sc_last_recv = time_second; #endif /* PPP_FILTER */ } /* See if bpf wants to look at the packet. */ bpf_mtap(&sc->sc_if, m, BPF_D_IN); switch (proto) { #ifdef INET case PPP_IP: /* * IP packet - take off the ppp header and pass it up to IP. */ if ((ifp->if_flags & IFF_UP) == 0 || sc->sc_npmode[NP_IP] != NPMODE_PASS) { /* Interface is down - drop the packet. */ m_freem(m); return; } m->m_pkthdr.len -= PPP_HDRLEN; m->m_data += PPP_HDRLEN; m->m_len -= PPP_HDRLEN; #ifdef GATEWAY if (ipflow_fastforward(m)) return; #endif pktq = ip_pktq; break; #endif #ifdef INET6 case PPP_IPV6: /* * IPv6 packet - take off the ppp header and pass it up to * IPv6. */ if ((ifp->if_flags & IFF_UP) == 0 || sc->sc_npmode[NP_IPV6] != NPMODE_PASS) { /* interface is down - drop the packet. */ m_freem(m); return; } m->m_pkthdr.len -= PPP_HDRLEN; m->m_data += PPP_HDRLEN; m->m_len -= PPP_HDRLEN; #ifdef GATEWAY if (ip6flow_fastforward(&m)) return; #endif pktq = ip6_pktq; break; #endif default: /* * Some other protocol - place on input queue for read(). */ inq = &sc->sc_inq; pktq = NULL; break; } /* * Put the packet on the appropriate input queue. */ s = splnet(); /* pktq: inet or inet6 cases */ if (__predict_true(pktq)) { if (__predict_false(!pktq_enqueue(pktq, m, 0))) { splx(s); if_statinc(ifp, if_iqdrops); goto bad; } if_statadd2(ifp, if_ipackets, 1, if_ibytes, ilen); splx(s); return; } /* ifq: other protocol cases */ if (!inq) { splx(s); goto bad; } if (IF_QFULL(inq)) { IF_DROP(inq); splx(s); if (sc->sc_flags & SC_DEBUG) printf("%s: input queue full\n", ifp->if_xname); if_statinc(ifp, if_iqdrops); goto bad; } IF_ENQUEUE(inq, m); splx(s); if_statadd2(ifp, if_ipackets, 1, if_ibytes, ilen); (*sc->sc_ctlp)(sc); return; bad: m_freem(m); if_statinc(&sc->sc_if, if_ierrors); sc->sc_stats.ppp_ierrors++; } #define MAX_DUMP_BYTES 128 static void pppdumpm(struct mbuf *m0) { char buf[3*MAX_DUMP_BYTES+4]; char *bp = buf; struct mbuf *m; for (m = m0; m; m = m->m_next) { int l = m->m_len; u_char *rptr = (u_char *)m->m_data; while (l--) { if (bp > buf + sizeof(buf) - 4) goto done; /* Convert byte to ascii hex */ *bp++ = hexdigits[*rptr >> 4]; *bp++ = hexdigits[*rptr++ & 0xf]; } if (m->m_next) { if (bp > buf + sizeof(buf) - 3) goto done; *bp++ = '|'; } else *bp++ = ' '; } done: if (m) *bp++ = '>'; *bp = 0; printf("%s\n", buf); } #ifdef ALTQ /* * A wrapper to transmit a packet from if_start since ALTQ uses * if_start to send a packet. */ static void ppp_ifstart(struct ifnet *ifp) { struct ppp_softc *sc; sc = ifp->if_softc; (*sc->sc_start)(sc); } #endif static const struct ppp_known_compressor { uint8_t code; const char *module; } ppp_known_compressors[] = { { CI_DEFLATE, "ppp_deflate" }, { CI_DEFLATE_DRAFT, "ppp_deflate" }, { CI_BSD_COMPRESS, "ppp_bsdcomp" }, { CI_MPPE, "ppp_mppe" }, { 0, NULL } }; static int ppp_compressor_init(void) { mutex_init(&ppp_compressors_mtx, MUTEX_DEFAULT, IPL_NONE); return 0; } static int ppp_compressor_destroy(void) { mutex_destroy(&ppp_compressors_mtx); return 0; } static void ppp_compressor_rele(struct compressor *cp) { mutex_enter(&ppp_compressors_mtx); --cp->comp_refcnt; mutex_exit(&ppp_compressors_mtx); } static struct compressor * ppp_get_compressor_noload(uint8_t ci, bool hold) { struct compressor *cp; KASSERT(mutex_owned(&ppp_compressors_mtx)); LIST_FOREACH(cp, &ppp_compressors, comp_list) { if (cp->compress_proto == ci) { if (hold) ++cp->comp_refcnt; return cp; } } return NULL; } static struct compressor * ppp_get_compressor(uint8_t ci) { struct compressor *cp = NULL; const struct ppp_known_compressor *pkc; mutex_enter(&ppp_compressors_mtx); cp = ppp_get_compressor_noload(ci, true); mutex_exit(&ppp_compressors_mtx); if (cp != NULL) return cp; kernconfig_lock(); mutex_enter(&ppp_compressors_mtx); cp = ppp_get_compressor_noload(ci, true); mutex_exit(&ppp_compressors_mtx); if (cp == NULL) { /* Not found, so try to autoload a module */ for (pkc = ppp_known_compressors; pkc->module != NULL; pkc++) { if (pkc->code == ci) { if (module_autoload(pkc->module, MODULE_CLASS_MISC) != 0) break; mutex_enter(&ppp_compressors_mtx); cp = ppp_get_compressor_noload(ci, true); mutex_exit(&ppp_compressors_mtx); break; } } } kernconfig_unlock(); return cp; } int ppp_register_compressor(struct compressor *pc, size_t ncomp) { int error = 0; size_t i; mutex_enter(&ppp_compressors_mtx); for (i = 0; i < ncomp; i++) { if (ppp_get_compressor_noload(pc[i].compress_proto, false) != NULL) error = EEXIST; } if (!error) { for (i = 0; i < ncomp; i++) { pc[i].comp_refcnt = 0; LIST_INSERT_HEAD(&ppp_compressors, &pc[i], comp_list); } } mutex_exit(&ppp_compressors_mtx); return error; } int ppp_unregister_compressor(struct compressor *pc, size_t ncomp) { int error = 0; size_t i; mutex_enter(&ppp_compressors_mtx); for (i = 0; i < ncomp; i++) { if (ppp_get_compressor_noload(pc[i].compress_proto, false) != &pc[i]) error = ENOENT; else if (pc[i].comp_refcnt != 0) error = EBUSY; } if (!error) { for (i = 0; i < ncomp; i++) { LIST_REMOVE(&pc[i], comp_list); } } mutex_exit(&ppp_compressors_mtx); return error; } /* * Module infrastructure */ #include "if_module.h" #ifdef PPP_FILTER #define PPP_DEP "bpf_filter," #else #define PPP_DEP #endif IF_MODULE(MODULE_CLASS_DRIVER, ppp, PPP_DEP "slcompress") |
| 438 439 439 437 439 438 439 461 434 459 461 22 21 458 461 461 461 460 460 24 24 24 24 24 24 4 4 23 23 24 24 461 462 462 463 463 463 463 463 463 462 438 438 439 437 439 437 463 438 439 24 24 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 | /* $NetBSD: bus_dma.c,v 1.89 2022/08/20 23:48:51 riastradh Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 2007, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace * Simulation Facility NASA Ames Research Center, and by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: bus_dma.c,v 1.89 2022/08/20 23:48:51 riastradh Exp $"); /* * The following is included because _bus_dma_uiomove is derived from * uiomove() in kern_subr.c. */ /* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This software was developed by the Computer Systems Engineering group * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and * contributed to Berkeley. * * All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Lawrence Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "ioapic.h" #include "isa.h" #include "opt_mpbios.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/kmem.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/proc.h> #include <sys/asan.h> #include <sys/msan.h> #include <sys/bus.h> #include <machine/bus_private.h> #if NIOAPIC > 0 #include <machine/i82093var.h> #endif #ifdef MPBIOS #include <machine/mpbiosvar.h> #endif #include <machine/pmap_private.h> #if NISA > 0 #include <dev/isa/isareg.h> #include <dev/isa/isavar.h> #endif #include <uvm/uvm.h> extern paddr_t avail_end; #define IDTVEC(name) __CONCAT(X,name) typedef void (vector)(void); extern vector *IDTVEC(intr)[]; #define BUSDMA_BOUNCESTATS #ifdef BUSDMA_BOUNCESTATS #define BUSDMA_EVCNT_DECL(name) \ static struct evcnt bus_dma_ev_##name = \ EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "bus_dma", #name); \ EVCNT_ATTACH_STATIC(bus_dma_ev_##name) #define STAT_INCR(name) \ bus_dma_ev_##name.ev_count++ #define STAT_DECR(name) \ bus_dma_ev_##name.ev_count-- BUSDMA_EVCNT_DECL(nbouncebufs); BUSDMA_EVCNT_DECL(loads); BUSDMA_EVCNT_DECL(bounces); #else #define STAT_INCR(x) #define STAT_DECR(x) #endif static int _bus_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t, bus_size_t, int, bus_dmamap_t *); static void _bus_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t); static int _bus_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t, struct proc *, int); static int _bus_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *, int); static int _bus_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int); static int _bus_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t, bus_dma_segment_t *, int, bus_size_t, int); static void _bus_dmamap_unload(bus_dma_tag_t, bus_dmamap_t); static void _bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t, bus_size_t, int); static int _bus_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs, int flags); static void _bus_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs); static int _bus_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs, size_t size, void **kvap, int flags); static void _bus_dmamem_unmap(bus_dma_tag_t tag, void *kva, size_t size); static paddr_t _bus_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs, off_t off, int prot, int flags); static int _bus_dmatag_subregion(bus_dma_tag_t tag, bus_addr_t min_addr, bus_addr_t max_addr, bus_dma_tag_t *newtag, int flags); static void _bus_dmatag_destroy(bus_dma_tag_t tag); static int _bus_dma_uiomove(void *, struct uio *, size_t, int); static int _bus_dma_alloc_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map, bus_size_t size, int flags); static void _bus_dma_free_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map); static int _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, bus_size_t buflen, struct vmspace *vm, int flags); static int _bus_dmamap_load_busaddr(bus_dma_tag_t, bus_dmamap_t, bus_addr_t, bus_size_t); #ifndef _BUS_DMAMEM_ALLOC_RANGE static int _bus_dmamem_alloc_range(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs, int flags, bus_addr_t low, bus_addr_t high); #define _BUS_DMAMEM_ALLOC_RANGE _bus_dmamem_alloc_range /* * Allocate physical memory from the given physical address range. * Called by DMA-safe memory allocation methods. */ static int _bus_dmamem_alloc_range(bus_dma_tag_t t, bus_size_t size, bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs, int flags, bus_addr_t low, bus_addr_t high) { paddr_t curaddr, lastaddr; struct vm_page *m; struct pglist mlist; int curseg, error; bus_size_t uboundary; /* Always round the size. */ size = round_page(size); KASSERT(boundary >= PAGE_SIZE || boundary == 0); /* * Allocate pages from the VM system. * We accept boundaries < size, splitting in multiple segments * if needed. uvm_pglistalloc does not, so compute an appropriate * boundary: next power of 2 >= size */ if (boundary == 0) uboundary = 0; else { uboundary = boundary; while (uboundary < size) uboundary = uboundary << 1; } error = uvm_pglistalloc(size, low, high, alignment, uboundary, &mlist, nsegs, (flags & BUS_DMA_NOWAIT) == 0); if (error) return (error); /* * Compute the location, size, and number of segments actually * returned by the VM code. */ m = TAILQ_FIRST(&mlist); curseg = 0; lastaddr = segs[curseg].ds_addr = VM_PAGE_TO_PHYS(m); segs[curseg].ds_len = PAGE_SIZE; m = m->pageq.queue.tqe_next; for (; m != NULL; m = m->pageq.queue.tqe_next) { curaddr = VM_PAGE_TO_PHYS(m); KASSERTMSG(curaddr >= low, "curaddr=%#"PRIxPADDR " low=%#"PRIxBUSADDR" high=%#"PRIxBUSADDR, curaddr, low, high); KASSERTMSG(curaddr < high, "curaddr=%#"PRIxPADDR " low=%#"PRIxBUSADDR" high=%#"PRIxBUSADDR, curaddr, low, high); if (curaddr == (lastaddr + PAGE_SIZE) && (lastaddr & boundary) == (curaddr & boundary)) { segs[curseg].ds_len += PAGE_SIZE; } else { curseg++; KASSERTMSG(curseg < nsegs, "curseg %d size %llx", curseg, (long long)size); segs[curseg].ds_addr = curaddr; segs[curseg].ds_len = PAGE_SIZE; } lastaddr = curaddr; } *rsegs = curseg + 1; return (0); } #endif /* _BUS_DMAMEM_ALLOC_RANGE */ /* * Create a DMA map. */ static int _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp) { struct x86_bus_dma_cookie *cookie; bus_dmamap_t map; int error, cookieflags; void *cookiestore, *mapstore; size_t cookiesize, mapsize; /* * Allocate and initialize the DMA map. The end of the map * is a variable-sized array of segments, so we allocate enough * room for them in one shot. * * Note we don't preserve the WAITOK or NOWAIT flags. Preservation * of ALLOCNOW notifies others that we've reserved these resources, * and they are not to be freed. * * The bus_dmamap_t includes one bus_dma_segment_t, hence * the (nsegments - 1). */ error = 0; mapsize = sizeof(struct x86_bus_dmamap) + (sizeof(bus_dma_segment_t) * (nsegments - 1)); if ((mapstore = malloc(mapsize, M_DMAMAP, M_ZERO | ((flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK))) == NULL) return (ENOMEM); map = (struct x86_bus_dmamap *)mapstore; map->_dm_size = size; map->_dm_segcnt = nsegments; map->_dm_maxmaxsegsz = maxsegsz; map->_dm_boundary = boundary; map->_dm_bounce_thresh = t->_bounce_thresh; map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT); map->dm_maxsegsz = maxsegsz; map->dm_mapsize = 0; /* no valid mappings */ map->dm_nsegs = 0; if (t->_bounce_thresh == 0 || _BUS_AVAIL_END <= t->_bounce_thresh - 1) map->_dm_bounce_thresh = 0; cookieflags = 0; if (t->_may_bounce != NULL) { error = t->_may_bounce(t, map, flags, &cookieflags); if (error != 0) goto out; } if (map->_dm_bounce_thresh != 0) cookieflags |= X86_DMA_MIGHT_NEED_BOUNCE; if ((cookieflags & X86_DMA_MIGHT_NEED_BOUNCE) == 0) { *dmamp = map; return 0; } cookiesize = sizeof(struct x86_bus_dma_cookie) + (sizeof(bus_dma_segment_t) * map->_dm_segcnt); /* * Allocate our cookie. */ if ((cookiestore = malloc(cookiesize, M_DMAMAP, M_ZERO | ((flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK))) == NULL) { error = ENOMEM; goto out; } cookie = (struct x86_bus_dma_cookie *)cookiestore; cookie->id_flags = cookieflags; map->_dm_cookie = cookie; error = _bus_dma_alloc_bouncebuf(t, map, size, flags); out: if (error) _bus_dmamap_destroy(t, map); else *dmamp = map; return (error); } /* * Destroy a DMA map. */ static void _bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map) { struct x86_bus_dma_cookie *cookie = map->_dm_cookie; /* * Free any bounce pages this map might hold. */ if (cookie != NULL) { if (cookie->id_flags & X86_DMA_HAS_BOUNCE) _bus_dma_free_bouncebuf(t, map); free(cookie, M_DMAMAP); } free(map, M_DMAMAP); } /* * Load a DMA map with a linear buffer. */ static int _bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf, bus_size_t buflen, struct proc *p, int flags) { struct x86_bus_dma_cookie *cookie = map->_dm_cookie; int error; struct vmspace *vm; STAT_INCR(loads); /* * Make sure that on error condition we return "no valid mappings." */ map->dm_mapsize = 0; map->dm_nsegs = 0; KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz); if (buflen > map->_dm_size) return EINVAL; if (p != NULL) { vm = p->p_vmspace; } else { vm = vmspace_kernel(); } error = _bus_dmamap_load_buffer(t, map, buf, buflen, vm, flags); if (error == 0) { if (cookie != NULL) cookie->id_flags &= ~X86_DMA_IS_BOUNCING; map->dm_mapsize = buflen; return 0; } if (cookie == NULL || (cookie->id_flags & X86_DMA_MIGHT_NEED_BOUNCE) == 0) return error; /* * First attempt failed; bounce it. */ STAT_INCR(bounces); /* * Allocate bounce pages, if necessary. */ if ((cookie->id_flags & X86_DMA_HAS_BOUNCE) == 0) { error = _bus_dma_alloc_bouncebuf(t, map, buflen, flags); if (error) return (error); } /* * Cache a pointer to the caller's buffer and load the DMA map * with the bounce buffer. */ cookie->id_origbuf = buf; cookie->id_origbuflen = buflen; cookie->id_buftype = X86_DMA_BUFTYPE_LINEAR; map->dm_nsegs = 0; error = bus_dmamap_load(t, map, cookie->id_bouncebuf, buflen, p, flags); if (error) return (error); /* ...so _bus_dmamap_sync() knows we're bouncing */ cookie->id_flags |= X86_DMA_IS_BOUNCING; return (0); } static int _bus_dmamap_load_busaddr(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr, bus_size_t size) { bus_dma_segment_t * const segs = map->dm_segs; int nseg = map->dm_nsegs; bus_addr_t bmask = ~(map->_dm_boundary - 1); bus_addr_t lastaddr = 0xdead; /* XXX gcc */ bus_size_t sgsize; if (nseg > 0) lastaddr = segs[nseg-1].ds_addr + segs[nseg-1].ds_len; again: sgsize = size; /* * Make sure we don't cross any boundaries. */ if (map->_dm_boundary > 0) { bus_addr_t baddr; /* next boundary address */ baddr = (addr + map->_dm_boundary) & bmask; if (sgsize > (baddr - addr)) sgsize = (baddr - addr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ if (nseg > 0 && addr == lastaddr && segs[nseg-1].ds_len + sgsize <= map->dm_maxsegsz && (map->_dm_boundary == 0 || (segs[nseg-1].ds_addr & bmask) == (addr & bmask))) { /* coalesce */ segs[nseg-1].ds_len += sgsize; } else if (nseg >= map->_dm_segcnt) { return EFBIG; } else { /* new segment */ segs[nseg].ds_addr = addr; segs[nseg].ds_len = sgsize; nseg++; } lastaddr = addr + sgsize; if (map->_dm_bounce_thresh != 0 && lastaddr > map->_dm_bounce_thresh) return EINVAL; addr += sgsize; size -= sgsize; if (size > 0) goto again; map->dm_nsegs = nseg; return 0; } /* * Like _bus_dmamap_load(), but for mbufs. */ static int _bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0, int flags) { struct x86_bus_dma_cookie *cookie = map->_dm_cookie; int error; struct mbuf *m; /* * Make sure on error condition we return "no valid mappings." */ map->dm_mapsize = 0; map->dm_nsegs = 0; KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz); KASSERT(m0->m_flags & M_PKTHDR); if (m0->m_pkthdr.len > map->_dm_size) return (EINVAL); error = 0; for (m = m0; m != NULL && error == 0; m = m->m_next) { int offset; int remainbytes; const struct vm_page * const *pgs; paddr_t paddr; int size; if (m->m_len == 0) continue; switch (m->m_flags & (M_EXT|M_EXT_CLUSTER|M_EXT_PAGES)) { case M_EXT|M_EXT_CLUSTER: /* XXX KDASSERT */ KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID); paddr = m->m_ext.ext_paddr + (m->m_data - m->m_ext.ext_buf); size = m->m_len; error = _bus_dmamap_load_busaddr(t, map, _BUS_PHYS_TO_BUS(paddr), size); break; case M_EXT|M_EXT_PAGES: KASSERT(m->m_ext.ext_buf <= m->m_data); KASSERT(m->m_data <= m->m_ext.ext_buf + m->m_ext.ext_size); offset = (vaddr_t)m->m_data - trunc_page((vaddr_t)m->m_ext.ext_buf); remainbytes = m->m_len; /* skip uninteresting pages */ pgs = (const struct vm_page * const *) m->m_ext.ext_pgs + (offset >> PAGE_SHIFT); offset &= PAGE_MASK; /* offset in the first page */ /* load each pages */ while (remainbytes > 0) { const struct vm_page *pg; bus_addr_t busaddr; size = MIN(remainbytes, PAGE_SIZE - offset); pg = *pgs++; KASSERT(pg); busaddr = _BUS_VM_PAGE_TO_BUS(pg) + offset; error = _bus_dmamap_load_busaddr(t, map, busaddr, size); if (error) break; offset = 0; remainbytes -= size; } break; case 0: paddr = m->m_paddr + M_BUFOFFSET(m) + (m->m_data - M_BUFADDR(m)); size = m->m_len; error = _bus_dmamap_load_busaddr(t, map, _BUS_PHYS_TO_BUS(paddr), size); break; default: error = _bus_dmamap_load_buffer(t, map, m->m_data, m->m_len, vmspace_kernel(), flags); } } if (error == 0) { map->dm_mapsize = m0->m_pkthdr.len; return 0; } map->dm_nsegs = 0; if (cookie == NULL || (cookie->id_flags & X86_DMA_MIGHT_NEED_BOUNCE) == 0) return error; /* * First attempt failed; bounce it. */ STAT_INCR(bounces); /* * Allocate bounce pages, if necessary. */ if ((cookie->id_flags & X86_DMA_HAS_BOUNCE) == 0) { error = _bus_dma_alloc_bouncebuf(t, map, m0->m_pkthdr.len, flags); if (error) return (error); } /* * Cache a pointer to the caller's buffer and load the DMA map * with the bounce buffer. */ cookie->id_origbuf = m0; cookie->id_origbuflen = m0->m_pkthdr.len; /* not really used */ cookie->id_buftype = X86_DMA_BUFTYPE_MBUF; error = bus_dmamap_load(t, map, cookie->id_bouncebuf, m0->m_pkthdr.len, NULL, flags); if (error) return (error); /* ...so _bus_dmamap_sync() knows we're bouncing */ cookie->id_flags |= X86_DMA_IS_BOUNCING; return (0); } /* * Like _bus_dmamap_load(), but for uios. */ static int _bus_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, struct uio *uio, int flags) { int i, error; bus_size_t minlen, resid; struct vmspace *vm; struct iovec *iov; void *addr; struct x86_bus_dma_cookie *cookie = map->_dm_cookie; /* * Make sure that on error condition we return "no valid mappings." */ map->dm_mapsize = 0; map->dm_nsegs = 0; KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz); resid = uio->uio_resid; iov = uio->uio_iov; vm = uio->uio_vmspace; error = 0; for (i = 0; i < uio->uio_iovcnt && resid != 0 && error == 0; i++) { /* * Now at the first iovec to load. Load each iovec * until we have exhausted the residual count. */ minlen = resid < iov[i].iov_len ? resid : iov[i].iov_len; addr = (void *)iov[i].iov_base; error = _bus_dmamap_load_buffer(t, map, addr, minlen, vm, flags); resid -= minlen; } if (error == 0) { map->dm_mapsize = uio->uio_resid; return 0; } map->dm_nsegs = 0; if (cookie == NULL || (cookie->id_flags & X86_DMA_MIGHT_NEED_BOUNCE) == 0) return error; STAT_INCR(bounces); /* * Allocate bounce pages, if necessary. */ if ((cookie->id_flags & X86_DMA_HAS_BOUNCE) == 0) { error = _bus_dma_alloc_bouncebuf(t, map, uio->uio_resid, flags); if (error) return (error); } /* * Cache a pointer to the caller's buffer and load the DMA map * with the bounce buffer. */ cookie->id_origbuf = uio; cookie->id_origbuflen = uio->uio_resid; cookie->id_buftype = X86_DMA_BUFTYPE_UIO; error = bus_dmamap_load(t, map, cookie->id_bouncebuf, uio->uio_resid, NULL, flags); if (error) return (error); /* ...so _bus_dmamap_sync() knows we're bouncing */ cookie->id_flags |= X86_DMA_IS_BOUNCING; return (0); } /* * Like _bus_dmamap_load(), but for raw memory allocated with * bus_dmamem_alloc(). */ static int _bus_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, bus_size_t size0, int flags) { bus_size_t size; int i, error = 0; /* * Make sure that on error condition we return "no valid mappings." */ map->dm_mapsize = 0; map->dm_nsegs = 0; KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz); if (size0 > map->_dm_size) return EINVAL; for (i = 0, size = size0; i < nsegs && size > 0; i++) { bus_dma_segment_t *ds = &segs[i]; bus_size_t sgsize; sgsize = MIN(ds->ds_len, size); if (sgsize == 0) continue; error = _bus_dmamap_load_busaddr(t, map, ds->ds_addr, sgsize); if (error != 0) break; size -= sgsize; } if (error != 0) { map->dm_mapsize = 0; map->dm_nsegs = 0; return error; } /* XXX TBD bounce */ map->dm_mapsize = size0; return 0; } /* * Unload a DMA map. */ static void _bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map) { struct x86_bus_dma_cookie *cookie = map->_dm_cookie; /* * If we have bounce pages, free them, unless they're * reserved for our exclusive use. */ if (cookie != NULL) { cookie->id_flags &= ~X86_DMA_IS_BOUNCING; cookie->id_buftype = X86_DMA_BUFTYPE_INVALID; } map->dm_maxsegsz = map->_dm_maxmaxsegsz; map->dm_mapsize = 0; map->dm_nsegs = 0; } /* * Synchronize a DMA map. * * Reference: * * AMD64 Architecture Programmer's Manual, Volume 2: System * Programming, 24593--Rev. 3.38--November 2021, Sec. 7.4.2 Memory * Barrier Interaction with Memory Types, Table 7-3, p. 196. * https://web.archive.org/web/20220625040004/https://www.amd.com/system/files/TechDocs/24593.pdf#page=256 */ static void _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset, bus_size_t len, int ops) { struct x86_bus_dma_cookie *cookie = map->_dm_cookie; /* * Mixing PRE and POST operations is not allowed. */ if ((ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE)) != 0 && (ops & (BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE)) != 0) panic("%s: mix PRE and POST", __func__); if ((ops & (BUS_DMASYNC_PREWRITE|BUS_DMASYNC_POSTREAD)) != 0) { KASSERTMSG(offset < map->dm_mapsize, "bad offset 0x%"PRIxBUSADDR" >= 0x%"PRIxBUSSIZE, offset, map->dm_mapsize); KASSERTMSG(len <= map->dm_mapsize - offset, "bad length 0x%"PRIxBUSADDR" + %"PRIxBUSSIZE " > %"PRIxBUSSIZE, offset, len, map->dm_mapsize); } /* * BUS_DMASYNC_POSTREAD: The caller has been alerted to DMA * completion by reading a register or DMA descriptor, and the * caller is about to read out of the DMA memory buffer that * the device just filled. * * => LFENCE ensures that these happen in order so that the * caller, or the bounce buffer logic here, doesn't proceed * to read any stale data from cache or speculation. x86 * never reorders loads from wp/wt/wb or uc memory, but it * may execute loads from wc/wc+ memory early, e.g. with * BUS_SPACE_MAP_PREFETCHABLE. */ if (ops & BUS_DMASYNC_POSTREAD) x86_lfence(); /* * If we're not bouncing, just return; nothing to do. */ if (len == 0 || cookie == NULL || (cookie->id_flags & X86_DMA_IS_BOUNCING) == 0) goto end; switch (cookie->id_buftype) { case X86_DMA_BUFTYPE_LINEAR: /* * Nothing to do for pre-read. */ if (ops & BUS_DMASYNC_PREWRITE) { /* * Copy the caller's buffer to the bounce buffer. */ memcpy((char *)cookie->id_bouncebuf + offset, (char *)cookie->id_origbuf + offset, len); } if (ops & BUS_DMASYNC_POSTREAD) { /* * Copy the bounce buffer to the caller's buffer. */ memcpy((char *)cookie->id_origbuf + offset, (char *)cookie->id_bouncebuf + offset, len); } /* * Nothing to do for post-write. */ break; case X86_DMA_BUFTYPE_MBUF: { struct mbuf *m, *m0 = cookie->id_origbuf; bus_size_t minlen, moff; /* * Nothing to do for pre-read. */ if (ops & BUS_DMASYNC_PREWRITE) { /* * Copy the caller's buffer to the bounce buffer. */ m_copydata(m0, offset, len, (char *)cookie->id_bouncebuf + offset); } if (ops & BUS_DMASYNC_POSTREAD) { /* * Copy the bounce buffer to the caller's buffer. */ for (moff = offset, m = m0; m != NULL && len != 0; m = m->m_next) { /* Find the beginning mbuf. */ if (moff >= m->m_len) { moff -= m->m_len; continue; } /* * Now at the first mbuf to sync; nail * each one until we have exhausted the * length. */ minlen = len < m->m_len - moff ? len : m->m_len - moff; memcpy(mtod(m, char *) + moff, (char *)cookie->id_bouncebuf + offset, minlen); moff = 0; len -= minlen; offset += minlen; } } /* * Nothing to do for post-write. */ break; } case X86_DMA_BUFTYPE_UIO: { struct uio *uio; uio = (struct uio *)cookie->id_origbuf; /* * Nothing to do for pre-read. */ if (ops & BUS_DMASYNC_PREWRITE) { /* * Copy the caller's buffer to the bounce buffer. */ _bus_dma_uiomove((char *)cookie->id_bouncebuf + offset, uio, len, UIO_WRITE); } if (ops & BUS_DMASYNC_POSTREAD) { _bus_dma_uiomove((char *)cookie->id_bouncebuf + offset, uio, len, UIO_READ); } /* * Nothing to do for post-write. */ break; } case X86_DMA_BUFTYPE_RAW: panic("%s: X86_DMA_BUFTYPE_RAW", __func__); break; case X86_DMA_BUFTYPE_INVALID: panic("%s: X86_DMA_BUFTYPE_INVALID", __func__); break; default: panic("%s: unknown buffer type %d", __func__, cookie->id_buftype); break; } end: /* * BUS_DMASYNC_PREREAD: The caller may have previously been * using a DMA memory buffer, with loads and stores, and is * about to trigger DMA by writing to a register or DMA * descriptor. * * => SFENCE ensures that the stores happen in order, in case * the latter one is non-temporal or to wc/wc+ memory and * thus may be executed early. x86 never reorders * load;store to store;load for any memory type, so no * barrier is needed for prior loads. * * BUS_DMASYNC_PREWRITE: The caller has just written to a DMA * memory buffer, or we just wrote to to the bounce buffer, * data that the device needs to use, and the caller is about * to trigger DMA by writing to a register or DMA descriptor. * * => SFENCE ensures that these happen in order so that any * buffered stores are visible to the device before the DMA * is triggered. x86 never reorders (non-temporal) stores * to wp/wt/wb or uc memory, but it may reorder two stores * if one is to wc/wc+ memory, e.g. if the DMA descriptor is * mapped with BUS_SPACE_MAP_PREFETCHABLE. */ if (ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE)) x86_sfence(); /* * BUS_DMASYNC_POSTWRITE: The caller has been alerted to DMA * completion by reading a register or DMA descriptor, and the * caller may proceed to reuse the DMA memory buffer, with * loads and stores. * * => No barrier is needed. Since the DMA memory buffer is not * changing (we're sending data to the device, not receiving * data from the device), prefetched loads are safe. x86 * never reoreders load;store to store;load for any memory * type, so early execution of stores prior to witnessing * the DMA completion is not possible. */ } /* * Allocate memory safe for DMA. */ static int _bus_dmamem_alloc(bus_dma_tag_t t, bus_size_t size, bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs, int flags) { bus_addr_t high; if (t->_bounce_alloc_hi != 0 && _BUS_AVAIL_END > t->_bounce_alloc_hi - 1) high = t->_bounce_alloc_hi - 1; else high = _BUS_AVAIL_END; return (_BUS_DMAMEM_ALLOC_RANGE(t, size, alignment, boundary, segs, nsegs, rsegs, flags, t->_bounce_alloc_lo, high)); } static int _bus_dma_alloc_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map, bus_size_t size, int flags) { struct x86_bus_dma_cookie *cookie = map->_dm_cookie; int error = 0; KASSERT(cookie != NULL); cookie->id_bouncebuflen = round_page(size); error = _bus_dmamem_alloc(t, cookie->id_bouncebuflen, PAGE_SIZE, map->_dm_boundary, cookie->id_bouncesegs, map->_dm_segcnt, &cookie->id_nbouncesegs, flags); if (error) { cookie->id_bouncebuflen = 0; cookie->id_nbouncesegs = 0; return error; } error = _bus_dmamem_map(t, cookie->id_bouncesegs, cookie->id_nbouncesegs, cookie->id_bouncebuflen, (void **)&cookie->id_bouncebuf, flags); if (error) { _bus_dmamem_free(t, cookie->id_bouncesegs, cookie->id_nbouncesegs); cookie->id_bouncebuflen = 0; cookie->id_nbouncesegs = 0; } else { cookie->id_flags |= X86_DMA_HAS_BOUNCE; STAT_INCR(nbouncebufs); } return (error); } static void _bus_dma_free_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map) { struct x86_bus_dma_cookie *cookie = map->_dm_cookie; KASSERT(cookie != NULL); STAT_DECR(nbouncebufs); _bus_dmamem_unmap(t, cookie->id_bouncebuf, cookie->id_bouncebuflen); _bus_dmamem_free(t, cookie->id_bouncesegs, cookie->id_nbouncesegs); cookie->id_bouncebuflen = 0; cookie->id_nbouncesegs = 0; cookie->id_flags &= ~X86_DMA_HAS_BOUNCE; } /* * This function does the same as uiomove, but takes an explicit * direction, and does not update the uio structure. */ static int _bus_dma_uiomove(void *buf, struct uio *uio, size_t n, int direction) { struct iovec *iov; int error; struct vmspace *vm; char *cp; size_t resid, cnt; int i; iov = uio->uio_iov; vm = uio->uio_vmspace; cp = buf; resid = n; for (i = 0; i < uio->uio_iovcnt && resid > 0; i++) { iov = &uio->uio_iov[i]; if (iov->iov_len == 0) continue; cnt = MIN(resid, iov->iov_len); if (!VMSPACE_IS_KERNEL_P(vm)) { preempt_point(); } if (direction == UIO_READ) { error = copyout_vmspace(vm, cp, iov->iov_base, cnt); } else { error = copyin_vmspace(vm, iov->iov_base, cp, cnt); } if (error) return (error); cp += cnt; resid -= cnt; } return (0); } /* * Common function for freeing DMA-safe memory. May be called by * bus-specific DMA memory free functions. */ static void _bus_dmamem_free(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs) { struct vm_page *m; bus_addr_t addr; struct pglist mlist; int curseg; /* * Build a list of pages to free back to the VM system. */ TAILQ_INIT(&mlist); for (curseg = 0; curseg < nsegs; curseg++) { for (addr = segs[curseg].ds_addr; addr < (segs[curseg].ds_addr + segs[curseg].ds_len); addr += PAGE_SIZE) { m = _BUS_BUS_TO_VM_PAGE(addr); TAILQ_INSERT_TAIL(&mlist, m, pageq.queue); } } uvm_pglistfree(&mlist); } /* * Common function for mapping DMA-safe memory. May be called by * bus-specific DMA memory map functions. * This supports BUS_DMA_NOCACHE. */ static int _bus_dmamem_map(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs, size_t size, void **kvap, int flags) { vaddr_t va; bus_addr_t addr; int curseg; const uvm_flag_t kmflags = (flags & BUS_DMA_NOWAIT) != 0 ? UVM_KMF_NOWAIT : 0; u_int pmapflags = PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE; size = round_page(size); if (flags & BUS_DMA_NOCACHE) pmapflags |= PMAP_NOCACHE; va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY | kmflags); if (va == 0) return ENOMEM; *kvap = (void *)va; for (curseg = 0; curseg < nsegs; curseg++) { for (addr = segs[curseg].ds_addr; addr < (segs[curseg].ds_addr + segs[curseg].ds_len); addr += PAGE_SIZE, va += PAGE_SIZE, size -= PAGE_SIZE) { if (size == 0) panic("_bus_dmamem_map: size botch"); _BUS_PMAP_ENTER(pmap_kernel(), va, addr, VM_PROT_READ | VM_PROT_WRITE, pmapflags); } } pmap_update(pmap_kernel()); return 0; } /* * Common function for unmapping DMA-safe memory. May be called by * bus-specific DMA memory unmapping functions. */ static void _bus_dmamem_unmap(bus_dma_tag_t t, void *kva, size_t size) { pt_entry_t *pte, opte; vaddr_t va, sva, eva; KASSERTMSG(((uintptr_t)kva & PGOFSET) == 0, "kva=%p", kva); size = round_page(size); sva = (vaddr_t)kva; eva = sva + size; /* * mark pages cacheable again. */ for (va = sva; va < eva; va += PAGE_SIZE) { pte = kvtopte(va); opte = *pte; if ((opte & PTE_PCD) != 0) pmap_pte_clearbits(pte, PTE_PCD); } pmap_remove(pmap_kernel(), (vaddr_t)kva, (vaddr_t)kva + size); pmap_update(pmap_kernel()); uvm_km_free(kernel_map, (vaddr_t)kva, size, UVM_KMF_VAONLY); } /* * Common function for mmap(2)'ing DMA-safe memory. May be called by * bus-specific DMA mmap(2)'ing functions. */ static paddr_t _bus_dmamem_mmap(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs, off_t off, int prot, int flags) { int i; for (i = 0; i < nsegs; i++) { KASSERTMSG((off & PGOFSET) == 0, "off=0x%jx", (uintmax_t)off); KASSERTMSG((segs[i].ds_addr & PGOFSET) == 0, "segs[%u].ds_addr=%"PRIxBUSADDR, i, segs[i].ds_addr); KASSERTMSG((segs[i].ds_len & PGOFSET) == 0, "segs[%u].ds_len=%"PRIxBUSSIZE, i, segs[i].ds_len); if (off >= segs[i].ds_len) { off -= segs[i].ds_len; continue; } return (x86_btop(_BUS_BUS_TO_PHYS(segs[i].ds_addr + off))); } /* Page not found. */ return (-1); } /********************************************************************** * DMA utility functions **********************************************************************/ /* * Utility function to load a linear buffer. */ static int _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, bus_size_t buflen, struct vmspace *vm, int flags) { bus_size_t sgsize; bus_addr_t curaddr; vaddr_t vaddr = (vaddr_t)buf; pmap_t pmap; if (vm != NULL) pmap = vm_map_pmap(&vm->vm_map); else pmap = pmap_kernel(); while (buflen > 0) { int error; /* * Get the bus address for this segment. */ curaddr = _BUS_VIRT_TO_BUS(pmap, vaddr); /* * Compute the segment size, and adjust counts. */ sgsize = PAGE_SIZE - ((u_long)vaddr & PGOFSET); if (buflen < sgsize) sgsize = buflen; /* * If we're beyond the bounce threshold, notify * the caller. */ if (map->_dm_bounce_thresh != 0 && curaddr + sgsize >= map->_dm_bounce_thresh) return (EINVAL); error = _bus_dmamap_load_busaddr(t, map, curaddr, sgsize); if (error) return error; vaddr += sgsize; buflen -= sgsize; } return (0); } static int _bus_dmatag_subregion(bus_dma_tag_t tag, bus_addr_t min_addr, bus_addr_t max_addr, bus_dma_tag_t *newtag, int flags) { if ((tag->_bounce_thresh != 0 && max_addr >= tag->_bounce_thresh - 1) && (tag->_bounce_alloc_hi != 0 && max_addr >= tag->_bounce_alloc_hi - 1) && (min_addr <= tag->_bounce_alloc_lo)) { *newtag = tag; /* if the tag must be freed, add a reference */ if (tag->_tag_needs_free) (tag->_tag_needs_free)++; return 0; } if ((*newtag = malloc(sizeof(struct x86_bus_dma_tag), M_DMAMAP, (flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK)) == NULL) return ENOMEM; **newtag = *tag; (*newtag)->_tag_needs_free = 1; if (tag->_bounce_thresh == 0 || max_addr < tag->_bounce_thresh) (*newtag)->_bounce_thresh = max_addr; if (tag->_bounce_alloc_hi == 0 || max_addr < tag->_bounce_alloc_hi) (*newtag)->_bounce_alloc_hi = max_addr; if (min_addr > tag->_bounce_alloc_lo) (*newtag)->_bounce_alloc_lo = min_addr; return 0; } static void _bus_dmatag_destroy(bus_dma_tag_t tag) { switch (tag->_tag_needs_free) { case 0: break; /* not allocated with malloc */ case 1: free(tag, M_DMAMAP); /* last reference to tag */ break; default: (tag->_tag_needs_free)--; /* one less reference */ } } void bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t p, bus_addr_t o, bus_size_t l, int ops) { bus_dma_tag_t it; kasan_dma_sync(p, o, l, ops); kmsan_dma_sync(p, o, l, ops); if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_SYNC) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_SYNC) == 0) continue; (*it->bdt_ov->ov_dmamap_sync)(it->bdt_ctx, t, p, o, l, ops); return; } _bus_dmamap_sync(t, p, o, l, ops); } int bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_CREATE) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_CREATE) == 0) continue; return (*it->bdt_ov->ov_dmamap_create)(it->bdt_ctx, t, size, nsegments, maxsegsz, boundary, flags, dmamp); } return _bus_dmamap_create(t, size, nsegments, maxsegsz, boundary, flags, dmamp); } void bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t dmam) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_DESTROY) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_DESTROY) == 0) continue; (*it->bdt_ov->ov_dmamap_destroy)(it->bdt_ctx, t, dmam); return; } _bus_dmamap_destroy(t, dmam); } int bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t dmam, void *buf, bus_size_t buflen, struct proc *p, int flags) { bus_dma_tag_t it; kasan_dma_load(dmam, buf, buflen, KASAN_DMA_LINEAR); kmsan_dma_load(dmam, buf, buflen, KMSAN_DMA_LINEAR); if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_LOAD) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_LOAD) == 0) continue; return (*it->bdt_ov->ov_dmamap_load)(it->bdt_ctx, t, dmam, buf, buflen, p, flags); } return _bus_dmamap_load(t, dmam, buf, buflen, p, flags); } int bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t dmam, struct mbuf *chain, int flags) { bus_dma_tag_t it; kasan_dma_load(dmam, chain, 0, KASAN_DMA_MBUF); kmsan_dma_load(dmam, chain, 0, KMSAN_DMA_MBUF); if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_LOAD_MBUF) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_LOAD_MBUF) == 0) continue; return (*it->bdt_ov->ov_dmamap_load_mbuf)(it->bdt_ctx, t, dmam, chain, flags); } return _bus_dmamap_load_mbuf(t, dmam, chain, flags); } int bus_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t dmam, struct uio *uio, int flags) { bus_dma_tag_t it; kasan_dma_load(dmam, uio, 0, KASAN_DMA_UIO); kmsan_dma_load(dmam, uio, 0, KMSAN_DMA_UIO); if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_LOAD_UIO) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_LOAD_UIO) == 0) continue; return (*it->bdt_ov->ov_dmamap_load_uio)(it->bdt_ctx, t, dmam, uio, flags); } return _bus_dmamap_load_uio(t, dmam, uio, flags); } int bus_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t dmam, bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags) { bus_dma_tag_t it; kasan_dma_load(dmam, NULL, 0, KASAN_DMA_RAW); kmsan_dma_load(dmam, NULL, 0, KMSAN_DMA_RAW); if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_LOAD_RAW) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_LOAD_RAW) == 0) continue; return (*it->bdt_ov->ov_dmamap_load_raw)(it->bdt_ctx, t, dmam, segs, nsegs, size, flags); } return _bus_dmamap_load_raw(t, dmam, segs, nsegs, size, flags); } void bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t dmam) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_UNLOAD) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_UNLOAD) == 0) continue; (*it->bdt_ov->ov_dmamap_unload)(it->bdt_ctx, t, dmam); return; } _bus_dmamap_unload(t, dmam); } int bus_dmamem_alloc(bus_dma_tag_t t, bus_size_t size, bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs, int flags) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_ALLOC) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_ALLOC) == 0) continue; return (*it->bdt_ov->ov_dmamem_alloc)(it->bdt_ctx, t, size, alignment, boundary, segs, nsegs, rsegs, flags); } return _bus_dmamem_alloc(t, size, alignment, boundary, segs, nsegs, rsegs, flags); } void bus_dmamem_free(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_FREE) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_FREE) == 0) continue; (*it->bdt_ov->ov_dmamem_free)(it->bdt_ctx, t, segs, nsegs); return; } _bus_dmamem_free(t, segs, nsegs); } int bus_dmamem_map(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs, size_t size, void **kvap, int flags) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_MAP) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_MAP) == 0) continue; return (*it->bdt_ov->ov_dmamem_map)(it->bdt_ctx, t, segs, nsegs, size, kvap, flags); } return _bus_dmamem_map(t, segs, nsegs, size, kvap, flags); } void bus_dmamem_unmap(bus_dma_tag_t t, void *kva, size_t size) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_UNMAP) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_UNMAP) == 0) continue; (*it->bdt_ov->ov_dmamem_unmap)(it->bdt_ctx, t, kva, size); return; } _bus_dmamem_unmap(t, kva, size); } paddr_t bus_dmamem_mmap(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs, off_t off, int prot, int flags) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_MMAP) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_MMAP) == 0) continue; return (*it->bdt_ov->ov_dmamem_mmap)(it->bdt_ctx, t, segs, nsegs, off, prot, flags); } return _bus_dmamem_mmap(t, segs, nsegs, off, prot, flags); } int bus_dmatag_subregion(bus_dma_tag_t t, bus_addr_t min_addr, bus_addr_t max_addr, bus_dma_tag_t *newtag, int flags) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMATAG_OVERRIDE_SUBREGION) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMATAG_OVERRIDE_SUBREGION) == 0) continue; return (*it->bdt_ov->ov_dmatag_subregion)(it->bdt_ctx, t, min_addr, max_addr, newtag, flags); } return _bus_dmatag_subregion(t, min_addr, max_addr, newtag, flags); } void bus_dmatag_destroy(bus_dma_tag_t t) { bus_dma_tag_t it; if ((t->bdt_exists & BUS_DMATAG_OVERRIDE_DESTROY) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bdt_super) { if ((it->bdt_present & BUS_DMATAG_OVERRIDE_DESTROY) == 0) continue; (*it->bdt_ov->ov_dmatag_destroy)(it->bdt_ctx, t); return; } _bus_dmatag_destroy(t); } static const void * bit_to_function_pointer(const struct bus_dma_overrides *ov, uint64_t bit) { switch (bit) { case BUS_DMAMAP_OVERRIDE_CREATE: return ov->ov_dmamap_create; case BUS_DMAMAP_OVERRIDE_DESTROY: return ov->ov_dmamap_destroy; case BUS_DMAMAP_OVERRIDE_LOAD: return ov->ov_dmamap_load; case BUS_DMAMAP_OVERRIDE_LOAD_MBUF: return ov->ov_dmamap_load_mbuf; case BUS_DMAMAP_OVERRIDE_LOAD_UIO: return ov->ov_dmamap_load_uio; case BUS_DMAMAP_OVERRIDE_LOAD_RAW: return ov->ov_dmamap_load_raw; case BUS_DMAMAP_OVERRIDE_UNLOAD: return ov->ov_dmamap_unload; case BUS_DMAMAP_OVERRIDE_SYNC: return ov->ov_dmamap_sync; case BUS_DMAMEM_OVERRIDE_ALLOC: return ov->ov_dmamem_alloc; case BUS_DMAMEM_OVERRIDE_FREE: return ov->ov_dmamem_free; case BUS_DMAMEM_OVERRIDE_MAP: return ov->ov_dmamem_map; case BUS_DMAMEM_OVERRIDE_UNMAP: return ov->ov_dmamem_unmap; case BUS_DMAMEM_OVERRIDE_MMAP: return ov->ov_dmamem_mmap; case BUS_DMATAG_OVERRIDE_SUBREGION: return ov->ov_dmatag_subregion; case BUS_DMATAG_OVERRIDE_DESTROY: return ov->ov_dmatag_destroy; default: return NULL; } } void bus_dma_tag_destroy(bus_dma_tag_t bdt) { if (bdt->bdt_super != NULL) bus_dmatag_destroy(bdt->bdt_super); kmem_free(bdt, sizeof(struct x86_bus_dma_tag)); } int bus_dma_tag_create(bus_dma_tag_t obdt, const uint64_t present, const struct bus_dma_overrides *ov, void *ctx, bus_dma_tag_t *bdtp) { uint64_t bit, bits, nbits; bus_dma_tag_t bdt; const void *fp; if (ov == NULL || present == 0) return EINVAL; bdt = kmem_alloc(sizeof(struct x86_bus_dma_tag), KM_SLEEP); *bdt = *obdt; /* don't let bus_dmatag_destroy free these */ bdt->_tag_needs_free = 0; bdt->bdt_super = obdt; for (bits = present; bits != 0; bits = nbits) { nbits = bits & (bits - 1); bit = nbits ^ bits; if ((fp = bit_to_function_pointer(ov, bit)) == NULL) { #ifdef DEBUG printf("%s: missing bit %" PRIx64 "\n", __func__, bit); #endif goto einval; } } bdt->bdt_ov = ov; bdt->bdt_exists = obdt->bdt_exists | present; bdt->bdt_present = present; bdt->bdt_ctx = ctx; *bdtp = bdt; if (obdt->_tag_needs_free) obdt->_tag_needs_free++; return 0; einval: kmem_free(bdt, sizeof(struct x86_bus_dma_tag)); return EINVAL; } |
| 2 2 1 2 3 3 1 1 1 1 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 | /* $NetBSD: prop_kern.c,v 1.25 2022/08/03 21:13:46 riastradh Exp $ */ /*- * Copyright (c) 2006, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(__NetBSD__) #include <sys/types.h> #include <sys/ioctl.h> #include <prop/proplib.h> #if !defined(_KERNEL) && !defined(_STANDALONE) #include <sys/mman.h> #include <errno.h> #include <string.h> #include <stdlib.h> #include <stdio.h> #ifdef RUMP_ACTION #include <rump/rump_syscalls.h> #define ioctl(a,b,c) rump_sys_ioctl(a,b,c) #endif static int _prop_object_externalize_to_pref(prop_object_t obj, struct plistref *pref, char **bufp) { char *buf; switch (prop_object_type(obj)) { case PROP_TYPE_DICTIONARY: buf = prop_dictionary_externalize(obj); break; case PROP_TYPE_ARRAY: buf = prop_array_externalize(obj); break; default: return (ENOTSUP); } if (buf == NULL) { /* Assume we ran out of memory. */ return (ENOMEM); } pref->pref_plist = buf; pref->pref_len = strlen(buf) + 1; *bufp = buf; return (0); } bool prop_array_externalize_to_pref(prop_array_t array, struct plistref *prefp) { char *buf; int rv; rv = _prop_object_externalize_to_pref(array, prefp, &buf); if (rv != 0) errno = rv; /* pass up error value in errno */ return (rv == 0); } /* * prop_array_externalize_to_pref -- * Externalize an array into a plistref for sending to the kernel. */ int prop_array_send_syscall(prop_array_t array, struct plistref *prefp) { if (prop_array_externalize_to_pref(array, prefp)) return 0; else return errno; } bool prop_dictionary_externalize_to_pref(prop_dictionary_t dict, struct plistref *prefp) { char *buf; int rv; rv = _prop_object_externalize_to_pref(dict, prefp, &buf); if (rv != 0) errno = rv; /* pass up error value in errno */ return (rv == 0); } /* * prop_dictionary_externalize_to_pref -- * Externalize an dictionary into a plistref for sending to the kernel. */ int prop_dictionary_send_syscall(prop_dictionary_t dict, struct plistref *prefp) { if (prop_dictionary_externalize_to_pref(dict, prefp)) return 0; else return errno; } static int _prop_object_send_ioctl(prop_object_t obj, int fd, unsigned long cmd) { struct plistref pref; char *buf; int error; error = _prop_object_externalize_to_pref(obj, &pref, &buf); if (error) return (error); if (ioctl(fd, cmd, &pref) == -1) error = errno; else error = 0; free(buf); return (error); } /* * prop_array_send_ioctl -- * Send an array to the kernel using the specified ioctl. */ int prop_array_send_ioctl(prop_array_t array, int fd, unsigned long cmd) { int rv; rv = _prop_object_send_ioctl(array, fd, cmd); if (rv != 0) { errno = rv; /* pass up error value in errno */ return rv; } else return 0; } /* * prop_dictionary_send_ioctl -- * Send a dictionary to the kernel using the specified ioctl. */ int prop_dictionary_send_ioctl(prop_dictionary_t dict, int fd, unsigned long cmd) { int rv; rv = _prop_object_send_ioctl(dict, fd, cmd); if (rv != 0) { errno = rv; /* pass up error value in errno */ return rv; } else return 0; } static int _prop_object_internalize_from_pref(const struct plistref *pref, prop_type_t type, prop_object_t *objp) { prop_object_t obj = NULL; char *buf; int error = 0; if (pref->pref_len == 0) { /* * This should never happen; we should always get the XML * for an empty dictionary if it's really empty. */ error = EIO; goto out; } else { buf = pref->pref_plist; buf[pref->pref_len - 1] = '\0'; /* extra insurance */ switch (type) { case PROP_TYPE_DICTIONARY: obj = prop_dictionary_internalize(buf); break; case PROP_TYPE_ARRAY: obj = prop_array_internalize(buf); break; default: error = ENOTSUP; } (void) munmap(buf, pref->pref_len); if (obj == NULL && error == 0) error = EIO; } out: if (error == 0) *objp = obj; return (error); } /* * prop_array_internalize_from_pref -- * Internalize a pref into a prop_array_t object. */ bool prop_array_internalize_from_pref(const struct plistref *prefp, prop_array_t *arrayp) { int rv; rv = _prop_object_internalize_from_pref(prefp, PROP_TYPE_ARRAY, (prop_object_t *)arrayp); if (rv != 0) errno = rv; /* pass up error value in errno */ return (rv == 0); } /* * prop_array_recv_syscall -- * Internalize an array received from the kernel as pref. */ int prop_array_recv_syscall(const struct plistref *prefp, prop_array_t *arrayp) { if (prop_array_internalize_from_pref(prefp, arrayp)) return 0; else return errno; } /* * prop_dictionary_internalize_from_pref -- * Internalize a pref into a prop_dictionary_t object. */ bool prop_dictionary_internalize_from_pref(const struct plistref *prefp, prop_dictionary_t *dictp) { int rv; rv = _prop_object_internalize_from_pref(prefp, PROP_TYPE_DICTIONARY, (prop_object_t *)dictp); if (rv != 0) errno = rv; /* pass up error value in errno */ return (rv == 0); } /* * prop_dictionary_recv_syscall -- * Internalize a dictionary received from the kernel as pref. */ int prop_dictionary_recv_syscall(const struct plistref *prefp, prop_dictionary_t *dictp) { if (prop_dictionary_internalize_from_pref(prefp, dictp)) return 0; else return errno; } /* * prop_array_recv_ioctl -- * Receive an array from the kernel using the specified ioctl. */ int prop_array_recv_ioctl(int fd, unsigned long cmd, prop_array_t *arrayp) { int rv; struct plistref pref; rv = ioctl(fd, cmd, &pref); if (rv == -1) return errno; rv = _prop_object_internalize_from_pref(&pref, PROP_TYPE_ARRAY, (prop_object_t *)arrayp); if (rv != 0) { errno = rv; /* pass up error value in errno */ return rv; } else return 0; } /* * prop_dictionary_recv_ioctl -- * Receive a dictionary from the kernel using the specified ioctl. */ int prop_dictionary_recv_ioctl(int fd, unsigned long cmd, prop_dictionary_t *dictp) { int rv; struct plistref pref; rv = ioctl(fd, cmd, &pref); if (rv == -1) return errno; rv = _prop_object_internalize_from_pref(&pref, PROP_TYPE_DICTIONARY, (prop_object_t *)dictp); if (rv != 0) { errno = rv; /* pass up error value in errno */ return rv; } else return 0; } /* * prop_dictionary_sendrecv_ioctl -- * Combination send/receive a dictionary to/from the kernel using * the specified ioctl. */ int prop_dictionary_sendrecv_ioctl(prop_dictionary_t dict, int fd, unsigned long cmd, prop_dictionary_t *dictp) { struct plistref pref; char *buf; int error; error = _prop_object_externalize_to_pref(dict, &pref, &buf); if (error != 0) { errno = error; return error; } if (ioctl(fd, cmd, &pref) == -1) error = errno; else error = 0; free(buf); if (error != 0) return error; error = _prop_object_internalize_from_pref(&pref, PROP_TYPE_DICTIONARY, (prop_object_t *)dictp); if (error != 0) { errno = error; /* pass up error value in errno */ return error; } else return 0; } #endif /* !_KERNEL && !_STANDALONE */ #if defined(_KERNEL) #include <sys/param.h> #include <sys/mman.h> #include <sys/errno.h> #include <sys/malloc.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/resource.h> #include <sys/pool.h> #include <uvm/uvm_extern.h> #include "prop_object_impl.h" /* Arbitrary limit ioctl input to 128KB */ unsigned int prop_object_copyin_limit = 128 * 1024; /* initialize proplib for use in the kernel */ void prop_kern_init(void) { __link_set_decl(prop_linkpools, struct prop_pool_init); struct prop_pool_init * const *pi; __link_set_foreach(pi, prop_linkpools) pool_init((*pi)->pp, (*pi)->size, 0, 0, 0, (*pi)->wchan, &pool_allocator_nointr, IPL_NONE); } static int _prop_object_copyin(const struct plistref *pref, const prop_type_t type, prop_object_t *objp, size_t lim) { prop_object_t obj = NULL; char *buf; int error; if (pref->pref_len >= lim) return E2BIG; /* * Allocate an extra byte so we can guarantee NUL-termination. */ buf = malloc(pref->pref_len + 1, M_TEMP, M_WAITOK); if (buf == NULL) return (ENOMEM); error = copyin(pref->pref_plist, buf, pref->pref_len); if (error) { free(buf, M_TEMP); return (error); } buf[pref->pref_len] = '\0'; switch (type) { case PROP_TYPE_ARRAY: obj = prop_array_internalize(buf); break; case PROP_TYPE_DICTIONARY: obj = prop_dictionary_internalize(buf); break; default: error = ENOTSUP; } free(buf, M_TEMP); if (obj == NULL) { if (error == 0) error = EIO; } else { *objp = obj; } return (error); } static int _prop_object_copyin_ioctl(const struct plistref *pref, const prop_type_t type, const u_long cmd, prop_object_t *objp, size_t lim) { if ((cmd & IOC_IN) == 0) return (EFAULT); return _prop_object_copyin(pref, type, objp, lim); } /* * prop_array_copyin -- * Copy in an array passed as a syscall arg. */ int prop_array_copyin_size(const struct plistref *pref, prop_array_t *arrayp, size_t lim) { return _prop_object_copyin(pref, PROP_TYPE_ARRAY, (prop_object_t *)arrayp, lim); } int prop_array_copyin(const struct plistref *pref, prop_array_t *arrayp) { return prop_array_copyin_size(pref, arrayp, prop_object_copyin_limit); } /* * prop_dictionary_copyin -- * Copy in a dictionary passed as a syscall arg. */ int prop_dictionary_copyin_size(const struct plistref *pref, prop_dictionary_t *dictp, size_t lim) { return _prop_object_copyin(pref, PROP_TYPE_DICTIONARY, (prop_object_t *)dictp, lim); } int prop_dictionary_copyin(const struct plistref *pref, prop_dictionary_t *dictp) { return prop_dictionary_copyin_size(pref, dictp, prop_object_copyin_limit); } /* * prop_array_copyin_ioctl -- * Copy in an array send with an ioctl. */ int prop_array_copyin_ioctl_size(const struct plistref *pref, const u_long cmd, prop_array_t *arrayp, size_t lim) { return _prop_object_copyin_ioctl(pref, PROP_TYPE_ARRAY, cmd, (prop_object_t *)arrayp, lim); } int prop_array_copyin_ioctl(const struct plistref *pref, const u_long cmd, prop_array_t *arrayp) { return prop_array_copyin_ioctl_size(pref, cmd, arrayp, prop_object_copyin_limit); } /* * prop_dictionary_copyin_ioctl -- * Copy in a dictionary sent with an ioctl. */ int prop_dictionary_copyin_ioctl_size(const struct plistref *pref, const u_long cmd, prop_dictionary_t *dictp, size_t lim) { return _prop_object_copyin_ioctl(pref, PROP_TYPE_DICTIONARY, cmd, (prop_object_t *)dictp, lim); } int prop_dictionary_copyin_ioctl(const struct plistref *pref, const u_long cmd, prop_dictionary_t *dictp) { return prop_dictionary_copyin_ioctl_size(pref, cmd, dictp, prop_object_copyin_limit); } static int _prop_object_copyout(struct plistref *pref, prop_object_t obj) { struct lwp *l = curlwp; /* XXX */ struct proc *p = l->l_proc; char *buf; void *uaddr; size_t len, rlen; int error = 0; switch (prop_object_type(obj)) { case PROP_TYPE_ARRAY: buf = prop_array_externalize(obj); break; case PROP_TYPE_DICTIONARY: buf = prop_dictionary_externalize(obj); break; default: return (ENOTSUP); } if (buf == NULL) return (ENOMEM); len = strlen(buf) + 1; rlen = round_page(len); uaddr = NULL; error = uvm_mmap_anon(p, &uaddr, rlen); if (error == 0) { error = copyout(buf, uaddr, len); if (error == 0) { pref->pref_plist = uaddr; pref->pref_len = len; } } free(buf, M_TEMP); return (error); } /* * prop_array_copyout -- * Copy out an array to a syscall arg. */ int prop_array_copyout(struct plistref *pref, prop_array_t array) { return (_prop_object_copyout(pref, array)); } /* * prop_dictionary_copyout -- * Copy out a dictionary to a syscall arg. */ int prop_dictionary_copyout(struct plistref *pref, prop_dictionary_t dict) { return (_prop_object_copyout(pref, dict)); } static int _prop_object_copyout_ioctl(struct plistref *pref, const u_long cmd, prop_object_t obj) { if ((cmd & IOC_OUT) == 0) return (EFAULT); return _prop_object_copyout(pref, obj); } /* * prop_array_copyout_ioctl -- * Copy out an array being received with an ioctl. */ int prop_array_copyout_ioctl(struct plistref *pref, const u_long cmd, prop_array_t array) { return (_prop_object_copyout_ioctl(pref, cmd, array)); } /* * prop_dictionary_copyout_ioctl -- * Copy out a dictionary being received with an ioctl. */ int prop_dictionary_copyout_ioctl(struct plistref *pref, const u_long cmd, prop_dictionary_t dict) { return ( _prop_object_copyout_ioctl(pref, cmd, dict)); } #endif /* _KERNEL */ #endif /* __NetBSD__ */ |
| 4 4 4 4 4 4 4 2 3 3 3 1 1 1 1 3 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 | /* $NetBSD: sco_upper.c,v 1.16 2014/08/05 07:55:32 rtr Exp $ */ /*- * Copyright (c) 2006 Itronix Inc. * All rights reserved. * * Written by Iain Hibbert for Itronix Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of Itronix Inc. may not be used to endorse * or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: sco_upper.c,v 1.16 2014/08/05 07:55:32 rtr Exp $"); #include <sys/param.h> #include <sys/kernel.h> #include <sys/mbuf.h> #include <sys/proc.h> #include <sys/socketvar.h> #include <sys/systm.h> #include <netbt/bluetooth.h> #include <netbt/hci.h> #include <netbt/sco.h> /**************************************************************************** * * SCO - Upper Protocol API */ struct sco_pcb_list sco_pcb = LIST_HEAD_INITIALIZER(sco_pcb); /* * sco_attach_pcb(handle, proto, upper) * * Attach a new instance of SCO pcb to handle */ int sco_attach_pcb(struct sco_pcb **handle, const struct btproto *proto, void *upper) { struct sco_pcb *pcb; KASSERT(handle != NULL); KASSERT(proto != NULL); KASSERT(upper != NULL); pcb = malloc(sizeof(struct sco_pcb), M_BLUETOOTH, M_NOWAIT | M_ZERO); if (pcb == NULL) return ENOMEM; pcb->sp_proto = proto; pcb->sp_upper = upper; LIST_INSERT_HEAD(&sco_pcb, pcb, sp_next); *handle = pcb; return 0; } /* * sco_bind_pcb(pcb, sockaddr) * * Bind SCO pcb to local address */ int sco_bind_pcb(struct sco_pcb *pcb, struct sockaddr_bt *addr) { if (pcb->sp_link != NULL || pcb->sp_flags & SP_LISTENING) return EINVAL; bdaddr_copy(&pcb->sp_laddr, &addr->bt_bdaddr); return 0; } /* * sco_sockaddr_pcb(pcb, sockaddr) * * Copy local address of PCB to sockaddr */ int sco_sockaddr_pcb(struct sco_pcb *pcb, struct sockaddr_bt *addr) { memset(addr, 0, sizeof(struct sockaddr_bt)); addr->bt_len = sizeof(struct sockaddr_bt); addr->bt_family = AF_BLUETOOTH; bdaddr_copy(&addr->bt_bdaddr, &pcb->sp_laddr); return 0; } /* * sco_connect_pcb(pcb, sockaddr) * * Initiate a SCO connection to the destination address. */ int sco_connect_pcb(struct sco_pcb *pcb, struct sockaddr_bt *dest) { hci_add_sco_con_cp cp; struct hci_unit *unit; struct hci_link *acl, *sco; int err; if (pcb->sp_flags & SP_LISTENING) return EINVAL; bdaddr_copy(&pcb->sp_raddr, &dest->bt_bdaddr); if (bdaddr_any(&pcb->sp_raddr)) return EDESTADDRREQ; if (bdaddr_any(&pcb->sp_laddr)) { err = hci_route_lookup(&pcb->sp_laddr, &pcb->sp_raddr); if (err) return err; } unit = hci_unit_lookup(&pcb->sp_laddr); if (unit == NULL) return ENETDOWN; /* * We must have an already open ACL connection before we open the SCO * connection, and since SCO connections dont happen on their own we * will not open one, the application wanting this should have opened * it previously. */ acl = hci_link_lookup_bdaddr(unit, &pcb->sp_raddr, HCI_LINK_ACL); if (acl == NULL || acl->hl_state != HCI_LINK_OPEN) return EHOSTUNREACH; sco = hci_link_alloc(unit, &pcb->sp_raddr, HCI_LINK_SCO); if (sco == NULL) return ENOMEM; sco->hl_link = hci_acl_open(unit, &pcb->sp_raddr); KASSERT(sco->hl_link == acl); cp.con_handle = htole16(acl->hl_handle); cp.pkt_type = htole16(0x00e0); /* HV1, HV2, HV3 */ err = hci_send_cmd(unit, HCI_CMD_ADD_SCO_CON, &cp, sizeof(cp)); if (err) { hci_link_free(sco, err); return err; } sco->hl_sco = pcb; pcb->sp_link = sco; pcb->sp_mtu = unit->hci_max_sco_size; return 0; } /* * sco_peeraddr_pcb(pcb, sockaddr) * * Copy remote address of SCO pcb to sockaddr */ int sco_peeraddr_pcb(struct sco_pcb *pcb, struct sockaddr_bt *addr) { memset(addr, 0, sizeof(struct sockaddr_bt)); addr->bt_len = sizeof(struct sockaddr_bt); addr->bt_family = AF_BLUETOOTH; bdaddr_copy(&addr->bt_bdaddr, &pcb->sp_raddr); return 0; } /* * sco_disconnect_pcb(pcb, linger) * * Initiate disconnection of connected SCO pcb */ int sco_disconnect_pcb(struct sco_pcb *pcb, int linger) { hci_discon_cp cp; struct hci_link *sco; int err; sco = pcb->sp_link; if (sco == NULL) return EINVAL; cp.con_handle = htole16(sco->hl_handle); cp.reason = 0x13; /* "Remote User Terminated Connection" */ err = hci_send_cmd(sco->hl_unit, HCI_CMD_DISCONNECT, &cp, sizeof(cp)); if (err || linger == 0) { sco->hl_sco = NULL; pcb->sp_link = NULL; hci_link_free(sco, err); } return err; } /* * sco_detach_pcb(handle) * * Detach SCO pcb from handle and clear up */ void sco_detach_pcb(struct sco_pcb **handle) { struct sco_pcb *pcb; KASSERT(handle != NULL); pcb = *handle; *handle = NULL; if (pcb->sp_link != NULL) { sco_disconnect_pcb(pcb, 0); pcb->sp_link = NULL; } LIST_REMOVE(pcb, sp_next); free(pcb, M_BLUETOOTH); } /* * sco_listen_pcb(pcb) * * Mark pcb as a listener. */ int sco_listen_pcb(struct sco_pcb *pcb) { if (pcb->sp_link != NULL) return EINVAL; pcb->sp_flags |= SP_LISTENING; return 0; } /* * sco_send_pcb(pcb, mbuf) * * Send data on SCO pcb. * * Gross hackage, we just output the packet directly onto the unit queue. * This will work fine for one channel per unit, but for more channels it * really needs fixing. We set the context so that when the packet is sent, * we can drop a record from the socket buffer. */ int sco_send_pcb(struct sco_pcb *pcb, struct mbuf *m) { hci_scodata_hdr_t *hdr; int plen; if (pcb->sp_link == NULL) { m_freem(m); return EINVAL; } plen = m->m_pkthdr.len; DPRINTFN(10, "%d bytes\n", plen); /* * This is a temporary limitation, as USB devices cannot * handle SCO packet sizes that are not an integer number * of Isochronous frames. See ubt(4) */ if (plen != pcb->sp_mtu) { m_freem(m); return EMSGSIZE; } M_PREPEND(m, sizeof(hci_scodata_hdr_t), M_DONTWAIT); if (m == NULL) return ENOMEM; hdr = mtod(m, hci_scodata_hdr_t *); hdr->type = HCI_SCO_DATA_PKT; hdr->con_handle = htole16(pcb->sp_link->hl_handle); hdr->length = plen; pcb->sp_pending++; M_SETCTX(m, pcb->sp_link); hci_output_sco(pcb->sp_link->hl_unit, m); return 0; } /* * sco_setopt(pcb, sopt) * * Set SCO pcb options */ int sco_setopt(struct sco_pcb *pcb, const struct sockopt *sopt) { int err = 0; switch (sopt->sopt_name) { default: err = ENOPROTOOPT; break; } return err; } /* * sco_getopt(pcb, sopt) * * Get SCO pcb options */ int sco_getopt(struct sco_pcb *pcb, struct sockopt *sopt) { switch (sopt->sopt_name) { case SO_SCO_MTU: return sockopt_set(sopt, &pcb->sp_mtu, sizeof(uint16_t)); case SO_SCO_HANDLE: if (pcb->sp_link) return sockopt_set(sopt, &pcb->sp_link->hl_handle, sizeof(uint16_t)); return ENOTCONN; default: break; } return ENOPROTOOPT; } |
| 5 4 9 5 5 4 4 4 5 5 5 5 4 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 | /* $NetBSD: pckbport.c,v 1.20 2021/08/07 16:19:15 thorpej Exp $ */ /* * Copyright (c) 2004 Ben Harris * Copyright (c) 1998 * Matthias Drochner. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: pckbport.c,v 1.20 2021/08/07 16:19:15 thorpej Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/callout.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/device.h> #include <sys/malloc.h> #include <sys/errno.h> #include <sys/queue.h> #include <dev/pckbport/pckbdreg.h> #include <dev/pckbport/pckbportvar.h> #include "locators.h" #include "pckbd.h" #if (NPCKBD > 0) #include <dev/pckbport/pckbdvar.h> #endif /* descriptor for one device command */ struct pckbport_devcmd { TAILQ_ENTRY(pckbport_devcmd) next; int flags; #define KBC_CMDFLAG_SYNC 1 /* give descriptor back to caller */ #define KBC_CMDFLAG_SLOW 2 u_char cmd[4]; int cmdlen, cmdidx, retries; u_char response[4]; int status, responselen, responseidx; }; /* data per slave device */ struct pckbport_slotdata { int polling; /* don't process data in interrupt handler */ TAILQ_HEAD(, pckbport_devcmd) cmdqueue; /* active commands */ TAILQ_HEAD(, pckbport_devcmd) freequeue; /* free commands */ #define NCMD 5 struct pckbport_devcmd cmds[NCMD]; }; #define CMD_IN_QUEUE(q) (TAILQ_FIRST(&(q)->cmdqueue) != NULL) static void pckbport_init_slotdata(struct pckbport_slotdata *); static int pckbportprint(void *, const char *); static struct pckbport_slotdata pckbport_cons_slotdata; static int pckbport_poll_data1(pckbport_tag_t, pckbport_slot_t); static int pckbport_send_devcmd(struct pckbport_tag *, pckbport_slot_t, u_char); static void pckbport_poll_cmd1(struct pckbport_tag *, pckbport_slot_t, struct pckbport_devcmd *); static void pckbport_cleanqueue(struct pckbport_slotdata *); static void pckbport_cleanup(void *); static int pckbport_cmdresponse(struct pckbport_tag *, pckbport_slot_t, u_char); static void pckbport_start(struct pckbport_tag *, pckbport_slot_t); static const char * const pckbport_slot_names[] = { "kbd", "aux" }; static struct pckbport_tag pckbport_cntag; #define KBD_DELAY DELAY(8) #ifdef PCKBPORTDEBUG #define DPRINTF(a) printf a #else #define DPRINTF(a) #endif static int pckbport_poll_data1(pckbport_tag_t t, pckbport_slot_t slot) { return t->t_ops->t_poll_data1(t->t_cookie, slot); } static int pckbport_send_devcmd(struct pckbport_tag *t, pckbport_slot_t slot, u_char val) { return t->t_ops->t_send_devcmd(t->t_cookie, slot, val); } pckbport_tag_t pckbport_attach(void *cookie, struct pckbport_accessops const *ops) { pckbport_tag_t t; if (cookie == pckbport_cntag.t_cookie && ops == pckbport_cntag.t_ops) return &pckbport_cntag; t = malloc(sizeof(struct pckbport_tag), M_DEVBUF, M_WAITOK | M_ZERO); callout_init(&t->t_cleanup, 0); t->t_cookie = cookie; t->t_ops = ops; return t; } device_t pckbport_attach_slot(device_t dev, pckbport_tag_t t, pckbport_slot_t slot) { struct pckbport_attach_args pa; void *sdata; device_t found; int alloced = 0; int locs[PCKBPORTCF_NLOCS]; pa.pa_tag = t; pa.pa_slot = slot; if (t->t_slotdata[slot] == NULL) { sdata = malloc(sizeof(struct pckbport_slotdata), M_DEVBUF, M_WAITOK); t->t_slotdata[slot] = sdata; pckbport_init_slotdata(t->t_slotdata[slot]); alloced++; } locs[PCKBPORTCF_SLOT] = slot; found = config_found(dev, &pa, pckbportprint, CFARGS(.submatch = config_stdsubmatch, .iattr = "pckbport", .locators = locs)); if (found == NULL && alloced) { free(t->t_slotdata[slot], M_DEVBUF); t->t_slotdata[slot] = NULL; } return found; } int pckbportprint(void *aux, const char *pnp) { struct pckbport_attach_args *pa = aux; if (!pnp) aprint_normal(" (%s slot)", pckbport_slot_names[pa->pa_slot]); return QUIET; } void pckbport_init_slotdata(struct pckbport_slotdata *q) { int i; TAILQ_INIT(&q->cmdqueue); TAILQ_INIT(&q->freequeue); for (i = 0; i < NCMD; i++) TAILQ_INSERT_TAIL(&q->freequeue, &(q->cmds[i]), next); q->polling = 0; } void pckbport_flush(pckbport_tag_t t, pckbport_slot_t slot) { (void)pckbport_poll_data1(t, slot); } int pckbport_poll_data(pckbport_tag_t t, pckbport_slot_t slot) { struct pckbport_slotdata *q = t->t_slotdata[slot]; int c; c = pckbport_poll_data1(t, slot); if (c != -1 && q && CMD_IN_QUEUE(q)) /* * we jumped into a running command - try to deliver * the response */ if (pckbport_cmdresponse(t, slot, c)) return -1; return c; } /* * switch scancode translation on / off * return nonzero on success */ int pckbport_xt_translation(pckbport_tag_t t, pckbport_slot_t slot, int on) { return t->t_ops->t_xt_translation(t->t_cookie, slot, on); } void pckbport_slot_enable(pckbport_tag_t t, pckbport_slot_t slot, int on) { t->t_ops->t_slot_enable(t->t_cookie, slot, on); } void pckbport_set_poll(pckbport_tag_t t, pckbport_slot_t slot, int on) { t->t_slotdata[slot]->polling = on; t->t_ops->t_set_poll(t->t_cookie, slot, on); } /* * Pass command to device, poll for ACK and data. * to be called at spltty() */ static void pckbport_poll_cmd1(struct pckbport_tag *t, pckbport_slot_t slot, struct pckbport_devcmd *cmd) { int i, c = 0; while (cmd->cmdidx < cmd->cmdlen) { if (!pckbport_send_devcmd(t, slot, cmd->cmd[cmd->cmdidx])) { printf("pckbport_cmd: send error\n"); cmd->status = EIO; return; } for (i = 10; i; i--) { /* 1s ??? */ c = pckbport_poll_data1(t, slot); if (c != -1) break; } switch (c) { case KBR_ACK: cmd->cmdidx++; continue; case KBR_BAT_DONE: case KBR_BAT_FAIL: case KBR_RESEND: DPRINTF(("%s: %s\n", __func__, c == KBR_RESEND ? "RESEND" : (c == KBR_BAT_DONE ? "BAT_DONE" : "BAT_FAIL"))); if (cmd->retries++ < 5) continue; else { DPRINTF(("%s: cmd failed\n", __func__)); cmd->status = EIO; return; } case -1: DPRINTF(("%s: timeout\n", __func__)); cmd->status = EIO; return; } DPRINTF(("%s: lost 0x%x\n", __func__, c)); } while (cmd->responseidx < cmd->responselen) { if (cmd->flags & KBC_CMDFLAG_SLOW) i = 100; /* 10s ??? */ else i = 10; /* 1s ??? */ while (i--) { c = pckbport_poll_data1(t, slot); if (c != -1) break; } if (c == -1) { DPRINTF(("%s: no data\n", __func__)); cmd->status = ETIMEDOUT; return; } else cmd->response[cmd->responseidx++] = c; } } /* for use in autoconfiguration */ int pckbport_poll_cmd(pckbport_tag_t t, pckbport_slot_t slot, const u_char *cmd, int len, int responselen, u_char *respbuf, int slow) { struct pckbport_devcmd nc; if ((len > 4) || (responselen > 4)) return (EINVAL); memset(&nc, 0, sizeof(nc)); memcpy(nc.cmd, cmd, len); nc.cmdlen = len; nc.responselen = responselen; nc.flags = (slow ? KBC_CMDFLAG_SLOW : 0); pckbport_poll_cmd1(t, slot, &nc); if (nc.status == 0 && respbuf) memcpy(respbuf, nc.response, responselen); return nc.status; } /* * Clean up a command queue, throw away everything. */ void pckbport_cleanqueue(struct pckbport_slotdata *q) { struct pckbport_devcmd *cmd; while ((cmd = TAILQ_FIRST(&q->cmdqueue))) { TAILQ_REMOVE(&q->cmdqueue, cmd, next); #ifdef PCKBPORTDEBUG printf("%s: removing", __func__); for (int i = 0; i < cmd->cmdlen; i++) printf(" %02x", cmd->cmd[i]); printf("\n"); #endif TAILQ_INSERT_TAIL(&q->freequeue, cmd, next); } } /* * Timeout error handler: clean queues and data port. * XXX could be less invasive. */ void pckbport_cleanup(void *self) { struct pckbport_tag *t = self; int s; u_char cmd[1], resp[2]; printf("pckbport: command timeout\n"); s = spltty(); if (t->t_slotdata[PCKBPORT_KBD_SLOT]) pckbport_cleanqueue(t->t_slotdata[PCKBPORT_KBD_SLOT]); if (t->t_slotdata[PCKBPORT_AUX_SLOT]) pckbport_cleanqueue(t->t_slotdata[PCKBPORT_AUX_SLOT]); #if 0 /* XXXBJH Move to controller driver? */ while (bus_space_read_1(t->t_iot, t->t_ioh_c, 0) & KBS_DIB) { KBD_DELAY; (void) bus_space_read_1(t->t_iot, t->t_ioh_d, 0); } #endif cmd[0] = KBC_RESET; (void)pckbport_poll_cmd(t, PCKBPORT_KBD_SLOT, cmd, 1, 2, resp, 1); pckbport_flush(t, PCKBPORT_KBD_SLOT); splx(s); } /* * Pass command to device during normal operation. * to be called at spltty() */ void pckbport_start(struct pckbport_tag *t, pckbport_slot_t slot) { struct pckbport_slotdata *q = t->t_slotdata[slot]; struct pckbport_devcmd *cmd = TAILQ_FIRST(&q->cmdqueue); KASSERT(cmd != NULL); if (q->polling) { do { pckbport_poll_cmd1(t, slot, cmd); if (cmd->status) printf("pckbport_start: command error\n"); TAILQ_REMOVE(&q->cmdqueue, cmd, next); if (cmd->flags & KBC_CMDFLAG_SYNC) wakeup(cmd); else { callout_stop(&t->t_cleanup); TAILQ_INSERT_TAIL(&q->freequeue, cmd, next); } cmd = TAILQ_FIRST(&q->cmdqueue); } while (cmd); return; } if (!pckbport_send_devcmd(t, slot, cmd->cmd[cmd->cmdidx])) { printf("pckbport_start: send error\n"); /* XXX what now? */ return; } } /* * Handle command responses coming in asynchronously, * return nonzero if valid response. * to be called at spltty() */ int pckbport_cmdresponse(struct pckbport_tag *t, pckbport_slot_t slot, u_char data) { struct pckbport_slotdata *q = t->t_slotdata[slot]; struct pckbport_devcmd *cmd = TAILQ_FIRST(&q->cmdqueue); KASSERT(cmd != NULL); if (cmd->cmdidx < cmd->cmdlen) { if (data != KBR_ACK && data != KBR_RESEND) return 0; if (data == KBR_RESEND) { if (cmd->retries++ < 5) /* try again last command */ goto restart; else { DPRINTF(("%s: cmd failed\n", __func__)); cmd->status = EIO; /* dequeue */ } } else { if (++cmd->cmdidx < cmd->cmdlen) goto restart; if (cmd->responselen) return 1; /* else dequeue */ } } else if (cmd->responseidx < cmd->responselen) { cmd->response[cmd->responseidx++] = data; if (cmd->responseidx < cmd->responselen) return 1; /* else dequeue */ } else return 0; /* dequeue: */ TAILQ_REMOVE(&q->cmdqueue, cmd, next); if (cmd->flags & KBC_CMDFLAG_SYNC) wakeup(cmd); else { callout_stop(&t->t_cleanup); TAILQ_INSERT_TAIL(&q->freequeue, cmd, next); } if (!CMD_IN_QUEUE(q)) return 1; restart: pckbport_start(t, slot); return 1; } /* * Put command into the device's command queue, return zero or errno. */ int pckbport_enqueue_cmd(pckbport_tag_t t, pckbport_slot_t slot, const u_char *cmd, int len, int responselen, int sync, u_char *respbuf) { struct pckbport_slotdata *q = t->t_slotdata[slot]; struct pckbport_devcmd *nc; int s, isactive, res = 0; if ((len > 4) || (responselen > 4)) return EINVAL; s = spltty(); nc = TAILQ_FIRST(&q->freequeue); if (nc) TAILQ_REMOVE(&q->freequeue, nc, next); splx(s); if (!nc) return ENOMEM; memset(nc, 0, sizeof(*nc)); memcpy(nc->cmd, cmd, len); nc->cmdlen = len; nc->responselen = responselen; nc->flags = (sync ? KBC_CMDFLAG_SYNC : 0); s = spltty(); if (q->polling && sync) /* * XXX We should poll until the queue is empty. * But we don't come here normally, so make * it simple and throw away everything. */ pckbport_cleanqueue(q); isactive = CMD_IN_QUEUE(q); TAILQ_INSERT_TAIL(&q->cmdqueue, nc, next); if (!isactive) pckbport_start(t, slot); if (q->polling) res = (sync ? nc->status : 0); else if (sync) { if ((res = tsleep(nc, 0, "kbccmd", 1*hz))) { TAILQ_REMOVE(&q->cmdqueue, nc, next); pckbport_cleanup(t); } else res = nc->status; } else callout_reset(&t->t_cleanup, hz, pckbport_cleanup, t); if (sync) { if (respbuf) memcpy(respbuf, nc->response, responselen); TAILQ_INSERT_TAIL(&q->freequeue, nc, next); } splx(s); return res; } void pckbport_set_inputhandler(pckbport_tag_t t, pckbport_slot_t slot, pckbport_inputfcn func, void *arg, const char *name) { if (slot >= PCKBPORT_NSLOTS) panic("pckbport_set_inputhandler: bad slot %d", slot); t->t_ops->t_intr_establish(t->t_cookie, slot); t->t_inputhandler[slot] = func; t->t_inputarg[slot] = arg; t->t_subname[slot] = name; } void pckbportintr(pckbport_tag_t t, pckbport_slot_t slot, int data) { struct pckbport_slotdata *q; q = t->t_slotdata[slot]; if (!q) { /* XXX do something for live insertion? */ printf("pckbportintr: no dev for slot %d\n", slot); return; } if (CMD_IN_QUEUE(q) && pckbport_cmdresponse(t, slot, data)) return; if (t->t_inputhandler[slot]) { (*t->t_inputhandler[slot])(t->t_inputarg[slot], data); return; } DPRINTF(("%s: slot %d lost %d\n", __func__, slot, data)); } int pckbport_cnattach(void *cookie, struct pckbport_accessops const *ops, pckbport_slot_t slot) { int res = 0; pckbport_tag_t t = &pckbport_cntag; callout_init(&t->t_cleanup, 0); t->t_cookie = cookie; t->t_ops = ops; /* flush */ pckbport_flush(t, slot); #if (NPCKBD > 0) res = pckbd_cnattach(t, slot); #elif (NPCKBPORT_MACHDEP_CNATTACH > 0) res = pckbport_machdep_cnattach(t, slot); #else res = ENXIO; #endif /* NPCKBPORT_MACHDEP_CNATTACH > 0 */ if (res == 0) { t->t_slotdata[slot] = &pckbport_cons_slotdata; pckbport_init_slotdata(&pckbport_cons_slotdata); } return res; } |
| 27 27 27 27 1 26 12 14 1 12 1 23 1 24 1 24 23 24 24 24 24 23 24 24 25 10 9 9 1 1 7 7 7 7 7 7 5 5 5 5 5 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 | /* $NetBSD: if_ethersubr.c,v 1.315 2022/06/20 12:22:00 martin Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_ethersubr.c 8.2 (Berkeley) 4/4/96 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: if_ethersubr.c,v 1.315 2022/06/20 12:22:00 martin Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" #include "opt_atalk.h" #include "opt_mbuftrace.h" #include "opt_mpls.h" #include "opt_gateway.h" #include "opt_pppoe.h" #include "opt_net_mpsafe.h" #endif #include "vlan.h" #include "pppoe.h" #include "bridge.h" #include "arp.h" #include "agr.h" #include <sys/sysctl.h> #include <sys/mbuf.h> #include <sys/mutex.h> #include <sys/ioctl.h> #include <sys/errno.h> #include <sys/device.h> #include <sys/entropy.h> #include <sys/rndsource.h> #include <sys/cpu.h> #include <sys/kmem.h> #include <sys/hook.h> #include <net/if.h> #include <net/netisr.h> #include <net/route.h> #include <net/if_llc.h> #include <net/if_dl.h> #include <net/if_types.h> #include <net/pktqueue.h> #include <net/if_media.h> #include <dev/mii/mii.h> #include <dev/mii/miivar.h> #if NARP == 0 /* * XXX there should really be a way to issue this warning from within config(8) */ #error You have included NETATALK or a pseudo-device in your configuration that depends on the presence of ethernet interfaces, but have no such interfaces configured. Check if you really need pseudo-device bridge, pppoe, vlan or options NETATALK. #endif #include <net/bpf.h> #include <net/if_ether.h> #include <net/if_vlanvar.h> #if NPPPOE > 0 #include <net/if_pppoe.h> #endif #if NAGR > 0 #include <net/ether_slowprotocols.h> #include <net/agr/ieee8023ad.h> #include <net/agr/if_agrvar.h> #endif #if NBRIDGE > 0 #include <net/if_bridgevar.h> #endif #include <netinet/in.h> #ifdef INET #include <netinet/in_var.h> #endif #include <netinet/if_inarp.h> #ifdef INET6 #ifndef INET #include <netinet/in.h> #endif #include <netinet6/in6_var.h> #include <netinet6/nd6.h> #endif #include "carp.h" #if NCARP > 0 #include <netinet/ip_carp.h> #endif #ifdef NETATALK #include <netatalk/at.h> #include <netatalk/at_var.h> #include <netatalk/at_extern.h> #define llc_snap_org_code llc_un.type_snap.org_code #define llc_snap_ether_type llc_un.type_snap.ether_type extern u_char at_org_code[3]; extern u_char aarp_org_code[3]; #endif /* NETATALK */ #ifdef MPLS #include <netmpls/mpls.h> #include <netmpls/mpls_var.h> #endif CTASSERT(sizeof(struct ether_addr) == 6); CTASSERT(sizeof(struct ether_header) == 14); #ifdef DIAGNOSTIC static struct timeval bigpktppslim_last; static int bigpktppslim = 2; /* XXX */ static int bigpktpps_count; static kmutex_t bigpktpps_lock __cacheline_aligned; #endif const uint8_t etherbroadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }; #define senderr(e) { error = (e); goto bad;} static pktq_rps_hash_func_t ether_pktq_rps_hash_p; static int ether_output(struct ifnet *, struct mbuf *, const struct sockaddr *, const struct rtentry *); /* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Assumes that ifp is actually pointer to ethercom structure. */ static int ether_output(struct ifnet * const ifp0, struct mbuf * const m0, const struct sockaddr * const dst, const struct rtentry *rt) { uint8_t esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN]; uint16_t etype = 0; int error = 0, hdrcmplt = 0; struct mbuf *m = m0; struct mbuf *mcopy = NULL; struct ether_header *eh; struct ifnet *ifp = ifp0; #ifdef INET struct arphdr *ah; #endif #ifdef NETATALK struct at_ifaddr *aa; #endif #ifdef MBUFTRACE m_claimm(m, ifp->if_mowner); #endif #if NCARP > 0 if (ifp->if_type == IFT_CARP) { struct ifaddr *ifa; int s = pserialize_read_enter(); /* loop back if this is going to the carp interface */ if (dst != NULL && ifp0->if_link_state == LINK_STATE_UP && (ifa = ifa_ifwithaddr(dst)) != NULL) { if (ifa->ifa_ifp == ifp0) { pserialize_read_exit(s); return looutput(ifp0, m, dst, rt); } } pserialize_read_exit(s); ifp = ifp->if_carpdev; /* ac = (struct arpcom *)ifp; */ if ((ifp0->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING)) senderr(ENETDOWN); } #endif if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING)) senderr(ENETDOWN); switch (dst->sa_family) { #ifdef INET case AF_INET: if (m->m_flags & M_BCAST) { memcpy(edst, etherbroadcastaddr, sizeof(edst)); } else if (m->m_flags & M_MCAST) { ETHER_MAP_IP_MULTICAST(&satocsin(dst)->sin_addr, edst); } else { error = arpresolve(ifp0, rt, m, dst, edst, sizeof(edst)); if (error) return (error == EWOULDBLOCK) ? 0 : error; } /* If broadcasting on a simplex interface, loopback a copy */ if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX)) mcopy = m_copypacket(m, M_DONTWAIT); etype = htons(ETHERTYPE_IP); break; case AF_ARP: ah = mtod(m, struct arphdr *); if (m->m_flags & M_BCAST) { memcpy(edst, etherbroadcastaddr, sizeof(edst)); } else { void *tha = ar_tha(ah); if (tha == NULL) { /* fake with ARPHRD_IEEE1394 */ m_freem(m); return 0; } memcpy(edst, tha, sizeof(edst)); } ah->ar_hrd = htons(ARPHRD_ETHER); switch (ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: etype = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: etype = htons(ETHERTYPE_ARP); } break; #endif #ifdef INET6 case AF_INET6: if (m->m_flags & M_BCAST) { memcpy(edst, etherbroadcastaddr, sizeof(edst)); } else if (m->m_flags & M_MCAST) { ETHER_MAP_IPV6_MULTICAST(&satocsin6(dst)->sin6_addr, edst); } else { error = nd6_resolve(ifp0, rt, m, dst, edst, sizeof(edst)); if (error) return (error == EWOULDBLOCK) ? 0 : error; } etype = htons(ETHERTYPE_IPV6); break; #endif #ifdef NETATALK case AF_APPLETALK: { struct ifaddr *ifa; int s; KERNEL_LOCK(1, NULL); if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) { KERNEL_UNLOCK_ONE(NULL); return 0; } /* * ifaddr is the first thing in at_ifaddr */ s = pserialize_read_enter(); ifa = at_ifawithnet((const struct sockaddr_at *)dst, ifp); if (ifa == NULL) { pserialize_read_exit(s); KERNEL_UNLOCK_ONE(NULL); senderr(EADDRNOTAVAIL); } aa = (struct at_ifaddr *)ifa; /* * In the phase 2 case, we need to prepend an mbuf for the * llc header. */ if (aa->aa_flags & AFA_PHASE2) { struct llc llc; M_PREPEND(m, sizeof(struct llc), M_DONTWAIT); if (m == NULL) { pserialize_read_exit(s); KERNEL_UNLOCK_ONE(NULL); senderr(ENOBUFS); } llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP; llc.llc_control = LLC_UI; memcpy(llc.llc_snap_org_code, at_org_code, sizeof(llc.llc_snap_org_code)); llc.llc_snap_ether_type = htons(ETHERTYPE_ATALK); memcpy(mtod(m, void *), &llc, sizeof(struct llc)); } else { etype = htons(ETHERTYPE_ATALK); } pserialize_read_exit(s); KERNEL_UNLOCK_ONE(NULL); break; } #endif /* NETATALK */ case pseudo_AF_HDRCMPLT: hdrcmplt = 1; memcpy(esrc, ((const struct ether_header *)dst->sa_data)->ether_shost, sizeof(esrc)); /* FALLTHROUGH */ case AF_UNSPEC: memcpy(edst, ((const struct ether_header *)dst->sa_data)->ether_dhost, sizeof(edst)); /* AF_UNSPEC doesn't swap the byte order of the ether_type. */ etype = ((const struct ether_header *)dst->sa_data)->ether_type; break; default: printf("%s: can't handle af%d\n", ifp->if_xname, dst->sa_family); senderr(EAFNOSUPPORT); } #ifdef MPLS { struct m_tag *mtag; mtag = m_tag_find(m, PACKET_TAG_MPLS); if (mtag != NULL) { /* Having the tag itself indicates it's MPLS */ etype = htons(ETHERTYPE_MPLS); m_tag_delete(m, mtag); } } #endif if (mcopy) (void)looutput(ifp, mcopy, dst, rt); KASSERT((m->m_flags & M_PKTHDR) != 0); /* * If no ether type is set, this must be a 802.2 formatted packet. */ if (etype == 0) etype = htons(m->m_pkthdr.len); /* * Add local net header. If no space in first mbuf, allocate another. */ M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT); if (m == NULL) senderr(ENOBUFS); eh = mtod(m, struct ether_header *); /* Note: etype is already in network byte order. */ memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type)); memcpy(eh->ether_dhost, edst, sizeof(edst)); if (hdrcmplt) { memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost)); } else { memcpy(eh->ether_shost, CLLADDR(ifp->if_sadl), sizeof(eh->ether_shost)); } #if NCARP > 0 if (ifp0 != ifp && ifp0->if_type == IFT_CARP) { memcpy(eh->ether_shost, CLLADDR(ifp0->if_sadl), sizeof(eh->ether_shost)); } #endif if ((error = pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_OUT)) != 0) return error; if (m == NULL) return 0; #if NBRIDGE > 0 /* * Bridges require special output handling. */ if (ifp->if_bridge) return bridge_output(ifp, m, NULL, NULL); #endif #if NCARP > 0 if (ifp != ifp0) if_statadd(ifp0, if_obytes, m->m_pkthdr.len + ETHER_HDR_LEN); #endif #ifdef ALTQ KERNEL_LOCK(1, NULL); /* * If ALTQ is enabled on the parent interface, do * classification; the queueing discipline might not * require classification, but might require the * address family/header pointer in the pktattr. */ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_etherclassify(&ifp->if_snd, m); KERNEL_UNLOCK_ONE(NULL); #endif return ifq_enqueue(ifp, m); bad: if_statinc(ifp, if_oerrors); if (m) m_freem(m); return error; } #ifdef ALTQ /* * This routine is a slight hack to allow a packet to be classified * if the Ethernet headers are present. It will go away when ALTQ's * classification engine understands link headers. * * XXX: We may need to do m_pullups here. First to ensure struct ether_header * is indeed contiguous, then to read the LLC and so on. */ void altq_etherclassify(struct ifaltq *ifq, struct mbuf *m) { struct ether_header *eh; struct mbuf *mtop = m; uint16_t ether_type; int hlen, af, hdrsize; void *hdr; KASSERT((mtop->m_flags & M_PKTHDR) != 0); hlen = ETHER_HDR_LEN; eh = mtod(m, struct ether_header *); ether_type = htons(eh->ether_type); if (ether_type < ETHERMTU) { /* LLC/SNAP */ struct llc *llc = (struct llc *)(eh + 1); hlen += 8; if (m->m_len < hlen || llc->llc_dsap != LLC_SNAP_LSAP || llc->llc_ssap != LLC_SNAP_LSAP || llc->llc_control != LLC_UI) { /* Not SNAP. */ goto bad; } ether_type = htons(llc->llc_un.type_snap.ether_type); } switch (ether_type) { case ETHERTYPE_IP: af = AF_INET; hdrsize = 20; /* sizeof(struct ip) */ break; case ETHERTYPE_IPV6: af = AF_INET6; hdrsize = 40; /* sizeof(struct ip6_hdr) */ break; default: af = AF_UNSPEC; hdrsize = 0; break; } while (m->m_len <= hlen) { hlen -= m->m_len; m = m->m_next; if (m == NULL) goto bad; } if (m->m_len < (hlen + hdrsize)) { /* * protocol header not in a single mbuf. * We can't cope with this situation right * now (but it shouldn't ever happen, really, anyhow). */ #ifdef DEBUG printf("altq_etherclassify: headers span multiple mbufs: " "%d < %d\n", m->m_len, (hlen + hdrsize)); #endif goto bad; } m->m_data += hlen; m->m_len -= hlen; hdr = mtod(m, void *); if (ALTQ_NEEDS_CLASSIFY(ifq)) { mtop->m_pkthdr.pattr_class = (*ifq->altq_classify)(ifq->altq_clfier, m, af); } mtop->m_pkthdr.pattr_af = af; mtop->m_pkthdr.pattr_hdr = hdr; m->m_data -= hlen; m->m_len += hlen; return; bad: mtop->m_pkthdr.pattr_class = NULL; mtop->m_pkthdr.pattr_hdr = NULL; mtop->m_pkthdr.pattr_af = AF_UNSPEC; } #endif /* ALTQ */ #if defined (LLC) || defined (NETATALK) static void ether_input_llc(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh) { struct ifqueue *inq = NULL; int isr = 0; struct llc *l; if (m->m_len < sizeof(*eh) + sizeof(struct llc)) goto error; l = (struct llc *)(eh+1); switch (l->llc_dsap) { #ifdef NETATALK case LLC_SNAP_LSAP: switch (l->llc_control) { case LLC_UI: if (l->llc_ssap != LLC_SNAP_LSAP) goto error; if (memcmp(&(l->llc_snap_org_code)[0], at_org_code, sizeof(at_org_code)) == 0 && ntohs(l->llc_snap_ether_type) == ETHERTYPE_ATALK) { inq = &atintrq2; m_adj(m, sizeof(struct ether_header) + sizeof(struct llc)); isr = NETISR_ATALK; break; } if (memcmp(&(l->llc_snap_org_code)[0], aarp_org_code, sizeof(aarp_org_code)) == 0 && ntohs(l->llc_snap_ether_type) == ETHERTYPE_AARP) { m_adj(m, sizeof(struct ether_header) + sizeof(struct llc)); aarpinput(ifp, m); /* XXX queue? */ return; } default: goto error; } break; #endif default: goto noproto; } KASSERT(inq != NULL); IFQ_ENQUEUE_ISR(inq, m, isr); return; noproto: m_freem(m); if_statinc(ifp, if_noproto); return; error: m_freem(m); if_statinc(ifp, if_ierrors); return; } #endif /* defined (LLC) || defined (NETATALK) */ /* * Process a received Ethernet packet; * the packet is in the mbuf chain m with * the ether header. */ void ether_input(struct ifnet *ifp, struct mbuf *m) { #if NVLAN > 0 || defined(MBUFTRACE) struct ethercom *ec = (struct ethercom *) ifp; #endif pktqueue_t *pktq = NULL; struct ifqueue *inq = NULL; uint16_t etype; struct ether_header *eh; size_t ehlen; static int earlypkts; int isr = 0; KASSERT(!cpu_intr_p()); KASSERT((m->m_flags & M_PKTHDR) != 0); if ((ifp->if_flags & IFF_UP) == 0) goto drop; #ifdef MBUFTRACE m_claimm(m, &ec->ec_rx_mowner); #endif if (__predict_false(m->m_len < sizeof(*eh))) { if ((m = m_pullup(m, sizeof(*eh))) == NULL) { if_statinc(ifp, if_ierrors); return; } } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); ehlen = sizeof(*eh); if (__predict_false(earlypkts < 100 || entropy_epoch() == (unsigned)-1)) { rnd_add_data(NULL, eh, ehlen, 0); earlypkts++; } /* * Determine if the packet is within its size limits. For MPLS the * header length is variable, so we skip the check. */ if (etype != ETHERTYPE_MPLS && m->m_pkthdr.len > ETHER_MAX_FRAME(ifp, etype, m->m_flags & M_HASFCS)) { #ifdef DIAGNOSTIC mutex_enter(&bigpktpps_lock); if (ppsratecheck(&bigpktppslim_last, &bigpktpps_count, bigpktppslim)) { printf("%s: discarding oversize frame (len=%d)\n", ifp->if_xname, m->m_pkthdr.len); } mutex_exit(&bigpktpps_lock); #endif goto error; } if (ETHER_IS_MULTICAST(eh->ether_dhost)) { /* * If this is not a simplex interface, drop the packet * if it came from us. */ if ((ifp->if_flags & IFF_SIMPLEX) == 0 && memcmp(CLLADDR(ifp->if_sadl), eh->ether_shost, ETHER_ADDR_LEN) == 0) { goto drop; } if (memcmp(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_statinc(ifp, if_imcasts); } /* If the CRC is still on the packet, trim it off. */ if (m->m_flags & M_HASFCS) { m_adj(m, -ETHER_CRC_LEN); m->m_flags &= ~M_HASFCS; } if_statadd(ifp, if_ibytes, m->m_pkthdr.len); if (!vlan_has_tag(m) && etype == ETHERTYPE_VLAN) { m = ether_strip_vlantag(m); if (m == NULL) { if_statinc(ifp, if_ierrors); return; } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); ehlen = sizeof(*eh); } if ((m->m_flags & (M_BCAST | M_MCAST | M_PROMISC)) == 0 && (ifp->if_flags & IFF_PROMISC) != 0 && memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost, ETHER_ADDR_LEN) != 0) { m->m_flags |= M_PROMISC; } if ((m->m_flags & M_PROMISC) == 0) { if (pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_IN) != 0) return; if (m == NULL) return; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); } /* * Processing a logical interfaces that are able * to configure vlan(4). */ #if NAGR > 0 if (ifp->if_lagg != NULL && __predict_true(etype != ETHERTYPE_SLOWPROTOCOLS)) { m->m_flags &= ~M_PROMISC; agr_input(ifp, m); return; } #endif /* * VLAN processing. * * VLAN provides service delimiting so the frames are * processed before other handlings. If a VLAN interface * does not exist to take those frames, they're returned * to ether_input(). */ if (vlan_has_tag(m)) { if (EVL_VLANOFTAG(vlan_get_tag(m)) == 0) { if (etype == ETHERTYPE_VLAN || etype == ETHERTYPE_QINQ) goto drop; /* XXX we should actually use the prio value? */ m->m_flags &= ~M_VLANTAG; } else { #if NVLAN > 0 if (ec->ec_nvlans > 0) { m = vlan_input(ifp, m); /* vlan_input() called ether_input() recursively */ if (m == NULL) return; } #endif /* drop VLAN frames not for this port. */ goto noproto; } } #if NCARP > 0 if (__predict_false(ifp->if_carp && ifp->if_type != IFT_CARP)) { /* * Clear M_PROMISC, in case the packet comes from a * vlan. */ m->m_flags &= ~M_PROMISC; if (carp_input(m, (uint8_t *)&eh->ether_shost, (uint8_t *)&eh->ether_dhost, eh->ether_type) == 0) return; } #endif /* * Handle protocols that expect to have the Ethernet header * (and possibly FCS) intact. */ switch (etype) { #if NPPPOE > 0 case ETHERTYPE_PPPOEDISC: pppoedisc_input(ifp, m); return; case ETHERTYPE_PPPOE: pppoe_input(ifp, m); return; #endif case ETHERTYPE_SLOWPROTOCOLS: { uint8_t subtype; if (m->m_pkthdr.len < sizeof(*eh) + sizeof(subtype)) goto error; m_copydata(m, sizeof(*eh), sizeof(subtype), &subtype); switch (subtype) { #if NAGR > 0 case SLOWPROTOCOLS_SUBTYPE_LACP: if (ifp->if_lagg != NULL) { ieee8023ad_lacp_input(ifp, m); return; } break; case SLOWPROTOCOLS_SUBTYPE_MARKER: if (ifp->if_lagg != NULL) { ieee8023ad_marker_input(ifp, m); return; } break; #endif default: if (subtype == 0 || subtype > 10) { /* illegal value */ goto error; } /* unknown subtype */ break; } } /* FALLTHROUGH */ default: if (m->m_flags & M_PROMISC) goto drop; } /* If the CRC is still on the packet, trim it off. */ if (m->m_flags & M_HASFCS) { m_adj(m, -ETHER_CRC_LEN); m->m_flags &= ~M_HASFCS; } /* etype represents the size of the payload in this case */ if (etype <= ETHERMTU + sizeof(struct ether_header)) { KASSERT(ehlen == sizeof(*eh)); #if defined (LLC) || defined (NETATALK) ether_input_llc(ifp, m, eh); return; #else /* ethertype of 0-1500 is regarded as noproto */ goto noproto; #endif } /* Strip off the Ethernet header. */ m_adj(m, ehlen); switch (etype) { #ifdef INET case ETHERTYPE_IP: #ifdef GATEWAY if (ipflow_fastforward(m)) return; #endif pktq = ip_pktq; break; case ETHERTYPE_ARP: isr = NETISR_ARP; inq = &arpintrq; break; case ETHERTYPE_REVARP: revarpinput(m); /* XXX queue? */ return; #endif #ifdef INET6 case ETHERTYPE_IPV6: if (__predict_false(!in6_present)) goto noproto; #ifdef GATEWAY if (ip6flow_fastforward(&m)) return; #endif pktq = ip6_pktq; break; #endif #ifdef NETATALK case ETHERTYPE_ATALK: isr = NETISR_ATALK; inq = &atintrq1; break; case ETHERTYPE_AARP: aarpinput(ifp, m); /* XXX queue? */ return; #endif #ifdef MPLS case ETHERTYPE_MPLS: isr = NETISR_MPLS; inq = &mplsintrq; break; #endif default: goto noproto; } if (__predict_true(pktq)) { const uint32_t h = pktq_rps_hash(ðer_pktq_rps_hash_p, m); if (__predict_false(!pktq_enqueue(pktq, m, h))) { m_freem(m); } return; } if (__predict_false(!inq)) { /* Should not happen. */ goto error; } IFQ_ENQUEUE_ISR(inq, m, isr); return; drop: m_freem(m); if_statinc(ifp, if_iqdrops); return; noproto: m_freem(m); if_statinc(ifp, if_noproto); return; error: m_freem(m); if_statinc(ifp, if_ierrors); return; } static void ether_bpf_mtap(struct bpf_if *bp, struct mbuf *m, u_int direction) { struct ether_vlan_header evl; struct m_hdr mh, md; KASSERT(bp != NULL); if (!vlan_has_tag(m)) { bpf_mtap3(bp, m, direction); return; } memcpy(&evl, mtod(m, char *), ETHER_HDR_LEN); evl.evl_proto = evl.evl_encap_proto; evl.evl_encap_proto = htons(ETHERTYPE_VLAN); evl.evl_tag = htons(vlan_get_tag(m)); md.mh_flags = 0; md.mh_data = m->m_data + ETHER_HDR_LEN; md.mh_len = m->m_len - ETHER_HDR_LEN; md.mh_next = m->m_next; mh.mh_flags = 0; mh.mh_data = (char *)&evl; mh.mh_len = sizeof(evl); mh.mh_next = (struct mbuf *)&md; bpf_mtap3(bp, (struct mbuf *)&mh, direction); } /* * Convert Ethernet address to printable (loggable) representation. */ char * ether_sprintf(const u_char *ap) { static char etherbuf[3 * ETHER_ADDR_LEN]; return ether_snprintf(etherbuf, sizeof(etherbuf), ap); } char * ether_snprintf(char *buf, size_t len, const u_char *ap) { char *cp = buf; size_t i; for (i = 0; i < len / 3; i++) { *cp++ = hexdigits[*ap >> 4]; *cp++ = hexdigits[*ap++ & 0xf]; *cp++ = ':'; } *--cp = '\0'; return buf; } /* * Perform common duties while attaching to interface list */ void ether_ifattach(struct ifnet *ifp, const uint8_t *lla) { struct ethercom *ec = (struct ethercom *)ifp; char xnamebuf[HOOKNAMSIZ]; ifp->if_type = IFT_ETHER; ifp->if_hdrlen = ETHER_HDR_LEN; ifp->if_dlt = DLT_EN10MB; ifp->if_mtu = ETHERMTU; ifp->if_output = ether_output; ifp->_if_input = ether_input; ifp->if_bpf_mtap = ether_bpf_mtap; if (ifp->if_baudrate == 0) ifp->if_baudrate = IF_Mbps(10); /* just a default */ if (lla != NULL) if_set_sadl(ifp, lla, ETHER_ADDR_LEN, !ETHER_IS_LOCAL(lla)); LIST_INIT(&ec->ec_multiaddrs); SIMPLEQ_INIT(&ec->ec_vids); ec->ec_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET); ec->ec_flags = 0; ifp->if_broadcastaddr = etherbroadcastaddr; bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header)); snprintf(xnamebuf, sizeof(xnamebuf), "%s-ether_ifdetachhooks", ifp->if_xname); ec->ec_ifdetach_hooks = simplehook_create(IPL_NET, xnamebuf); #ifdef MBUFTRACE mowner_init_owner(&ec->ec_tx_mowner, ifp->if_xname, "tx"); mowner_init_owner(&ec->ec_rx_mowner, ifp->if_xname, "rx"); MOWNER_ATTACH(&ec->ec_tx_mowner); MOWNER_ATTACH(&ec->ec_rx_mowner); ifp->if_mowner = &ec->ec_tx_mowner; #endif } void ether_ifdetach(struct ifnet *ifp) { struct ethercom *ec = (void *) ifp; struct ether_multi *enm; IFNET_ASSERT_UNLOCKED(ifp); /* * Prevent further calls to ioctl (for example turning off * promiscuous mode from the bridge code), which eventually can * call if_init() which can cause panics because the interface * is in the process of being detached. Return device not configured * instead. */ ifp->if_ioctl = __FPTRCAST(int (*)(struct ifnet *, u_long, void *), enxio); simplehook_dohooks(ec->ec_ifdetach_hooks); KASSERT(!simplehook_has_hooks(ec->ec_ifdetach_hooks)); simplehook_destroy(ec->ec_ifdetach_hooks); bpf_detach(ifp); ETHER_LOCK(ec); KASSERT(ec->ec_nvlans == 0); while ((enm = LIST_FIRST(&ec->ec_multiaddrs)) != NULL) { LIST_REMOVE(enm, enm_list); kmem_free(enm, sizeof(*enm)); ec->ec_multicnt--; } ETHER_UNLOCK(ec); mutex_obj_free(ec->ec_lock); ec->ec_lock = NULL; ifp->if_mowner = NULL; MOWNER_DETACH(&ec->ec_rx_mowner); MOWNER_DETACH(&ec->ec_tx_mowner); } void * ether_ifdetachhook_establish(struct ifnet *ifp, void (*fn)(void *), void *arg) { struct ethercom *ec; khook_t *hk; if (ifp->if_type != IFT_ETHER) return NULL; ec = (struct ethercom *)ifp; hk = simplehook_establish(ec->ec_ifdetach_hooks, fn, arg); return (void *)hk; } void ether_ifdetachhook_disestablish(struct ifnet *ifp, void *vhook, kmutex_t *lock) { struct ethercom *ec; if (vhook == NULL) return; ec = (struct ethercom *)ifp; simplehook_disestablish(ec->ec_ifdetach_hooks, vhook, lock); } #if 0 /* * This is for reference. We have a table-driven version * of the little-endian crc32 generator, which is faster * than the double-loop. */ uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { uint32_t c, crc, carry; size_t i, j; crc = 0xffffffffU; /* initial value */ for (i = 0; i < len; i++) { c = buf[i]; for (j = 0; j < 8; j++) { carry = ((crc & 0x01) ? 1 : 0) ^ (c & 0x01); crc >>= 1; c >>= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_LE); } } return (crc); } #else uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { static const uint32_t crctab[] = { 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; uint32_t crc; size_t i; crc = 0xffffffffU; /* initial value */ for (i = 0; i < len; i++) { crc ^= buf[i]; crc = (crc >> 4) ^ crctab[crc & 0xf]; crc = (crc >> 4) ^ crctab[crc & 0xf]; } return (crc); } #endif uint32_t ether_crc32_be(const uint8_t *buf, size_t len) { uint32_t c, crc, carry; size_t i, j; crc = 0xffffffffU; /* initial value */ for (i = 0; i < len; i++) { c = buf[i]; for (j = 0; j < 8; j++) { carry = ((crc & 0x80000000U) ? 1 : 0) ^ (c & 0x01); crc <<= 1; c >>= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_BE) | carry; } } return (crc); } #ifdef INET const uint8_t ether_ipmulticast_min[ETHER_ADDR_LEN] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 }; const uint8_t ether_ipmulticast_max[ETHER_ADDR_LEN] = { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff }; #endif #ifdef INET6 const uint8_t ether_ip6multicast_min[ETHER_ADDR_LEN] = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }; const uint8_t ether_ip6multicast_max[ETHER_ADDR_LEN] = { 0x33, 0x33, 0xff, 0xff, 0xff, 0xff }; #endif /* * ether_aton implementation, not using a static buffer. */ int ether_aton_r(u_char *dest, size_t len, const char *str) { const u_char *cp = (const void *)str; u_char *ep; #define atox(c) (((c) <= '9') ? ((c) - '0') : ((toupper(c) - 'A') + 10)) if (len < ETHER_ADDR_LEN) return ENOSPC; ep = dest + ETHER_ADDR_LEN; while (*cp) { if (!isxdigit(*cp)) return EINVAL; *dest = atox(*cp); cp++; if (isxdigit(*cp)) { *dest = (*dest << 4) | atox(*cp); cp++; } dest++; if (dest == ep) return (*cp == '\0') ? 0 : ENAMETOOLONG; switch (*cp) { case ':': case '-': case '.': cp++; break; } } return ENOBUFS; } /* * Convert a sockaddr into an Ethernet address or range of Ethernet * addresses. */ int ether_multiaddr(const struct sockaddr *sa, uint8_t addrlo[ETHER_ADDR_LEN], uint8_t addrhi[ETHER_ADDR_LEN]) { #ifdef INET const struct sockaddr_in *sin; #endif #ifdef INET6 const struct sockaddr_in6 *sin6; #endif switch (sa->sa_family) { case AF_UNSPEC: memcpy(addrlo, sa->sa_data, ETHER_ADDR_LEN); memcpy(addrhi, addrlo, ETHER_ADDR_LEN); break; #ifdef INET case AF_INET: sin = satocsin(sa); if (sin->sin_addr.s_addr == INADDR_ANY) { /* * An IP address of INADDR_ANY means listen to * or stop listening to all of the Ethernet * multicast addresses used for IP. * (This is for the sake of IP multicast routers.) */ memcpy(addrlo, ether_ipmulticast_min, ETHER_ADDR_LEN); memcpy(addrhi, ether_ipmulticast_max, ETHER_ADDR_LEN); } else { ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo); memcpy(addrhi, addrlo, ETHER_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: sin6 = satocsin6(sa); if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to or stop * listening to all of the Ethernet multicast * address used for IP6. * (This is used for multicast routers.) */ memcpy(addrlo, ether_ip6multicast_min, ETHER_ADDR_LEN); memcpy(addrhi, ether_ip6multicast_max, ETHER_ADDR_LEN); } else { ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, addrlo); memcpy(addrhi, addrlo, ETHER_ADDR_LEN); } break; #endif default: return EAFNOSUPPORT; } return 0; } /* * Add an Ethernet multicast address or range of addresses to the list for a * given interface. */ int ether_addmulti(const struct sockaddr *sa, struct ethercom *ec) { struct ether_multi *enm, *_enm; u_char addrlo[ETHER_ADDR_LEN]; u_char addrhi[ETHER_ADDR_LEN]; int error = 0; /* Allocate out of lock */ enm = kmem_alloc(sizeof(*enm), KM_SLEEP); ETHER_LOCK(ec); error = ether_multiaddr(sa, addrlo, addrhi); if (error != 0) goto out; /* * Verify that we have valid Ethernet multicast addresses. */ if (!ETHER_IS_MULTICAST(addrlo) || !ETHER_IS_MULTICAST(addrhi)) { error = EINVAL; goto out; } /* * See if the address range is already in the list. */ _enm = ether_lookup_multi(addrlo, addrhi, ec); if (_enm != NULL) { /* * Found it; just increment the reference count. */ ++_enm->enm_refcount; error = 0; goto out; } /* * Link a new multicast record into the interface's multicast list. */ memcpy(enm->enm_addrlo, addrlo, ETHER_ADDR_LEN); memcpy(enm->enm_addrhi, addrhi, ETHER_ADDR_LEN); enm->enm_refcount = 1; LIST_INSERT_HEAD(&ec->ec_multiaddrs, enm, enm_list); ec->ec_multicnt++; /* * Return ENETRESET to inform the driver that the list has changed * and its reception filter should be adjusted accordingly. */ error = ENETRESET; enm = NULL; out: ETHER_UNLOCK(ec); if (enm != NULL) kmem_free(enm, sizeof(*enm)); return error; } /* * Delete a multicast address record. */ int ether_delmulti(const struct sockaddr *sa, struct ethercom *ec) { struct ether_multi *enm; u_char addrlo[ETHER_ADDR_LEN]; u_char addrhi[ETHER_ADDR_LEN]; int error; ETHER_LOCK(ec); error = ether_multiaddr(sa, addrlo, addrhi); if (error != 0) goto error; /* * Look up the address in our list. */ enm = ether_lookup_multi(addrlo, addrhi, ec); if (enm == NULL) { error = ENXIO; goto error; } if (--enm->enm_refcount != 0) { /* * Still some claims to this record. */ error = 0; goto error; } /* * No remaining claims to this record; unlink and free it. */ LIST_REMOVE(enm, enm_list); ec->ec_multicnt--; ETHER_UNLOCK(ec); kmem_free(enm, sizeof(*enm)); /* * Return ENETRESET to inform the driver that the list has changed * and its reception filter should be adjusted accordingly. */ return ENETRESET; error: ETHER_UNLOCK(ec); return error; } void ether_set_ifflags_cb(struct ethercom *ec, ether_cb_t cb) { ec->ec_ifflags_cb = cb; } void ether_set_vlan_cb(struct ethercom *ec, ether_vlancb_t cb) { ec->ec_vlan_cb = cb; } static int ether_ioctl_reinit(struct ethercom *ec) { struct ifnet *ifp = &ec->ec_if; int error; KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname); switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) { case IFF_RUNNING: /* * If interface is marked down and it is running, * then stop and disable it. */ if_stop(ifp, 1); break; case IFF_UP: /* * If interface is marked up and it is stopped, then * start it. */ return if_init(ifp); case IFF_UP | IFF_RUNNING: error = 0; if (ec->ec_ifflags_cb != NULL) { error = (*ec->ec_ifflags_cb)(ec); if (error == ENETRESET) { /* * Reset the interface to pick up * changes in any other flags that * affect the hardware state. */ return if_init(ifp); } } else error = if_init(ifp); return error; case 0: break; } return 0; } /* * Common ioctls for Ethernet interfaces. Note, we must be * called at splnet(). */ int ether_ioctl(struct ifnet *ifp, u_long cmd, void *data) { struct ethercom *ec = (void *)ifp; struct eccapreq *eccr; struct ifreq *ifr = (struct ifreq *)data; struct if_laddrreq *iflr = data; const struct sockaddr_dl *sdl; static const uint8_t zero[ETHER_ADDR_LEN]; int error; switch (cmd) { case SIOCINITIFADDR: { struct ifaddr *ifa = (struct ifaddr *)data; if (ifa->ifa_addr->sa_family != AF_LINK && (ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING)) { ifp->if_flags |= IFF_UP; if ((error = if_init(ifp)) != 0) return error; } #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(ifp, ifa); #endif return 0; } case SIOCSIFMTU: { int maxmtu; if (ec->ec_capabilities & ETHERCAP_JUMBO_MTU) maxmtu = ETHERMTU_JUMBO; else maxmtu = ETHERMTU; if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > maxmtu) return EINVAL; else if ((error = ifioctl_common(ifp, cmd, data)) != ENETRESET) return error; else if (ifp->if_flags & IFF_UP) { /* Make sure the device notices the MTU change. */ return if_init(ifp); } else return 0; } case SIOCSIFFLAGS: if ((error = ifioctl_common(ifp, cmd, data)) != 0) return error; return ether_ioctl_reinit(ec); case SIOCGIFFLAGS: error = ifioctl_common(ifp, cmd, data); if (error == 0) { /* Set IFF_ALLMULTI for backcompat */ ifr->ifr_flags |= (ec->ec_flags & ETHER_F_ALLMULTI) ? IFF_ALLMULTI : 0; } return error; case SIOCGETHERCAP: eccr = (struct eccapreq *)data; eccr->eccr_capabilities = ec->ec_capabilities; eccr->eccr_capenable = ec->ec_capenable; return 0; case SIOCSETHERCAP: eccr = (struct eccapreq *)data; if ((eccr->eccr_capenable & ~ec->ec_capabilities) != 0) return EINVAL; if (eccr->eccr_capenable == ec->ec_capenable) return 0; #if 0 /* notyet */ ec->ec_capenable = (ec->ec_capenable & ETHERCAP_CANTCHANGE) | (eccr->eccr_capenable & ~ETHERCAP_CANTCHANGE); #else ec->ec_capenable = eccr->eccr_capenable; #endif return ether_ioctl_reinit(ec); case SIOCADDMULTI: return ether_addmulti(ifreq_getaddr(cmd, ifr), ec); case SIOCDELMULTI: return ether_delmulti(ifreq_getaddr(cmd, ifr), ec); case SIOCSIFMEDIA: case SIOCGIFMEDIA: if (ec->ec_mii != NULL) return ifmedia_ioctl(ifp, ifr, &ec->ec_mii->mii_media, cmd); else if (ec->ec_ifmedia != NULL) return ifmedia_ioctl(ifp, ifr, ec->ec_ifmedia, cmd); else return ENOTTY; break; case SIOCALIFADDR: sdl = satocsdl(sstocsa(&iflr->addr)); if (sdl->sdl_family != AF_LINK) ; else if (ETHER_IS_MULTICAST(CLLADDR(sdl))) return EINVAL; else if (memcmp(zero, CLLADDR(sdl), sizeof(zero)) == 0) return EINVAL; /*FALLTHROUGH*/ default: return ifioctl_common(ifp, cmd, data); } return 0; } /* * Enable/disable passing VLAN packets if the parent interface supports it. * Return: * 0: Ok * -1: Parent interface does not support vlans * >0: Error */ int ether_enable_vlan_mtu(struct ifnet *ifp) { int error; struct ethercom *ec = (void *)ifp; /* Parent does not support VLAN's */ if ((ec->ec_capabilities & ETHERCAP_VLAN_MTU) == 0) return -1; /* * Parent supports the VLAN_MTU capability, * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames; * enable it. */ ec->ec_capenable |= ETHERCAP_VLAN_MTU; /* Interface is down, defer for later */ if ((ifp->if_flags & IFF_UP) == 0) return 0; if ((error = if_flags_set(ifp, ifp->if_flags)) == 0) return 0; ec->ec_capenable &= ~ETHERCAP_VLAN_MTU; return error; } int ether_disable_vlan_mtu(struct ifnet *ifp) { int error; struct ethercom *ec = (void *)ifp; /* We still have VLAN's, defer for later */ if (ec->ec_nvlans != 0) return 0; /* Parent does not support VLAB's, nothing to do. */ if ((ec->ec_capenable & ETHERCAP_VLAN_MTU) == 0) return -1; /* * Disable Tx/Rx of VLAN-sized frames. */ ec->ec_capenable &= ~ETHERCAP_VLAN_MTU; /* Interface is down, defer for later */ if ((ifp->if_flags & IFF_UP) == 0) return 0; if ((error = if_flags_set(ifp, ifp->if_flags)) == 0) return 0; ec->ec_capenable |= ETHERCAP_VLAN_MTU; return error; } /* * Add and delete VLAN TAG */ int ether_add_vlantag(struct ifnet *ifp, uint16_t vtag, bool *vlanmtu_status) { struct ethercom *ec = (void *)ifp; struct vlanid_list *vidp; bool vlanmtu_enabled; uint16_t vid = EVL_VLANOFTAG(vtag); int error; vlanmtu_enabled = false; /* Add a vid to the list */ vidp = kmem_alloc(sizeof(*vidp), KM_SLEEP); vidp->vid = vid; ETHER_LOCK(ec); ec->ec_nvlans++; SIMPLEQ_INSERT_TAIL(&ec->ec_vids, vidp, vid_list); ETHER_UNLOCK(ec); if (ec->ec_nvlans == 1) { IFNET_LOCK(ifp); error = ether_enable_vlan_mtu(ifp); IFNET_UNLOCK(ifp); if (error == 0) { vlanmtu_enabled = true; } else if (error != -1) { goto fail; } } if (ec->ec_vlan_cb != NULL) { error = (*ec->ec_vlan_cb)(ec, vid, true); if (error != 0) goto fail; } if (vlanmtu_status != NULL) *vlanmtu_status = vlanmtu_enabled; return 0; fail: ETHER_LOCK(ec); ec->ec_nvlans--; SIMPLEQ_REMOVE(&ec->ec_vids, vidp, vlanid_list, vid_list); ETHER_UNLOCK(ec); if (vlanmtu_enabled) { IFNET_LOCK(ifp); (void)ether_disable_vlan_mtu(ifp); IFNET_UNLOCK(ifp); } kmem_free(vidp, sizeof(*vidp)); return error; } int ether_del_vlantag(struct ifnet *ifp, uint16_t vtag) { struct ethercom *ec = (void *)ifp; struct vlanid_list *vidp; uint16_t vid = EVL_VLANOFTAG(vtag); ETHER_LOCK(ec); SIMPLEQ_FOREACH(vidp, &ec->ec_vids, vid_list) { if (vidp->vid == vid) { SIMPLEQ_REMOVE(&ec->ec_vids, vidp, vlanid_list, vid_list); ec->ec_nvlans--; break; } } ETHER_UNLOCK(ec); if (vidp == NULL) return ENOENT; if (ec->ec_vlan_cb != NULL) { (void)(*ec->ec_vlan_cb)(ec, vidp->vid, false); } if (ec->ec_nvlans == 0) { IFNET_LOCK(ifp); (void)ether_disable_vlan_mtu(ifp); IFNET_UNLOCK(ifp); } kmem_free(vidp, sizeof(*vidp)); return 0; } int ether_inject_vlantag(struct mbuf **mp, uint16_t etype, uint16_t tag) { static const size_t min_data_len = ETHER_MIN_LEN - ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */ static const char vlan_zero_pad_buff[ETHER_MIN_LEN] = { 0 }; struct ether_vlan_header *evl; struct mbuf *m = *mp; int error; error = 0; M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); if (m == NULL) { error = ENOBUFS; goto out; } if (m->m_len < sizeof(*evl)) { m = m_pullup(m, sizeof(*evl)); if (m == NULL) { error = ENOBUFS; goto out; } } /* * Transform the Ethernet header into an * Ethernet header with 802.1Q encapsulation. */ memmove(mtod(m, void *), mtod(m, char *) + ETHER_VLAN_ENCAP_LEN, sizeof(struct ether_header)); evl = mtod(m, struct ether_vlan_header *); evl->evl_proto = evl->evl_encap_proto; evl->evl_encap_proto = htons(etype); evl->evl_tag = htons(tag); /* * To cater for VLAN-aware layer 2 ethernet * switches which may need to strip the tag * before forwarding the packet, make sure * the packet+tag is at least 68 bytes long. * This is necessary because our parent will * only pad to 64 bytes (ETHER_MIN_LEN) and * some switches will not pad by themselves * after deleting a tag. */ if (m->m_pkthdr.len < min_data_len) { m_copyback(m, m->m_pkthdr.len, min_data_len - m->m_pkthdr.len, vlan_zero_pad_buff); } m->m_flags &= ~M_VLANTAG; out: *mp = m; return error; } struct mbuf * ether_strip_vlantag(struct mbuf *m) { struct ether_vlan_header *evl; if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { return NULL; } if (m_makewritable(&m, 0, sizeof(*evl), M_DONTWAIT)) { m_freem(m); return NULL; } evl = mtod(m, struct ether_vlan_header *); KASSERT(ntohs(evl->evl_encap_proto) == ETHERTYPE_VLAN); vlan_set_tag(m, ntohs(evl->evl_tag)); /* * Restore the original ethertype. We'll remove * the encapsulation after we've found the vlan * interface corresponding to the tag. */ evl->evl_encap_proto = evl->evl_proto; /* * Remove the encapsulation header and append tag. * The original header has already been fixed up above. */ vlan_set_tag(m, ntohs(evl->evl_tag)); memmove((char *)evl + ETHER_VLAN_ENCAP_LEN, evl, offsetof(struct ether_vlan_header, evl_encap_proto)); m_adj(m, ETHER_VLAN_ENCAP_LEN); return m; } static int ether_multicast_sysctl(SYSCTLFN_ARGS) { struct ether_multi *enm; struct ifnet *ifp; struct ethercom *ec; int error = 0; size_t written; struct psref psref; int bound; unsigned int multicnt; struct ether_multi_sysctl *addrs; int i; if (namelen != 1) return EINVAL; bound = curlwp_bind(); ifp = if_get_byindex(name[0], &psref); if (ifp == NULL) { error = ENODEV; goto out; } if (ifp->if_type != IFT_ETHER) { if_put(ifp, &psref); *oldlenp = 0; goto out; } ec = (struct ethercom *)ifp; if (oldp == NULL) { if_put(ifp, &psref); *oldlenp = ec->ec_multicnt * sizeof(*addrs); goto out; } /* * ec->ec_lock is a spin mutex so we cannot call sysctl_copyout, which * is sleepable, while holding it. Copy data to a local buffer first * with the lock taken and then call sysctl_copyout without holding it. */ retry: multicnt = ec->ec_multicnt; if (multicnt == 0) { if_put(ifp, &psref); *oldlenp = 0; goto out; } addrs = kmem_zalloc(sizeof(*addrs) * multicnt, KM_SLEEP); ETHER_LOCK(ec); if (multicnt != ec->ec_multicnt) { /* The number of multicast addresses has changed */ ETHER_UNLOCK(ec); kmem_free(addrs, sizeof(*addrs) * multicnt); goto retry; } i = 0; LIST_FOREACH(enm, &ec->ec_multiaddrs, enm_list) { struct ether_multi_sysctl *addr = &addrs[i]; addr->enm_refcount = enm->enm_refcount; memcpy(addr->enm_addrlo, enm->enm_addrlo, ETHER_ADDR_LEN); memcpy(addr->enm_addrhi, enm->enm_addrhi, ETHER_ADDR_LEN); i++; } ETHER_UNLOCK(ec); error = 0; written = 0; for (i = 0; i < multicnt; i++) { struct ether_multi_sysctl *addr = &addrs[i]; if (written + sizeof(*addr) > *oldlenp) break; error = sysctl_copyout(l, addr, oldp, sizeof(*addr)); if (error) break; written += sizeof(*addr); oldp = (char *)oldp + sizeof(*addr); } kmem_free(addrs, sizeof(*addrs) * multicnt); if_put(ifp, &psref); *oldlenp = written; out: curlwp_bindx(bound); return error; } static void ether_sysctl_setup(struct sysctllog **clog) { const struct sysctlnode *rnode = NULL; sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "ether", SYSCTL_DESCR("Ethernet-specific information"), NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, &rnode, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "multicast", SYSCTL_DESCR("multicast addresses"), ether_multicast_sysctl, 0, NULL, 0, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, &rnode, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_STRING, "rps_hash", SYSCTL_DESCR("Interface rps hash function control"), sysctl_pktq_rps_hash_handler, 0, (void *)ðer_pktq_rps_hash_p, PKTQ_RPS_HASH_NAME_LEN, CTL_CREATE, CTL_EOL); } void etherinit(void) { #ifdef DIAGNOSTIC mutex_init(&bigpktpps_lock, MUTEX_DEFAULT, IPL_NET); #endif ether_pktq_rps_hash_p = pktq_rps_hash_default; ether_sysctl_setup(NULL); } |
| 9 9 9 9 9 6 8 8 8 8 8 8 2 2 2 6 6 5 6 6 6 2 8 6 6 6 6 6 6 6 6 6 6 6 6 5 5 6 4 67 62 67 2 1 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 | /* $NetBSD: icmp6.c,v 1.251 2022/08/22 09:25:55 knakahara Exp $ */ /* $KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.251 2022/08/22 09:25:55 knakahara Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" #include "opt_inet.h" #include "opt_ipsec.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/kmem.h> #include <sys/mbuf.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/time.h> #include <sys/kernel.h> #include <sys/syslog.h> #include <sys/domain.h> #include <sys/sysctl.h> #include <net/if.h> #include <net/route.h> #include <net/if_dl.h> #include <net/if_types.h> #include <net/nd.h> #include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/ip6.h> #include <netinet/wqinput.h> #include <netinet6/ip6_var.h> #include <netinet6/ip6_private.h> #include <netinet/icmp6.h> #include <netinet6/icmp6_private.h> #include <netinet6/mld6_var.h> #include <netinet6/in6_pcb.h> #include <netinet6/in6_ifattach.h> #include <netinet6/ip6protosw.h> #include <netinet6/nd6.h> #include <netinet6/scope6_var.h> #ifdef IPSEC #include <netipsec/ipsec.h> #include <netipsec/ipsec6.h> #include <netipsec/key.h> #endif #include "faith.h" #if defined(NFAITH) && 0 < NFAITH #include <net/if_faith.h> #endif /* Ensure that non packed structures are the desired size. */ __CTASSERT(sizeof(struct icmp6_hdr) == 8); __CTASSERT(sizeof(struct icmp6_nodeinfo) == 16); __CTASSERT(sizeof(struct icmp6_namelookup) == 20); __CTASSERT(sizeof(struct icmp6_router_renum) == 16); __CTASSERT(sizeof(struct nd_router_solicit) == 8); __CTASSERT(sizeof(struct nd_router_advert) == 16); __CTASSERT(sizeof(struct nd_neighbor_solicit) == 24); __CTASSERT(sizeof(struct nd_neighbor_advert) == 24); __CTASSERT(sizeof(struct nd_redirect) == 40); __CTASSERT(sizeof(struct nd_opt_hdr) == 2); __CTASSERT(sizeof(struct nd_opt_route_info) == 8); __CTASSERT(sizeof(struct nd_opt_prefix_info) == 32); __CTASSERT(sizeof(struct nd_opt_rd_hdr) == 8); __CTASSERT(sizeof(struct nd_opt_mtu) == 8); __CTASSERT(sizeof(struct nd_opt_nonce) == 2 + ND_OPT_NONCE_LEN); __CTASSERT(sizeof(struct nd_opt_rdnss) == 8); __CTASSERT(sizeof(struct nd_opt_dnssl) == 8); __CTASSERT(sizeof(struct mld_hdr) == 24); __CTASSERT(sizeof(struct ni_reply_fqdn) == 8); __CTASSERT(sizeof(struct rr_pco_match) == 24); __CTASSERT(sizeof(struct rr_pco_use) == 32); __CTASSERT(sizeof(struct rr_result) == 24); extern struct domain inet6domain; percpu_t *icmp6stat_percpu; extern struct inpcbtable raw6cbtable; extern int icmp6errppslim; static int icmp6errpps_count = 0; static struct timeval icmp6errppslim_last; extern int icmp6_nodeinfo; /* * List of callbacks to notify when Path MTU changes are made. */ struct icmp6_mtudisc_callback { LIST_ENTRY(icmp6_mtudisc_callback) mc_list; void (*mc_func)(struct in6_addr *); }; LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks = LIST_HEAD_INITIALIZER(&icmp6_mtudisc_callbacks); static struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL; extern int pmtu_expire; /* XXX do these values make any sense? */ static int icmp6_mtudisc_hiwat = 1280; static int icmp6_mtudisc_lowat = 256; /* * keep track of # of redirect routes. */ static struct rttimer_queue *icmp6_redirect_timeout_q = NULL; /* XXX experimental, turned off */ static int icmp6_redirect_hiwat = -1; static int icmp6_redirect_lowat = -1; /* Protect mtudisc and redirect stuffs */ static kmutex_t icmp6_mtx __cacheline_aligned; static bool icmp6_reflect_pmtu = false; static void icmp6_errcount(u_int, int, int); static int icmp6_rip6_input(struct mbuf **, int); static void icmp6_reflect(struct mbuf *, size_t); static int icmp6_ratelimit(const struct in6_addr *, const int, const int); static const char *icmp6_redirect_diag(char *, size_t, struct in6_addr *, struct in6_addr *, struct in6_addr *); static void icmp6_redirect_input(struct mbuf *, int); static struct mbuf *ni6_input(struct mbuf *, int); static struct mbuf *ni6_nametodns(const char *, int, int); static int ni6_dnsmatch(const char *, int, const char *, int); static int ni6_addrs(struct icmp6_nodeinfo *, struct ifnet **, char *, struct psref *); static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int); static int icmp6_notify_error(struct mbuf *, int, int, int); static struct rtentry *icmp6_mtudisc_clone(struct sockaddr *); static void icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *); static void icmp6_redirect_timeout(struct rtentry *, struct rttimer *); static void sysctl_net_inet6_icmp6_setup(struct sysctllog **); /* workqueue-based pr_input */ static struct wqinput *icmp6_wqinput; static void _icmp6_input(struct mbuf *m, int off, int proto); void icmp6_init(void) { sysctl_net_inet6_icmp6_setup(NULL); mld_init(); mutex_init(&icmp6_mtx, MUTEX_DEFAULT, IPL_NONE); mutex_enter(&icmp6_mtx); icmp6_mtudisc_timeout_q = rt_timer_queue_create(pmtu_expire); icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout); mutex_exit(&icmp6_mtx); icmp6stat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP6_NSTATS); icmp6_wqinput = wqinput_create("icmp6", _icmp6_input); } static void icmp6_errcount(u_int base, int type, int code) { switch (type) { case ICMP6_DST_UNREACH: switch (code) { case ICMP6_DST_UNREACH_NOROUTE: ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_NOROUTE); return; case ICMP6_DST_UNREACH_ADMIN: ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_ADMIN); return; case ICMP6_DST_UNREACH_BEYONDSCOPE: ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_BEYONDSCOPE); return; case ICMP6_DST_UNREACH_ADDR: ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_ADDR); return; case ICMP6_DST_UNREACH_NOPORT: ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_NOPORT); return; } break; case ICMP6_PACKET_TOO_BIG: ICMP6_STATINC(base + ICMP6_ERRSTAT_PACKET_TOO_BIG); return; case ICMP6_TIME_EXCEEDED: switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: ICMP6_STATINC(base + ICMP6_ERRSTAT_TIME_EXCEED_TRANSIT); return; case ICMP6_TIME_EXCEED_REASSEMBLY: ICMP6_STATINC(base + ICMP6_ERRSTAT_TIME_EXCEED_REASSEMBLY); return; } break; case ICMP6_PARAM_PROB: switch (code) { case ICMP6_PARAMPROB_HEADER: ICMP6_STATINC(base + ICMP6_ERRSTAT_PARAMPROB_HEADER); return; case ICMP6_PARAMPROB_NEXTHEADER: ICMP6_STATINC(base + ICMP6_ERRSTAT_PARAMPROB_NEXTHEADER); return; case ICMP6_PARAMPROB_OPTION: ICMP6_STATINC(base + ICMP6_ERRSTAT_PARAMPROB_OPTION); return; } break; case ND_REDIRECT: ICMP6_STATINC(base + ICMP6_ERRSTAT_REDIRECT); return; } ICMP6_STATINC(base + ICMP6_ERRSTAT_UNKNOWN); } /* * Register a Path MTU Discovery callback. */ void icmp6_mtudisc_callback_register(void (*func)(struct in6_addr *)) { struct icmp6_mtudisc_callback *mc, *new; new = kmem_alloc(sizeof(*mc), KM_SLEEP); mutex_enter(&icmp6_mtx); for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL; mc = LIST_NEXT(mc, mc_list)) { if (mc->mc_func == func) { mutex_exit(&icmp6_mtx); kmem_free(new, sizeof(*mc)); return; } } new->mc_func = func; LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, new, mc_list); mutex_exit(&icmp6_mtx); } /* * A wrapper function for icmp6_error() necessary when the erroneous packet * may not contain enough scope zone information. */ void icmp6_error2(struct mbuf *m, int type, int code, int param, struct ifnet *ifp, struct in6_addr *src) { struct ip6_hdr *ip6; KASSERT(ifp != NULL); if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } ip6 = mtod(m, struct ip6_hdr *); if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0) goto out; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) goto out; *src = ip6->ip6_src; icmp6_error(m, type, code, param); return; out: m_freem(m); } /* * Generate an error packet of type error in response to bad IP6 packet. */ void icmp6_error(struct mbuf *m, int type, int code, int param) { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; u_int preplen; int off; int nxt; ICMP6_STATINC(ICMP6_STAT_ERROR); /* count per-type-code statistics */ icmp6_errcount(ICMP6_STAT_OUTERRHIST, type, code); if (m->m_flags & M_DECRYPTED) { ICMP6_STATINC(ICMP6_STAT_CANTERROR); goto freeit; } if (M_UNWRITABLE(m, sizeof(struct ip6_hdr)) && (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) return; oip6 = mtod(m, struct ip6_hdr *); /* * If the destination address of the erroneous packet is a multicast * address, or the packet was sent using link-layer multicast, * we should basically suppress sending an error (RFC 2463, Section * 2.4). * We have two exceptions (the item e.2 in that section): * - the Packet Too Big message can be sent for path MTU discovery. * - the Parameter Problem Message that can be allowed an icmp6 error * in the option type field. This check has been done in * ip6_unknown_opt(), so we can just check the type and code. */ if ((m->m_flags & (M_BCAST|M_MCAST) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && (type != ICMP6_PACKET_TOO_BIG && (type != ICMP6_PARAM_PROB || code != ICMP6_PARAMPROB_OPTION))) goto freeit; /* * RFC 2463, 2.4 (e.5): source address check. * XXX: the case of anycast source? */ if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) goto freeit; /* * If we are about to send ICMPv6 against ICMPv6 error/redirect, * don't do it. */ nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off >= 0 && nxt == IPPROTO_ICMPV6) { struct icmp6_hdr *icp; IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, sizeof(*icp)); if (icp == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); return; } if (icp->icmp6_type < ICMP6_ECHO_REQUEST || icp->icmp6_type == ND_REDIRECT) { /* * ICMPv6 error * Special case: for redirect (which is * informational) we must not send icmp6 error. */ ICMP6_STATINC(ICMP6_STAT_CANTERROR); goto freeit; } else { /* ICMPv6 informational - send the error */ } } else { /* non-ICMPv6 - send the error */ } oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ /* Finally, do rate limitation check. */ if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { ICMP6_STATINC(ICMP6_STAT_TOOFREQ); goto freeit; } /* * OK, ICMP6 can be generated. */ if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); M_PREPEND(m, preplen, M_DONTWAIT); if (m && M_UNWRITABLE(m, preplen)) m = m_pullup(m, preplen); if (m == NULL) { nd6log(LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__); return; } nip6 = mtod(m, struct ip6_hdr *); nip6->ip6_src = oip6->ip6_src; nip6->ip6_dst = oip6->ip6_dst; in6_clearscope(&oip6->ip6_src); in6_clearscope(&oip6->ip6_dst); icmp6 = (struct icmp6_hdr *)(nip6 + 1); icmp6->icmp6_type = type; icmp6->icmp6_code = code; icmp6->icmp6_pptr = htonl((u_int32_t)param); /* * icmp6_reflect() is designed to be in the input path. * icmp6_error() can be called from both input and output path, * and if we are in output path rcvif could contain bogus value. * clear m->m_pkthdr.rcvif for safety, we should have enough scope * information in ip header (nip6). */ m_reset_rcvif(m); ICMP6_STATINC(ICMP6_STAT_OUTHIST + type); /* header order: IPv6 - ICMPv6 */ icmp6_reflect(m, sizeof(struct ip6_hdr)); return; freeit: /* * If we can't tell whether or not we can generate ICMP6, free it. */ m_freem(m); } /* * Process a received ICMP6 message. */ static void _icmp6_input(struct mbuf *m, int off, int proto) { struct mbuf *n; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; int icmp6len = m->m_pkthdr.len - off; int code, sum; struct ifnet *rcvif; struct psref psref; char ip6buf[INET6_ADDRSTRLEN], ip6buf2[INET6_ADDRSTRLEN]; rcvif = m_get_rcvif_psref(m, &psref); if (__predict_false(rcvif == NULL)) goto freeit; #define ICMP6_MAXLEN (sizeof(*nip6) + sizeof(*nicmp6) + 4) KASSERT(ICMP6_MAXLEN < MCLBYTES); icmp6_ifstat_inc(rcvif, ifs6_in_msg); /* * Locate icmp6 structure in mbuf, and check * that not corrupted and of at least minimum length */ if (icmp6len < sizeof(struct icmp6_hdr)) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); icmp6_ifstat_inc(rcvif, ifs6_in_error); goto freeit; } if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); icmp6_ifstat_inc(rcvif, ifs6_in_error); goto freeit; } } ip6 = mtod(m, struct ip6_hdr *); IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); icmp6_ifstat_inc(rcvif, ifs6_in_error); goto freeit; } /* * Enforce alignment requirements that are violated in * some cases, see kern/50766 for details. */ if (ACCESSIBLE_POINTER(icmp6, struct ip6_hdr) == 0) { m = m_copyup(m, off + sizeof(struct icmp6_hdr), 0); if (m == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); icmp6_ifstat_inc(rcvif, ifs6_in_error); goto freeit; } ip6 = mtod(m, struct ip6_hdr *); icmp6 = (struct icmp6_hdr *)(mtod(m, char *) + off); } KASSERT(ACCESSIBLE_POINTER(icmp6, struct ip6_hdr)); /* * calculate the checksum */ if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { nd6log(LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n", icmp6->icmp6_type, sum, IN6_PRINT(ip6buf, &ip6->ip6_src)); ICMP6_STATINC(ICMP6_STAT_CHECKSUM); icmp6_ifstat_inc(rcvif, ifs6_in_error); goto freeit; } #if defined(NFAITH) && 0 < NFAITH if (faithprefix(&ip6->ip6_dst)) { /* * Deliver very specific ICMP6 type only. * This is important to deliver TOOBIG. Otherwise PMTUD * will not work. */ switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: case ICMP6_PACKET_TOO_BIG: case ICMP6_TIME_EXCEEDED: break; default: goto freeit; } } #endif code = icmp6->icmp6_code; ICMP6_STATINC(ICMP6_STAT_INHIST + icmp6->icmp6_type); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: icmp6_ifstat_inc(rcvif, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(rcvif, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ break; case ICMP6_DST_UNREACH_ADDR: code = PRC_HOSTDEAD; break; case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_NOPORT: code = PRC_UNREACH_PORT; break; default: goto badcode; } goto deliver; case ICMP6_PACKET_TOO_BIG: icmp6_ifstat_inc(rcvif, ifs6_in_pkttoobig); /* * MTU is checked in icmp6_mtudisc. */ code = PRC_MSGSIZE; /* * Updating the path MTU will be done after examining * intermediate extension headers. */ goto deliver; case ICMP6_TIME_EXCEEDED: icmp6_ifstat_inc(rcvif, ifs6_in_timeexceed); switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: code = PRC_TIMXCEED_INTRANS; break; case ICMP6_TIME_EXCEED_REASSEMBLY: code = PRC_TIMXCEED_REASS; break; default: goto badcode; } goto deliver; case ICMP6_PARAM_PROB: icmp6_ifstat_inc(rcvif, ifs6_in_paramprob); switch (code) { case ICMP6_PARAMPROB_NEXTHEADER: code = PRC_UNREACH_PROTOCOL; break; case ICMP6_PARAMPROB_HEADER: case ICMP6_PARAMPROB_OPTION: code = PRC_PARAMPROB; break; default: goto badcode; } goto deliver; case ICMP6_ECHO_REQUEST: icmp6_ifstat_inc(rcvif, ifs6_in_echo); if (code != 0) goto badcode; /* * Copy mbuf to send to two data paths: userland socket(s), * and to the querier (echo reply). * m: a copy for socket, n: a copy for querier * * If the first mbuf is shared, or the first mbuf is too short, * copy the first part of the data into a fresh mbuf. * Otherwise, we will wrongly overwrite both copies. */ if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) { /* Give up local */ n = m; m = NULL; } else if (M_UNWRITABLE(n, off + sizeof(struct icmp6_hdr))) { struct mbuf *n0 = n; /* * Prepare an internal mbuf. m_pullup() doesn't * always copy the length we specified. */ if ((n = m_dup(n0, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* Give up local */ n = m; m = NULL; } m_freem(n0); } IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off, sizeof(*nicmp6)); if (nicmp6 == NULL) goto freeit; nicmp6->icmp6_type = ICMP6_ECHO_REPLY; nicmp6->icmp6_code = 0; if (n) { uint64_t *icmp6s = ICMP6_STAT_GETREF(); icmp6s[ICMP6_STAT_REFLECT]++; icmp6s[ICMP6_STAT_OUTHIST + ICMP6_ECHO_REPLY]++; ICMP6_STAT_PUTREF(); icmp6_reflect(n, off); } if (!m) goto freeit; break; case ICMP6_ECHO_REPLY: icmp6_ifstat_inc(rcvif, ifs6_in_echoreply); if (code != 0) goto badcode; break; case MLD_LISTENER_QUERY: case MLD_LISTENER_REPORT: if (icmp6len < sizeof(struct mld_hdr)) goto badlen; if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */ icmp6_ifstat_inc(rcvif, ifs6_in_mldquery); else icmp6_ifstat_inc(rcvif, ifs6_in_mldreport); if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) { /* give up local */ mld_input(m, off); m = NULL; goto freeit; } mld_input(n, off); /* m stays. */ break; case MLD_LISTENER_DONE: icmp6_ifstat_inc(rcvif, ifs6_in_mlddone); if (icmp6len < sizeof(struct mld_hdr)) /* necessary? */ goto badlen; break; /* nothing to be done in kernel */ case MLD_MTRACE_RESP: case MLD_MTRACE: /* XXX: these two are experimental. not officially defined. */ /* XXX: per-interface statistics? */ break; /* just pass it to applications */ case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ { enum { WRU, FQDN } mode; if (!icmp6_nodeinfo) break; if (icmp6len == sizeof(struct icmp6_hdr) + 4) mode = WRU; else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) mode = FQDN; else goto badlen; if (mode == FQDN) { n = m_copypacket(m, M_DONTWAIT); if (n) n = ni6_input(n, off); } else { u_char *p; int maxhlen; if ((icmp6_nodeinfo & 5) != 5) break; if (code != 0) goto badcode; MGETHDR(n, M_DONTWAIT, m->m_type); if (n && ICMP6_MAXLEN > MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (n == NULL) { /* Give up remote */ break; } m_reset_rcvif(n); n->m_len = 0; maxhlen = M_TRAILINGSPACE(n) - ICMP6_MAXLEN; if (maxhlen < 0) { m_free(n); break; } if (maxhlen > hostnamelen) maxhlen = hostnamelen; /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); memcpy(nip6, ip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); memcpy(nicmp6, icmp6, sizeof(struct icmp6_hdr)); p = (u_char *)(nicmp6 + 1); memset(p, 0, 4); memcpy(p + 4, hostname, maxhlen); /* meaningless TTL */ m_copy_pkthdr(n, m); n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + 4 + maxhlen; nicmp6->icmp6_type = ICMP6_WRUREPLY; nicmp6->icmp6_code = 0; } if (n) { uint64_t *icmp6s = ICMP6_STAT_GETREF(); icmp6s[ICMP6_STAT_REFLECT]++; icmp6s[ICMP6_STAT_OUTHIST + ICMP6_WRUREPLY]++; ICMP6_STAT_PUTREF(); icmp6_reflect(n, sizeof(struct ip6_hdr)); } break; } case ICMP6_WRUREPLY: if (code != 0) goto badcode; break; case ND_ROUTER_SOLICIT: icmp6_ifstat_inc(rcvif, ifs6_in_routersolicit); /* FALLTHROUGH */ case ND_ROUTER_ADVERT: if (icmp6->icmp6_type == ND_ROUTER_ADVERT) icmp6_ifstat_inc(rcvif, ifs6_in_routeradvert); if (code != 0) goto badcode; if ((icmp6->icmp6_type == ND_ROUTER_SOLICIT && icmp6len < sizeof(struct nd_router_solicit)) || (icmp6->icmp6_type == ND_ROUTER_ADVERT && icmp6len < sizeof(struct nd_router_advert))) goto badlen; if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) { /* give up local */ nd6_rtr_cache(m, off, icmp6len, icmp6->icmp6_type); m = NULL; goto freeit; } nd6_rtr_cache(n, off, icmp6len, icmp6->icmp6_type); /* m stays. */ break; case ND_NEIGHBOR_SOLICIT: icmp6_ifstat_inc(rcvif, ifs6_in_neighborsolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) { /* give up local */ nd6_ns_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_ns_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_ADVERT: icmp6_ifstat_inc(rcvif, ifs6_in_neighboradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) { /* give up local */ nd6_na_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_na_input(n, off, icmp6len); /* m stays. */ break; case ND_REDIRECT: icmp6_ifstat_inc(rcvif, ifs6_in_redirect); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) goto badlen; if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) { /* give up local */ icmp6_redirect_input(m, off); m = NULL; goto freeit; } icmp6_redirect_input(n, off); /* m stays. */ break; case ICMP6_ROUTER_RENUMBERING: if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && code != ICMP6_ROUTER_RENUMBERING_RESULT) goto badcode; if (icmp6len < sizeof(struct icmp6_router_renum)) goto badlen; break; default: nd6log(LOG_DEBUG, "unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, IN6_PRINT(ip6buf, &ip6->ip6_src), IN6_PRINT(ip6buf2, &ip6->ip6_dst), rcvif ? rcvif->if_index : 0); if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { /* ICMPv6 error: MUST deliver it by spec... */ code = PRC_NCMDS; /* deliver */ } else { /* ICMPv6 informational: MUST not deliver */ break; } deliver: if (icmp6_notify_error(m, off, icmp6len, code)) { /* In this case, m should've been freed. */ m_put_rcvif_psref(rcvif, &psref); return; } break; badcode: ICMP6_STATINC(ICMP6_STAT_BADCODE); break; badlen: ICMP6_STATINC(ICMP6_STAT_BADLEN); break; } m_put_rcvif_psref(rcvif, &psref); /* deliver the packet to appropriate sockets */ icmp6_rip6_input(&m, off); return; freeit: m_put_rcvif_psref(rcvif, &psref); m_freem(m); return; } int icmp6_input(struct mbuf **mp, int *offp, int proto) { wqinput_input(icmp6_wqinput, *mp, *offp, proto); return IPPROTO_DONE; } static int icmp6_notify_error(struct mbuf *m, int off, int icmp6len, int code) { struct icmp6_hdr *icmp6; struct ip6_hdr *eip6; u_int32_t notifymtu; struct sockaddr_in6 icmp6src, icmp6dst; if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); goto freeit; } IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); return (-1); } eip6 = (struct ip6_hdr *)(icmp6 + 1); /* Detect the upper level protocol */ { void *(*ctlfunc)(int, const struct sockaddr *, void *); u_int8_t nxt = eip6->ip6_nxt; int eoff = off + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); struct ip6ctlparam ip6cp; struct in6_addr *finaldst = NULL; int icmp6type = icmp6->icmp6_type; struct ip6_frag *fh; struct ip6_rthdr *rth; struct ifnet *rcvif; int s; while (1) { /* XXX: should avoid infinite loop explicitly? */ struct ip6_ext *eh; switch (nxt) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: case IPPROTO_AH: IP6_EXTHDR_GET(eh, struct ip6_ext *, m, eoff, sizeof(*eh)); if (eh == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); return (-1); } if (nxt == IPPROTO_AH) eoff += (eh->ip6e_len + 2) << 2; else eoff += (eh->ip6e_len + 1) << 3; nxt = eh->ip6e_nxt; break; case IPPROTO_ROUTING: /* Ignore the option. */ IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, eoff, sizeof(*rth)); if (rth == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); return (-1); } eoff += (rth->ip6r_len + 1) << 3; nxt = rth->ip6r_nxt; break; case IPPROTO_FRAGMENT: IP6_EXTHDR_GET(fh, struct ip6_frag *, m, eoff, sizeof(*fh)); if (fh == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); return (-1); } /* * Data after a fragment header is meaningless * unless it is the first fragment, but * we'll go to the notify label for path MTU * discovery. */ if (fh->ip6f_offlg & IP6F_OFF_MASK) goto notify; eoff += sizeof(struct ip6_frag); nxt = fh->ip6f_nxt; break; default: /* * This case includes ESP and the No Next * Header. In such cases going to the notify * label does not have any meaning * (i.e. ctlfunc will be NULL), but we go * anyway since we might have to update * path MTU information. */ goto notify; } } notify: IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); return (-1); } /* * retrieve parameters from the inner IPv6 header, and convert * them into sockaddr structures. * XXX: there is no guarantee that the source or destination * addresses of the inner packet are in the same scope zone as * the addresses of the icmp packet. But there is no other * way to determine the zone. */ eip6 = (struct ip6_hdr *)(icmp6 + 1); rcvif = m_get_rcvif(m, &s); if (__predict_false(rcvif == NULL)) goto freeit; sockaddr_in6_init(&icmp6dst, (finaldst == NULL) ? &eip6->ip6_dst : finaldst, 0, 0, 0); if (in6_setscope(&icmp6dst.sin6_addr, rcvif, NULL)) { m_put_rcvif(rcvif, &s); goto freeit; } sockaddr_in6_init(&icmp6src, &eip6->ip6_src, 0, 0, 0); if (in6_setscope(&icmp6src.sin6_addr, rcvif, NULL)) { m_put_rcvif(rcvif, &s); goto freeit; } m_put_rcvif(rcvif, &s); icmp6src.sin6_flowinfo = (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); if (finaldst == NULL) finaldst = &eip6->ip6_dst; ip6cp.ip6c_m = m; ip6cp.ip6c_icmp6 = icmp6; ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); ip6cp.ip6c_off = eoff; ip6cp.ip6c_finaldst = finaldst; ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; } ctlfunc = inet6sw[ip6_protox[nxt]].pr_ctlinput; if (ctlfunc) { (void)(*ctlfunc)(code, sin6tosa(&icmp6dst), &ip6cp); } } return (0); freeit: m_freem(m); return (-1); } void icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) { unsigned long rtcount; struct icmp6_mtudisc_callback *mc; struct in6_addr *dst = ip6cp->ip6c_finaldst; struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ u_int mtu = ntohl(icmp6->icmp6_mtu); struct rtentry *rt = NULL; struct sockaddr_in6 sin6; struct ifnet *rcvif; int s; /* * The MTU should not be less than the minimal IPv6 MTU except for the * hack in ip6_output/ip6_setpmtu where we always include a frag header. * In that one case, the MTU might be less than 1280. */ if (__predict_false(mtu < IPV6_MMTU - sizeof(struct ip6_frag))) { /* is the mtu even sane? */ if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8) return; if (!validated) return; mtu = IPV6_MMTU - sizeof(struct ip6_frag); } /* * allow non-validated cases if memory is plenty, to make traffic * from non-connected pcb happy. */ mutex_enter(&icmp6_mtx); rtcount = rt_timer_count(icmp6_mtudisc_timeout_q); if (validated) { if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat) { mutex_exit(&icmp6_mtx); return; } else if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat) { /* * XXX nuke a victim, install the new one. */ } } else { if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat) { mutex_exit(&icmp6_mtx); return; } } mutex_exit(&icmp6_mtx); memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = PF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst; rcvif = m_get_rcvif(m, &s); if (__predict_false(rcvif == NULL)) return; if (in6_setscope(&sin6.sin6_addr, rcvif, NULL)) { m_put_rcvif(rcvif, &s); return; } m_put_rcvif(rcvif, &s); rt = icmp6_mtudisc_clone(sin6tosa(&sin6)); if (rt && (rt->rt_flags & RTF_HOST) && !(rt->rt_rmx.rmx_locks & RTV_MTU) && (rt->rt_rmx.rmx_mtu > mtu || rt->rt_rmx.rmx_mtu == 0)) { if (mtu < rt->rt_ifp->if_mtu) { ICMP6_STATINC(ICMP6_STAT_PMTUCHG); rt->rt_rmx.rmx_mtu = mtu; } } if (rt) { rt_unref(rt); } /* * Notify protocols that the MTU for this destination * has changed. */ mutex_enter(&icmp6_mtx); for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL; mc = LIST_NEXT(mc, mc_list)) (*mc->mc_func)(&sin6.sin6_addr); mutex_exit(&icmp6_mtx); } /* * Process a Node Information Query packet, based on * draft-ietf-ipngwg-icmp-name-lookups-07. * * Spec incompatibilities: * - IPv6 Subject address handling * - IPv4 Subject address handling support missing * - Proxy reply (answer even if it's not for me) * - joins NI group address at in6_ifattach() time only, does not cope * with hostname changes by sethostname(3) */ static struct mbuf * ni6_input(struct mbuf *m, int off) { struct icmp6_nodeinfo *ni6, *nni6; struct mbuf *n = NULL; u_int16_t qtype; int subjlen; int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); struct ni_reply_fqdn *fqdn; int addrs; /* for NI_QTYPE_NODEADDR */ struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ struct sockaddr_in6 sin6; /* ip6_dst */ struct in6_addr in6_subj; /* subject address */ struct ip6_hdr *ip6; int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ char *subj = NULL; struct ifnet *rcvif; int s, ss; struct ifaddr *ifa; struct psref psref; ip6 = mtod(m, struct ip6_hdr *); IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); if (ni6 == NULL) { /* m is already reclaimed */ return NULL; } KASSERT((m->m_flags & M_PKTHDR) != 0); /* * Validate IPv6 destination address. * * The Responder must discard the Query without further processing * unless it is one of the Responder's unicast or anycast addresses, or * a link-local scope multicast address which the Responder has joined. * [icmp-name-lookups-07, Section 4.] */ sockaddr_in6_init(&sin6, &ip6->ip6_dst, 0, 0, 0); /* XXX scopeid */ ss = pserialize_read_enter(); ifa = ifa_ifwithaddr(sin6tosa(&sin6)); if (ifa != NULL) { ; /* unicast/anycast, fine */ } else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr)) { ; /* link-local multicast, fine */ } else { pserialize_read_exit(ss); goto bad; } pserialize_read_exit(ss); /* validate query Subject field. */ qtype = ntohs(ni6->ni_qtype); subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); switch (qtype) { case NI_QTYPE_NOOP: case NI_QTYPE_SUPTYPES: /* 07 draft */ if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) break; /* FALLTHROUGH */ case NI_QTYPE_FQDN: case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: #if ICMP6_NI_SUBJ_IPV6 != 0 case 0: #endif /* * backward compatibility - try to accept 03 draft * format, where no Subject is present. */ if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && subjlen == 0) { oldfqdn++; break; } #if ICMP6_NI_SUBJ_IPV6 != 0 if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) goto bad; #endif if (subjlen != sizeof(sin6.sin6_addr)) goto bad; /* * Validate Subject address. * * Not sure what exactly "address belongs to the node" * means in the spec, is it just unicast, or what? * * At this moment we consider Subject address as * "belong to the node" if the Subject address equals * to the IPv6 destination address; validation for * IPv6 destination address should have done enough * check for us. * * We do not do proxy at this moment. */ /* m_pulldown instead of copy? */ m_copydata(m, off + sizeof(struct icmp6_nodeinfo), subjlen, (void *)&in6_subj); rcvif = m_get_rcvif(m, &s); if (__predict_false(rcvif == NULL)) goto bad; if (in6_setscope(&in6_subj, rcvif, NULL)) { m_put_rcvif(rcvif, &s); goto bad; } m_put_rcvif(rcvif, &s); subj = (char *)&in6_subj; if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj)) break; /* * XXX if we are to allow other cases, we should really * be careful about scope here. * basically, we should disallow queries toward IPv6 * destination X with subject Y, if scope(X) > scope(Y). * if we allow scope(X) > scope(Y), it will result in * information leakage across scope boundary. */ goto bad; case ICMP6_NI_SUBJ_FQDN: /* * Validate Subject name with gethostname(3). * * The behavior may need some debate, since: * - we are not sure if the node has FQDN as * hostname (returned by gethostname(3)). * - the code does wildcard match for truncated names. * however, we are not sure if we want to perform * wildcard match, if gethostname(3) side has * truncated hostname. */ n = ni6_nametodns(hostname, hostnamelen, 0); if (!n || n->m_next || n->m_len == 0) goto bad; IP6_EXTHDR_GET(subj, char *, m, off + sizeof(struct icmp6_nodeinfo), subjlen); if (subj == NULL) goto bad; if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), n->m_len)) { goto bad; } m_freem(n); n = NULL; break; case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ default: goto bad; } break; } /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ switch (qtype) { case NI_QTYPE_FQDN: if ((icmp6_nodeinfo & 1) == 0) goto bad; break; case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: if ((icmp6_nodeinfo & 2) == 0) goto bad; break; } /* guess reply length */ switch (qtype) { case NI_QTYPE_NOOP: break; /* no reply data */ case NI_QTYPE_SUPTYPES: replylen += sizeof(u_int32_t); break; case NI_QTYPE_FQDN: /* will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); break; case NI_QTYPE_NODEADDR: addrs = ni6_addrs(ni6, &ifp, subj, &psref); replylen += addrs * (sizeof(struct in6_addr) + sizeof(u_int32_t)); if (replylen > MCLBYTES) replylen = MCLBYTES; /* XXX: will truncate pkt later */ break; case NI_QTYPE_IPV4ADDR: /* unsupported - should respond with unknown Qtype? */ goto bad; default: /* * XXX: We must return a reply with the ICMP6 code * `unknown Qtype' in this case. However we regard the case * as an FQDN query for backward compatibility. * Older versions set a random value to this field, * so it rarely varies in the defined qtypes. * But the mechanism is not reliable... * maybe we should obsolete older versions. */ qtype = NI_QTYPE_FQDN; /* will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); oldfqdn++; break; } /* allocate an mbuf to reply. */ MGETHDR(n, M_DONTWAIT, m->m_type); if (n == NULL) { goto bad; } m_move_pkthdr(n, m); if (replylen > MHLEN) { if (replylen > MCLBYTES) { /* * XXX: should we try to allocate more? But MCLBYTES * is probably much larger than IPV6_MMTU... */ goto bad; } MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { goto bad; } } n->m_pkthdr.len = n->m_len = replylen; /* copy mbuf header and IPv6 + Node Information base headers */ bcopy(mtod(m, void *), mtod(n, void *), sizeof(struct ip6_hdr)); nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); bcopy((void *)ni6, (void *)nni6, sizeof(struct icmp6_nodeinfo)); /* qtype dependent procedure */ switch (qtype) { case NI_QTYPE_NOOP: nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = 0; break; case NI_QTYPE_SUPTYPES: { u_int32_t v; nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = htons(0x0000); /* raw bitmap */ /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ v = (u_int32_t)htonl(0x0000000f); memcpy(nni6 + 1, &v, sizeof(u_int32_t)); break; } case NI_QTYPE_FQDN: nni6->ni_code = ICMP6_NI_SUCCESS; fqdn = (struct ni_reply_fqdn *)(mtod(n, char *) + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); nni6->ni_flags = 0; /* XXX: meaningless TTL */ fqdn->ni_fqdn_ttl = 0; /* ditto. */ /* * XXX do we really have FQDN in variable "hostname"? */ n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn); if (n->m_next == NULL) goto bad; /* XXX we assume that n->m_next is not a chain */ if (n->m_next->m_next != NULL) goto bad; n->m_pkthdr.len += n->m_next->m_len; break; case NI_QTYPE_NODEADDR: { int lenlim, copied; nni6->ni_code = ICMP6_NI_SUCCESS; n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); lenlim = M_TRAILINGSPACE(n); copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); if_put(ifp, &psref); ifp = NULL; /* update mbuf length */ n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo) + copied; break; } default: panic("%s: impossible", __func__); break; } nni6->ni_type = ICMP6_NI_REPLY; m_freem(m); return n; bad: if_put(ifp, &psref); m_freem(m); if (n) m_freem(n); return NULL; } #define isupper(x) ('A' <= (x) && (x) <= 'Z') #define isalpha(x) (('A' <= (x) && (x) <= 'Z') || ('a' <= (x) && (x) <= 'z')) #define isalnum(x) (isalpha(x) || ('0' <= (x) && (x) <= '9')) #define tolower(x) (isupper(x) ? (x) + 'a' - 'A' : (x)) /* * make a mbuf with DNS-encoded string. no compression support. * * XXX names with less than 2 dots (like "foo" or "foo.section") will be * treated as truncated name (two \0 at the end). this is a wild guess. * * old - return pascal string if non-zero */ static struct mbuf * ni6_nametodns(const char *name, int namelen, int old) { struct mbuf *m; char *cp, *ep; const char *p, *q; int i, len, nterm; if (old) len = namelen + 1; else len = MCLBYTES; /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */ MGET(m, M_DONTWAIT, MT_DATA); if (m && len > MLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) goto fail; } if (!m) goto fail; m->m_next = NULL; if (old) { m->m_len = len; *mtod(m, char *) = namelen; memcpy(mtod(m, char *) + 1, name, namelen); return m; } else { m->m_len = 0; cp = mtod(m, char *); ep = mtod(m, char *) + M_TRAILINGSPACE(m); /* if not certain about my name, return empty buffer */ if (namelen == 0) return m; /* * guess if it looks like shortened hostname, or FQDN. * shortened hostname needs two trailing "\0". */ i = 0; for (p = name; p < name + namelen; p++) { if (*p == '.') i++; } if (i < 2) nterm = 2; else nterm = 1; p = name; while (cp < ep && p < name + namelen) { i = 0; for (q = p; q < name + namelen && *q && *q != '.'; q++) i++; /* result does not fit into mbuf */ if (cp + i + 1 >= ep) goto fail; /* * DNS label length restriction, RFC1035 page 8. * "i == 0" case is included here to avoid returning * 0-length label on "foo..bar". */ if (i <= 0 || i >= 64) goto fail; *cp++ = i; if (!isalpha(p[0]) || !isalnum(p[i - 1])) goto fail; while (i > 0) { if (!isalnum(*p) && *p != '-') goto fail; if (isupper(*p)) { *cp++ = tolower(*p); p++; } else *cp++ = *p++; i--; } p = q; if (p < name + namelen && *p == '.') p++; } /* termination */ if (cp + nterm >= ep) goto fail; while (nterm-- > 0) *cp++ = '\0'; m->m_len = cp - mtod(m, char *); return m; } panic("should not reach here"); /* NOTREACHED */ fail: if (m) m_freem(m); return NULL; } /* * check if two DNS-encoded string matches. takes care of truncated * form (with \0\0 at the end). no compression support. * XXX upper/lowercase match (see RFC2065) */ static int ni6_dnsmatch(const char *a, int alen, const char *b, int blen) { const char *a0, *b0; int l; /* simplest case - need validation? */ if (alen == blen && memcmp(a, b, alen) == 0) return 1; a0 = a; b0 = b; /* termination is mandatory */ if (alen < 2 || blen < 2) return 0; if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') return 0; alen--; blen--; while (a - a0 < alen && b - b0 < blen) { if (a - a0 + 1 > alen || b - b0 + 1 > blen) return 0; if ((signed char)a[0] < 0 || (signed char)b[0] < 0) return 0; /* we don't support compression yet */ if (a[0] >= 64 || b[0] >= 64) return 0; /* truncated case */ if (a[0] == 0 && a - a0 == alen - 1) return 1; if (b[0] == 0 && b - b0 == blen - 1) return 1; if (a[0] == 0 || b[0] == 0) return 0; if (a[0] != b[0]) return 0; l = a[0]; if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) return 0; if (memcmp(a + 1, b + 1, l) != 0) return 0; a += 1 + l; b += 1 + l; } if (a - a0 == alen && b - b0 == blen) return 1; else return 0; } /* * calculate the number of addresses to be returned in the node info reply. */ static int ni6_addrs(struct icmp6_nodeinfo *ni6, struct ifnet **ifpp, char *subj, struct psref *psref) { struct ifnet *ifp; struct in6_ifaddr *ia6; struct ifaddr *ifa; struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */ int addrs = 0, addrsofif, iffound = 0; int niflags = ni6->ni_flags; int s; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: if (subj == NULL) /* must be impossible... */ return 0; subj_ip6 = (struct sockaddr_in6 *)subj; break; default: /* * XXX: we only support IPv6 subject address for * this Qtype. */ return 0; } } s = pserialize_read_enter(); IFNET_READER_FOREACH(ifp) { addrsofif = 0; IFADDR_READER_FOREACH(ifa, ifp) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia6 = (struct in6_ifaddr *)ifa; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr, &ia6->ia_addr.sin6_addr)) iffound = 1; /* * IPv4-mapped addresses can only be returned by a * Node Information proxy, since they represent * addresses of IPv4-only nodes, which perforce do * not implement this protocol. * [icmp-name-lookups-07, Section 5.4] * So we don't support NI_NODEADDR_FLAG_COMPAT in * this function at this moment. */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ia6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; /* we need only unicast addresses */ addrsofif++; /* count the address */ } if (iffound) { if_acquire(ifp, psref); pserialize_read_exit(s); *ifpp = ifp; return addrsofif; } addrs += addrsofif; } pserialize_read_exit(s); return addrs; } static int ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, struct ifnet *ifp0, int resid) { struct ifnet *ifp; struct in6_ifaddr *ia6; struct ifaddr *ifa; struct ifnet *ifp_dep = NULL; int copied = 0, allow_deprecated = 0; u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; int s; if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return 0; /* needless to copy */ s = pserialize_read_enter(); ifp = ifp0 ? ifp0 : IFNET_READER_FIRST(); again: for (; ifp; ifp = IFNET_READER_NEXT(ifp)) { IFADDR_READER_FOREACH(ifa, ifp) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia6 = (struct in6_ifaddr *)ifa; if ((ia6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && allow_deprecated == 0) { /* * prefererred address should be put before * deprecated addresses. */ /* record the interface for later search */ if (ifp_dep == NULL) ifp_dep = ifp; continue; } else if ((ia6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && allow_deprecated != 0) continue; /* we now collect deprecated addrs */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ia6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; /* now we can copy the address */ if (resid < sizeof(struct in6_addr) + sizeof(u_int32_t)) { /* * We give up much more copy. * Set the truncate flag and return. */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; goto out; } /* * Set the TTL of the address. * The TTL value should be one of the following * according to the specification: * * 1. The remaining lifetime of a DHCP lease on the * address, or * 2. The remaining Valid Lifetime of a prefix from * which the address was derived through Stateless * Autoconfiguration. * * Note that we currently do not support stateful * address configuration by DHCPv6, so the former * case can't happen. * * TTL must be 2^31 > TTL >= 0. */ if (ia6->ia6_lifetime.ia6t_expire == 0) ltime = ND6_INFINITE_LIFETIME; else { if (ia6->ia6_lifetime.ia6t_expire > time_uptime) ltime = ia6->ia6_lifetime.ia6t_expire - time_uptime; else ltime = 0; } if (ltime > 0x7fffffff) ltime = 0x7fffffff; ltime = htonl(ltime); memcpy(cp, <ime, sizeof(u_int32_t)); cp += sizeof(u_int32_t); /* copy the address itself */ bcopy(&ia6->ia_addr.sin6_addr, cp, sizeof(struct in6_addr)); in6_clearscope((struct in6_addr *)cp); /* XXX */ cp += sizeof(struct in6_addr); resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } if (ifp0) /* we need search only on the specified IF */ break; } if (allow_deprecated == 0 && ifp_dep != NULL) { ifp = ifp_dep; allow_deprecated = 1; goto again; } out: pserialize_read_exit(s); return copied; } /* * XXX almost dup'ed code with rip6_input. */ static int icmp6_rip6_input(struct mbuf **mp, int off) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct inpcb_hdr *inph; struct in6pcb *in6p; struct in6pcb *last = NULL; struct sockaddr_in6 rip6src; struct icmp6_hdr *icmp6; struct mbuf *n, *opts = NULL; IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { /* m is already reclaimed */ return IPPROTO_DONE; } /* * XXX: the address may have embedded scope zone ID, which should be * hidden from applications. */ sockaddr_in6_init(&rip6src, &ip6->ip6_src, 0, 0, 0); if (sa6_recoverscope(&rip6src)) { m_freem(m); return IPPROTO_DONE; } TAILQ_FOREACH(inph, &raw6cbtable.inpt_queue, inph_queue) { in6p = (struct in6pcb *)inph; if (in6p->in6p_af != AF_INET6) continue; if (in6p->in6p_ip6.ip6_nxt != IPPROTO_ICMPV6) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; if (in6p->in6p_icmp6filt && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) continue; if (last == NULL) { ; } #ifdef IPSEC else if (ipsec_used && ipsec_in_reject(m, last)) { /* do not inject data into pcb */ } #endif else if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) { if (last->in6p_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, &opts, ip6, n); /* strip intermediate headers */ m_adj(n, off); if (sbappendaddr(&last->in6p_socket->so_rcv, sin6tosa(&rip6src), n, opts) == 0) { soroverflow(last->in6p_socket); m_freem(n); if (opts) m_freem(opts); } else { sorwakeup(last->in6p_socket); } opts = NULL; } last = in6p; } #ifdef IPSEC if (ipsec_used && last && ipsec_in_reject(m, last)) { m_freem(m); IP6_STATDEC(IP6_STAT_DELIVERED); /* do not inject data into pcb */ } else #endif if (last) { if (last->in6p_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, &opts, ip6, m); /* strip intermediate headers */ m_adj(m, off); if (sbappendaddr(&last->in6p_socket->so_rcv, sin6tosa(&rip6src), m, opts) == 0) { soroverflow(last->in6p_socket); m_freem(m); if (opts) m_freem(opts); } else { sorwakeup(last->in6p_socket); } } else { m_freem(m); IP6_STATDEC(IP6_STAT_DELIVERED); } return IPPROTO_DONE; } /* * Reflect the ip6 packet back to the source. * OFF points to the icmp6 header, counted from the top of the mbuf. * * Note: RFC 1885 required that an echo reply should be truncated if it * did not fit in with (return) path MTU, and KAME code supported the * behavior. However, as a clarification after the RFC, this limitation * was removed in a revised version of the spec, RFC 2463. We had kept the * old behavior, with a (non-default) ifdef block, while the new version of * the spec was an internet-draft status, and even after the new RFC was * published. But it would rather make sense to clean the obsoleted part * up, and to make the code simpler at this stage. */ static void icmp6_reflect(struct mbuf *m, size_t off) { struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; const struct in6_ifaddr *ia; const struct ip6aux *ip6a; int plen; int type, code; struct ifnet *outif = NULL; struct in6_addr origdst; struct ifnet *rcvif; int s; bool ip6_src_filled = false; int flags; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { nd6log(LOG_DEBUG, "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", (u_long)off, (u_long)sizeof(struct ip6_hdr), __FILE__, __LINE__); goto bad; } /* * If there are extra headers between IPv6 and ICMPv6, strip * off that header first. */ CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN); if (off > sizeof(struct ip6_hdr)) { size_t l; struct ip6_hdr nip6; l = off - sizeof(struct ip6_hdr); m_copydata(m, 0, sizeof(nip6), (void *)&nip6); m_adj(m, l); l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } memcpy(mtod(m, void *), (void *)&nip6, sizeof(nip6)); } else { size_t l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } } plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_ICMPV6; icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ origdst = ip6->ip6_dst; /* * ip6_input() drops a packet if its src is multicast. * So, the src is never multicast. */ ip6->ip6_dst = ip6->ip6_src; /* * If the incoming packet was addressed directly to us (i.e. unicast), * use dst as the src for the reply. * The IN6_IFF_NOTREADY case should be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. * Note that ip6_getdstifaddr() may fail if we are in an error handling * procedure of an outgoing packet of our own, in which case we need * to search in the ifaddr list. */ if (IN6_IS_ADDR_MULTICAST(&origdst)) { ; } else if ((ip6a = ip6_getdstifaddr(m)) != NULL) { if ((ip6a->ip6a_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) { ip6->ip6_src = ip6a->ip6a_src; ip6_src_filled = true; } } else { union { struct sockaddr_in6 sin6; struct sockaddr sa; } u; int _s; struct ifaddr *ifa; sockaddr_in6_init(&u.sin6, &origdst, 0, 0, 0); _s = pserialize_read_enter(); ifa = ifa_ifwithaddr(&u.sa); if (ifa != NULL) { ia = ifatoia6(ifa); if ((ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) { ip6->ip6_src = ia->ia_addr.sin6_addr; ip6_src_filled = true; } } pserialize_read_exit(_s); } if (!ip6_src_filled) { int e; struct sockaddr_in6 sin6; struct route ro; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ /* zone ID should be embedded */ sockaddr_in6_init(&sin6, &ip6->ip6_dst, 0, 0, 0); memset(&ro, 0, sizeof(ro)); e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, NULL, NULL, &ip6->ip6_src); rtcache_free(&ro); if (e != 0) { char ip6buf[INET6_ADDRSTRLEN]; nd6log(LOG_DEBUG, "source can't be determined: " "dst=%s, error=%d\n", IN6_PRINT(ip6buf, &sin6.sin6_addr), e); goto bad; } } ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; rcvif = m_get_rcvif(m, &s); if (rcvif) { /* XXX: This may not be the outgoing interface */ ip6->ip6_hlim = ND_IFINFO(rcvif)->chlim; } else { ip6->ip6_hlim = ip6_defhlim; } m_put_rcvif(rcvif, &s); m->m_pkthdr.csum_flags = 0; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); /* * XXX option handling */ m->m_flags &= ~(M_BCAST|M_MCAST); /* * Note for icmp6_reflect_pmtu == false * To avoid a "too big" situation at an intermediate router * and the path MTU discovery process, specify the IPV6_MINMTU flag. * Note that only echo and node information replies are affected, * since the length of ICMP6 errors is limited to the minimum MTU. */ flags = icmp6_reflect_pmtu ? 0 : IPV6_MINMTU; if (ip6_output(m, NULL, NULL, flags, NULL, NULL, &outif) != 0 && outif) icmp6_ifstat_inc(outif, ifs6_out_error); if (outif) icmp6_ifoutstat_inc(outif, type, code); return; bad: m_freem(m); return; } static const char * icmp6_redirect_diag(char *buf, size_t buflen, struct in6_addr *src6, struct in6_addr *dst6, struct in6_addr *tgt6) { char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; char ip6buft[INET6_ADDRSTRLEN]; snprintf(buf, buflen, "(src=%s dst=%s tgt=%s)", IN6_PRINT(ip6bufs, src6), IN6_PRINT(ip6bufd, dst6), IN6_PRINT(ip6buft, tgt6)); return buf; } static void icmp6_redirect_input(struct mbuf *m, int off) { struct ifnet *ifp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_redirect *nd_rd; int icmp6len = m->m_pkthdr.len - off; char *lladdr = NULL; int lladdrlen = 0; struct rtentry *rt = NULL; int is_router; int is_onlink; struct in6_addr src6 = ip6->ip6_src; struct in6_addr redtgt6; struct in6_addr reddst6; union nd_opts ndopts; struct psref psref; char ip6buf[INET6_ADDRSTRLEN]; char diagbuf[256]; ifp = m_get_rcvif_psref(m, &psref); if (ifp == NULL) goto freeit; /* XXX if we are router, we don't update route by icmp6 redirect */ if (ip6_forwarding) goto freeit; if (!icmp6_rediraccept) goto freeit; IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); if (nd_rd == NULL) { ICMP6_STATINC(ICMP6_STAT_TOOSHORT); m_put_rcvif_psref(ifp, &psref); return; } redtgt6 = nd_rd->nd_rd_target; reddst6 = nd_rd->nd_rd_dst; if (in6_setscope(&redtgt6, ifp, NULL) || in6_setscope(&reddst6, ifp, NULL)) { goto freeit; } /* validation */ if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { nd6log(LOG_ERR, "ICMP6 redirect sent from %s rejected; " "must be from linklocal\n", IN6_PRINT(ip6buf, &src6)); goto bad; } if (ip6->ip6_hlim != 255) { nd6log(LOG_ERR, "ICMP6 redirect sent from %s rejected; " "hlim=%d (must be 255)\n", IN6_PRINT(ip6buf, &src6), ip6->ip6_hlim); goto bad; } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ struct sockaddr_in6 sin6; struct in6_addr *gw6; sockaddr_in6_init(&sin6, &reddst6, 0, 0, 0); rt = rtalloc1(sin6tosa(&sin6), 0); if (rt) { if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { nd6log(LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(diagbuf, sizeof(diagbuf), &src6, &reddst6, &redtgt6)); rt_unref(rt); goto bad; } gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr); if (memcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) { nd6log(LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): %s\n", IN6_PRINT(ip6buf, gw6), icmp6_redirect_diag(diagbuf, sizeof(diagbuf), &src6, &reddst6, &redtgt6)); rt_unref(rt); goto bad; } } else { nd6log(LOG_ERR, "ICMP6 redirect rejected; " "no route found for redirect dst: %s\n", icmp6_redirect_diag(diagbuf, sizeof(diagbuf), &src6, &reddst6, &redtgt6)); goto bad; } rt_unref(rt); rt = NULL; } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { nd6log(LOG_ERR, "ICMP6 redirect rejected; " "redirect dst must be unicast: %s\n", icmp6_redirect_diag(diagbuf, sizeof(diagbuf), &src6, &reddst6, &redtgt6)); goto bad; } is_router = is_onlink = 0; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) is_router = 1; /* router case */ if (memcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) is_onlink = 1; /* on-link destination case */ if (!is_router && !is_onlink) { nd6log(LOG_ERR, "ICMP6 redirect rejected; " "neither router case nor onlink case: %s\n", icmp6_redirect_diag(diagbuf, sizeof(diagbuf), &src6, &reddst6, &redtgt6)); goto bad; } /* validation passed */ icmp6len -= sizeof(*nd_rd); nd6_option_init(nd_rd + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log(LOG_INFO, "invalid ND option, rejected: %s\n", icmp6_redirect_diag(diagbuf, sizeof(diagbuf), &src6, &reddst6, &redtgt6)); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log(LOG_INFO, "lladdrlen mismatch for %s " "(if %d, icmp6 packet %d): %s\n", IN6_PRINT(ip6buf, &redtgt6), ifp->if_addrlen, lladdrlen - 2, icmp6_redirect_diag(diagbuf, sizeof(diagbuf), &src6, &reddst6, &redtgt6)); goto bad; } /* RFC 2461 8.3 */ nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); m_put_rcvif_psref(ifp, &psref); ifp = NULL; if (!is_onlink) { /* better router case. perform rtredirect. */ /* perform rtredirect */ struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; unsigned long rtcount; struct rtentry *newrt = NULL; /* * do not install redirect route, if the number of entries * is too much (> hiwat). note that, the node (= host) will * work just fine even if we do not install redirect route * (there will be additional hops, though). */ mutex_enter(&icmp6_mtx); rtcount = rt_timer_count(icmp6_redirect_timeout_q); if (0 <= ip6_maxdynroutes && rtcount >= ip6_maxdynroutes) { mutex_exit(&icmp6_mtx); goto freeit; } if (0 <= icmp6_redirect_hiwat && rtcount > icmp6_redirect_hiwat) { mutex_exit(&icmp6_mtx); goto freeit; } else if (0 <= icmp6_redirect_lowat && rtcount > icmp6_redirect_lowat) { /* * XXX nuke a victim, install the new one. */ } memset(&sdst, 0, sizeof(sdst)); memset(&sgw, 0, sizeof(sgw)); memset(&ssrc, 0, sizeof(ssrc)); sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6; sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6); bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); rtredirect(sin6tosa(&sdst), sin6tosa(&sgw), NULL, RTF_GATEWAY | RTF_HOST, sin6tosa(&ssrc), &newrt); if (newrt) { (void)rt_timer_add(newrt, icmp6_redirect_timeout, icmp6_redirect_timeout_q); rt_unref(newrt); } mutex_exit(&icmp6_mtx); } /* finally update cached route in each socket via pfctlinput */ { struct sockaddr_in6 sdst; sockaddr_in6_init(&sdst, &reddst6, 0, 0, 0); pfctlinput(PRC_REDIRECT_HOST, sin6tosa(&sdst)); #if defined(IPSEC) if (ipsec_used) key_sa_routechange(sin6tosa(&sdst)); #endif } freeit: if (ifp != NULL) m_put_rcvif_psref(ifp, &psref); m_freem(m); return; bad: m_put_rcvif_psref(ifp, &psref); ICMP6_STATINC(ICMP6_STAT_BADREDIRECT); m_freem(m); } void icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) { struct ifnet *ifp; /* my outgoing interface */ struct in6_addr *ifp_ll6; struct in6_addr *nexthop; struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ struct mbuf *m = NULL; /* newly allocated one */ struct ip6_hdr *ip6; /* m as struct ip6_hdr */ struct nd_redirect *nd_rd; size_t maxlen; u_char *p; struct sockaddr_in6 src_sa; icmp6_errcount(ICMP6_STAT_OUTERRHIST, ND_REDIRECT, 0); /* if we are not router, we don't send icmp6 redirect */ if (!ip6_forwarding) goto fail; /* sanity check */ KASSERT(m0 != NULL); KASSERT(rt != NULL); ifp = rt->rt_ifp; /* * Address check: * the source address must identify a neighbor, and * the destination address must not be a multicast address * [RFC 2461, sec 8.2] */ sip6 = mtod(m0, struct ip6_hdr *); sockaddr_in6_init(&src_sa, &sip6->ip6_src, 0, 0, 0); if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) goto fail; if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ /* rate limit */ if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) goto fail; /* * Since we are going to append up to 1280 bytes (= IPV6_MMTU), * we almost always ask for an mbuf cluster for simplicity. * (MHLEN < IPV6_MMTU is almost always true) */ MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m && IPV6_MMTU >= MHLEN) { #if IPV6_MMTU >= MCLBYTES MEXTMALLOC(m, IPV6_MMTU, M_NOWAIT); #else MCLGET(m, M_DONTWAIT); #endif } if (!m) goto fail; m_reset_rcvif(m); m->m_len = 0; maxlen = M_TRAILINGSPACE(m); maxlen = uimin(IPV6_MMTU, maxlen); /* just for safety */ if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct nd_redirect) + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { goto fail; } { /* get ip6 linklocal address for ifp(my outgoing interface). */ struct in6_ifaddr *ia; int s = pserialize_read_enter(); if ((ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY| IN6_IFF_ANYCAST)) == NULL) { pserialize_read_exit(s); goto fail; } ifp_ll6 = &ia->ia_addr.sin6_addr; pserialize_read_exit(s); } /* get ip6 linklocal address for the router. */ if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)rt->rt_gateway; nexthop = &sin6->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(nexthop)) nexthop = NULL; } else nexthop = NULL; /* ip6 */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; /* ip6->ip6_src must be linklocal addr for my outgoing if. */ bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); /* ND Redirect */ nd_rd = (struct nd_redirect *)(ip6 + 1); nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; nd_rd->nd_rd_reserved = 0; if (rt->rt_flags & RTF_GATEWAY) { /* * nd_rd->nd_rd_target must be a link-local address in * better router cases. */ if (!nexthop) goto fail; bcopy(nexthop, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } else { /* make sure redtgt == reddst */ nexthop = &sip6->ip6_dst; bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } p = (u_char *)(nd_rd + 1); { /* target lladdr option */ struct llentry *ln = NULL; int len, pad; struct nd_opt_hdr *nd_opt; char *lladdr; ln = nd6_lookup(nexthop, ifp, false); if (ln == NULL) goto nolladdropt; len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ pad = len - (sizeof(*nd_opt) + ifp->if_addrlen); /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) { LLE_RUNLOCK(ln); goto nolladdropt; } if (ln->la_flags & LLE_VALID) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen); memset(lladdr + ifp->if_addrlen, 0, pad); p += len; } LLE_RUNLOCK(ln); } nolladdropt: m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* just to be safe */ if (m0->m_flags & M_DECRYPTED) goto noredhdropt; if (p - (u_char *)ip6 > maxlen) goto noredhdropt; { /* redirected header option */ int len; struct nd_opt_rd_hdr *nd_opt_rh; /* * compute the maximum size for icmp6 redirect header option. * XXX room for auth header? */ len = maxlen - (p - (u_char *)ip6); len &= ~7; if (len < sizeof(*nd_opt_rh)) { goto noredhdropt; } /* * Redirected header option spec (RFC2461 4.6.3) talks nothing * about padding/truncate rule for the original IP packet. * From the discussion on IPv6imp in Feb 1999, * the consensus was: * - "attach as much as possible" is the goal * - pad if not aligned (original size can be guessed by * original ip6 header) * Following code adds the padding if it is simple enough, * and truncates if not. */ if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { /* not enough room, truncate */ m_adj(m0, (len - sizeof(*nd_opt_rh)) - m0->m_pkthdr.len); } else { /* * enough room, truncate if not aligned. * we don't pad here for simplicity. */ int extra; extra = m0->m_pkthdr.len % 8; if (extra) { /* truncate */ m_adj(m0, -extra); } len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); } nd_opt_rh = (struct nd_opt_rd_hdr *)p; memset(nd_opt_rh, 0, sizeof(*nd_opt_rh)); nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; nd_opt_rh->nd_opt_rh_len = len >> 3; p += sizeof(*nd_opt_rh); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* connect m0 to m */ m->m_pkthdr.len += m0->m_pkthdr.len; m_cat(m, m0); m0 = NULL; } noredhdropt: if (m0) { m_freem(m0); m0 = NULL; } /* XXX: clear embedded link IDs in the inner header */ in6_clearscope(&sip6->ip6_src); in6_clearscope(&sip6->ip6_dst); in6_clearscope(&nd_rd->nd_rd_target); in6_clearscope(&nd_rd->nd_rd_dst); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); nd_rd->nd_rd_cksum = 0; nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen)); /* send the packet to outside... */ if (ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL) != 0) icmp6_ifstat_inc(ifp, ifs6_out_error); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_redirect); ICMP6_STATINC(ICMP6_STAT_OUTHIST + ND_REDIRECT); return; fail: if (m) m_freem(m); if (m0) m_freem(m0); } /* * ICMPv6 socket option processing. */ int icmp6_ctloutput(int op, struct socket *so, struct sockopt *sopt) { int error = 0; struct in6pcb *in6p = sotoin6pcb(so); if (sopt->sopt_level != IPPROTO_ICMPV6) return rip6_ctloutput(op, so, sopt); switch (op) { case PRCO_SETOPT: switch (sopt->sopt_name) { case ICMP6_FILTER: { struct icmp6_filter fil; error = sockopt_get(sopt, &fil, sizeof(fil)); if (error) break; memcpy(in6p->in6p_icmp6filt, &fil, sizeof(struct icmp6_filter)); error = 0; break; } default: error = ENOPROTOOPT; break; } break; case PRCO_GETOPT: switch (sopt->sopt_name) { case ICMP6_FILTER: { if (in6p->in6p_icmp6filt == NULL) { error = EINVAL; break; } error = sockopt_set(sopt, in6p->in6p_icmp6filt, sizeof(struct icmp6_filter)); break; } default: error = ENOPROTOOPT; break; } break; } return error; } /* * Perform rate limit check. * Returns 0 if it is okay to send the icmp6 packet. * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate * limitation. * * XXX per-destination/type check necessary? */ static int icmp6_ratelimit( const struct in6_addr *dst, /* not used at this moment */ const int type, /* not used at this moment */ const int code) /* not used at this moment */ { int ret; ret = 0; /* okay to send */ /* PPS limit */ if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count, icmp6errppslim)) { /* The packet is subject to rate limit */ ret++; } return ret; } static struct rtentry * icmp6_mtudisc_clone(struct sockaddr *dst) { struct rtentry *rt; int error; rt = rtalloc1(dst, 1); if (rt == NULL) return NULL; /* If we didn't get a host route, allocate one */ if ((rt->rt_flags & RTF_HOST) == 0) { struct rtentry *nrt; error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL, RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt); if (error) { rt_unref(rt); return NULL; } nrt->rt_rmx = rt->rt_rmx; rt_unref(rt); rt = nrt; } mutex_enter(&icmp6_mtx); error = rt_timer_add(rt, icmp6_mtudisc_timeout, icmp6_mtudisc_timeout_q); mutex_exit(&icmp6_mtx); if (error) { rt_unref(rt); return NULL; } return rt; /* caller need to call rtfree() */ } static void icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r) { struct rtentry *retrt; KASSERT(rt != NULL); rt_assert_referenced(rt); if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) == (RTF_DYNAMIC | RTF_HOST)) { rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt); rt_unref(rt); rt_free(retrt); } else { if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) rt->rt_rmx.rmx_mtu = 0; } } static void icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r) { struct rtentry *retrt; KASSERT(rt != NULL); rt_assert_referenced(rt); if ((rt->rt_flags & (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) == (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) { rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt); rt_unref(rt); rt_free(retrt); } } #ifdef COMPAT_90 /* * sysctl helper routine for the net.inet6.icmp6.nd6 nodes. silly? */ static int sysctl_net_inet6_icmp6_nd6(SYSCTLFN_ARGS) { (void)&name; (void)&l; (void)&oname; if (namelen != 0) return (EINVAL); return (nd6_sysctl(rnode->sysctl_num, oldp, oldlenp, /*XXXUNCONST*/ __UNCONST(newp), newlen)); } #endif static int sysctl_net_inet6_icmp6_stats(SYSCTLFN_ARGS) { return (NETSTAT_SYSCTL(icmp6stat_percpu, ICMP6_NSTATS)); } static int sysctl_net_inet6_icmp6_redirtimeout(SYSCTLFN_ARGS) { int error, tmp; struct sysctlnode node; mutex_enter(&icmp6_mtx); node = *rnode; node.sysctl_data = &tmp; tmp = icmp6_redirtimeout; error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) goto out; if (tmp < 0) { error = EINVAL; goto out; } icmp6_redirtimeout = tmp; if (icmp6_redirect_timeout_q != NULL) { if (icmp6_redirtimeout == 0) { rt_timer_queue_destroy(icmp6_redirect_timeout_q); } else { rt_timer_queue_change(icmp6_redirect_timeout_q, icmp6_redirtimeout); } } else if (icmp6_redirtimeout > 0) { icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout); } error = 0; out: mutex_exit(&icmp6_mtx); return error; } static void sysctl_net_inet6_icmp6_setup(struct sysctllog **clog) { sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet6", NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET6, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "icmp6", SYSCTL_DESCR("ICMPv6 related settings"), NULL, 0, NULL, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "stats", SYSCTL_DESCR("ICMPv6 transmission statistics"), sysctl_net_inet6_icmp6_stats, 0, NULL, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_STATS, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "rediraccept", SYSCTL_DESCR("Accept and process redirect messages"), NULL, 0, &icmp6_rediraccept, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_REDIRACCEPT, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "redirtimeout", SYSCTL_DESCR("Redirect generated route lifetime"), sysctl_net_inet6_icmp6_redirtimeout, 0, &icmp6_redirtimeout, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_REDIRTIMEOUT, CTL_EOL); #if 0 /* obsoleted */ sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "errratelimit", NULL, NULL, 0, &icmp6_errratelimit, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ERRRATELIMIT, CTL_EOL); #endif sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nd6_prune", SYSCTL_DESCR("Neighbor discovery prune interval"), NULL, 0, &nd6_prune, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_PRUNE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nd6_delay", SYSCTL_DESCR("First probe delay time"), NULL, 0, &nd6_nd_domain.nd_delay, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_DELAY, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nd6_mmaxtries", SYSCTL_DESCR("Number of multicast discovery attempts"), NULL, 0, &nd6_nd_domain.nd_mmaxtries, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_MMAXTRIES, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nd6_umaxtries", SYSCTL_DESCR("Number of unicast discovery attempts"), NULL, 0, &nd6_nd_domain.nd_umaxtries, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_UMAXTRIES, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nd6_maxnudhint", SYSCTL_DESCR("Maximum neighbor unreachable hint count"), NULL, 0, &nd6_nd_domain.nd_maxnudhint, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_MAXNUDHINT, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "maxqueuelen", SYSCTL_DESCR("max packet queue len for a unresolved ND"), NULL, 1, &nd6_nd_domain.nd_maxqueuelen, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_MAXQLEN, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nd6_useloopback", SYSCTL_DESCR("Use loopback interface for local traffic"), NULL, 0, &nd6_useloopback, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_USELOOPBACK, CTL_EOL); #if 0 /* obsoleted */ sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nd6_proxyall", NULL, NULL, 0, &nd6_proxyall, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_PROXYALL, CTL_EOL); #endif sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nodeinfo", SYSCTL_DESCR("Respond to node information requests"), NULL, 0, &icmp6_nodeinfo, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_NODEINFO, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "errppslimit", SYSCTL_DESCR("Maximum ICMP errors sent per second"), NULL, 0, &icmp6errppslim, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ERRPPSLIMIT, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "mtudisc_hiwat", SYSCTL_DESCR("Low mark on MTU Discovery route timers"), NULL, 0, &icmp6_mtudisc_hiwat, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_MTUDISC_HIWAT, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "mtudisc_lowat", SYSCTL_DESCR("Low mark on MTU Discovery route timers"), NULL, 0, &icmp6_mtudisc_lowat, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_MTUDISC_LOWAT, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nd6_debug", SYSCTL_DESCR("Enable neighbor discovery debug output"), NULL, 0, &nd6_debug, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_DEBUG, CTL_EOL); #ifdef COMPAT_90 sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "nd6_drlist", SYSCTL_DESCR("Default router list"), sysctl_net_inet6_icmp6_nd6, 0, NULL, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, OICMPV6CTL_ND6_DRLIST, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "nd6_prlist", SYSCTL_DESCR("Prefix list"), sysctl_net_inet6_icmp6_nd6, 0, NULL, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, OICMPV6CTL_ND6_PRLIST, CTL_EOL); #endif sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_BOOL, "reflect_pmtu", SYSCTL_DESCR("Use path MTU Discovery for icmpv6 reflect"), NULL, 0, &icmp6_reflect_pmtu, 0, CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_REFLECT_PMTU, CTL_EOL); } void icmp6_statinc(u_int stat) { KASSERT(stat < ICMP6_NSTATS); ICMP6_STATINC(stat); } |
| 2 2 2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 | /* $NetBSD: rf_disks.c,v 1.93 2022/08/10 01:16:38 mrg Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Greg Oster * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. * * Author: Mark Holland * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /*************************************************************** * rf_disks.c -- code to perform operations on the actual disks ***************************************************************/ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.93 2022/08/10 01:16:38 mrg Exp $"); #include <dev/raidframe/raidframevar.h> #include "rf_raid.h" #include "rf_alloclist.h" #include "rf_utils.h" #include "rf_general.h" #include "rf_options.h" #include "rf_kintf.h" #include "rf_netbsd.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/ioctl.h> #include <sys/fcntl.h> #include <sys/vnode.h> #include <sys/namei.h> /* for pathbuf */ #include <sys/kauth.h> #include <miscfs/specfs/specdev.h> /* for v_rdev */ static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); static void rf_print_label_status( RF_Raid_t *, int, char *, RF_ComponentLabel_t *); static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, RF_ComponentLabel_t *, int, int ); #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) /************************************************************************** * * initialize the disks comprising the array * * We want the spare disks to have regular row,col numbers so that we can * easily substitue a spare for a failed disk. But, the driver code assumes * throughout that the array contains numRow by numCol _non-spare_ disks, so * it's not clear how to fit in the spares. This is an unfortunate holdover * from raidSim. The quick and dirty fix is to make row zero bigger than the * rest, and put all the spares in it. This probably needs to get changed * eventually. * **************************************************************************/ int rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) { RF_RaidDisk_t *disks; RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; RF_RowCol_t c; int bs, ret; unsigned i, count, foundone = 0, numFailuresThisRow; int force; force = cfgPtr->force; ret = rf_AllocDiskStructures(raidPtr, cfgPtr); if (ret) goto fail; disks = raidPtr->Disks; numFailuresThisRow = 0; for (c = 0; c < raidPtr->numCol; c++) { ret = rf_ConfigureDisk(raidPtr, &cfgPtr->devnames[0][c][0], &disks[c], c); if (ret) goto fail; if (disks[c].status == rf_ds_optimal) { ret = raidfetch_component_label(raidPtr, c); if (ret) goto fail; /* mark it as failed if the label looks bogus... */ if (!rf_reasonable_label(&raidPtr->raid_cinfo[c].ci_label,0) && !force) { disks[c].status = rf_ds_failed; } } if (disks[c].status != rf_ds_optimal) { numFailuresThisRow++; } else { if (disks[c].numBlocks < min_numblks) min_numblks = disks[c].numBlocks; DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", c, disks[c].devname, disks[c].numBlocks, disks[c].blockSize, (long int) disks[c].numBlocks * disks[c].blockSize / 1024 / 1024); } } /* XXX fix for n-fault tolerant */ /* XXX this should probably check to see how many failures we can handle for this configuration! */ if (numFailuresThisRow > 0) raidPtr->status = rf_rs_degraded; /* all disks must be the same size & have the same block size, bs must * be a power of 2 */ bs = 0; foundone = 0; for (c = 0; c < raidPtr->numCol; c++) { if (disks[c].status == rf_ds_optimal) { bs = disks[c].blockSize; foundone = 1; break; } } if (!foundone) { RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); ret = EINVAL; goto fail; } for (count = 0, i = 1; i; i <<= 1) if (bs & i) count++; if (count != 1) { RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); ret = EINVAL; goto fail; } if (rf_CheckLabels( raidPtr, cfgPtr )) { printf("raid%d: There were fatal errors\n", raidPtr->raidid); if (force != 0) { printf("raid%d: Fatal errors being ignored.\n", raidPtr->raidid); } else { ret = EINVAL; goto fail; } } for (c = 0; c < raidPtr->numCol; c++) { if (disks[c].status == rf_ds_optimal) { if (disks[c].blockSize != bs) { RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c); ret = EINVAL; goto fail; } if (disks[c].numBlocks != min_numblks) { RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n", c, (int) min_numblks); disks[c].numBlocks = min_numblks; } } } raidPtr->sectorsPerDisk = min_numblks; raidPtr->logBytesPerSector = ffs(bs) - 1; raidPtr->bytesPerSector = bs; raidPtr->sectorMask = bs - 1; return (0); fail: rf_UnconfigureVnodes( raidPtr ); return (ret); } /**************************************************************************** * set up the data structures describing the spare disks in the array * recall from the above comment that the spare disk descriptors are stored * in row zero, which is specially expanded to hold them. ****************************************************************************/ int rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) { int i, ret; unsigned int bs; RF_RaidDisk_t *disks; int num_spares_done; num_spares_done = 0; /* The space for the spares should have already been allocated by * ConfigureDisks() */ disks = &raidPtr->Disks[raidPtr->numCol]; for (i = 0; i < raidPtr->numSpare; i++) { ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], &disks[i], raidPtr->numCol + i); if (ret) goto fail; if (disks[i].status != rf_ds_optimal) { RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", &cfgPtr->spare_names[i][0]); } else { disks[i].status = rf_ds_spare; /* change status to * spare */ DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i, disks[i].devname, disks[i].numBlocks, disks[i].blockSize, (long int) disks[i].numBlocks * disks[i].blockSize / 1024 / 1024); } num_spares_done++; } /* check sizes and block sizes on spare disks */ bs = 1 << raidPtr->logBytesPerSector; for (i = 0; i < raidPtr->numSpare; i++) { if (disks[i].blockSize != bs) { RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); ret = EINVAL; goto fail; } if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n", disks[i].devname, disks[i].blockSize, raidPtr->sectorsPerDisk); ret = EINVAL; goto fail; } else if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n", disks[i].devname, raidPtr->sectorsPerDisk, disks[i].numBlocks); disks[i].numBlocks = raidPtr->sectorsPerDisk; } } return (0); fail: /* Release the hold on the main components. We've failed to allocate * a spare, and since we're failing, we need to free things.. XXX failing to allocate a spare is *not* that big of a deal... We *can* survive without it, if need be, esp. if we get hot adding working. If we don't fail out here, then we need a way to remove this spare... that should be easier to do here than if we are "live"... */ rf_UnconfigureVnodes( raidPtr ); return (ret); } static int rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) { int ret; size_t entries = raidPtr->numCol + RF_MAXSPARE; /* We allocate RF_MAXSPARE on the first row so that we have room to do hot-swapping of spares */ raidPtr->Disks = RF_MallocAndAdd( entries * sizeof(*raidPtr->Disks), raidPtr->cleanupList); if (raidPtr->Disks == NULL) { ret = ENOMEM; goto fail; } /* get space for device specific stuff.. */ raidPtr->raid_cinfo = RF_MallocAndAdd( entries * sizeof(*raidPtr->raid_cinfo), raidPtr->cleanupList); if (raidPtr->raid_cinfo == NULL) { ret = ENOMEM; goto fail; } return(0); fail: rf_UnconfigureVnodes( raidPtr ); return(ret); } /* configure a single disk during auto-configuration at boot */ int rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *auto_config) { RF_RaidDisk_t *disks; RF_RaidDisk_t *diskPtr; RF_RowCol_t c; RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; int bs, ret; int numFailuresThisRow; RF_AutoConfig_t *ac; int parity_good; int mod_counter; int mod_counter_found; #if DEBUG printf("Starting autoconfiguration of RAID set...\n"); #endif ret = rf_AllocDiskStructures(raidPtr, cfgPtr); if (ret) goto fail; disks = raidPtr->Disks; /* assume the parity will be fine.. */ parity_good = RF_RAID_CLEAN; /* Check for mod_counters that are too low */ mod_counter_found = 0; mod_counter = 0; ac = auto_config; while(ac!=NULL) { if (mod_counter_found==0) { mod_counter = ac->clabel->mod_counter; mod_counter_found = 1; } else { if (ac->clabel->mod_counter > mod_counter) { mod_counter = ac->clabel->mod_counter; } } ac->flag = 0; /* clear the general purpose flag */ ac = ac->next; } bs = 0; numFailuresThisRow = 0; for (c = 0; c < raidPtr->numCol; c++) { diskPtr = &disks[c]; /* find this row/col in the autoconfig */ #if DEBUG printf("Looking for %d in autoconfig\n",c); #endif ac = auto_config; while(ac!=NULL) { if (ac->clabel==NULL) { /* big-time bad news. */ goto fail; } if ((ac->clabel->column == c) && (ac->clabel->mod_counter == mod_counter)) { /* it's this one... */ /* flag it as 'used', so we don't free it later. */ ac->flag = 1; #if DEBUG printf("Found: %s at %d\n", ac->devname,c); #endif break; } ac=ac->next; } if (ac==NULL) { /* we didn't find an exact match with a correct mod_counter above... can we find one with an incorrect mod_counter to use instead? (this one, if we find it, will be marked as failed once the set configures) */ ac = auto_config; while(ac!=NULL) { if (ac->clabel==NULL) { /* big-time bad news. */ goto fail; } if (ac->clabel->column == c) { /* it's this one... flag it as 'used', so we don't free it later. */ ac->flag = 1; #if DEBUG printf("Found(low mod_counter): %s at %d\n", ac->devname,c); #endif break; } ac=ac->next; } } if (ac!=NULL) { /* Found it. Configure it.. */ diskPtr->blockSize = ac->clabel->blockSize; diskPtr->numBlocks = rf_component_label_numblocks(ac->clabel); /* Note: rf_protectedSectors is already factored into numBlocks here */ raidPtr->raid_cinfo[c].ci_vp = ac->vp; raidPtr->raid_cinfo[c].ci_dev = ac->dev; memcpy(raidget_component_label(raidPtr, c), ac->clabel, sizeof(*ac->clabel)); snprintf(diskPtr->devname, sizeof(diskPtr->devname), "/dev/%s", ac->devname); /* note the fact that this component was autoconfigured. You'll need this info later. Trust me :) */ diskPtr->auto_configured = 1; diskPtr->dev = ac->dev; /* * we allow the user to specify that * only a fraction of the disks should * be used this is just for debug: it * speeds up the parity scan */ diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100; /* XXX these will get set multiple times, but since we're autoconfiguring, they'd better be always the same each time! If not, this is the least of your worries */ bs = diskPtr->blockSize; min_numblks = diskPtr->numBlocks; /* this gets done multiple times, but that's fine -- the serial number will be the same for all components, guaranteed */ raidPtr->serial_number = ac->clabel->serial_number; /* check the last time the label was modified */ if (ac->clabel->mod_counter != mod_counter) { /* Even though we've filled in all of the above, we don't trust this component since its modification counter is not in sync with the rest, and we really consider it to be failed. */ disks[c].status = rf_ds_failed; numFailuresThisRow++; } else { if (ac->clabel->clean != RF_RAID_CLEAN) { parity_good = RF_RAID_DIRTY; } } } else { /* Didn't find it at all!! Component must really be dead */ disks[c].status = rf_ds_failed; snprintf(disks[c].devname, sizeof(disks[c].devname), "component%d", c); numFailuresThisRow++; } } /* XXX fix for n-fault tolerant */ /* XXX this should probably check to see how many failures we can handle for this configuration! */ if (numFailuresThisRow > 0) { raidPtr->status = rf_rs_degraded; raidPtr->numFailures = numFailuresThisRow; } /* close the device for the ones that didn't get used */ ac = auto_config; while(ac!=NULL) { if (ac->flag == 0) { vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED); vput(ac->vp); ac->vp = NULL; #if DEBUG printf("Released %s from auto-config set.\n", ac->devname); #endif } ac = ac->next; } raidPtr->mod_counter = mod_counter; /* note the state of the parity, if any */ raidPtr->parity_good = parity_good; raidPtr->sectorsPerDisk = min_numblks; raidPtr->logBytesPerSector = ffs(bs) - 1; raidPtr->bytesPerSector = bs; raidPtr->sectorMask = bs - 1; return (0); fail: rf_UnconfigureVnodes( raidPtr ); return (ret); } /* configure a single disk in the array */ int rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr, RF_RowCol_t col) { char *p; struct pathbuf *pb; struct vnode *vp; int error; p = rf_find_non_white(bf); if (p[strlen(p) - 1] == '\n') { /* strip off the newline */ p[strlen(p) - 1] = '\0'; } (void) strcpy(diskPtr->devname, p); /* Let's start by claiming the component is fine and well... */ diskPtr->status = rf_ds_optimal; raidPtr->raid_cinfo[col].ci_vp = NULL; raidPtr->raid_cinfo[col].ci_dev = 0; if (!strcmp("absent", diskPtr->devname)) { printf("Ignoring missing component at column %d\n", col); snprintf(diskPtr->devname, sizeof(diskPtr->devname), "component%d", col); diskPtr->status = rf_ds_failed; return (0); } pb = pathbuf_create(diskPtr->devname); if (pb == NULL) { printf("pathbuf_create for device: %s failed!\n", diskPtr->devname); return ENOMEM; } error = vn_bdev_openpath(pb, &vp, curlwp); pathbuf_destroy(pb); if (error) { printf("open device: '%s' failed: %d\n", diskPtr->devname, error); if (error == ENXIO) { /* the component isn't there... must be dead :-( */ diskPtr->status = rf_ds_failed; return 0; } else { return (error); } } if ((error = rf_getdisksize(vp, diskPtr)) != 0) return (error); /* * If this raidPtr's bytesPerSector is zero, fill it in with this * components blockSize. This will give us something to work with * initially, and if it is wrong, we'll get errors later. */ if (raidPtr->bytesPerSector == 0) raidPtr->bytesPerSector = diskPtr->blockSize; if (diskPtr->status == rf_ds_optimal) { raidPtr->raid_cinfo[col].ci_vp = vp; raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev; /* This component was not automatically configured */ diskPtr->auto_configured = 0; diskPtr->dev = vp->v_rdev; /* we allow the user to specify that only a fraction of the * disks should be used this is just for debug: it speeds up * the parity scan */ diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100; } return (0); } static void rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name, RF_ComponentLabel_t *ci_label) { printf("raid%d: Component %s being configured at col: %d\n", raidPtr->raidid, dev_name, column ); printf(" Column: %d Num Columns: %d\n", ci_label->column, ci_label->num_columns); printf(" Version: %d Serial Number: %d Mod Counter: %d\n", ci_label->version, ci_label->serial_number, ci_label->mod_counter); printf(" Clean: %s Status: %d\n", ci_label->clean ? "Yes" : "No", ci_label->status ); } static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, char *dev_name, RF_ComponentLabel_t *ci_label, int serial_number, int mod_counter) { int fatal_error = 0; if (serial_number != ci_label->serial_number) { printf("%s has a different serial number: %d %d\n", dev_name, serial_number, ci_label->serial_number); fatal_error = 1; } if (mod_counter != ci_label->mod_counter) { printf("%s has a different modification count: %d %d\n", dev_name, mod_counter, ci_label->mod_counter); } if (row != ci_label->row) { printf("Row out of alignment for: %s\n", dev_name); fatal_error = 1; } if (column != ci_label->column) { printf("Column out of alignment for: %s\n", dev_name); fatal_error = 1; } if (raidPtr->numCol != ci_label->num_columns) { printf("Number of columns do not match for: %s\n", dev_name); fatal_error = 1; } if (ci_label->clean == 0) { /* it's not clean, but that's not fatal */ printf("%s is not clean!\n", dev_name); } return(fatal_error); } static void rf_handle_hosed(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, int hosed_column, int again) { printf("Hosed component: %s\n", &cfgPtr->devnames[0][hosed_column][0]); if (cfgPtr->force) return; /* we'll fail this component, as if there are other major errors, we aren't forcing things and we'll abort the config anyways */ if (again && raidPtr->Disks[hosed_column].status == rf_ds_failed) return; raidPtr->Disks[hosed_column].status = rf_ds_failed; raidPtr->numFailures++; raidPtr->status = rf_rs_degraded; } /* rf_CheckLabels() - check all the component labels for consistency. Return an error if there is anything major amiss. */ int rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) { int c; char *dev_name; RF_ComponentLabel_t *ci_label; int serial_number = 0; int mod_number = 0; int fatal_error = 0; int mod_values[4]; int mod_count[4]; int ser_values[4]; int ser_count[4]; int num_ser; int num_mod; int i; int found; int hosed_column; int too_fatal; int parity_good; hosed_column = -1; too_fatal = 0; /* We're going to try to be a little intelligent here. If one component's label is bogus, and we can identify that it's the *only* one that's gone, we'll mark it as "failed" and allow the configuration to proceed. This will be the *only* case that we'll proceed if there would be (otherwise) fatal errors. Basically we simply keep a count of how many components had what serial number. If all but one agree, we simply mark the disagreeing component as being failed, and allow things to come up "normally". We do this first for serial numbers, and then for "mod_counter". */ num_ser = 0; num_mod = 0; ser_values[0] = ser_values[1] = ser_values[2] = ser_values[3] = 0; ser_count[0] = ser_count[1] = ser_count[2] = ser_count[3] = 0; mod_values[0] = mod_values[1] = mod_values[2] = mod_values[3] = 0; mod_count[0] = mod_count[1] = mod_count[2] = mod_count[3] = 0; for (c = 0; c < raidPtr->numCol; c++) { if (raidPtr->Disks[c].status != rf_ds_optimal) continue; ci_label = raidget_component_label(raidPtr, c); found=0; for(i=0;i<num_ser;i++) { if (ser_values[i] == ci_label->serial_number) { ser_count[i]++; found=1; break; } } if (!found) { ser_values[num_ser] = ci_label->serial_number; ser_count[num_ser] = 1; num_ser++; if (num_ser>2) { fatal_error = 1; break; } } found=0; for(i=0;i<num_mod;i++) { if (mod_values[i] == ci_label->mod_counter) { mod_count[i]++; found=1; break; } } if (!found) { mod_values[num_mod] = ci_label->mod_counter; mod_count[num_mod] = 1; num_mod++; if (num_mod>2) { fatal_error = 1; break; } } } #if DEBUG printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); for(i=0;i<num_ser;i++) { printf("%d %d\n", ser_values[i], ser_count[i]); } printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); for(i=0;i<num_mod;i++) { printf("%d %d\n", mod_values[i], mod_count[i]); } #endif serial_number = ser_values[0]; if (num_ser == 2) { if ((ser_count[0] == 1) || (ser_count[1] == 1)) { /* Locate the maverick component */ if (ser_count[1] > ser_count[0]) { serial_number = ser_values[1]; } for (c = 0; c < raidPtr->numCol; c++) { if (raidPtr->Disks[c].status != rf_ds_optimal) continue; ci_label = raidget_component_label(raidPtr, c); if (serial_number != ci_label->serial_number) { hosed_column = c; break; } } if (hosed_column != -1) rf_handle_hosed(raidPtr, cfgPtr, hosed_column, 0); } else { too_fatal = 1; } if (cfgPtr->parityConfig == '0') { /* We've identified two different serial numbers. RAID 0 can't cope with that, so we'll punt */ too_fatal = 1; } } /* record the serial number for later. If we bail later, setting this doesn't matter, otherwise we've got the best guess at the correct serial number */ raidPtr->serial_number = serial_number; mod_number = mod_values[0]; if (num_mod == 2) { if ((mod_count[0] == 1) || (mod_count[1] == 1)) { /* Locate the maverick component */ if (mod_count[1] > mod_count[0]) { mod_number = mod_values[1]; } else if (mod_count[1] < mod_count[0]) { mod_number = mod_values[0]; } else { /* counts of different modification values are the same. Assume greater value is the correct one, all other things considered */ if (mod_values[0] > mod_values[1]) { mod_number = mod_values[0]; } else { mod_number = mod_values[1]; } } for (c = 0; c < raidPtr->numCol; c++) { if (raidPtr->Disks[c].status != rf_ds_optimal) continue; ci_label = raidget_component_label(raidPtr, c); if (mod_number != ci_label->mod_counter) { if (hosed_column == c) { /* same one. Can deal with it. */ } else { hosed_column = c; if (num_ser != 1) { too_fatal = 1; break; } } } } if (hosed_column != -1) rf_handle_hosed(raidPtr, cfgPtr, hosed_column, 1); } else { too_fatal = 1; } if (cfgPtr->parityConfig == '0') { /* We've identified two different mod counters. RAID 0 can't cope with that, so we'll punt */ too_fatal = 1; } } raidPtr->mod_counter = mod_number; if (too_fatal) { /* we've had both a serial number mismatch, and a mod_counter mismatch -- and they involved two different components!! Bail -- make things fail so that the user must force the issue... */ hosed_column = -1; fatal_error = 1; } if (num_ser > 2) { printf("raid%d: Too many different serial numbers!\n", raidPtr->raidid); fatal_error = 1; } if (num_mod > 2) { printf("raid%d: Too many different mod counters!\n", raidPtr->raidid); fatal_error = 1; } for (c = 0; c < raidPtr->numCol; c++) { if (raidPtr->Disks[c].status != rf_ds_optimal) { hosed_column = c; break; } } /* we start by assuming the parity will be good, and flee from that notion at the slightest sign of trouble */ parity_good = RF_RAID_CLEAN; for (c = 0; c < raidPtr->numCol; c++) { dev_name = &cfgPtr->devnames[0][c][0]; ci_label = raidget_component_label(raidPtr, c); if (c == hosed_column) { printf("raid%d: Ignoring %s\n", raidPtr->raidid, dev_name); } else { rf_print_label_status( raidPtr, c, dev_name, ci_label); if (rf_check_label_vitals( raidPtr, 0, c, dev_name, ci_label, serial_number, mod_number )) { fatal_error = 1; } if (ci_label->clean != RF_RAID_CLEAN) { parity_good = RF_RAID_DIRTY; } } } if (fatal_error) { parity_good = RF_RAID_DIRTY; } /* we note the state of the parity */ raidPtr->parity_good = parity_good; return(fatal_error); } int rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) { RF_RaidDisk_t *disks; RF_DiskQueue_t *spareQueues; int ret; unsigned int bs; int spare_number; ret=0; if (raidPtr->numSpare >= RF_MAXSPARE) { RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); return(EINVAL); } rf_lock_mutex2(raidPtr->mutex); while (raidPtr->adding_hot_spare == 1) { rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex); } raidPtr->adding_hot_spare = 1; rf_unlock_mutex2(raidPtr->mutex); /* the beginning of the spares... */ disks = &raidPtr->Disks[raidPtr->numCol]; spare_number = raidPtr->numSpare; ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, &disks[spare_number], raidPtr->numCol + spare_number); if (ret) goto fail; if (disks[spare_number].status != rf_ds_optimal) { RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", sparePtr->component_name); rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); ret=EINVAL; goto fail; } else { disks[spare_number].status = rf_ds_spare; DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", spare_number, disks[spare_number].devname, disks[spare_number].numBlocks, disks[spare_number].blockSize, (long int) disks[spare_number].numBlocks * disks[spare_number].blockSize / 1024 / 1024); } /* check sizes and block sizes on the spare disk */ bs = 1 << raidPtr->logBytesPerSector; if (disks[spare_number].blockSize != bs) { RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); ret = EINVAL; goto fail; } if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n", disks[spare_number].devname, disks[spare_number].blockSize, raidPtr->sectorsPerDisk); rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); ret = EINVAL; goto fail; } else { if (disks[spare_number].numBlocks > raidPtr->sectorsPerDisk) { RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n", disks[spare_number].devname, raidPtr->sectorsPerDisk, disks[spare_number].numBlocks); disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; } } spareQueues = &raidPtr->Queues[raidPtr->numCol]; ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], raidPtr->numCol + spare_number, raidPtr->qType, raidPtr->sectorsPerDisk, raidPtr->Disks[raidPtr->numCol + spare_number].dev, raidPtr->maxOutstanding, &raidPtr->shutdownList, raidPtr->cleanupList); rf_lock_mutex2(raidPtr->mutex); raidPtr->numSpare++; rf_unlock_mutex2(raidPtr->mutex); fail: rf_lock_mutex2(raidPtr->mutex); raidPtr->adding_hot_spare = 0; rf_signal_cond2(raidPtr->adding_hot_spare_cv); rf_unlock_mutex2(raidPtr->mutex); return(ret); } int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) { #if 0 int spare_number; #endif if (raidPtr->numSpare==0) { printf("No spares to remove!\n"); return(EINVAL); } return(EINVAL); /* XXX not implemented yet */ #if 0 spare_number = sparePtr->column; if (spare_number < 0 || spare_number > raidPtr->numSpare) { return(EINVAL); } /* verify that this spare isn't in use... */ /* it's gone.. */ raidPtr->numSpare--; return(0); #endif } int rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) { #if 0 RF_RaidDisk_t *disks; #endif if ((component->column < 0) || (component->column >= raidPtr->numCol)) { return(EINVAL); } #if 0 disks = &raidPtr->Disks[component->column]; #endif /* 1. This component must be marked as 'failed' */ return(EINVAL); /* Not implemented yet. */ } int rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) { /* Issues here include how to 'move' this in if there is IO taking place (e.g. component queues and such) */ return(EINVAL); /* Not implemented yet. */ } |
| 1309 867 867 860 860 21 862 865 14 860 928 54 914 912 84 34 78 79 891 925 926 924 925 157 924 926 2 158 158 823 926 89 885 25 2 109 105 2 2 898 91 898 4 4 4 4 4 4 3 34 34 34 26 2 2 84 84 78 5 9 9 75 75 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 | /* $NetBSD: kern_sleepq.c,v 1.73 2022/06/29 22:27:01 riastradh Exp $ */ /*- * Copyright (c) 2006, 2007, 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Sleep queue implementation, used by turnstiles and general sleep/wakeup * interfaces. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.73 2022/06/29 22:27:01 riastradh Exp $"); #include <sys/param.h> #include <sys/kernel.h> #include <sys/cpu.h> #include <sys/intr.h> #include <sys/pool.h> #include <sys/proc.h> #include <sys/resourcevar.h> #include <sys/sched.h> #include <sys/systm.h> #include <sys/sleepq.h> #include <sys/ktrace.h> /* * for sleepq_abort: * During autoconfiguration or after a panic, a sleep will simply lower the * priority briefly to allow interrupts, then return. The priority to be * used (IPL_SAFEPRI) is machine-dependent, thus this value is initialized and * maintained in the machine-dependent layers. This priority will typically * be 0, or the lowest priority that is safe for use on the interrupt stack; * it can be made higher to block network software interrupts after panics. */ #ifndef IPL_SAFEPRI #define IPL_SAFEPRI 0 #endif static int sleepq_sigtoerror(lwp_t *, int); /* General purpose sleep table, used by mtsleep() and condition variables. */ sleeptab_t sleeptab __cacheline_aligned; sleepqlock_t sleepq_locks[SLEEPTAB_HASH_SIZE] __cacheline_aligned; /* * sleeptab_init: * * Initialize a sleep table. */ void sleeptab_init(sleeptab_t *st) { static bool again; int i; for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) { if (!again) { mutex_init(&sleepq_locks[i].lock, MUTEX_DEFAULT, IPL_SCHED); } sleepq_init(&st->st_queue[i]); } again = true; } /* * sleepq_init: * * Prepare a sleep queue for use. */ void sleepq_init(sleepq_t *sq) { LIST_INIT(sq); } /* * sleepq_remove: * * Remove an LWP from a sleep queue and wake it up. */ void sleepq_remove(sleepq_t *sq, lwp_t *l) { struct schedstate_percpu *spc; struct cpu_info *ci; KASSERT(lwp_locked(l, NULL)); if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_NULL) == 0) { KASSERT(sq != NULL); LIST_REMOVE(l, l_sleepchain); } else { KASSERT(sq == NULL); } l->l_syncobj = &sched_syncobj; l->l_wchan = NULL; l->l_sleepq = NULL; l->l_flag &= ~LW_SINTR; ci = l->l_cpu; spc = &ci->ci_schedstate; /* * If not sleeping, the LWP must have been suspended. Let whoever * holds it stopped set it running again. */ if (l->l_stat != LSSLEEP) { KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED); lwp_setlock(l, spc->spc_lwplock); return; } /* * If the LWP is still on the CPU, mark it as LSONPROC. It may be * about to call mi_switch(), in which case it will yield. */ if ((l->l_pflag & LP_RUNNING) != 0) { l->l_stat = LSONPROC; l->l_slptime = 0; lwp_setlock(l, spc->spc_lwplock); return; } /* Update sleep time delta, call the wake-up handler of scheduler */ l->l_slpticksum += (getticks() - l->l_slpticks); sched_wakeup(l); /* Look for a CPU to wake up */ l->l_cpu = sched_takecpu(l); ci = l->l_cpu; spc = &ci->ci_schedstate; /* * Set it running. */ spc_lock(ci); lwp_setlock(l, spc->spc_mutex); sched_setrunnable(l); l->l_stat = LSRUN; l->l_slptime = 0; sched_enqueue(l); sched_resched_lwp(l, true); /* LWP & SPC now unlocked, but we still hold sleep queue lock. */ } /* * sleepq_insert: * * Insert an LWP into the sleep queue, optionally sorting by priority. */ static void sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj) { if ((sobj->sobj_flag & SOBJ_SLEEPQ_NULL) != 0) { KASSERT(sq == NULL); return; } KASSERT(sq != NULL); if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { lwp_t *l2, *l_last = NULL; const pri_t pri = lwp_eprio(l); LIST_FOREACH(l2, sq, l_sleepchain) { l_last = l2; if (lwp_eprio(l2) < pri) { LIST_INSERT_BEFORE(l2, l, l_sleepchain); return; } } /* * Ensure FIFO ordering if no waiters are of lower priority. */ if (l_last != NULL) { LIST_INSERT_AFTER(l_last, l, l_sleepchain); return; } } LIST_INSERT_HEAD(sq, l, l_sleepchain); } /* * sleepq_enqueue: * * Enter an LWP into the sleep queue and prepare for sleep. The sleep * queue must already be locked, and any interlock (such as the kernel * lock) must have be released (see sleeptab_lookup(), sleepq_enter()). */ void sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj, bool catch_p) { lwp_t *l = curlwp; KASSERT(lwp_locked(l, NULL)); KASSERT(l->l_stat == LSONPROC); KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL); KASSERT((l->l_flag & LW_SINTR) == 0); l->l_syncobj = sobj; l->l_wchan = wchan; l->l_sleepq = sq; l->l_wmesg = wmesg; l->l_slptime = 0; l->l_stat = LSSLEEP; if (catch_p) l->l_flag |= LW_SINTR; sleepq_insert(sq, l, sobj); /* Save the time when thread has slept */ l->l_slpticks = getticks(); sched_slept(l); } /* * sleepq_transfer: * * Move an LWP from one sleep queue to another. Both sleep queues * must already be locked. * * The LWP will be updated with the new sleepq, wchan, wmesg, * sobj, and mutex. The interruptible flag will also be updated. */ void sleepq_transfer(lwp_t *l, sleepq_t *from_sq, sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj, kmutex_t *mp, bool catch_p) { KASSERT(l->l_sleepq == from_sq); LIST_REMOVE(l, l_sleepchain); l->l_syncobj = sobj; l->l_wchan = wchan; l->l_sleepq = sq; l->l_wmesg = wmesg; if (catch_p) l->l_flag = LW_SINTR | LW_CATCHINTR; else l->l_flag = ~(LW_SINTR | LW_CATCHINTR); /* * This allows the transfer from one sleepq to another where * it is known that they're both protected by the same lock. */ if (mp != NULL) lwp_setlock(l, mp); sleepq_insert(sq, l, sobj); } /* * sleepq_uncatch: * * Mark the LWP as no longer sleeping interruptibly. */ void sleepq_uncatch(lwp_t *l) { l->l_flag = ~(LW_SINTR | LW_CATCHINTR); } /* * sleepq_block: * * After any intermediate step such as releasing an interlock, switch. * sleepq_block() may return early under exceptional conditions, for * example if the LWP's containing process is exiting. * * timo is a timeout in ticks. timo = 0 specifies an infinite timeout. */ int sleepq_block(int timo, bool catch_p, struct syncobj *syncobj) { int error = 0, sig; struct proc *p; lwp_t *l = curlwp; bool early = false; int biglocks = l->l_biglocks; ktrcsw(1, 0, syncobj); /* * If sleeping interruptably, check for pending signals, exits or * core dump events. * * Note the usage of LW_CATCHINTR. This expresses our intent * to catch or not catch sleep interruptions, which might change * while we are sleeping. It is independent from LW_SINTR because * we don't want to leave LW_SINTR set when the LWP is not asleep. */ if (catch_p) { if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) { l->l_flag &= ~LW_CANCELLED; error = EINTR; early = true; } else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) early = true; l->l_flag |= LW_CATCHINTR; } else l->l_flag &= ~LW_CATCHINTR; if (early) { /* lwp_unsleep() will release the lock */ lwp_unsleep(l, true); } else { /* * The LWP may have already been awoken if the caller * dropped the sleep queue lock between sleepq_enqueue() and * sleepq_block(). If that happens l_stat will be LSONPROC * and mi_switch() will treat this as a preemption. No need * to do anything special here. */ if (timo) { l->l_flag &= ~LW_STIMO; callout_schedule(&l->l_timeout_ch, timo); } spc_lock(l->l_cpu); mi_switch(l); /* The LWP and sleep queue are now unlocked. */ if (timo) { /* * Even if the callout appears to have fired, we * need to stop it in order to synchronise with * other CPUs. It's important that we do this in * this LWP's context, and not during wakeup, in * order to keep the callout & its cache lines * co-located on the CPU with the LWP. */ (void)callout_halt(&l->l_timeout_ch, NULL); error = (l->l_flag & LW_STIMO) ? EWOULDBLOCK : 0; } } /* * LW_CATCHINTR is only modified in this function OR when we * are asleep (with the sleepq locked). We can therefore safely * test it unlocked here as it is guaranteed to be stable by * virtue of us running. * * We do not bother clearing it if set; that would require us * to take the LWP lock, and it doesn't seem worth the hassle * considering it is only meaningful here inside this function, * and is set to reflect intent upon entry. */ if ((l->l_flag & LW_CATCHINTR) != 0 && error == 0) { p = l->l_proc; if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0) error = EINTR; else if ((l->l_flag & LW_PENDSIG) != 0) { /* * Acquiring p_lock may cause us to recurse * through the sleep path and back into this * routine, but is safe because LWPs sleeping * on locks are non-interruptable and we will * not recurse again. */ mutex_enter(p->p_lock); if (((sig = sigispending(l, 0)) != 0 && (sigprop[sig] & SA_STOP) == 0) || (sig = issignal(l)) != 0) error = sleepq_sigtoerror(l, sig); mutex_exit(p->p_lock); } } ktrcsw(0, 0, syncobj); if (__predict_false(biglocks != 0)) { KERNEL_LOCK(biglocks, NULL); } return error; } /* * sleepq_wake: * * Wake zero or more LWPs blocked on a single wait channel. */ void sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected, kmutex_t *mp) { lwp_t *l, *next; KASSERT(mutex_owned(mp)); for (l = LIST_FIRST(sq); l != NULL; l = next) { KASSERT(l->l_sleepq == sq); KASSERT(l->l_mutex == mp); next = LIST_NEXT(l, l_sleepchain); if (l->l_wchan != wchan) continue; sleepq_remove(sq, l); if (--expected == 0) break; } mutex_spin_exit(mp); } /* * sleepq_unsleep: * * Remove an LWP from its sleep queue and set it runnable again. * sleepq_unsleep() is called with the LWP's mutex held, and will * release it if "unlock" is true. */ void sleepq_unsleep(lwp_t *l, bool unlock) { sleepq_t *sq = l->l_sleepq; kmutex_t *mp = l->l_mutex; KASSERT(lwp_locked(l, mp)); KASSERT(l->l_wchan != NULL); sleepq_remove(sq, l); if (unlock) { mutex_spin_exit(mp); } } /* * sleepq_timeout: * * Entered via the callout(9) subsystem to time out an LWP that is on a * sleep queue. */ void sleepq_timeout(void *arg) { lwp_t *l = arg; /* * Lock the LWP. Assuming it's still on the sleep queue, its * current mutex will also be the sleep queue mutex. */ lwp_lock(l); if (l->l_wchan == NULL) { /* Somebody beat us to it. */ lwp_unlock(l); return; } l->l_flag |= LW_STIMO; lwp_unsleep(l, true); } /* * sleepq_sigtoerror: * * Given a signal number, interpret and return an error code. */ static int sleepq_sigtoerror(lwp_t *l, int sig) { struct proc *p = l->l_proc; int error; KASSERT(mutex_owned(p->p_lock)); /* * If this sleep was canceled, don't let the syscall restart. */ if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) error = EINTR; else error = ERESTART; return error; } /* * sleepq_abort: * * After a panic or during autoconfiguration, lower the interrupt * priority level to give pending interrupts a chance to run, and * then return. Called if sleepq_dontsleep() returns non-zero, and * always returns zero. */ int sleepq_abort(kmutex_t *mtx, int unlock) { int s; s = splhigh(); splx(IPL_SAFEPRI); splx(s); if (mtx != NULL && unlock != 0) mutex_exit(mtx); return 0; } /* * sleepq_reinsert: * * Move the position of the lwp in the sleep queue after a possible * change of the lwp's effective priority. */ static void sleepq_reinsert(sleepq_t *sq, lwp_t *l) { KASSERT(l->l_sleepq == sq); if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { return; } /* * Don't let the sleep queue become empty, even briefly. * cv_signal() and cv_broadcast() inspect it without the * sleep queue lock held and need to see a non-empty queue * head if there are waiters. */ if (LIST_FIRST(sq) == l && LIST_NEXT(l, l_sleepchain) == NULL) { return; } LIST_REMOVE(l, l_sleepchain); sleepq_insert(sq, l, l->l_syncobj); } /* * sleepq_changepri: * * Adjust the priority of an LWP residing on a sleepq. */ void sleepq_changepri(lwp_t *l, pri_t pri) { sleepq_t *sq = l->l_sleepq; KASSERT(lwp_locked(l, NULL)); l->l_priority = pri; sleepq_reinsert(sq, l); } /* * sleepq_changepri: * * Adjust the lended priority of an LWP residing on a sleepq. */ void sleepq_lendpri(lwp_t *l, pri_t pri) { sleepq_t *sq = l->l_sleepq; KASSERT(lwp_locked(l, NULL)); l->l_inheritedprio = pri; l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); sleepq_reinsert(sq, l); } |
| 3 3 2 1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 | /* $NetBSD: tmpfs_vfsops.c,v 1.77 2020/04/04 20:49:30 ad Exp $ */ /* * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Efficient memory file system. * * tmpfs is a file system that uses NetBSD's virtual memory sub-system * (the well-known UVM) to store file data and metadata in an efficient * way. This means that it does not follow the structure of an on-disk * file system because it simply does not need to. Instead, it uses * memory-specific data structures and algorithms to automatically * allocate and release resources. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.77 2020/04/04 20:49:30 ad Exp $"); #include <sys/param.h> #include <sys/atomic.h> #include <sys/types.h> #include <sys/kmem.h> #include <sys/mount.h> #include <sys/stat.h> #include <sys/systm.h> #include <sys/vnode.h> #include <sys/kauth.h> #include <sys/module.h> #include <miscfs/genfs/genfs.h> #include <fs/tmpfs/tmpfs.h> #include <fs/tmpfs/tmpfs_args.h> MODULE(MODULE_CLASS_VFS, tmpfs, NULL); struct pool tmpfs_dirent_pool; struct pool tmpfs_node_pool; void tmpfs_init(void) { pool_init(&tmpfs_dirent_pool, sizeof(tmpfs_dirent_t), 0, 0, 0, "tmpfs_dirent", &pool_allocator_nointr, IPL_NONE); pool_init(&tmpfs_node_pool, sizeof(tmpfs_node_t), 0, 0, 0, "tmpfs_node", &pool_allocator_nointr, IPL_NONE); } void tmpfs_done(void) { pool_destroy(&tmpfs_dirent_pool); pool_destroy(&tmpfs_node_pool); } int tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct tmpfs_args *args = data; tmpfs_mount_t *tmp; tmpfs_node_t *root; struct vattr va; struct vnode *vp; uint64_t memlimit; ino_t nodes; int error, flags; bool set_memlimit; bool set_nodes; if (args == NULL) return EINVAL; /* Validate the version. */ if (*data_len < sizeof(*args) || args->ta_version != TMPFS_ARGS_VERSION) return EINVAL; /* Handle retrieval of mount point arguments. */ if (mp->mnt_flag & MNT_GETARGS) { if (mp->mnt_data == NULL) return EIO; tmp = VFS_TO_TMPFS(mp); args->ta_version = TMPFS_ARGS_VERSION; args->ta_nodes_max = tmp->tm_nodes_max; args->ta_size_max = tmp->tm_mem_limit; root = tmp->tm_root; args->ta_root_uid = root->tn_uid; args->ta_root_gid = root->tn_gid; args->ta_root_mode = root->tn_mode; *data_len = sizeof(*args); return 0; } /* Prohibit mounts if there is not enough memory. */ if (tmpfs_mem_info(true) < uvmexp.freetarg) return EINVAL; /* Check for invalid uid and gid arguments */ if (args->ta_root_uid == VNOVAL || args->ta_root_gid == VNOVAL) return EINVAL; /* Get the memory usage limit for this file-system. */ if (args->ta_size_max < PAGE_SIZE) { memlimit = UINT64_MAX; set_memlimit = false; } else { memlimit = args->ta_size_max; set_memlimit = true; } KASSERT(memlimit > 0); if (args->ta_nodes_max <= 3) { nodes = 3 + (memlimit / 1024); set_nodes = false; } else { nodes = args->ta_nodes_max; set_nodes = true; } nodes = MIN(nodes, INT_MAX); KASSERT(nodes >= 3); if (mp->mnt_flag & MNT_UPDATE) { tmp = VFS_TO_TMPFS(mp); if (set_nodes && nodes < tmp->tm_nodes_cnt) return EBUSY; if ((mp->mnt_iflag & IMNT_WANTRDONLY)) { /* Changing from read/write to read-only. */ flags = WRITECLOSE; if ((mp->mnt_flag & MNT_FORCE)) flags |= FORCECLOSE; error = vflush(mp, NULL, flags); if (error) return error; } if (set_memlimit) { if ((error = tmpfs_mntmem_set(tmp, memlimit)) != 0) return error; } if (set_nodes) tmp->tm_nodes_max = nodes; root = tmp->tm_root; root->tn_uid = args->ta_root_uid; root->tn_gid = args->ta_root_gid; root->tn_mode = args->ta_root_mode; return 0; } mp->mnt_flag |= MNT_LOCAL; mp->mnt_stat.f_namemax = TMPFS_MAXNAMLEN; mp->mnt_fs_bshift = PAGE_SHIFT; mp->mnt_dev_bshift = DEV_BSHIFT; mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO | IMNT_SHRLOOKUP | IMNT_NCLOOKUP; vfs_getnewfsid(mp); /* Allocate the tmpfs mount structure and fill it. */ tmp = kmem_zalloc(sizeof(tmpfs_mount_t), KM_SLEEP); tmp->tm_nodes_max = nodes; tmp->tm_nodes_cnt = 0; LIST_INIT(&tmp->tm_nodes); mutex_init(&tmp->tm_lock, MUTEX_DEFAULT, IPL_NONE); tmpfs_mntmem_init(tmp, memlimit); mp->mnt_data = tmp; /* Allocate the root node. */ vattr_null(&va); va.va_type = VDIR; va.va_mode = args->ta_root_mode & ALLPERMS; va.va_uid = args->ta_root_uid; va.va_gid = args->ta_root_gid; error = vcache_new(mp, NULL, &va, NOCRED, NULL, &vp); if (error) { mp->mnt_data = NULL; tmpfs_mntmem_destroy(tmp); mutex_destroy(&tmp->tm_lock); kmem_free(tmp, sizeof(*tmp)); return error; } KASSERT(vp != NULL); root = VP_TO_TMPFS_NODE(vp); KASSERT(root != NULL); /* * Parent of the root inode is itself. Also, root inode has no * directory entry (i.e. is never attached), thus hold an extra * reference (link) for it. */ root->tn_links++; root->tn_spec.tn_dir.tn_parent = root; tmp->tm_root = root; vrele(vp); error = set_statvfs_info(path, UIO_USERSPACE, "tmpfs", UIO_SYSSPACE, mp->mnt_op->vfs_name, mp, curlwp); if (error) { (void)tmpfs_unmount(mp, MNT_FORCE); } return error; } int tmpfs_start(struct mount *mp, int flags) { return 0; } int tmpfs_unmount(struct mount *mp, int mntflags) { tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp); tmpfs_node_t *node, *cnode; int error, flags = 0; /* Handle forced unmounts. */ if (mntflags & MNT_FORCE) flags |= FORCECLOSE; /* Finalize all pending I/O. */ error = vflush(mp, NULL, flags); if (error != 0) return error; /* * First round, detach and destroy all directory entries. * Also, clear the pointers to the vnodes - they are gone. */ LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) { tmpfs_dirent_t *de; node->tn_vnode = NULL; if (node->tn_type != VDIR) { continue; } while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) { cnode = de->td_node; if (cnode && cnode != TMPFS_NODE_WHITEOUT) { cnode->tn_vnode = NULL; } tmpfs_dir_detach(node, de); tmpfs_free_dirent(tmp, de); } /* Extra virtual entry (itself for the root). */ node->tn_links--; } /* Release the reference on root (diagnostic). */ node = tmp->tm_root; node->tn_links--; /* Second round, destroy all inodes. */ while ((node = LIST_FIRST(&tmp->tm_nodes)) != NULL) { tmpfs_free_node(tmp, node); } /* Throw away the tmpfs_mount structure. */ tmpfs_mntmem_destroy(tmp); mutex_destroy(&tmp->tm_lock); kmem_free(tmp, sizeof(*tmp)); mp->mnt_data = NULL; return 0; } int tmpfs_root(struct mount *mp, int lktype, vnode_t **vpp) { tmpfs_node_t *node = VFS_TO_TMPFS(mp)->tm_root; int error; error = vcache_get(mp, &node, sizeof(node), vpp); if (error) return error; error = vn_lock(*vpp, lktype); if (error) { vrele(*vpp); *vpp = NULL; return error; } return 0; } int tmpfs_vget(struct mount *mp, ino_t ino, int lktype, vnode_t **vpp) { return EOPNOTSUPP; } int tmpfs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, vnode_t **vpp) { tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp); tmpfs_node_t *node; tmpfs_fid_t tfh; int error; if (fhp->fid_len != sizeof(tmpfs_fid_t)) { return EINVAL; } memcpy(&tfh, fhp, sizeof(tmpfs_fid_t)); mutex_enter(&tmp->tm_lock); /* XXX big oof .. use a better data structure */ LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) { if (node->tn_id == tfh.tf_id) { /* Prevent this node from disappearing. */ atomic_inc_32(&node->tn_holdcount); break; } } mutex_exit(&tmp->tm_lock); if (node == NULL) return ESTALE; error = vcache_get(mp, &node, sizeof(node), vpp); /* If this node has been reclaimed free it now. */ if (atomic_dec_32_nv(&node->tn_holdcount) == TMPFS_NODE_RECLAIMED) { KASSERT(error != 0); tmpfs_free_node(tmp, node); } if (error) return (error == ENOENT ? ESTALE : error); error = vn_lock(*vpp, lktype); if (error) { vrele(*vpp); *vpp = NULL; return error; } if (TMPFS_NODE_GEN(node) != tfh.tf_gen) { vput(*vpp); *vpp = NULL; return ESTALE; } return 0; } int tmpfs_vptofh(vnode_t *vp, struct fid *fhp, size_t *fh_size) { tmpfs_fid_t tfh; tmpfs_node_t *node; if (*fh_size < sizeof(tmpfs_fid_t)) { *fh_size = sizeof(tmpfs_fid_t); return E2BIG; } *fh_size = sizeof(tmpfs_fid_t); node = VP_TO_TMPFS_NODE(vp); memset(&tfh, 0, sizeof(tfh)); tfh.tf_len = sizeof(tmpfs_fid_t); tfh.tf_gen = TMPFS_NODE_GEN(node); tfh.tf_id = node->tn_id; memcpy(fhp, &tfh, sizeof(tfh)); return 0; } int tmpfs_statvfs(struct mount *mp, struct statvfs *sbp) { tmpfs_mount_t *tmp; fsfilcnt_t freenodes; size_t avail; tmp = VFS_TO_TMPFS(mp); sbp->f_iosize = sbp->f_frsize = sbp->f_bsize = PAGE_SIZE; mutex_enter(&tmp->tm_acc_lock); avail = tmpfs_pages_avail(tmp); sbp->f_blocks = (tmpfs_bytes_max(tmp) >> PAGE_SHIFT); sbp->f_bavail = sbp->f_bfree = avail; sbp->f_bresvd = 0; freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_cnt, avail * PAGE_SIZE / sizeof(tmpfs_node_t)); sbp->f_files = tmp->tm_nodes_cnt + freenodes; sbp->f_favail = sbp->f_ffree = freenodes; sbp->f_fresvd = 0; mutex_exit(&tmp->tm_acc_lock); copy_statvfs_info(sbp, mp); return 0; } int tmpfs_sync(struct mount *mp, int waitfor, kauth_cred_t uc) { return 0; } int tmpfs_snapshot(struct mount *mp, vnode_t *vp, struct timespec *ctime) { return EOPNOTSUPP; } /* * tmpfs vfs operations. */ extern const struct vnodeopv_desc tmpfs_fifoop_opv_desc; extern const struct vnodeopv_desc tmpfs_specop_opv_desc; extern const struct vnodeopv_desc tmpfs_vnodeop_opv_desc; const struct vnodeopv_desc * const tmpfs_vnodeopv_descs[] = { &tmpfs_fifoop_opv_desc, &tmpfs_specop_opv_desc, &tmpfs_vnodeop_opv_desc, NULL, }; struct vfsops tmpfs_vfsops = { .vfs_name = MOUNT_TMPFS, .vfs_min_mount_data = sizeof (struct tmpfs_args), .vfs_mount = tmpfs_mount, .vfs_start = tmpfs_start, .vfs_unmount = tmpfs_unmount, .vfs_root = tmpfs_root, .vfs_quotactl = (void *)eopnotsupp, .vfs_statvfs = tmpfs_statvfs, .vfs_sync = tmpfs_sync, .vfs_vget = tmpfs_vget, .vfs_loadvnode = tmpfs_loadvnode, .vfs_newvnode = tmpfs_newvnode, .vfs_fhtovp = tmpfs_fhtovp, .vfs_vptofh = tmpfs_vptofh, .vfs_init = tmpfs_init, .vfs_done = tmpfs_done, .vfs_snapshot = tmpfs_snapshot, .vfs_extattrctl = vfs_stdextattrctl, .vfs_suspendctl = genfs_suspendctl, .vfs_renamelock_enter = genfs_renamelock_enter, .vfs_renamelock_exit = genfs_renamelock_exit, .vfs_fsync = (void *)eopnotsupp, .vfs_opv_descs = tmpfs_vnodeopv_descs }; static int tmpfs_modcmd(modcmd_t cmd, void *arg) { switch (cmd) { case MODULE_CMD_INIT: return vfs_attach(&tmpfs_vfsops); case MODULE_CMD_FINI: return vfs_detach(&tmpfs_vfsops); default: return ENOTTY; } } |
| 276 1 1 278 278 275 276 241 238 236 45 44 45 17 17 17 17 17 16 17 17 17 17 18 15 18 18 18 6 6 2 18 18 18 2 18 17 6 5 17 13 13 13 8 17 17 16 17 17 17 6 6 5 1 1 5 5 1 1 1 1 1 1 5 27 27 27 15 15 3 27 27 27 10 10 2 1 1 2 1 9 9 9 2 2 7 7 7 7 1 7 5 5 5 5 4 5 4 4 4 4 4 4 4 5 5 4 4 4 5 5 5 5 3 5 5 5 2 1 1 1 1 1 1 1 1 4 5 18 17 18 19 19 19 19 37 36 37 37 36 36 3 13 13 13 13 13 13 3 37 35 37 37 36 36 35 13 37 13 13 36 11 11 11 11 21 21 21 21 138 139 139 138 137 29 29 28 28 28 28 26 3 3 2 26 26 14 12 12 28 222 157 221 29 210 212 218 169 162 188 188 6 6 5 11 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 | /* $NetBSD: uipc_mbuf.c,v 1.246 2022/04/09 23:38:33 riastradh Exp $ */ /* * Copyright (c) 1999, 2001, 2018 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center, and Maxime Villard. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.246 2022/04/09 23:38:33 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_mbuftrace.h" #include "opt_nmbclusters.h" #include "opt_ddb.h" #include "ether.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/atomic.h> #include <sys/cpu.h> #include <sys/proc.h> #include <sys/mbuf.h> #include <sys/kernel.h> #include <sys/syslog.h> #include <sys/domain.h> #include <sys/protosw.h> #include <sys/percpu.h> #include <sys/pool.h> #include <sys/socket.h> #include <sys/sysctl.h> #include <net/if.h> pool_cache_t mb_cache; /* mbuf cache */ static pool_cache_t mcl_cache; /* mbuf cluster cache */ struct mbstat mbstat; int max_linkhdr; int max_protohdr; int max_hdr; int max_datalen; static void mb_drain(void *, int); static int mb_ctor(void *, void *, int); static void sysctl_kern_mbuf_setup(void); static struct sysctllog *mbuf_sysctllog; static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool); static struct mbuf *m_split_internal(struct mbuf *, int, int, bool); static int m_copyback_internal(struct mbuf **, int, int, const void *, int, int); /* Flags for m_copyback_internal. */ #define CB_COPYBACK 0x0001 /* copyback from cp */ #define CB_PRESERVE 0x0002 /* preserve original data */ #define CB_COW 0x0004 /* do copy-on-write */ #define CB_EXTEND 0x0008 /* extend chain */ static const char mclpool_warnmsg[] = "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); static percpu_t *mbstat_percpu; #ifdef MBUFTRACE struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); struct mowner unknown_mowners[] = { MOWNER_INIT("unknown", "free"), MOWNER_INIT("unknown", "data"), MOWNER_INIT("unknown", "header"), MOWNER_INIT("unknown", "soname"), MOWNER_INIT("unknown", "soopts"), MOWNER_INIT("unknown", "ftable"), MOWNER_INIT("unknown", "control"), MOWNER_INIT("unknown", "oobdata"), }; struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); #endif #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) #define MCLADDREFERENCE(o, n) \ do { \ KASSERT(((o)->m_flags & M_EXT) != 0); \ KASSERT(((n)->m_flags & M_EXT) == 0); \ KASSERT((o)->m_ext.ext_refcnt >= 1); \ (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ (n)->m_ext_ref = (o)->m_ext_ref; \ mowner_ref((n), (n)->m_flags); \ } while (/* CONSTCOND */ 0) static int nmbclusters_limit(void) { #if defined(PMAP_MAP_POOLPAGE) /* direct mapping, doesn't use space in kmem_arena */ vsize_t max_size = physmem / 4; #else vsize_t max_size = MIN(physmem / 4, nkmempages / 4); #endif max_size = max_size * PAGE_SIZE / MCLBYTES; #ifdef NMBCLUSTERS_MAX max_size = MIN(max_size, NMBCLUSTERS_MAX); #endif return max_size; } /* * Initialize the mbuf allocator. */ void mbinit(void) { CTASSERT(sizeof(struct _m_ext) <= MHLEN); CTASSERT(sizeof(struct mbuf) == MSIZE); sysctl_kern_mbuf_setup(); mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", NULL, IPL_VM, mb_ctor, NULL, NULL); KASSERT(mb_cache != NULL); mcl_cache = pool_cache_init(mclbytes, COHERENCY_UNIT, 0, 0, "mclpl", NULL, IPL_VM, NULL, NULL, NULL); KASSERT(mcl_cache != NULL); pool_cache_set_drain_hook(mb_cache, mb_drain, NULL); pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL); /* * Set an arbitrary default limit on the number of mbuf clusters. */ #ifdef NMBCLUSTERS nmbclusters = MIN(NMBCLUSTERS, nmbclusters_limit()); #else nmbclusters = MAX(1024, (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); nmbclusters = MIN(nmbclusters, nmbclusters_limit()); #endif /* * Set the hard limit on the mclpool to the number of * mbuf clusters the kernel is to support. Log the limit * reached message max once a minute. */ pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); /* * Set a low water mark for both mbufs and clusters. This should * help ensure that they can be allocated in a memory starvation * situation. This is important for e.g. diskless systems which * must allocate mbufs in order for the pagedaemon to clean pages. */ pool_cache_setlowat(mb_cache, mblowat); pool_cache_setlowat(mcl_cache, mcllowat); #ifdef MBUFTRACE { /* * Attach the unknown mowners. */ int i; MOWNER_ATTACH(&revoked_mowner); for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); i-- > 0; ) MOWNER_ATTACH(&unknown_mowners[i]); } #endif } static void mb_drain(void *arg, int flags) { struct domain *dp; const struct protosw *pr; struct ifnet *ifp; int s; KERNEL_LOCK(1, NULL); s = splvm(); DOMAIN_FOREACH(dp) { for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain) (*pr->pr_drain)(); } /* XXX we cannot use psref in H/W interrupt */ if (!cpu_intr_p()) { int bound = curlwp_bind(); IFNET_READER_FOREACH(ifp) { struct psref psref; if_acquire(ifp, &psref); if (ifp->if_drain) (*ifp->if_drain)(ifp); if_release(ifp, &psref); } curlwp_bindx(bound); } splx(s); mbstat.m_drain++; KERNEL_UNLOCK_ONE(NULL); } /* * sysctl helper routine for the kern.mbuf subtree. * nmbclusters, mblowat and mcllowat need range * checking and pool tweaking after being reset. */ static int sysctl_kern_mbuf(SYSCTLFN_ARGS) { int error, newval; struct sysctlnode node; node = *rnode; node.sysctl_data = &newval; switch (rnode->sysctl_num) { case MBUF_NMBCLUSTERS: case MBUF_MBLOWAT: case MBUF_MCLLOWAT: newval = *(int*)rnode->sysctl_data; break; default: return EOPNOTSUPP; } error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) return error; if (newval < 0) return EINVAL; switch (node.sysctl_num) { case MBUF_NMBCLUSTERS: if (newval < nmbclusters) return EINVAL; if (newval > nmbclusters_limit()) return EINVAL; nmbclusters = newval; pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); break; case MBUF_MBLOWAT: mblowat = newval; pool_cache_setlowat(mb_cache, mblowat); break; case MBUF_MCLLOWAT: mcllowat = newval; pool_cache_setlowat(mcl_cache, mcllowat); break; } return 0; } #ifdef MBUFTRACE static void mowner_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) { struct mowner_counter *mc = v1; struct mowner_user *mo_user = v2; int i; for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { mo_user->mo_counter[i] += mc->mc_counter[i]; } } static void mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) { memset(mo_user, 0, sizeof(*mo_user)); CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); percpu_foreach(mo->mo_counters, mowner_convert_to_user_cb, mo_user); } static int sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) { struct mowner *mo; size_t len = 0; int error = 0; if (namelen != 0) return EINVAL; if (newp != NULL) return EPERM; LIST_FOREACH(mo, &mowners, mo_link) { struct mowner_user mo_user; mowner_convert_to_user(mo, &mo_user); if (oldp != NULL) { if (*oldlenp - len < sizeof(mo_user)) { error = ENOMEM; break; } error = copyout(&mo_user, (char *)oldp + len, sizeof(mo_user)); if (error) break; } len += sizeof(mo_user); } if (error == 0) *oldlenp = len; return error; } #endif /* MBUFTRACE */ void mbstat_type_add(int type, int diff) { struct mbstat_cpu *mb; int s; s = splvm(); mb = percpu_getref(mbstat_percpu); mb->m_mtypes[type] += diff; percpu_putref(mbstat_percpu); splx(s); } static void mbstat_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) { struct mbstat_cpu *mbsc = v1; struct mbstat *mbs = v2; int i; for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { mbs->m_mtypes[i] += mbsc->m_mtypes[i]; } } static void mbstat_convert_to_user(struct mbstat *mbs) { memset(mbs, 0, sizeof(*mbs)); mbs->m_drain = mbstat.m_drain; percpu_foreach(mbstat_percpu, mbstat_convert_to_user_cb, mbs); } static int sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) { struct sysctlnode node; struct mbstat mbs; mbstat_convert_to_user(&mbs); node = *rnode; node.sysctl_data = &mbs; node.sysctl_size = sizeof(mbs); return sysctl_lookup(SYSCTLFN_CALL(&node)); } static void sysctl_kern_mbuf_setup(void) { KASSERT(mbuf_sysctllog == NULL); sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "mbuf", SYSCTL_DESCR("mbuf control variables"), NULL, 0, NULL, 0, CTL_KERN, KERN_MBUF, CTL_EOL); sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, CTLTYPE_INT, "msize", SYSCTL_DESCR("mbuf base size"), NULL, msize, NULL, 0, CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, CTLTYPE_INT, "mclbytes", SYSCTL_DESCR("mbuf cluster size"), NULL, mclbytes, NULL, 0, CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "nmbclusters", SYSCTL_DESCR("Limit on the number of mbuf clusters"), sysctl_kern_mbuf, 0, &nmbclusters, 0, CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "mblowat", SYSCTL_DESCR("mbuf low water mark"), sysctl_kern_mbuf, 0, &mblowat, 0, CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "mcllowat", SYSCTL_DESCR("mbuf cluster low water mark"), sysctl_kern_mbuf, 0, &mcllowat, 0, CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "stats", SYSCTL_DESCR("mbuf allocation statistics"), sysctl_kern_mbuf_stats, 0, NULL, 0, CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); #ifdef MBUFTRACE sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "mowners", SYSCTL_DESCR("Information about mbuf owners"), sysctl_kern_mbuf_mowners, 0, NULL, 0, CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); #endif } static int mb_ctor(void *arg, void *object, int flags) { struct mbuf *m = object; #ifdef POOL_VTOPHYS m->m_paddr = POOL_VTOPHYS(m); #else m->m_paddr = M_PADDR_INVALID; #endif return 0; } /* * Add mbuf to the end of a chain */ struct mbuf * m_add(struct mbuf *c, struct mbuf *m) { struct mbuf *n; if (c == NULL) return m; for (n = c; n->m_next != NULL; n = n->m_next) continue; n->m_next = m; return c; } struct mbuf * m_get(int how, int type) { struct mbuf *m; KASSERT(type != MT_FREE); m = pool_cache_get(mb_cache, how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT); if (m == NULL) return NULL; KASSERT(((vaddr_t)m->m_dat & PAGE_MASK) + MLEN <= PAGE_SIZE); mbstat_type_add(type, 1); mowner_init(m, type); m->m_ext_ref = m; /* default */ m->m_type = type; m->m_len = 0; m->m_next = NULL; m->m_nextpkt = NULL; /* default */ m->m_data = m->m_dat; m->m_flags = 0; /* default */ return m; } struct mbuf * m_gethdr(int how, int type) { struct mbuf *m; m = m_get(how, type); if (m == NULL) return NULL; m->m_data = m->m_pktdat; m->m_flags = M_PKTHDR; m_reset_rcvif(m); m->m_pkthdr.len = 0; m->m_pkthdr.csum_flags = 0; m->m_pkthdr.csum_data = 0; m->m_pkthdr.segsz = 0; m->m_pkthdr.ether_vtag = 0; m->m_pkthdr.pkthdr_flags = 0; SLIST_INIT(&m->m_pkthdr.tags); m->m_pkthdr.pattr_class = NULL; m->m_pkthdr.pattr_af = AF_UNSPEC; m->m_pkthdr.pattr_hdr = NULL; return m; } void m_clget(struct mbuf *m, int how) { m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache, how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT, &m->m_ext_storage.ext_paddr); if (m->m_ext_storage.ext_buf == NULL) return; KASSERT(((vaddr_t)m->m_ext_storage.ext_buf & PAGE_MASK) + mclbytes <= PAGE_SIZE); MCLINITREFERENCE(m); m->m_data = m->m_ext.ext_buf; m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) | M_EXT|M_EXT_CLUSTER|M_EXT_RW; m->m_ext.ext_size = MCLBYTES; m->m_ext.ext_free = NULL; m->m_ext.ext_arg = NULL; /* ext_paddr initialized above */ mowner_ref(m, M_EXT|M_EXT_CLUSTER); } struct mbuf * m_getcl(int how, int type, int flags) { struct mbuf *mp; if ((flags & M_PKTHDR) != 0) mp = m_gethdr(how, type); else mp = m_get(how, type); if (mp == NULL) return NULL; MCLGET(mp, how); if ((mp->m_flags & M_EXT) != 0) return mp; m_free(mp); return NULL; } /* * Utility function for M_PREPEND. Do *NOT* use it directly. */ struct mbuf * m_prepend(struct mbuf *m, int len, int how) { struct mbuf *mn; if (__predict_false(len > MHLEN)) { panic("%s: len > MHLEN", __func__); } KASSERT(len != M_COPYALL); mn = m_get(how, m->m_type); if (mn == NULL) { m_freem(m); return NULL; } if (m->m_flags & M_PKTHDR) { m_move_pkthdr(mn, m); } else { MCLAIM(mn, m->m_owner); } mn->m_next = m; m = mn; if (m->m_flags & M_PKTHDR) { if (len < MHLEN) m_align(m, len); } else { if (len < MLEN) m_align(m, len); } m->m_len = len; return m; } struct mbuf * m_copym(struct mbuf *m, int off, int len, int wait) { /* Shallow copy on M_EXT. */ return m_copy_internal(m, off, len, wait, false); } struct mbuf * m_dup(struct mbuf *m, int off, int len, int wait) { /* Deep copy. */ return m_copy_internal(m, off, len, wait, true); } static inline int m_copylen(int len, int copylen) { return (len == M_COPYALL) ? copylen : uimin(len, copylen); } static struct mbuf * m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep) { struct mbuf *n, **np; int off = off0; struct mbuf *top; int copyhdr = 0; if (off < 0 || (len != M_COPYALL && len < 0)) panic("%s: off %d, len %d", __func__, off, len); if (off == 0 && m->m_flags & M_PKTHDR) copyhdr = 1; while (off > 0) { if (m == NULL) panic("%s: m == NULL, off %d", __func__, off); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } np = ⊤ top = NULL; while (len == M_COPYALL || len > 0) { if (m == NULL) { if (len != M_COPYALL) panic("%s: m == NULL, len %d [!COPYALL]", __func__, len); break; } n = m_get(wait, m->m_type); *np = n; if (n == NULL) goto nospace; MCLAIM(n, m->m_owner); if (copyhdr) { m_copy_pkthdr(n, m); if (len == M_COPYALL) n->m_pkthdr.len -= off0; else n->m_pkthdr.len = len; copyhdr = 0; } n->m_len = m_copylen(len, m->m_len - off); if (m->m_flags & M_EXT) { if (!deep) { n->m_data = m->m_data + off; MCLADDREFERENCE(m, n); } else { /* * We don't care if MCLGET fails. n->m_len is * recomputed and handles that. */ MCLGET(n, wait); n->m_len = 0; n->m_len = M_TRAILINGSPACE(n); n->m_len = m_copylen(len, n->m_len); n->m_len = uimin(n->m_len, m->m_len - off); memcpy(mtod(n, void *), mtod(m, char *) + off, (unsigned)n->m_len); } } else { memcpy(mtod(n, void *), mtod(m, char *) + off, (unsigned)n->m_len); } if (len != M_COPYALL) len -= n->m_len; off += n->m_len; KASSERT(off <= m->m_len); if (off == m->m_len) { m = m->m_next; off = 0; } np = &n->m_next; } return top; nospace: m_freem(top); return NULL; } /* * Copy an entire packet, including header (which must be present). * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'. */ struct mbuf * m_copypacket(struct mbuf *m, int how) { struct mbuf *top, *n, *o; if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { panic("%s: no header (m = %p)", __func__, m); } n = m_get(how, m->m_type); top = n; if (!n) goto nospace; MCLAIM(n, m->m_owner); m_copy_pkthdr(n, m); n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; MCLADDREFERENCE(m, n); } else { memcpy(mtod(n, char *), mtod(m, char *), n->m_len); } m = m->m_next; while (m) { o = m_get(how, m->m_type); if (!o) goto nospace; MCLAIM(o, m->m_owner); n->m_next = o; n = n->m_next; n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; MCLADDREFERENCE(m, n); } else { memcpy(mtod(n, char *), mtod(m, char *), n->m_len); } m = m->m_next; } return top; nospace: m_freem(top); return NULL; } void m_copydata(struct mbuf *m, int off, int len, void *cp) { unsigned int count; struct mbuf *m0 = m; int len0 = len; int off0 = off; void *cp0 = cp; KASSERT(len != M_COPYALL); if (off < 0 || len < 0) panic("m_copydata: off %d, len %d", off, len); while (off > 0) { if (m == NULL) panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", m0, len0, off0, cp0, off, off0 - off); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } while (len > 0) { if (m == NULL) panic("m_copydata(%p,%d,%d,%p): " "m=NULL, off=%d (%d), len=%d (%d)", m0, len0, off0, cp0, off, off0 - off, len, len0 - len); count = uimin(m->m_len - off, len); memcpy(cp, mtod(m, char *) + off, count); len -= count; cp = (char *)cp + count; off = 0; m = m->m_next; } } /* * Concatenate mbuf chain n to m. * n might be copied into m (when n->m_len is small), therefore data portion of * n could be copied into an mbuf of different mbuf type. * Any m_pkthdr is not updated. */ void m_cat(struct mbuf *m, struct mbuf *n) { while (m->m_next) m = m->m_next; while (n) { if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { /* just join the two chains */ m->m_next = n; return; } /* splat the data from one into the other */ memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), (u_int)n->m_len); m->m_len += n->m_len; n = m_free(n); } } void m_adj(struct mbuf *mp, int req_len) { int len = req_len; struct mbuf *m; int count; if ((m = mp) == NULL) return; if (len >= 0) { /* * Trim from head. */ while (m != NULL && len > 0) { if (m->m_len <= len) { len -= m->m_len; m->m_len = 0; m = m->m_next; } else { m->m_len -= len; m->m_data += len; len = 0; } } if (mp->m_flags & M_PKTHDR) mp->m_pkthdr.len -= (req_len - len); } else { /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ len = -len; count = 0; for (;;) { count += m->m_len; if (m->m_next == NULL) break; m = m->m_next; } if (m->m_len >= len) { m->m_len -= len; if (mp->m_flags & M_PKTHDR) mp->m_pkthdr.len -= len; return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ m = mp; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len = count; for (; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; break; } count -= m->m_len; } if (m) { while (m->m_next) (m = m->m_next)->m_len = 0; } } } /* * m_ensure_contig: rearrange an mbuf chain that given length of bytes * would be contiguous and in the data area of an mbuf (therefore, mtod() * would work for a structure of given length). * * => On success, returns true and the resulting mbuf chain; false otherwise. * => The mbuf chain may change, but is always preserved valid. */ bool m_ensure_contig(struct mbuf **m0, int len) { struct mbuf *n = *m0, *m; size_t count, space; KASSERT(len != M_COPYALL); /* * If first mbuf has no cluster, and has room for len bytes * without shifting current data, pullup into it, * otherwise allocate a new mbuf to prepend to the chain. */ if ((n->m_flags & M_EXT) == 0 && n->m_data + len < &n->m_dat[MLEN] && n->m_next) { if (n->m_len >= len) { return true; } m = n; n = n->m_next; len -= m->m_len; } else { if (len > MHLEN) { return false; } m = m_get(M_DONTWAIT, n->m_type); if (m == NULL) { return false; } MCLAIM(m, n->m_owner); if (n->m_flags & M_PKTHDR) { m_move_pkthdr(m, n); } } space = &m->m_dat[MLEN] - (m->m_data + m->m_len); do { count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), (unsigned)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); m->m_next = n; *m0 = m; return len <= 0; } /* * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. */ struct mbuf * m_pullup(struct mbuf *n, int len) { struct mbuf *m = n; KASSERT(len != M_COPYALL); if (!m_ensure_contig(&m, len)) { KASSERT(m != NULL); m_freem(m); m = NULL; } return m; } /* * ensure that [off, off + len) is contiguous on the mbuf chain "m". * packet chain before "off" is kept untouched. * if offp == NULL, the target will start at <retval, 0> on resulting chain. * if offp != NULL, the target will start at <retval, *offp> on resulting chain. * * on error return (NULL return value), original "m" will be freed. * * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster) */ struct mbuf * m_pulldown(struct mbuf *m, int off, int len, int *offp) { struct mbuf *n, *o; int hlen, tlen, olen; int sharedcluster; /* Check invalid arguments. */ if (m == NULL) panic("%s: m == NULL", __func__); if (len > MCLBYTES) { m_freem(m); return NULL; } n = m; while (n != NULL && off > 0) { if (n->m_len > off) break; off -= n->m_len; n = n->m_next; } /* Be sure to point non-empty mbuf. */ while (n != NULL && n->m_len == 0) n = n->m_next; if (!n) { m_freem(m); return NULL; /* mbuf chain too short */ } sharedcluster = M_READONLY(n); /* * The target data is on <n, off>. If we got enough data on the mbuf * "n", we're done. */ #ifdef __NO_STRICT_ALIGNMENT if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster) #else if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster && ALIGNED_POINTER((mtod(n, char *) + off), uint32_t)) #endif goto ok; /* * When (len <= n->m_len - off) and (off != 0), it is a special case. * Len bytes from <n, off> sit in single mbuf, but the caller does * not like the starting position (off). * * Chop the current mbuf into two pieces, set off to 0. */ if (len <= n->m_len - off) { struct mbuf *mlast; o = m_dup(n, off, n->m_len - off, M_DONTWAIT); if (o == NULL) { m_freem(m); return NULL; /* ENOBUFS */ } KASSERT(o->m_len >= len); for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next) ; n->m_len = off; mlast->m_next = n->m_next; n->m_next = o; n = o; off = 0; goto ok; } /* * We need to take hlen from <n, off> and tlen from <n->m_next, 0>, * and construct contiguous mbuf with m_len == len. * * Note that hlen + tlen == len, and tlen > 0. */ hlen = n->m_len - off; tlen = len - hlen; /* * Ensure that we have enough trailing data on mbuf chain. If not, * we can do nothing about the chain. */ olen = 0; for (o = n->m_next; o != NULL; o = o->m_next) olen += o->m_len; if (hlen + olen < len) { m_freem(m); return NULL; /* mbuf chain too short */ } /* * Easy cases first. We need to use m_copydata() to get data from * <n->m_next, 0>. */ if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen && !sharedcluster) { m_copydata(n->m_next, 0, tlen, mtod(n, char *) + n->m_len); n->m_len += tlen; m_adj(n->m_next, tlen); goto ok; } if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen && #ifndef __NO_STRICT_ALIGNMENT ALIGNED_POINTER((n->m_next->m_data - hlen), uint32_t) && #endif !sharedcluster && n->m_next->m_len >= tlen) { n->m_next->m_data -= hlen; n->m_next->m_len += hlen; memcpy(mtod(n->m_next, void *), mtod(n, char *) + off, hlen); n->m_len -= hlen; n = n->m_next; off = 0; goto ok; } /* * Now, we need to do the hard way. Don't copy as there's no room * on both ends. */ o = m_get(M_DONTWAIT, m->m_type); if (o && len > MLEN) { MCLGET(o, M_DONTWAIT); if ((o->m_flags & M_EXT) == 0) { m_free(o); o = NULL; } } if (!o) { m_freem(m); return NULL; /* ENOBUFS */ } /* get hlen from <n, off> into <o, 0> */ o->m_len = hlen; memcpy(mtod(o, void *), mtod(n, char *) + off, hlen); n->m_len -= hlen; /* get tlen from <n->m_next, 0> into <o, hlen> */ m_copydata(n->m_next, 0, tlen, mtod(o, char *) + o->m_len); o->m_len += tlen; m_adj(n->m_next, tlen); o->m_next = n->m_next; n->m_next = o; n = o; off = 0; ok: if (offp) *offp = off; return n; } /* * Like m_pullup(), except a new mbuf is always allocated, and we allow * the amount of empty space before the data in the new mbuf to be specified * (in the event that the caller expects to prepend later). */ struct mbuf * m_copyup(struct mbuf *n, int len, int dstoff) { struct mbuf *m; int count, space; KASSERT(len != M_COPYALL); if (len > ((int)MHLEN - dstoff)) goto bad; m = m_get(M_DONTWAIT, n->m_type); if (m == NULL) goto bad; MCLAIM(m, n->m_owner); if (n->m_flags & M_PKTHDR) { m_move_pkthdr(m, n); } m->m_data += dstoff; space = &m->m_dat[MLEN] - (m->m_data + m->m_len); do { count = uimin(uimin(uimax(len, max_protohdr), space), n->m_len); memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), (unsigned)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); if (len > 0) { (void) m_free(m); goto bad; } m->m_next = n; return m; bad: m_freem(n); return NULL; } struct mbuf * m_split(struct mbuf *m0, int len, int wait) { return m_split_internal(m0, len, wait, true); } static struct mbuf * m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr) { struct mbuf *m, *n; unsigned len = len0, remain, len_save; KASSERT(len0 != M_COPYALL); for (m = m0; m && len > m->m_len; m = m->m_next) len -= m->m_len; if (m == NULL) return NULL; remain = m->m_len - len; if (copyhdr && (m0->m_flags & M_PKTHDR)) { n = m_gethdr(wait, m0->m_type); if (n == NULL) return NULL; MCLAIM(n, m0->m_owner); m_copy_rcvif(n, m0); n->m_pkthdr.len = m0->m_pkthdr.len - len0; len_save = m0->m_pkthdr.len; m0->m_pkthdr.len = len0; if (m->m_flags & M_EXT) goto extpacket; if (remain > MHLEN) { /* m can't be the lead packet */ m_align(n, 0); n->m_len = 0; n->m_next = m_split(m, len, wait); if (n->m_next == NULL) { (void)m_free(n); m0->m_pkthdr.len = len_save; return NULL; } return n; } else { m_align(n, remain); } } else if (remain == 0) { n = m->m_next; m->m_next = NULL; return n; } else { n = m_get(wait, m->m_type); if (n == NULL) return NULL; MCLAIM(n, m->m_owner); m_align(n, remain); } extpacket: if (m->m_flags & M_EXT) { n->m_data = m->m_data + len; MCLADDREFERENCE(m, n); } else { memcpy(mtod(n, void *), mtod(m, char *) + len, remain); } n->m_len = remain; m->m_len = len; n->m_next = m->m_next; m->m_next = NULL; return n; } /* * Routine to copy from device local memory into mbufs. */ struct mbuf * m_devget(char *buf, int totlen, int off, struct ifnet *ifp) { struct mbuf *m; struct mbuf *top = NULL, **mp = ⊤ char *cp, *epkt; int len; cp = buf; epkt = cp + totlen; if (off) { /* * If 'off' is non-zero, packet is trailer-encapsulated, * so we have to skip the type and length fields. */ cp += off + 2 * sizeof(uint16_t); totlen -= 2 * sizeof(uint16_t); } m = m_gethdr(M_DONTWAIT, MT_DATA); if (m == NULL) return NULL; m_set_rcvif(m, ifp); m->m_pkthdr.len = totlen; m->m_len = MHLEN; while (totlen > 0) { if (top) { m = m_get(M_DONTWAIT, MT_DATA); if (m == NULL) { m_freem(top); return NULL; } m->m_len = MLEN; } len = uimin(totlen, epkt - cp); if (len >= MINCLSIZE) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); m_freem(top); return NULL; } m->m_len = len = uimin(len, MCLBYTES); } else { /* * Place initial small packet/header at end of mbuf. */ if (len < m->m_len) { if (top == 0 && len + max_linkhdr <= m->m_len) m->m_data += max_linkhdr; m->m_len = len; } else len = m->m_len; } memcpy(mtod(m, void *), cp, (size_t)len); cp += len; *mp = m; mp = &m->m_next; totlen -= len; if (cp == epkt) cp = buf; } return top; } /* * Copy data from a buffer back into the indicated mbuf chain, * starting "off" bytes from the beginning, extending the mbuf * chain if necessary. */ void m_copyback(struct mbuf *m0, int off, int len, const void *cp) { #if defined(DEBUG) struct mbuf *origm = m0; int error; #endif if (m0 == NULL) return; #if defined(DEBUG) error = #endif m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND, M_DONTWAIT); #if defined(DEBUG) if (error != 0 || (m0 != NULL && origm != m0)) panic("m_copyback"); #endif } struct mbuf * m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) { int error; /* don't support chain expansion */ KASSERT(len != M_COPYALL); KDASSERT(off + len <= m_length(m0)); error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW, how); if (error) { /* * no way to recover from partial success. * just free the chain. */ m_freem(m0); return NULL; } return m0; } int m_makewritable(struct mbuf **mp, int off, int len, int how) { int error; #if defined(DEBUG) int origlen = m_length(*mp); #endif error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW, how); if (error) return error; #if defined(DEBUG) int reslen = 0; for (struct mbuf *n = *mp; n; n = n->m_next) reslen += n->m_len; if (origlen != reslen) panic("m_makewritable: length changed"); if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) panic("m_makewritable: inconsist"); #endif return 0; } static int m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp, int flags, int how) { int mlen; struct mbuf *m, *n; struct mbuf **mp; int totlen = 0; const char *cp = vp; KASSERT(mp0 != NULL); KASSERT(*mp0 != NULL); KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL); KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL); if (len == M_COPYALL) len = m_length(*mp0) - off; /* * we don't bother to update "totlen" in the case of CB_COW, * assuming that CB_EXTEND and CB_COW are exclusive. */ KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0); mp = mp0; m = *mp; while (off > (mlen = m->m_len)) { off -= mlen; totlen += mlen; if (m->m_next == NULL) { int tspace; extend: if ((flags & CB_EXTEND) == 0) goto out; /* * try to make some space at the end of "m". */ mlen = m->m_len; if (off + len >= MINCLSIZE && (m->m_flags & M_EXT) == 0 && m->m_len == 0) { MCLGET(m, how); } tspace = M_TRAILINGSPACE(m); if (tspace > 0) { tspace = uimin(tspace, off + len); KASSERT(tspace > 0); memset(mtod(m, char *) + m->m_len, 0, uimin(off, tspace)); m->m_len += tspace; off += mlen; totlen -= mlen; continue; } /* * need to allocate an mbuf. */ if (off + len >= MINCLSIZE) { n = m_getcl(how, m->m_type, 0); } else { n = m_get(how, m->m_type); } if (n == NULL) { goto out; } n->m_len = uimin(M_TRAILINGSPACE(n), off + len); memset(mtod(n, char *), 0, uimin(n->m_len, off)); m->m_next = n; } mp = &m->m_next; m = m->m_next; } while (len > 0) { mlen = m->m_len - off; if (mlen != 0 && M_READONLY(m)) { /* * This mbuf is read-only. Allocate a new writable * mbuf and try again. */ char *datap; int eatlen; KASSERT((flags & CB_COW) != 0); /* * if we're going to write into the middle of * a mbuf, split it first. */ if (off > 0) { n = m_split_internal(m, off, how, false); if (n == NULL) goto enobufs; m->m_next = n; mp = &m->m_next; m = n; off = 0; continue; } /* * XXX TODO coalesce into the trailingspace of * the previous mbuf when possible. */ /* * allocate a new mbuf. copy packet header if needed. */ n = m_get(how, m->m_type); if (n == NULL) goto enobufs; MCLAIM(n, m->m_owner); if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { m_move_pkthdr(n, m); n->m_len = MHLEN; } else { if (len >= MINCLSIZE) MCLGET(n, M_DONTWAIT); n->m_len = (n->m_flags & M_EXT) ? MCLBYTES : MLEN; } if (n->m_len > len) n->m_len = len; /* * free the region which has been overwritten. * copying data from old mbufs if requested. */ if (flags & CB_PRESERVE) datap = mtod(n, char *); else datap = NULL; eatlen = n->m_len; while (m != NULL && M_READONLY(m) && n->m_type == m->m_type && eatlen > 0) { mlen = uimin(eatlen, m->m_len); if (datap) { m_copydata(m, 0, mlen, datap); datap += mlen; } m->m_data += mlen; m->m_len -= mlen; eatlen -= mlen; if (m->m_len == 0) *mp = m = m_free(m); } if (eatlen > 0) n->m_len -= eatlen; n->m_next = m; *mp = m = n; continue; } mlen = uimin(mlen, len); if (flags & CB_COPYBACK) { memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); cp += mlen; } len -= mlen; mlen += off; off = 0; totlen += mlen; if (len == 0) break; if (m->m_next == NULL) { goto extend; } mp = &m->m_next; m = m->m_next; } out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { KASSERT((flags & CB_EXTEND) != 0); m->m_pkthdr.len = totlen; } return 0; enobufs: return ENOBUFS; } /* * Compress the mbuf chain. Return the new mbuf chain on success, NULL on * failure. The first mbuf is preserved, and on success the pointer returned * is the same as the one passed. */ struct mbuf * m_defrag(struct mbuf *m, int how) { struct mbuf *m0, *mn, *n; int sz; KASSERT((m->m_flags & M_PKTHDR) != 0); if (m->m_next == NULL) return m; /* Defrag to single mbuf if at all possible */ if ((m->m_flags & M_EXT) == 0 && m->m_pkthdr.len <= MCLBYTES) { if (m->m_pkthdr.len <= MHLEN) { if (M_TRAILINGSPACE(m) < (m->m_pkthdr.len - m->m_len)) { KASSERTMSG(M_LEADINGSPACE(m) + M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len), "too small leading %d trailing %d ro? %d" " pkthdr.len %d mlen %d", (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), M_READONLY(m), m->m_pkthdr.len, m->m_len); memmove(m->m_pktdat, m->m_data, m->m_len); m->m_data = m->m_pktdat; KASSERT(M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len)); } } else { /* Must copy data before adding cluster */ m0 = m_get(how, MT_DATA); if (m0 == NULL) return NULL; KASSERT(m->m_len <= MHLEN); m_copydata(m, 0, m->m_len, mtod(m0, void *)); MCLGET(m, how); if ((m->m_flags & M_EXT) == 0) { m_free(m0); return NULL; } memcpy(m->m_data, mtod(m0, void *), m->m_len); m_free(m0); } KASSERT(M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len)); m_copydata(m->m_next, 0, m->m_pkthdr.len - m->m_len, mtod(m, char *) + m->m_len); m->m_len = m->m_pkthdr.len; m_freem(m->m_next); m->m_next = NULL; return m; } m0 = m_get(how, MT_DATA); if (m0 == NULL) return NULL; mn = m0; sz = m->m_pkthdr.len - m->m_len; KASSERT(sz >= 0); do { if (sz > MLEN) { MCLGET(mn, how); if ((mn->m_flags & M_EXT) == 0) { m_freem(m0); return NULL; } } mn->m_len = MIN(sz, MCLBYTES); m_copydata(m, m->m_pkthdr.len - sz, mn->m_len, mtod(mn, void *)); sz -= mn->m_len; if (sz > 0) { /* need more mbufs */ n = m_get(how, MT_DATA); if (n == NULL) { m_freem(m0); return NULL; } mn->m_next = n; mn = n; } } while (sz > 0); m_freem(m->m_next); m->m_next = m0; return m; } void m_remove_pkthdr(struct mbuf *m) { KASSERT(m->m_flags & M_PKTHDR); m_tag_delete_chain(m); m->m_flags &= ~M_PKTHDR; memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); } void m_copy_pkthdr(struct mbuf *to, struct mbuf *from) { KASSERT((to->m_flags & M_EXT) == 0); KASSERT((to->m_flags & M_PKTHDR) == 0 || SLIST_FIRST(&to->m_pkthdr.tags) == NULL); KASSERT((from->m_flags & M_PKTHDR) != 0); to->m_pkthdr = from->m_pkthdr; to->m_flags = from->m_flags & M_COPYFLAGS; to->m_data = to->m_pktdat; SLIST_INIT(&to->m_pkthdr.tags); m_tag_copy_chain(to, from); } void m_move_pkthdr(struct mbuf *to, struct mbuf *from) { KASSERT((to->m_flags & M_EXT) == 0); KASSERT((to->m_flags & M_PKTHDR) == 0 || SLIST_FIRST(&to->m_pkthdr.tags) == NULL); KASSERT((from->m_flags & M_PKTHDR) != 0); to->m_pkthdr = from->m_pkthdr; to->m_flags = from->m_flags & M_COPYFLAGS; to->m_data = to->m_pktdat; from->m_flags &= ~M_PKTHDR; } /* * Set the m_data pointer of a newly-allocated mbuf to place an object of the * specified size at the end of the mbuf, longword aligned. */ void m_align(struct mbuf *m, int len) { int buflen, adjust; KASSERT(len != M_COPYALL); KASSERT(M_LEADINGSPACE(m) == 0); buflen = M_BUFSIZE(m); KASSERT(len <= buflen); adjust = buflen - len; m->m_data += adjust &~ (sizeof(long)-1); } /* * Apply function f to the data in an mbuf chain starting "off" bytes from the * beginning, continuing for "len" bytes. */ int m_apply(struct mbuf *m, int off, int len, int (*f)(void *, void *, unsigned int), void *arg) { unsigned int count; int rval; KASSERT(len != M_COPYALL); KASSERT(len >= 0); KASSERT(off >= 0); while (off > 0) { KASSERT(m != NULL); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } while (len > 0) { KASSERT(m != NULL); count = uimin(m->m_len - off, len); rval = (*f)(arg, mtod(m, char *) + off, count); if (rval) return rval; len -= count; off = 0; m = m->m_next; } return 0; } /* * Return a pointer to mbuf/offset of location in mbuf chain. */ struct mbuf * m_getptr(struct mbuf *m, int loc, int *off) { while (loc >= 0) { /* Normal end of search */ if (m->m_len > loc) { *off = loc; return m; } loc -= m->m_len; if (m->m_next == NULL) { if (loc == 0) { /* Point at the end of valid data */ *off = m->m_len; return m; } return NULL; } else { m = m->m_next; } } return NULL; } /* * Release a reference to the mbuf external storage. * * => free the mbuf m itself as well. */ static void m_ext_free(struct mbuf *m) { const bool embedded = MEXT_ISEMBEDDED(m); bool dofree = true; u_int refcnt; KASSERT((m->m_flags & M_EXT) != 0); KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); KASSERT((m->m_flags & M_EXT_CLUSTER) == (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); if (__predict_false(m->m_type == MT_FREE)) { panic("mbuf %p already freed", m); } if (__predict_true(m->m_ext.ext_refcnt == 1)) { refcnt = m->m_ext.ext_refcnt = 0; } else { #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); } if (refcnt > 0) { if (embedded) { /* * other mbuf's m_ext_ref still points to us. */ dofree = false; } else { m->m_ext_ref = m; } } else { /* * dropping the last reference */ #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_acquire(); #endif if (!embedded) { m->m_ext.ext_refcnt++; /* XXX */ m_ext_free(m->m_ext_ref); m->m_ext_ref = m; } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { pool_cache_put_paddr(mcl_cache, m->m_ext.ext_buf, m->m_ext.ext_paddr); } else if (m->m_ext.ext_free) { (*m->m_ext.ext_free)(m, m->m_ext.ext_buf, m->m_ext.ext_size, m->m_ext.ext_arg); /* * 'm' is already freed by the ext_free callback. */ dofree = false; } else { free(m->m_ext.ext_buf, 0); } } if (dofree) { m->m_type = MT_FREE; m->m_data = NULL; pool_cache_put(mb_cache, m); } } /* * Free a single mbuf and associated external storage. Return the * successor, if any. */ struct mbuf * m_free(struct mbuf *m) { struct mbuf *n; mowner_revoke(m, 1, m->m_flags); mbstat_type_add(m->m_type, -1); if (m->m_flags & M_PKTHDR) m_tag_delete_chain(m); n = m->m_next; if (m->m_flags & M_EXT) { m_ext_free(m); } else { if (__predict_false(m->m_type == MT_FREE)) { panic("mbuf %p already freed", m); } m->m_type = MT_FREE; m->m_data = NULL; pool_cache_put(mb_cache, m); } return n; } void m_freem(struct mbuf *m) { if (m == NULL) return; do { m = m_free(m); } while (m); } #if defined(DDB) void m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) { char ch; bool opt_c = false; bool opt_d = false; #if NETHER > 0 bool opt_v = false; const struct mbuf *m0 = NULL; #endif int no = 0; char buf[512]; while ((ch = *(modif++)) != '\0') { switch (ch) { case 'c': opt_c = true; break; case 'd': opt_d = true; break; #if NETHER > 0 case 'v': opt_v = true; m0 = m; break; #endif default: break; } } nextchain: (*pr)("MBUF(%d) %p\n", no, m); snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", m->m_data, m->m_len, m->m_type, buf); if (opt_d) { int i; unsigned char *p = m->m_data; (*pr)(" data:"); for (i = 0; i < m->m_len; i++) { if (i % 16 == 0) (*pr)("\n"); (*pr)(" %02x", p[i]); } (*pr)("\n"); } (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, m->m_nextpkt); (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), (int)M_READONLY(m)); if ((m->m_flags & M_PKTHDR) != 0) { snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%" PRIx32 ", segsz=%u\n", m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m), buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); } if ((m->m_flags & M_EXT)) { (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " "ext_free=%p, ext_arg=%p\n", m->m_ext.ext_refcnt, m->m_ext.ext_buf, m->m_ext.ext_size, m->m_ext.ext_free, m->m_ext.ext_arg); } if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; vaddr_t eva = sva + m->m_ext.ext_size; int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; int i; (*pr)(" pages:"); for (i = 0; i < n; i ++) { (*pr)(" %p", m->m_ext.ext_pgs[i]); } (*pr)("\n"); } if (opt_c) { m = m->m_next; if (m != NULL) { no++; goto nextchain; } } #if NETHER > 0 if (opt_v && m0) m_examine(m0, AF_ETHER, modif, pr); #endif } #endif /* defined(DDB) */ #if defined(MBUFTRACE) void mowner_init_owner(struct mowner *mo, const char *name, const char *descr) { memset(mo, 0, sizeof(*mo)); strlcpy(mo->mo_name, name, sizeof(mo->mo_name)); strlcpy(mo->mo_descr, descr, sizeof(mo->mo_descr)); } void mowner_attach(struct mowner *mo) { KASSERT(mo->mo_counters == NULL); mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); /* XXX lock */ LIST_INSERT_HEAD(&mowners, mo, mo_link); } void mowner_detach(struct mowner *mo) { KASSERT(mo->mo_counters != NULL); /* XXX lock */ LIST_REMOVE(mo, mo_link); percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); mo->mo_counters = NULL; } void mowner_init(struct mbuf *m, int type) { struct mowner_counter *mc; struct mowner *mo; int s; m->m_owner = mo = &unknown_mowners[type]; s = splvm(); mc = percpu_getref(mo->mo_counters); mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; percpu_putref(mo->mo_counters); splx(s); } void mowner_ref(struct mbuf *m, int flags) { struct mowner *mo = m->m_owner; struct mowner_counter *mc; int s; s = splvm(); mc = percpu_getref(mo->mo_counters); if ((flags & M_EXT) != 0) mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; if ((flags & M_EXT_CLUSTER) != 0) mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; percpu_putref(mo->mo_counters); splx(s); } void mowner_revoke(struct mbuf *m, bool all, int flags) { struct mowner *mo = m->m_owner; struct mowner_counter *mc; int s; s = splvm(); mc = percpu_getref(mo->mo_counters); if ((flags & M_EXT) != 0) mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; if ((flags & M_EXT_CLUSTER) != 0) mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; if (all) mc->mc_counter[MOWNER_COUNTER_RELEASES]++; percpu_putref(mo->mo_counters); splx(s); if (all) m->m_owner = &revoked_mowner; } static void mowner_claim(struct mbuf *m, struct mowner *mo) { struct mowner_counter *mc; int flags = m->m_flags; int s; s = splvm(); mc = percpu_getref(mo->mo_counters); mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; if ((flags & M_EXT) != 0) mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; if ((flags & M_EXT_CLUSTER) != 0) mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; percpu_putref(mo->mo_counters); splx(s); m->m_owner = mo; } void m_claim(struct mbuf *m, struct mowner *mo) { if (m->m_owner == mo || mo == NULL) return; mowner_revoke(m, true, m->m_flags); mowner_claim(m, mo); } void m_claimm(struct mbuf *m, struct mowner *mo) { for (; m != NULL; m = m->m_next) m_claim(m, mo); } #endif /* defined(MBUFTRACE) */ #ifdef DIAGNOSTIC /* * Verify that the mbuf chain is not malformed. Used only for diagnostic. * Panics on error. */ void m_verify_packet(struct mbuf *m) { struct mbuf *n = m; char *low, *high, *dat; int totlen = 0, len; if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { panic("%s: mbuf doesn't have M_PKTHDR", __func__); } while (n != NULL) { if (__predict_false(n->m_type == MT_FREE)) { panic("%s: mbuf already freed (n = %p)", __func__, n); } #if 0 /* * This ought to be a rule of the mbuf API. Unfortunately, * many places don't respect that rule. */ if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) { panic("%s: M_PKTHDR set on secondary mbuf", __func__); } #endif if (__predict_false(n->m_nextpkt != NULL)) { panic("%s: m_nextpkt not null (m_nextpkt = %p)", __func__, n->m_nextpkt); } dat = n->m_data; len = n->m_len; if (__predict_false(len < 0)) { panic("%s: incorrect length (len = %d)", __func__, len); } low = M_BUFADDR(n); high = low + M_BUFSIZE(n); if (__predict_false((dat < low) || (dat + len > high))) { panic("%s: m_data not in packet" "(dat = %p, len = %d, low = %p, high = %p)", __func__, dat, len, low, high); } totlen += len; n = n->m_next; } if (__predict_false(totlen != m->m_pkthdr.len)) { panic("%s: inconsistent mbuf length (%d != %d)", __func__, totlen, m->m_pkthdr.len); } } #endif struct m_tag * m_tag_get(int type, int len, int wait) { struct m_tag *t; if (len < 0) return NULL; t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait); if (t == NULL) return NULL; t->m_tag_id = type; t->m_tag_len = len; return t; } void m_tag_free(struct m_tag *t) { free(t, M_PACKET_TAGS); } void m_tag_prepend(struct mbuf *m, struct m_tag *t) { KASSERT((m->m_flags & M_PKTHDR) != 0); SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); } void m_tag_unlink(struct mbuf *m, struct m_tag *t) { KASSERT((m->m_flags & M_PKTHDR) != 0); SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); } void m_tag_delete(struct mbuf *m, struct m_tag *t) { m_tag_unlink(m, t); m_tag_free(t); } void m_tag_delete_chain(struct mbuf *m) { struct m_tag *p, *q; KASSERT((m->m_flags & M_PKTHDR) != 0); p = SLIST_FIRST(&m->m_pkthdr.tags); if (p == NULL) return; while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) m_tag_delete(m, q); m_tag_delete(m, p); } struct m_tag * m_tag_find(const struct mbuf *m, int type) { struct m_tag *p; KASSERT((m->m_flags & M_PKTHDR) != 0); p = SLIST_FIRST(&m->m_pkthdr.tags); while (p != NULL) { if (p->m_tag_id == type) return p; p = SLIST_NEXT(p, m_tag_link); } return NULL; } struct m_tag * m_tag_copy(struct m_tag *t) { struct m_tag *p; p = m_tag_get(t->m_tag_id, t->m_tag_len, M_NOWAIT); if (p == NULL) return NULL; memcpy(p + 1, t + 1, t->m_tag_len); return p; } /* * Copy two tag chains. The destination mbuf (to) loses any attached * tags even if the operation fails. This should not be a problem, as * m_tag_copy_chain() is typically called with a newly-allocated * destination mbuf. */ int m_tag_copy_chain(struct mbuf *to, struct mbuf *from) { struct m_tag *p, *t, *tprev = NULL; KASSERT((from->m_flags & M_PKTHDR) != 0); m_tag_delete_chain(to); SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { t = m_tag_copy(p); if (t == NULL) { m_tag_delete_chain(to); return 0; } if (tprev == NULL) SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link); else SLIST_INSERT_AFTER(tprev, t, m_tag_link); tprev = t; } return 1; } |
| 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 | /* $NetBSD: if_udav.c,v 1.99 2022/08/20 14:09:20 riastradh Exp $ */ /* $nabe: if_udav.c,v 1.3 2003/08/21 16:57:19 nabe Exp $ */ /* * Copyright (c) 2003 * Shingo WATANABE <nabe@nabechan.org>. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * DM9601(DAVICOM USB to Ethernet MAC Controller with Integrated 10/100 PHY) * The spec can be found at the following url. * http://www.davicom.com.tw/big5/download/Data%20Sheet/DM9601-DS-F01-062202s.pdf */ /* * TODO: * Interrupt Endpoint support * External PHYs * powerhook() support? */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: if_udav.c,v 1.99 2022/08/20 14:09:20 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #endif #include <sys/param.h> #include <dev/usb/usbnet.h> #include <dev/usb/if_udavreg.h> /* Function declarations */ static int udav_match(device_t, cfdata_t, void *); static void udav_attach(device_t, device_t, void *); CFATTACH_DECL_NEW(udav, sizeof(struct usbnet), udav_match, udav_attach, usbnet_detach, usbnet_activate); static void udav_chip_init(struct usbnet *); static unsigned udav_uno_tx_prepare(struct usbnet *, struct mbuf *, struct usbnet_chain *); static void udav_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t); static void udav_uno_stop(struct ifnet *, int); static void udav_uno_mcast(struct ifnet *); static int udav_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *); static int udav_uno_mii_write_reg(struct usbnet *, int, int, uint16_t); static void udav_uno_mii_statchg(struct ifnet *); static int udav_uno_init(struct ifnet *); static void udav_reset(struct usbnet *); static int udav_csr_read(struct usbnet *, int, void *, int); static int udav_csr_write(struct usbnet *, int, void *, int); static int udav_csr_read1(struct usbnet *, int); static int udav_csr_write1(struct usbnet *, int, unsigned char); #if 0 static int udav_mem_read(struct usbnet *, int, void *, int); static int udav_mem_write(struct usbnet *, int, void *, int); static int udav_mem_write1(struct usbnet *, int, unsigned char); #endif /* Macros */ #ifdef UDAV_DEBUG #define DPRINTF(x) if (udavdebug) printf x #define DPRINTFN(n, x) if (udavdebug >= (n)) printf x int udavdebug = 0; #else #define DPRINTF(x) #define DPRINTFN(n, x) #endif #define UDAV_SETBIT(un, reg, x) \ udav_csr_write1(un, reg, udav_csr_read1(un, reg) | (x)) #define UDAV_CLRBIT(un, reg, x) \ udav_csr_write1(un, reg, udav_csr_read1(un, reg) & ~(x)) static const struct udav_type { struct usb_devno udav_dev; uint16_t udav_flags; #define UDAV_EXT_PHY 0x0001 #define UDAV_NO_PHY 0x0002 } udav_devs [] = { /* Corega USB-TXC */ {{ USB_VENDOR_COREGA, USB_PRODUCT_COREGA_FETHER_USB_TXC }, 0}, /* ShanTou ST268 USB NIC */ {{ USB_VENDOR_SHANTOU, USB_PRODUCT_SHANTOU_ST268_USB_NIC }, 0}, /* ShanTou ADM8515 */ {{ USB_VENDOR_SHANTOU, USB_PRODUCT_SHANTOU_ADM8515 }, 0}, /* SUNRISING SR9600 */ {{ USB_VENDOR_SUNRISING, USB_PRODUCT_SUNRISING_SR9600 }, 0 }, /* SUNRISING QF9700 */ {{ USB_VENDOR_SUNRISING, USB_PRODUCT_SUNRISING_QF9700 }, UDAV_NO_PHY }, /* QUAN DM9601 */ {{USB_VENDOR_QUAN, USB_PRODUCT_QUAN_DM9601 }, 0}, #if 0 /* DAVICOM DM9601 Generic? */ /* XXX: The following ids was obtained from the data sheet. */ {{ 0x0a46, 0x9601 }, 0}, #endif }; #define udav_lookup(v, p) ((const struct udav_type *)usb_lookup(udav_devs, v, p)) static const struct usbnet_ops udav_ops = { .uno_stop = udav_uno_stop, .uno_mcast = udav_uno_mcast, .uno_read_reg = udav_uno_mii_read_reg, .uno_write_reg = udav_uno_mii_write_reg, .uno_statchg = udav_uno_mii_statchg, .uno_tx_prepare = udav_uno_tx_prepare, .uno_rx_loop = udav_uno_rx_loop, .uno_init = udav_uno_init, }; /* Probe */ static int udav_match(device_t parent, cfdata_t match, void *aux) { struct usb_attach_arg *uaa = aux; return udav_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ? UMATCH_VENDOR_PRODUCT : UMATCH_NONE; } /* Attach */ static void udav_attach(device_t parent, device_t self, void *aux) { USBNET_MII_DECL_DEFAULT(unm); struct usbnet_mii *unmp; struct usbnet * const un = device_private(self); struct usb_attach_arg *uaa = aux; struct usbd_device *dev = uaa->uaa_device; struct usbd_interface *iface; usbd_status err; usb_interface_descriptor_t *id; usb_endpoint_descriptor_t *ed; char *devinfop; int i; aprint_naive("\n"); aprint_normal("\n"); devinfop = usbd_devinfo_alloc(dev, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); un->un_dev = self; un->un_udev = dev; un->un_sc = un; un->un_ops = &udav_ops; un->un_rx_xfer_flags = USBD_SHORT_XFER_OK; un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER; un->un_rx_list_cnt = UDAV_RX_LIST_CNT; un->un_tx_list_cnt = UDAV_TX_LIST_CNT; un->un_rx_bufsz = UDAV_BUFSZ; un->un_tx_bufsz = UDAV_BUFSZ; /* Move the device into the configured state. */ err = usbd_set_config_no(dev, UDAV_CONFIG_NO, 1); /* idx 0 */ if (err) { aprint_error_dev(self, "failed to set configuration" ", err=%s\n", usbd_errstr(err)); return; } /* get control interface */ err = usbd_device2interface_handle(dev, UDAV_IFACE_INDEX, &iface); if (err) { aprint_error_dev(self, "failed to get interface, err=%s\n", usbd_errstr(err)); return; } un->un_iface = iface; un->un_flags = udav_lookup(uaa->uaa_vendor, uaa->uaa_product)->udav_flags; /* get interface descriptor */ id = usbd_get_interface_descriptor(un->un_iface); /* find endpoints */ un->un_ed[USBNET_ENDPT_RX] = un->un_ed[USBNET_ENDPT_TX] = un->un_ed[USBNET_ENDPT_INTR] = -1; for (i = 0; i < id->bNumEndpoints; i++) { ed = usbd_interface2endpoint_descriptor(un->un_iface, i); if (ed == NULL) { aprint_error_dev(self, "couldn't get endpoint %d\n", i); return; } if ((ed->bmAttributes & UE_XFERTYPE) == UE_BULK && UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN) un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress; else if ((ed->bmAttributes & UE_XFERTYPE) == UE_BULK && UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT) un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress; else if ((ed->bmAttributes & UE_XFERTYPE) == UE_INTERRUPT && UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN) un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress; } if (un->un_ed[USBNET_ENDPT_RX] == 0 || un->un_ed[USBNET_ENDPT_TX] == 0 || un->un_ed[USBNET_ENDPT_INTR] == 0) { aprint_error_dev(self, "missing endpoint\n"); return; } /* Not supported yet. */ un->un_ed[USBNET_ENDPT_INTR] = 0; usbnet_attach(un); // /* reset the adapter */ // udav_reset(un); /* Get Ethernet Address */ err = udav_csr_read(un, UDAV_PAR, un->un_eaddr, ETHER_ADDR_LEN); if (err) { aprint_error_dev(self, "read MAC address failed\n"); return; } if (ISSET(un->un_flags, UDAV_NO_PHY)) unmp = NULL; else unmp = &unm; /* initialize interface information */ usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST, 0, unmp); return; } #if 0 /* read memory */ static int udav_mem_read(struct usbnet *un, int offset, void *buf, int len) { usb_device_request_t req; usbd_status err; DPRINTFN(0x200, ("%s: %s: enter\n", device_xname(un->un_dev), __func__)); if (usbnet_isdying(un)) return 0; offset &= 0xffff; len &= 0xff; req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = UDAV_REQ_MEM_READ; USETW(req.wValue, 0x0000); USETW(req.wIndex, offset); USETW(req.wLength, len); err = usbd_do_request(un->un_udev, &req, buf); if (err) { DPRINTF(("%s: %s: read failed. off=%04x, err=%d\n", device_xname(un->un_dev), __func__, offset, err)); } return err; } /* write memory */ static int udav_mem_write(struct usbnet *un, int offset, void *buf, int len) { usb_device_request_t req; usbd_status err; DPRINTFN(0x200, ("%s: %s: enter\n", device_xname(un->un_dev), __func__)); if (usbnet_isdying(un)) return 0; offset &= 0xffff; len &= 0xff; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = UDAV_REQ_MEM_WRITE; USETW(req.wValue, 0x0000); USETW(req.wIndex, offset); USETW(req.wLength, len); err = usbd_do_request(un->un_udev, &req, buf); if (err) { DPRINTF(("%s: %s: write failed. off=%04x, err=%d\n", device_xname(un->un_dev), __func__, offset, err)); } return err; } /* write memory */ static int udav_mem_write1(struct usbnet *un, int offset, unsigned char ch) { usb_device_request_t req; usbd_status err; DPRINTFN(0x200, ("%s: %s: enter\n", device_xname(un->un_dev), __func__)); if (usbnet_isdying(un)) return 0; offset &= 0xffff; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = UDAV_REQ_MEM_WRITE1; USETW(req.wValue, ch); USETW(req.wIndex, offset); USETW(req.wLength, 0x0000); err = usbd_do_request(un->un_udev, &req, NULL); if (err) { DPRINTF(("%s: %s: write failed. off=%04x, err=%d\n", device_xname(un->un_dev), __func__, offset, err)); } return err; } #endif /* read register(s) */ static int udav_csr_read(struct usbnet *un, int offset, void *buf, int len) { usb_device_request_t req; usbd_status err; if (usbnet_isdying(un)) return USBD_IOERROR; DPRINTFN(0x200, ("%s: %s: enter\n", device_xname(un->un_dev), __func__)); offset &= 0xff; len &= 0xff; req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = UDAV_REQ_REG_READ; USETW(req.wValue, 0x0000); USETW(req.wIndex, offset); USETW(req.wLength, len); err = usbd_do_request(un->un_udev, &req, buf); if (err) { DPRINTF(("%s: %s: read failed. off=%04x, err=%d\n", device_xname(un->un_dev), __func__, offset, err)); memset(buf, 0, len); } return err; } /* write register(s) */ static int udav_csr_write(struct usbnet *un, int offset, void *buf, int len) { usb_device_request_t req; usbd_status err; if (usbnet_isdying(un)) return USBD_IOERROR; DPRINTFN(0x200, ("%s: %s: enter\n", device_xname(un->un_dev), __func__)); offset &= 0xff; len &= 0xff; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = UDAV_REQ_REG_WRITE; USETW(req.wValue, 0x0000); USETW(req.wIndex, offset); USETW(req.wLength, len); err = usbd_do_request(un->un_udev, &req, buf); if (err) { DPRINTF(("%s: %s: write failed. off=%04x, err=%d\n", device_xname(un->un_dev), __func__, offset, err)); } return err; } static int udav_csr_read1(struct usbnet *un, int offset) { uint8_t val = 0; DPRINTFN(0x200, ("%s: %s: enter\n", device_xname(un->un_dev), __func__)); if (usbnet_isdying(un)) return 0; return udav_csr_read(un, offset, &val, 1) ? 0 : val; } /* write a register */ static int udav_csr_write1(struct usbnet *un, int offset, unsigned char ch) { usb_device_request_t req; usbd_status err; if (usbnet_isdying(un)) return USBD_IOERROR; DPRINTFN(0x200, ("%s: %s: enter\n", device_xname(un->un_dev), __func__)); offset &= 0xff; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = UDAV_REQ_REG_WRITE1; USETW(req.wValue, ch); USETW(req.wIndex, offset); USETW(req.wLength, 0x0000); err = usbd_do_request(un->un_udev, &req, NULL); if (err) { DPRINTF(("%s: %s: write failed. off=%04x, err=%d\n", device_xname(un->un_dev), __func__, offset, err)); } return err; } static int udav_uno_init(struct ifnet *ifp) { struct usbnet * const un = ifp->if_softc; struct mii_data * const mii = usbnet_mii(un); uint8_t eaddr[ETHER_ADDR_LEN]; int rc = 0; DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__)); memcpy(eaddr, CLLADDR(ifp->if_sadl), sizeof(eaddr)); udav_csr_write(un, UDAV_PAR, eaddr, ETHER_ADDR_LEN); /* Initialize network control register */ /* Disable loopback */ UDAV_CLRBIT(un, UDAV_NCR, UDAV_NCR_LBK0 | UDAV_NCR_LBK1); /* Initialize RX control register */ UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_DIS_LONG | UDAV_RCR_DIS_CRC); /* If we want promiscuous mode, accept all physical frames. */ if (usbnet_ispromisc(un)) UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_ALL | UDAV_RCR_PRMSC); else UDAV_CLRBIT(un, UDAV_RCR, UDAV_RCR_ALL | UDAV_RCR_PRMSC); /* Enable RX */ UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_RXEN); /* clear POWER_DOWN state of internal PHY */ UDAV_SETBIT(un, UDAV_GPCR, UDAV_GPCR_GEP_CNTL0); UDAV_CLRBIT(un, UDAV_GPR, UDAV_GPR_GEPIO0); if (mii && (rc = mii_mediachg(mii)) == ENXIO) rc = 0; if (rc != 0) { return rc; } if (usbnet_isdying(un)) return EIO; return 0; } static void udav_reset(struct usbnet *un) { if (usbnet_isdying(un)) return; DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__)); udav_chip_init(un); } static void udav_chip_init(struct usbnet *un) { /* Select PHY */ #if 1 /* * XXX: force select internal phy. * external phy routines are not tested. */ UDAV_CLRBIT(un, UDAV_NCR, UDAV_NCR_EXT_PHY); #else if (un->un_flags & UDAV_EXT_PHY) { UDAV_SETBIT(un, UDAV_NCR, UDAV_NCR_EXT_PHY); } else { UDAV_CLRBIT(un, UDAV_NCR, UDAV_NCR_EXT_PHY); } #endif UDAV_SETBIT(un, UDAV_NCR, UDAV_NCR_RST); for (int i = 0; i < UDAV_TX_TIMEOUT; i++) { if (usbnet_isdying(un)) return; if (!(udav_csr_read1(un, UDAV_NCR) & UDAV_NCR_RST)) break; delay(10); /* XXX */ } delay(10000); /* XXX */ } #define UDAV_BITS 6 #define UDAV_CALCHASH(addr) \ (ether_crc32_le((addr), ETHER_ADDR_LEN) & ((1 << UDAV_BITS) - 1)) static void udav_uno_mcast(struct ifnet *ifp) { struct usbnet * const un = ifp->if_softc; struct ethercom *ec = usbnet_ec(un); struct ether_multi *enm; struct ether_multistep step; uint8_t hashes[8]; int h = 0; DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__)); if (usbnet_isdying(un)) return; if (ISSET(un->un_flags, UDAV_NO_PHY)) { UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_ALL); UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_PRMSC); return; } if (usbnet_ispromisc(un)) { ETHER_LOCK(ec); ec->ec_flags |= ETHER_F_ALLMULTI; ETHER_UNLOCK(ec); UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_ALL | UDAV_RCR_PRMSC); return; } /* first, zot all the existing hash bits */ memset(hashes, 0x00, sizeof(hashes)); hashes[7] |= 0x80; /* broadcast address */ udav_csr_write(un, UDAV_MAR, hashes, sizeof(hashes)); /* now program new ones */ ETHER_LOCK(ec); ETHER_FIRST_MULTI(step, ec, enm); while (enm != NULL) { if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN) != 0) { ec->ec_flags |= ETHER_F_ALLMULTI; ETHER_UNLOCK(ec); UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_ALL); UDAV_CLRBIT(un, UDAV_RCR, UDAV_RCR_PRMSC); return; } h = UDAV_CALCHASH(enm->enm_addrlo); hashes[h>>3] |= 1 << (h & 0x7); ETHER_NEXT_MULTI(step, enm); } ec->ec_flags &= ~ETHER_F_ALLMULTI; ETHER_UNLOCK(ec); /* disable all multicast */ UDAV_CLRBIT(un, UDAV_RCR, UDAV_RCR_ALL); /* write hash value to the register */ udav_csr_write(un, UDAV_MAR, hashes, sizeof(hashes)); } static unsigned udav_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c) { int total_len; uint8_t *buf = c->unc_buf; DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__)); if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - 2) return 0; /* Copy the mbuf data into a contiguous buffer */ m_copydata(m, 0, m->m_pkthdr.len, buf + 2); total_len = m->m_pkthdr.len; if (total_len < UDAV_MIN_FRAME_LEN) { memset(buf + 2 + total_len, 0, UDAV_MIN_FRAME_LEN - total_len); total_len = UDAV_MIN_FRAME_LEN; } /* Frame length is specified in the first 2bytes of the buffer */ buf[0] = (uint8_t)total_len; buf[1] = (uint8_t)(total_len >> 8); total_len += 2; DPRINTF(("%s: %s: send %d bytes\n", device_xname(un->un_dev), __func__, total_len)); return total_len; } static void udav_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len) { struct ifnet *ifp = usbnet_ifp(un); uint8_t *buf = c->unc_buf; uint16_t pkt_len; uint8_t pktstat; DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__)); /* first byte in received data */ pktstat = *buf; total_len -= sizeof(pktstat); buf += sizeof(pktstat); DPRINTF(("%s: RX Status: 0x%02x\n", device_xname(un->un_dev), pktstat)); pkt_len = UGETW(buf); total_len -= sizeof(pkt_len); buf += sizeof(pkt_len); DPRINTF(("%s: RX Length: 0x%02x\n", device_xname(un->un_dev), pkt_len)); if (pktstat & UDAV_RSR_LCS) { if_statinc(ifp, if_collisions); return; } if (pkt_len < sizeof(struct ether_header) || pkt_len > total_len || (pktstat & UDAV_RSR_ERR)) { if_statinc(ifp, if_ierrors); return; } pkt_len -= ETHER_CRC_LEN; DPRINTF(("%s: Rx deliver: 0x%02x\n", device_xname(un->un_dev), pkt_len)); usbnet_enqueue(un, buf, pkt_len, 0, 0, 0); } /* Stop the adapter and free any mbufs allocated to the RX and TX lists. */ static void udav_uno_stop(struct ifnet *ifp, int disable) { struct usbnet * const un = ifp->if_softc; DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__)); udav_reset(un); } static int udav_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val) { uint8_t data[2]; DPRINTFN(0xff, ("%s: %s: enter, phy=%d reg=0x%04x\n", device_xname(un->un_dev), __func__, phy, reg)); if (usbnet_isdying(un)) { #ifdef DIAGNOSTIC printf("%s: %s: dying\n", device_xname(un->un_dev), __func__); #endif *val = 0; return EINVAL; } /* XXX: one PHY only for the internal PHY */ if (phy != 0) { DPRINTFN(0xff, ("%s: %s: phy=%d is not supported\n", device_xname(un->un_dev), __func__, phy)); *val = 0; return EINVAL; } /* select internal PHY and set PHY register address */ udav_csr_write1(un, UDAV_EPAR, UDAV_EPAR_PHY_ADR0 | (reg & UDAV_EPAR_EROA_MASK)); /* select PHY operation and start read command */ udav_csr_write1(un, UDAV_EPCR, UDAV_EPCR_EPOS | UDAV_EPCR_ERPRR); /* XXX: should be wait? */ /* end read command */ UDAV_CLRBIT(un, UDAV_EPCR, UDAV_EPCR_ERPRR); /* retrieve the result from data registers */ udav_csr_read(un, UDAV_EPDRL, data, 2); *val = data[0] | (data[1] << 8); DPRINTFN(0xff, ("%s: %s: phy=%d reg=0x%04x => 0x%04hx\n", device_xname(un->un_dev), __func__, phy, reg, *val)); return 0; } static int udav_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val) { uint8_t data[2]; DPRINTFN(0xff, ("%s: %s: enter, phy=%d reg=0x%04x val=0x%04hx\n", device_xname(un->un_dev), __func__, phy, reg, val)); if (usbnet_isdying(un)) { #ifdef DIAGNOSTIC printf("%s: %s: dying\n", device_xname(un->un_dev), __func__); #endif return EIO; } /* XXX: one PHY only for the internal PHY */ if (phy != 0) { DPRINTFN(0xff, ("%s: %s: phy=%d is not supported\n", device_xname(un->un_dev), __func__, phy)); return EIO; } /* select internal PHY and set PHY register address */ udav_csr_write1(un, UDAV_EPAR, UDAV_EPAR_PHY_ADR0 | (reg & UDAV_EPAR_EROA_MASK)); /* put the value to the data registers */ data[0] = val & 0xff; data[1] = (val >> 8) & 0xff; udav_csr_write(un, UDAV_EPDRL, data, 2); /* select PHY operation and start write command */ udav_csr_write1(un, UDAV_EPCR, UDAV_EPCR_EPOS | UDAV_EPCR_ERPRW); /* XXX: should be wait? */ /* end write command */ UDAV_CLRBIT(un, UDAV_EPCR, UDAV_EPCR_ERPRW); return 0; } static void udav_uno_mii_statchg(struct ifnet *ifp) { struct usbnet * const un = ifp->if_softc; struct mii_data * const mii = usbnet_mii(un); DPRINTF(("%s: %s: enter\n", ifp->if_xname, __func__)); if (usbnet_isdying(un)) return; if ((mii->mii_media_status & IFM_ACTIVE) && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { DPRINTF(("%s: %s: got link\n", device_xname(un->un_dev), __func__)); usbnet_set_link(un, true); } } #ifdef _MODULE #include "ioconf.c" #endif USBNET_MODULE(udav) |
| 19 1597 518 1572 1600 1602 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | /* $NetBSD: wapbl.h,v 1.21 2018/12/10 21:19:33 jdolecek Exp $ */ /*- * Copyright (c) 2003,2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _SYS_WAPBL_H #define _SYS_WAPBL_H #include <sys/mutex.h> #if defined(_KERNEL) || defined(_KMEMUSER) #include <miscfs/specfs/specdev.h> #endif /* This header file describes the api and data structures for * write ahead physical block logging (WAPBL) support. */ #if defined(_KERNEL_OPT) #include "opt_wapbl.h" #endif #ifdef WAPBL_DEBUG #ifndef WAPBL_DEBUG_PRINT #define WAPBL_DEBUG_PRINT (WAPBL_PRINT_REPLAY | WAPBL_PRINT_OPEN) #endif #if 0 #define WAPBL_DEBUG_BUFBYTES #endif #endif #ifdef WAPBL_DEBUG_PRINT enum { WAPBL_PRINT_OPEN = 0x1, WAPBL_PRINT_FLUSH = 0x2, WAPBL_PRINT_TRUNCATE = 0x4, WAPBL_PRINT_TRANSACTION = 0x8, WAPBL_PRINT_BUFFER = 0x10, WAPBL_PRINT_BUFFER2 = 0x20, WAPBL_PRINT_ALLOC = 0x40, WAPBL_PRINT_INODE = 0x80, WAPBL_PRINT_WRITE = 0x100, WAPBL_PRINT_IO = 0x200, WAPBL_PRINT_REPLAY = 0x400, WAPBL_PRINT_ERROR = 0x800, WAPBL_PRINT_DISCARD = 0x1000, WAPBL_PRINT_BIODONE = 0x2000, }; #define WAPBL_PRINTF(mask, a) if (wapbl_debug_print & (mask)) printf a extern int wapbl_debug_print; #else #define WAPBL_PRINTF(mask, a) #endif /****************************************************************/ #include <sys/queue.h> #include <sys/vnode.h> #include <sys/buf.h> #ifdef _KERNEL struct wapbl_entry; struct wapbl_replay; struct wapbl; struct wapbl_dealloc { TAILQ_ENTRY(wapbl_dealloc) wd_entries; daddr_t wd_blkno; /* address of block */ int wd_len; /* size of block */ }; typedef void (*wapbl_flush_fn_t)(struct mount *, struct wapbl_dealloc *); /* * This structure holds per transaction log information */ struct wapbl_entry { struct wapbl *we_wapbl; SIMPLEQ_ENTRY(wapbl_entry) we_entries; size_t we_bufcount; /* Count of unsynced buffers */ size_t we_reclaimable_bytes; /* Number on disk bytes for this transaction */ int we_error; #ifdef WAPBL_DEBUG_BUFBYTES size_t we_unsynced_bufbytes; /* Byte count of unsynced buffers */ #endif }; /* Start using a log */ int wapbl_start(struct wapbl **, struct mount *, struct vnode *, daddr_t, size_t, size_t, struct wapbl_replay *, wapbl_flush_fn_t, wapbl_flush_fn_t); /* Discard the current transaction, potentially dangerous */ void wapbl_discard(struct wapbl *); /* stop using a log */ int wapbl_stop(struct wapbl *, int); /* * Begin a new transaction or increment transaction recursion * level if called while a transaction is already in progress * by the current process. */ int wapbl_begin(struct wapbl *, const char *, int); /* End a transaction or decrement the transaction recursion level */ void wapbl_end(struct wapbl *); /* * Add a new buffer to the current transaction. The buffers * data will be copied to the current transaction log and the * buffer will be marked B_LOCKED so that it will not be * flushed to disk by the syncer or reallocated. */ void wapbl_add_buf(struct wapbl *, struct buf *); /* Remove a buffer from the current transaction. */ void wapbl_remove_buf(struct wapbl *, struct buf *); void wapbl_resize_buf(struct wapbl *, struct buf *, long, long); /* * This will flush all completed transactions to disk and * start asynchronous writes on the associated buffers */ int wapbl_flush(struct wapbl *, int); /* * Inodes that are allocated but have zero link count * must be registered with the current transaction * so they may be recorded in the log and cleaned up later. * registration/unregistration of ino numbers already registered is ok. */ void wapbl_register_inode(struct wapbl *, ino_t, mode_t); void wapbl_unregister_inode(struct wapbl *, ino_t, mode_t); /* * Metadata block deallocations must be registered so * that revocations records can be written and to prevent * the corresponding blocks from being reused as data * blocks until the log is on disk. */ int wapbl_register_deallocation(struct wapbl *, daddr_t, int, bool, void **); void wapbl_unregister_deallocation(struct wapbl *, void *); void wapbl_jlock_assert(struct wapbl *wl); void wapbl_junlock_assert(struct wapbl *wl); void wapbl_print(struct wapbl *wl, int full, void (*pr)(const char *, ...) __printflike(1, 2)); #if defined(WAPBL_DEBUG) || defined(DDB) void wapbl_dump(struct wapbl *); #endif void wapbl_biodone(struct buf *); extern const struct wapbl_ops wapbl_ops; static __inline struct mount * wapbl_vptomp(struct vnode *vp) { struct mount *mp; mp = NULL; if (vp != NULL) { if (vp->v_type == VBLK) mp = spec_node_getmountedfs(vp); else mp = vp->v_mount; } return mp; } static __inline bool wapbl_vphaswapbl(struct vnode *vp) { struct mount *mp; if (vp == NULL) return false; mp = wapbl_vptomp(vp); return mp && mp->mnt_wapbl; } #endif /* _KERNEL */ /****************************************************************/ /* Replay support */ #ifdef WAPBL_INTERNAL LIST_HEAD(wapbl_blk_head, wapbl_blk); struct wapbl_replay { struct vnode *wr_logvp; struct vnode *wr_devvp; daddr_t wr_logpbn; int wr_log_dev_bshift; int wr_fs_dev_bshift; int64_t wr_circ_off; int64_t wr_circ_size; uint32_t wr_generation; void *wr_scratch; struct wapbl_blk_head *wr_blkhash; u_long wr_blkhashmask; int wr_blkhashcnt; off_t wr_inodeshead; off_t wr_inodestail; int wr_inodescnt; struct { uint32_t wr_inumber; uint32_t wr_imode; } *wr_inodes; }; #define wapbl_replay_isopen(wr) ((wr)->wr_scratch != 0) /* Supply this to provide i/o support */ int wapbl_write(void *, size_t, struct vnode *, daddr_t); int wapbl_read(void *, size_t, struct vnode *, daddr_t); /****************************************************************/ #else struct wapbl_replay; #endif /* WAPBL_INTERNAL */ /****************************************************************/ int wapbl_replay_start(struct wapbl_replay **, struct vnode *, daddr_t, size_t, size_t); void wapbl_replay_stop(struct wapbl_replay *); void wapbl_replay_free(struct wapbl_replay *); int wapbl_replay_write(struct wapbl_replay *, struct vnode *); int wapbl_replay_can_read(struct wapbl_replay *, daddr_t, long); int wapbl_replay_read(struct wapbl_replay *, void *, daddr_t, long); /****************************************************************/ #endif /* !_SYS_WAPBL_H */ |
| 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 | /* $NetBSD: if_urndis.c,v 1.47 2022/03/03 05:56:58 riastradh Exp $ */ /* $OpenBSD: if_urndis.c,v 1.31 2011/07/03 15:47:17 matthew Exp $ */ /* * Copyright (c) 2010 Jonathan Armani <armani@openbsd.org> * Copyright (c) 2010 Fabien Romano <fabien@openbsd.org> * Copyright (c) 2010 Michael Knudsen <mk@openbsd.org> * All rights reserved. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: if_urndis.c,v 1.47 2022/03/03 05:56:58 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #endif #include <sys/param.h> #include <sys/kmem.h> #include <dev/usb/usbnet.h> #include <dev/usb/usbdevs.h> #include <dev/usb/usbcdc.h> #include <dev/ic/rndisreg.h> #define RNDIS_RX_LIST_CNT 1 #define RNDIS_TX_LIST_CNT 1 #define RNDIS_BUFSZ 1562 struct urndis_softc { struct usbnet sc_un; int sc_ifaceno_ctl; /* RNDIS device info */ uint32_t sc_filter; uint32_t sc_maxppt; uint32_t sc_maxtsz; uint32_t sc_palign; }; #ifdef URNDIS_DEBUG #define DPRINTF(x) do { printf x; } while (0) #else #define DPRINTF(x) #endif #define DEVNAME(un) (device_xname(un->un_dev)) #define URNDIS_RESPONSE_LEN 0x400 #if 0 static void urndis_watchdog(struct ifnet *); #endif static int urndis_uno_init(struct ifnet *); static void urndis_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t); static unsigned urndis_uno_tx_prepare(struct usbnet *, struct mbuf *, struct usbnet_chain *); static uint32_t urndis_ctrl_handle_init(struct usbnet *, const struct rndis_comp_hdr *); static uint32_t urndis_ctrl_handle_query(struct usbnet *, const struct rndis_comp_hdr *, void **, size_t *); static uint32_t urndis_ctrl_handle_reset(struct usbnet *, const struct rndis_comp_hdr *); static uint32_t urndis_ctrl_handle_status(struct usbnet *, const struct rndis_comp_hdr *); static uint32_t urndis_ctrl_set(struct usbnet *, uint32_t, void *, size_t); static int urndis_match(device_t, cfdata_t, void *); static void urndis_attach(device_t, device_t, void *); static const struct usbnet_ops urndis_ops = { .uno_init = urndis_uno_init, .uno_tx_prepare = urndis_uno_tx_prepare, .uno_rx_loop = urndis_uno_rx_loop, }; CFATTACH_DECL_NEW(urndis, sizeof(struct urndis_softc), urndis_match, urndis_attach, usbnet_detach, usbnet_activate); /* * Supported devices that we can't match by class IDs. */ static const struct usb_devno urndis_devs[] = { { USB_VENDOR_HTC, USB_PRODUCT_HTC_ANDROID }, { USB_VENDOR_SAMSUNG, USB_PRODUCT_SAMSUNG_ANDROID2 }, { USB_VENDOR_SAMSUNG, USB_PRODUCT_SAMSUNG_ANDROID }, }; static usbd_status urndis_ctrl_msg(struct usbnet *un, uint8_t rt, uint8_t r, uint16_t index, uint16_t value, void *buf, size_t buflen) { usb_device_request_t req; req.bmRequestType = rt; req.bRequest = r; USETW(req.wValue, value); USETW(req.wIndex, index); USETW(req.wLength, buflen); return usbd_do_request(un->un_udev, &req, buf); } static usbd_status urndis_ctrl_send(struct usbnet *un, void *buf, size_t len) { struct urndis_softc *sc = usbnet_softc(un); usbd_status err; if (usbnet_isdying(un)) return(0); err = urndis_ctrl_msg(un, UT_WRITE_CLASS_INTERFACE, UR_GET_STATUS, sc->sc_ifaceno_ctl, 0, buf, len); if (err != USBD_NORMAL_COMPLETION) printf("%s: %s\n", DEVNAME(un), usbd_errstr(err)); return err; } static struct rndis_comp_hdr * urndis_ctrl_recv(struct usbnet *un) { struct urndis_softc *sc = usbnet_softc(un); struct rndis_comp_hdr *hdr; char *buf; usbd_status err; if (usbnet_isdying(un)) return(0); buf = kmem_alloc(URNDIS_RESPONSE_LEN, KM_SLEEP); err = urndis_ctrl_msg(un, UT_READ_CLASS_INTERFACE, UR_CLEAR_FEATURE, sc->sc_ifaceno_ctl, 0, buf, URNDIS_RESPONSE_LEN); if (err != USBD_NORMAL_COMPLETION && err != USBD_SHORT_XFER) { printf("%s: %s\n", DEVNAME(un), usbd_errstr(err)); kmem_free(buf, URNDIS_RESPONSE_LEN); return NULL; } hdr = (struct rndis_comp_hdr *)buf; DPRINTF(("%s: urndis_ctrl_recv: type %#x len %u\n", DEVNAME(un), le32toh(hdr->rm_type), le32toh(hdr->rm_len))); if (le32toh(hdr->rm_len) > URNDIS_RESPONSE_LEN) { printf("%s: ctrl message error: wrong size %u > %u\n", DEVNAME(un), le32toh(hdr->rm_len), URNDIS_RESPONSE_LEN); kmem_free(buf, URNDIS_RESPONSE_LEN); return NULL; } return hdr; } static uint32_t urndis_ctrl_handle(struct usbnet *un, struct rndis_comp_hdr *hdr, void **buf, size_t *bufsz) { uint32_t rval; DPRINTF(("%s: urndis_ctrl_handle\n", DEVNAME(un))); if (buf && bufsz) { *buf = NULL; *bufsz = 0; } switch (le32toh(hdr->rm_type)) { case REMOTE_NDIS_INITIALIZE_CMPLT: rval = urndis_ctrl_handle_init(un, hdr); break; case REMOTE_NDIS_QUERY_CMPLT: rval = urndis_ctrl_handle_query(un, hdr, buf, bufsz); break; case REMOTE_NDIS_RESET_CMPLT: rval = urndis_ctrl_handle_reset(un, hdr); break; case REMOTE_NDIS_KEEPALIVE_CMPLT: case REMOTE_NDIS_SET_CMPLT: rval = le32toh(hdr->rm_status); break; case REMOTE_NDIS_INDICATE_STATUS_MSG: rval = urndis_ctrl_handle_status(un, hdr); break; default: printf("%s: ctrl message error: unknown event %#x\n", DEVNAME(un), le32toh(hdr->rm_type)); rval = RNDIS_STATUS_FAILURE; } kmem_free(hdr, URNDIS_RESPONSE_LEN); return rval; } static uint32_t urndis_ctrl_handle_init(struct usbnet *un, const struct rndis_comp_hdr *hdr) { struct urndis_softc *sc = usbnet_softc(un); const struct rndis_init_comp *msg; msg = (const struct rndis_init_comp *) hdr; DPRINTF(("%s: urndis_ctrl_handle_init: len %u rid %u status %#x " "ver_major %u ver_minor %u devflags %#x medium %#x pktmaxcnt %u " "pktmaxsz %u align %u aflistoffset %u aflistsz %u\n", DEVNAME(un), le32toh(msg->rm_len), le32toh(msg->rm_rid), le32toh(msg->rm_status), le32toh(msg->rm_ver_major), le32toh(msg->rm_ver_minor), le32toh(msg->rm_devflags), le32toh(msg->rm_medium), le32toh(msg->rm_pktmaxcnt), le32toh(msg->rm_pktmaxsz), le32toh(msg->rm_align), le32toh(msg->rm_aflistoffset), le32toh(msg->rm_aflistsz))); if (le32toh(msg->rm_status) != RNDIS_STATUS_SUCCESS) { printf("%s: init failed %#x\n", DEVNAME(un), le32toh(msg->rm_status)); return le32toh(msg->rm_status); } if (le32toh(msg->rm_devflags) != RNDIS_DF_CONNECTIONLESS) { printf("%s: wrong device type (current type: %#x)\n", DEVNAME(un), le32toh(msg->rm_devflags)); return RNDIS_STATUS_FAILURE; } if (le32toh(msg->rm_medium) != RNDIS_MEDIUM_802_3) { printf("%s: medium not 802.3 (current medium: %#x)\n", DEVNAME(un), le32toh(msg->rm_medium)); return RNDIS_STATUS_FAILURE; } if (le32toh(msg->rm_ver_major) != RNDIS_MAJOR_VERSION || le32toh(msg->rm_ver_minor) != RNDIS_MINOR_VERSION) { printf("%s: version not %u.%u (current version: %u.%u)\n", DEVNAME(un), RNDIS_MAJOR_VERSION, RNDIS_MINOR_VERSION, le32toh(msg->rm_ver_major), le32toh(msg->rm_ver_minor)); return RNDIS_STATUS_FAILURE; } sc->sc_maxppt = le32toh(msg->rm_pktmaxcnt); sc->sc_maxtsz = le32toh(msg->rm_pktmaxsz); sc->sc_palign = 1U << le32toh(msg->rm_align); return le32toh(msg->rm_status); } static uint32_t urndis_ctrl_handle_query(struct usbnet *un, const struct rndis_comp_hdr *hdr, void **buf, size_t *bufsz) { const struct rndis_query_comp *msg; msg = (const struct rndis_query_comp *) hdr; DPRINTF(("%s: urndis_ctrl_handle_query: len %u rid %u status %#x " "buflen %u bufoff %u\n", DEVNAME(un), le32toh(msg->rm_len), le32toh(msg->rm_rid), le32toh(msg->rm_status), le32toh(msg->rm_infobuflen), le32toh(msg->rm_infobufoffset))); if (buf && bufsz) { *buf = NULL; *bufsz = 0; } if (le32toh(msg->rm_status) != RNDIS_STATUS_SUCCESS) { printf("%s: query failed %#x\n", DEVNAME(un), le32toh(msg->rm_status)); return le32toh(msg->rm_status); } if (le32toh(msg->rm_infobuflen) + le32toh(msg->rm_infobufoffset) + RNDIS_HEADER_OFFSET > le32toh(msg->rm_len)) { printf("%s: ctrl message error: invalid query info " "len/offset/end_position(%u/%u/%u) -> " "go out of buffer limit %u\n", DEVNAME(un), le32toh(msg->rm_infobuflen), le32toh(msg->rm_infobufoffset), le32toh(msg->rm_infobuflen) + le32toh(msg->rm_infobufoffset) + (uint32_t)RNDIS_HEADER_OFFSET, le32toh(msg->rm_len)); return RNDIS_STATUS_FAILURE; } if (buf && bufsz) { const char *p; *buf = kmem_alloc(le32toh(msg->rm_infobuflen), KM_SLEEP); *bufsz = le32toh(msg->rm_infobuflen); p = (const char *)&msg->rm_rid; p += le32toh(msg->rm_infobufoffset); memcpy(*buf, p, le32toh(msg->rm_infobuflen)); } return le32toh(msg->rm_status); } static uint32_t urndis_ctrl_handle_reset(struct usbnet *un, const struct rndis_comp_hdr *hdr) { struct urndis_softc *sc = usbnet_softc(un); const struct rndis_reset_comp *msg; uint32_t rval; msg = (const struct rndis_reset_comp *) hdr; rval = le32toh(msg->rm_status); DPRINTF(("%s: urndis_ctrl_handle_reset: len %u status %#x " "adrreset %u\n", DEVNAME(un), le32toh(msg->rm_len), rval, le32toh(msg->rm_adrreset))); if (rval != RNDIS_STATUS_SUCCESS) { printf("%s: reset failed %#x\n", DEVNAME(un), rval); return rval; } if (le32toh(msg->rm_adrreset) != 0) { uint32_t filter; filter = htole32(sc->sc_filter); rval = urndis_ctrl_set(un, OID_GEN_CURRENT_PACKET_FILTER, &filter, sizeof(filter)); if (rval != RNDIS_STATUS_SUCCESS) { printf("%s: unable to reset data filters\n", DEVNAME(un)); return rval; } } return rval; } static uint32_t urndis_ctrl_handle_status(struct usbnet *un, const struct rndis_comp_hdr *hdr) { const struct rndis_status_msg *msg; uint32_t rval; msg = (const struct rndis_status_msg *)hdr; rval = le32toh(msg->rm_status); DPRINTF(("%s: urndis_ctrl_handle_status: len %u status %#x " "stbuflen %u\n", DEVNAME(un), le32toh(msg->rm_len), rval, le32toh(msg->rm_stbuflen))); switch (rval) { case RNDIS_STATUS_MEDIA_CONNECT: case RNDIS_STATUS_MEDIA_DISCONNECT: case RNDIS_STATUS_OFFLOAD_CURRENT_CONFIG: rval = RNDIS_STATUS_SUCCESS; break; default: printf("%s: status %#x\n", DEVNAME(un), rval); } return rval; } static uint32_t urndis_ctrl_init(struct usbnet *un) { struct rndis_init_req *msg; uint32_t rval; struct rndis_comp_hdr *hdr; msg = kmem_alloc(sizeof(*msg), KM_SLEEP); msg->rm_type = htole32(REMOTE_NDIS_INITIALIZE_MSG); msg->rm_len = htole32(sizeof(*msg)); msg->rm_rid = htole32(0); msg->rm_ver_major = htole32(RNDIS_MAJOR_VERSION); msg->rm_ver_minor = htole32(RNDIS_MINOR_VERSION); msg->rm_max_xfersz = htole32(RNDIS_BUFSZ); DPRINTF(("%s: urndis_ctrl_init send: type %u len %u rid %u ver_major %u " "ver_minor %u max_xfersz %u\n", DEVNAME(un), le32toh(msg->rm_type), le32toh(msg->rm_len), le32toh(msg->rm_rid), le32toh(msg->rm_ver_major), le32toh(msg->rm_ver_minor), le32toh(msg->rm_max_xfersz))); rval = urndis_ctrl_send(un, msg, sizeof(*msg)); kmem_free(msg, sizeof(*msg)); if (rval != RNDIS_STATUS_SUCCESS) { printf("%s: init failed\n", DEVNAME(un)); return rval; } if ((hdr = urndis_ctrl_recv(un)) == NULL) { printf("%s: unable to get init response\n", DEVNAME(un)); return RNDIS_STATUS_FAILURE; } rval = urndis_ctrl_handle(un, hdr, NULL, NULL); return rval; } #if 0 static uint32_t urndis_ctrl_halt(struct usbnet *un) { struct rndis_halt_req *msg; uint32_t rval; msg = kmem_alloc(sizeof(*msg), KM_SLEEP); msg->rm_type = htole32(REMOTE_NDIS_HALT_MSG); msg->rm_len = htole32(sizeof(*msg)); msg->rm_rid = 0; DPRINTF(("%s: urndis_ctrl_halt send: type %u len %u rid %u\n", DEVNAME(un), le32toh(msg->rm_type), le32toh(msg->rm_len), le32toh(msg->rm_rid))); rval = urndis_ctrl_send(un, msg, sizeof(*msg)); kmem_free(msg, sizeof(*msg)); if (rval != RNDIS_STATUS_SUCCESS) printf("%s: halt failed\n", DEVNAME(un)); return rval; } #endif static uint32_t urndis_ctrl_query(struct usbnet *un, uint32_t oid, void *qbuf, size_t qlen, void **rbuf, size_t *rbufsz) { struct rndis_query_req *msg; uint32_t rval; struct rndis_comp_hdr *hdr; msg = kmem_alloc(sizeof(*msg) + qlen, KM_SLEEP); msg->rm_type = htole32(REMOTE_NDIS_QUERY_MSG); msg->rm_len = htole32(sizeof(*msg) + qlen); msg->rm_rid = 0; /* XXX */ msg->rm_oid = htole32(oid); msg->rm_infobuflen = htole32(qlen); if (qlen != 0) { msg->rm_infobufoffset = htole32(20); memcpy((char*)msg + 20, qbuf, qlen); } else msg->rm_infobufoffset = 0; msg->rm_devicevchdl = 0; DPRINTF(("%s: urndis_ctrl_query send: type %u len %u rid %u oid %#x " "infobuflen %u infobufoffset %u devicevchdl %u\n", DEVNAME(un), le32toh(msg->rm_type), le32toh(msg->rm_len), le32toh(msg->rm_rid), le32toh(msg->rm_oid), le32toh(msg->rm_infobuflen), le32toh(msg->rm_infobufoffset), le32toh(msg->rm_devicevchdl))); rval = urndis_ctrl_send(un, msg, sizeof(*msg)); kmem_free(msg, sizeof(*msg) + qlen); if (rval != RNDIS_STATUS_SUCCESS) { printf("%s: query failed\n", DEVNAME(un)); return rval; } if ((hdr = urndis_ctrl_recv(un)) == NULL) { printf("%s: unable to get query response\n", DEVNAME(un)); return RNDIS_STATUS_FAILURE; } rval = urndis_ctrl_handle(un, hdr, rbuf, rbufsz); return rval; } static uint32_t urndis_ctrl_set(struct usbnet *un, uint32_t oid, void *buf, size_t len) { struct rndis_set_req *msg; uint32_t rval; struct rndis_comp_hdr *hdr; msg = kmem_alloc(sizeof(*msg) + len, KM_SLEEP); msg->rm_type = htole32(REMOTE_NDIS_SET_MSG); msg->rm_len = htole32(sizeof(*msg) + len); msg->rm_rid = 0; /* XXX */ msg->rm_oid = htole32(oid); msg->rm_infobuflen = htole32(len); if (len != 0) { msg->rm_infobufoffset = htole32(20); memcpy((char*)msg + 20, buf, len); } else msg->rm_infobufoffset = 0; msg->rm_devicevchdl = 0; DPRINTF(("%s: urndis_ctrl_set send: type %u len %u rid %u oid %#x " "infobuflen %u infobufoffset %u devicevchdl %u\n", DEVNAME(un), le32toh(msg->rm_type), le32toh(msg->rm_len), le32toh(msg->rm_rid), le32toh(msg->rm_oid), le32toh(msg->rm_infobuflen), le32toh(msg->rm_infobufoffset), le32toh(msg->rm_devicevchdl))); rval = urndis_ctrl_send(un, msg, sizeof(*msg)); kmem_free(msg, sizeof(*msg) + len); if (rval != RNDIS_STATUS_SUCCESS) { printf("%s: set failed\n", DEVNAME(un)); return rval; } if ((hdr = urndis_ctrl_recv(un)) == NULL) { printf("%s: unable to get set response\n", DEVNAME(un)); return RNDIS_STATUS_FAILURE; } rval = urndis_ctrl_handle(un, hdr, NULL, NULL); if (rval != RNDIS_STATUS_SUCCESS) printf("%s: set failed %#x\n", DEVNAME(un), rval); return rval; } #if 0 static uint32_t urndis_ctrl_set_param(struct urndis_softc *un, const char *name, uint32_t type, void *buf, size_t len) { struct rndis_set_parameter *param; uint32_t rval; size_t namelen, tlen; if (name) namelen = strlen(name); else namelen = 0; tlen = sizeof(*param) + len + namelen; param = kmem_alloc(tlen, KM_SLEEP); param->rm_namelen = htole32(namelen); param->rm_valuelen = htole32(len); param->rm_type = htole32(type); if (namelen != 0) { param->rm_nameoffset = htole32(20); memcpy(param + 20, name, namelen); } else param->rm_nameoffset = 0; if (len != 0) { param->rm_valueoffset = htole32(20 + namelen); memcpy(param + 20 + namelen, buf, len); } else param->rm_valueoffset = 0; DPRINTF(("%s: urndis_ctrl_set_param send: nameoffset %u namelen %u " "type %#x valueoffset %u valuelen %u\n", DEVNAME(un), le32toh(param->rm_nameoffset), le32toh(param->rm_namelen), le32toh(param->rm_type), le32toh(param->rm_valueoffset), le32toh(param->rm_valuelen))); rval = urndis_ctrl_set(un, OID_GEN_RNDIS_CONFIG_PARAMETER, param, tlen); kmem_free(param, tlen); if (rval != RNDIS_STATUS_SUCCESS) printf("%s: set param failed %#x\n", DEVNAME(un), rval); return rval; } /* XXX : adrreset, get it from response */ static uint32_t urndis_ctrl_reset(struct usbnet *un) { struct rndis_reset_req *reset; uint32_t rval; struct rndis_comp_hdr *hdr; reset = kmem_alloc(sizeof(*reset), KM_SLEEP); reset->rm_type = htole32(REMOTE_NDIS_RESET_MSG); reset->rm_len = htole32(sizeof(*reset)); reset->rm_rid = 0; /* XXX rm_rid == reserved ... remove ? */ DPRINTF(("%s: urndis_ctrl_reset send: type %u len %u rid %u\n", DEVNAME(un), le32toh(reset->rm_type), le32toh(reset->rm_len), le32toh(reset->rm_rid))); rval = urndis_ctrl_send(un, reset, sizeof(*reset)); kmem_free(reset, sizeof(*reset)); if (rval != RNDIS_STATUS_SUCCESS) { printf("%s: reset failed\n", DEVNAME(un)); return rval; } if ((hdr = urndis_ctrl_recv(un)) == NULL) { printf("%s: unable to get reset response\n", DEVNAME(un)); return RNDIS_STATUS_FAILURE; } rval = urndis_ctrl_handle(un, hdr, NULL, NULL); return rval; } static uint32_t urndis_ctrl_keepalive(struct usbnet *un) { struct rndis_keepalive_req *keep; uint32_t rval; struct rndis_comp_hdr *hdr; keep = kmem_alloc(sizeof(*keep), KM_SLEEP); keep->rm_type = htole32(REMOTE_NDIS_KEEPALIVE_MSG); keep->rm_len = htole32(sizeof(*keep)); keep->rm_rid = 0; /* XXX rm_rid == reserved ... remove ? */ DPRINTF(("%s: urndis_ctrl_keepalive: type %u len %u rid %u\n", DEVNAME(un), le32toh(keep->rm_type), le32toh(keep->rm_len), le32toh(keep->rm_rid))); rval = urndis_ctrl_send(un, keep, sizeof(*keep)); kmem_free(keep, sizeof(*keep)); if (rval != RNDIS_STATUS_SUCCESS) { printf("%s: keepalive failed\n", DEVNAME(un)); return rval; } if ((hdr = urndis_ctrl_recv(un)) == NULL) { printf("%s: unable to get keepalive response\n", DEVNAME(un)); return RNDIS_STATUS_FAILURE; } rval = urndis_ctrl_handle(un, hdr, NULL, NULL); if (rval != RNDIS_STATUS_SUCCESS) { printf("%s: keepalive failed %#x\n", DEVNAME(un), rval); urndis_ctrl_reset(un); } return rval; } #endif static unsigned urndis_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c) { struct rndis_packet_msg *msg; if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - sizeof(*msg)) return 0; msg = (struct rndis_packet_msg *)c->unc_buf; memset(msg, 0, sizeof(*msg)); msg->rm_type = htole32(REMOTE_NDIS_PACKET_MSG); msg->rm_len = htole32(sizeof(*msg) + m->m_pkthdr.len); msg->rm_dataoffset = htole32(RNDIS_DATA_OFFSET); msg->rm_datalen = htole32(m->m_pkthdr.len); m_copydata(m, 0, m->m_pkthdr.len, ((char*)msg + RNDIS_DATA_OFFSET + RNDIS_HEADER_OFFSET)); DPRINTF(("%s: %s type %#x len %u data(off %u len %u)\n", __func__, DEVNAME(un), le32toh(msg->rm_type), le32toh(msg->rm_len), le32toh(msg->rm_dataoffset), le32toh(msg->rm_datalen))); return le32toh(msg->rm_len); } static void urndis_uno_rx_loop(struct usbnet * un, struct usbnet_chain *c, uint32_t total_len) { struct rndis_packet_msg *msg; struct ifnet *ifp = usbnet_ifp(un); int offset; offset = 0; while (total_len > 1) { msg = (struct rndis_packet_msg *)((char*)c->unc_buf + offset); DPRINTF(("%s: %s buffer size left %u\n", DEVNAME(un), __func__, total_len)); if (total_len < sizeof(*msg)) { printf("%s: urndis_decap invalid buffer total_len %u < " "minimum header %zu\n", DEVNAME(un), total_len, sizeof(*msg)); return; } DPRINTF(("%s: urndis_decap total_len %u data(off:%u len:%u) " "oobdata(off:%u len:%u nb:%u) perpacket(off:%u len:%u)\n", DEVNAME(un), le32toh(msg->rm_len), le32toh(msg->rm_dataoffset), le32toh(msg->rm_datalen), le32toh(msg->rm_oobdataoffset), le32toh(msg->rm_oobdatalen), le32toh(msg->rm_oobdataelements), le32toh(msg->rm_pktinfooffset), le32toh(msg->rm_pktinfooffset))); if (le32toh(msg->rm_type) != REMOTE_NDIS_PACKET_MSG) { printf("%s: urndis_decap invalid type %#x != %#x\n", DEVNAME(un), le32toh(msg->rm_type), REMOTE_NDIS_PACKET_MSG); return; } if (le32toh(msg->rm_len) < sizeof(*msg)) { printf("%s: urndis_decap invalid msg len %u < %zu\n", DEVNAME(un), le32toh(msg->rm_len), sizeof(*msg)); return; } if (le32toh(msg->rm_len) > total_len) { printf("%s: urndis_decap invalid msg len %u > buffer " "total_len %u\n", DEVNAME(un), le32toh(msg->rm_len), total_len); return; } if (le32toh(msg->rm_dataoffset) + le32toh(msg->rm_datalen) + RNDIS_HEADER_OFFSET > le32toh(msg->rm_len)) { printf("%s: urndis_decap invalid data " "len/offset/end_position(%u/%u/%u) -> " "go out of receive buffer limit %u\n", DEVNAME(un), le32toh(msg->rm_datalen), le32toh(msg->rm_dataoffset), le32toh(msg->rm_dataoffset) + le32toh(msg->rm_datalen) + (uint32_t)RNDIS_HEADER_OFFSET, le32toh(msg->rm_len)); return; } if (le32toh(msg->rm_datalen) < sizeof(struct ether_header)) { if_statinc(ifp, if_ierrors); printf("%s: urndis_decap invalid ethernet size " "%d < %zu\n", DEVNAME(un), le32toh(msg->rm_datalen), sizeof(struct ether_header)); return; } usbnet_enqueue(un, ((char*)&msg->rm_dataoffset + le32toh(msg->rm_dataoffset)), le32toh(msg->rm_datalen), 0, 0, 0); offset += le32toh(msg->rm_len); total_len -= le32toh(msg->rm_len); } } #if 0 static void urndis_watchdog(struct ifnet *ifp) { struct urndis_softc *sc = usbnet_softc(un); if (un->un_dying) return; if_statinc(ifp, if_oerrors); printf("%s: watchdog timeout\n", DEVNAME(un)); urndis_ctrl_keepalive(un); } #endif static int urndis_uno_init(struct ifnet *ifp) { struct usbnet *un = ifp->if_softc; KASSERT(IFNET_LOCKED(ifp)); if (urndis_ctrl_init(un) != RNDIS_STATUS_SUCCESS) return EIO; return 0; } static int urndis_match(device_t parent, cfdata_t match, void *aux) { struct usbif_attach_arg *uiaa = aux; usb_interface_descriptor_t *id; if (!uiaa->uiaa_iface) return UMATCH_NONE; id = usbd_get_interface_descriptor(uiaa->uiaa_iface); if (id == NULL) return UMATCH_NONE; if (id->bInterfaceClass == UICLASS_WIRELESS && id->bInterfaceSubClass == UISUBCLASS_RF && id->bInterfaceProtocol == UIPROTO_RNDIS) return UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO; return usb_lookup(urndis_devs, uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL ? UMATCH_VENDOR_PRODUCT : UMATCH_NONE; } static void urndis_attach(device_t parent, device_t self, void *aux) { struct urndis_softc *sc = device_private(self); struct usbnet * const un = &sc->sc_un; struct usbif_attach_arg *uiaa = aux; struct usbd_device *dev = uiaa->uiaa_device; usb_interface_descriptor_t *id; usb_endpoint_descriptor_t *ed; usb_config_descriptor_t *cd; struct usbd_interface *iface_ctl; const usb_cdc_union_descriptor_t *ud; const usb_cdc_header_descriptor_t *desc; usbd_desc_iter_t iter; int if_ctl, if_data; int i, j, altcnt; void *buf; size_t bufsz; uint32_t filter; char *devinfop; KASSERT((void *)sc == un); aprint_naive("\n"); aprint_normal("\n"); devinfop = usbd_devinfo_alloc(dev, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); un->un_dev = self; un->un_udev = dev; un->un_sc = sc; un->un_ops = &urndis_ops; un->un_rx_xfer_flags = USBD_SHORT_XFER_OK; un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER; un->un_rx_list_cnt = RNDIS_RX_LIST_CNT; un->un_tx_list_cnt = RNDIS_TX_LIST_CNT; un->un_rx_bufsz = RNDIS_BUFSZ; un->un_tx_bufsz = RNDIS_BUFSZ; iface_ctl = uiaa->uiaa_iface; un->un_iface = uiaa->uiaa_iface; id = usbd_get_interface_descriptor(iface_ctl); if_ctl = id->bInterfaceNumber; sc->sc_ifaceno_ctl = if_ctl; if_data = -1; usb_desc_iter_init(un->un_udev, &iter); while ((desc = (const void *)usb_desc_iter_next(&iter)) != NULL) { if (desc->bDescriptorType != UDESC_CS_INTERFACE) { continue; } switch (desc->bDescriptorSubtype) { case UDESCSUB_CDC_UNION: /* XXX bail out when found first? */ ud = (const usb_cdc_union_descriptor_t *)desc; if (if_data == -1) if_data = ud->bSlaveInterface[0]; break; } } if (if_data == -1) { DPRINTF(("urndis_attach: no union interface\n")); un->un_iface = iface_ctl; } else { DPRINTF(("urndis_attach: union interface: ctl %u, data %u\n", if_ctl, if_data)); for (i = 0; i < uiaa->uiaa_nifaces; i++) { if (uiaa->uiaa_ifaces[i] != NULL) { id = usbd_get_interface_descriptor( uiaa->uiaa_ifaces[i]); if (id != NULL && id->bInterfaceNumber == if_data) { un->un_iface = uiaa->uiaa_ifaces[i]; uiaa->uiaa_ifaces[i] = NULL; } } } } if (un->un_iface == NULL) { aprint_error("%s: no data interface\n", DEVNAME(un)); return; } id = usbd_get_interface_descriptor(un->un_iface); cd = usbd_get_config_descriptor(un->un_udev); altcnt = usbd_get_no_alts(cd, id->bInterfaceNumber); for (j = 0; j < altcnt; j++) { if (usbd_set_interface(un->un_iface, j)) { aprint_error("%s: interface alternate setting %u " "failed\n", DEVNAME(un), j); return; } /* Find endpoints. */ id = usbd_get_interface_descriptor(un->un_iface); un->un_ed[USBNET_ENDPT_RX] = un->un_ed[USBNET_ENDPT_TX] = 0; for (i = 0; i < id->bNumEndpoints; i++) { ed = usbd_interface2endpoint_descriptor( un->un_iface, i); if (!ed) { aprint_error("%s: no descriptor for bulk " "endpoint %u\n", DEVNAME(un), i); return; } if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) { un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress; } else if ( UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) { un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress; } } if (un->un_ed[USBNET_ENDPT_RX] != 0 && un->un_ed[USBNET_ENDPT_TX] != 0) { DPRINTF(("%s: in=%#x, out=%#x\n", DEVNAME(un), un->un_ed[USBNET_ENDPT_RX], un->un_ed[USBNET_ENDPT_TX])); break; } } if (un->un_ed[USBNET_ENDPT_RX] == 0) aprint_error("%s: could not find data bulk in\n", DEVNAME(un)); if (un->un_ed[USBNET_ENDPT_TX] == 0) aprint_error("%s: could not find data bulk out\n",DEVNAME(un)); if (un->un_ed[USBNET_ENDPT_RX] == 0 || un->un_ed[USBNET_ENDPT_TX] == 0) return; #if 0 ifp->if_watchdog = urndis_watchdog; #endif usbnet_attach(un); if (urndis_ctrl_init(un) != RNDIS_STATUS_SUCCESS) { aprint_error("%s: unable to initialize hardware\n", DEVNAME(un)); return; } if (urndis_ctrl_query(un, OID_802_3_PERMANENT_ADDRESS, NULL, 0, &buf, &bufsz) != RNDIS_STATUS_SUCCESS) { aprint_error("%s: unable to get hardware address\n", DEVNAME(un)); return; } if (bufsz == ETHER_ADDR_LEN) { memcpy(un->un_eaddr, buf, ETHER_ADDR_LEN); kmem_free(buf, bufsz); } else { aprint_error("%s: invalid address\n", DEVNAME(un)); if (buf && bufsz) kmem_free(buf, bufsz); return; } /* Initialize packet filter */ sc->sc_filter = RNDIS_PACKET_TYPE_BROADCAST; sc->sc_filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST; filter = htole32(sc->sc_filter); if (urndis_ctrl_set(un, OID_GEN_CURRENT_PACKET_FILTER, &filter, sizeof(filter)) != RNDIS_STATUS_SUCCESS) { aprint_error("%s: unable to set data filters\n", DEVNAME(un)); return; } usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST, 0, NULL); } #ifdef _MODULE #include "ioconf.c" #endif USBNET_MODULE(urndis) |
| 3 3 3 38 38 36 166 166 166 166 166 166 166 166 166 166 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 | /* $NetBSD: uhub.c,v 1.161 2022/04/06 22:01:45 mlelstv Exp $ */ /* $FreeBSD: src/sys/dev/usb/uhub.c,v 1.18 1999/11/17 22:33:43 n_hibma Exp $ */ /* $OpenBSD: uhub.c,v 1.86 2015/06/29 18:27:40 mpi Exp $ */ /* * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Lennart Augustsson (lennart@augustsson.net) at * Carlstedt Research & Technology. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * USB spec: http://www.usb.org/developers/docs/usbspec.zip */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uhub.c,v 1.161 2022/04/06 22:01:45 mlelstv Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #endif #include <sys/param.h> #include <sys/bus.h> #include <sys/device.h> #include <sys/kernel.h> #include <sys/kmem.h> #include <sys/proc.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/kcov.h> #include <sys/sdt.h> #include <dev/usb/usb.h> #include <dev/usb/usb_sdt.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdivar.h> #include <dev/usb/usbhist.h> SDT_PROBE_DEFINE1(usb, hub, explore, start, "struct usbd_device *"/*hub*/); SDT_PROBE_DEFINE1(usb, hub, explore, done, "struct usbd_device *"/*hub*/); SDT_PROBE_DEFINE3(usb, hub, explore, rescan, "struct usbd_device *"/*hub*/, "int"/*portno*/, "struct usbd_port *"/*port*/); SDT_PROBE_DEFINE5(usb, hub, explore, portstat, "struct usbd_device *"/*hub*/, "int"/*portno*/, "int"/*status*/, "int"/*change*/, "int"/*reattach*/); SDT_PROBE_DEFINE3(usb, hub, explore, disconnect, "struct usbd_device *"/*hub*/, "int"/*portno*/, "struct usbd_port *"/*port*/); SDT_PROBE_DEFINE5(usb, hub, explore, reset, "struct usbd_device *"/*hub*/, "int"/*portno*/, "struct usbd_port *"/*port*/, "int"/*status*/, "int"/*change*/); SDT_PROBE_DEFINE4(usb, hub, explore, connect, "struct usbd_device *"/*hub*/, "int"/*portno*/, "struct usbd_port *"/*port*/, "int"/*speed*/); SDT_PROBE_DEFINE4(usb, hub, explore, connected, "struct usbd_device *"/*hub*/, "int"/*portno*/, "struct usbd_port *"/*port*/, "int"/*speed*/); SDT_PROBE_DEFINE2(usb, hub, interrupt, , "struct usbd_device *"/*hub*/, "usbd_status"/*status*/); #ifdef USB_DEBUG #ifndef UHUB_DEBUG #define uhubdebug 0 #else static int uhubdebug = 0; SYSCTL_SETUP(sysctl_hw_uhub_setup, "sysctl hw.uhub setup") { int err; const struct sysctlnode *rnode; const struct sysctlnode *cnode; err = sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "uhub", SYSCTL_DESCR("uhub global controls"), NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL); if (err) goto fail; /* control debugging printfs */ err = sysctl_createv(clog, 0, &rnode, &cnode, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug", SYSCTL_DESCR("Enable debugging output"), NULL, 0, &uhubdebug, sizeof(uhubdebug), CTL_CREATE, CTL_EOL); if (err) goto fail; return; fail: aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err); } #endif /* UHUB_DEBUG */ #endif /* USB_DEBUG */ #define DPRINTF(FMT,A,B,C,D) USBHIST_LOGN(uhubdebug,1,FMT,A,B,C,D) #define DPRINTFN(N,FMT,A,B,C,D) USBHIST_LOGN(uhubdebug,N,FMT,A,B,C,D) #define UHUBHIST_FUNC() USBHIST_FUNC() #define UHUBHIST_CALLED(name) USBHIST_CALLED(uhubdebug) #define UHUBHIST_CALLARGS(FMT,A,B,C,D) \ USBHIST_CALLARGS(uhubdebug,FMT,A,B,C,D) struct uhub_softc { device_t sc_dev; /* base device */ struct usbd_device *sc_hub; /* USB device */ int sc_proto; /* device protocol */ struct usbd_pipe *sc_ipipe; /* interrupt pipe */ kmutex_t sc_lock; kcondvar_t sc_cv; uint8_t *sc_statusbuf; uint8_t *sc_statuspend; uint8_t *sc_status; size_t sc_statuslen; bool sc_explorepending; bool sc_first_explore; bool sc_running; bool sc_rescan; struct lwp *sc_exploring; }; #define UHUB_IS_HIGH_SPEED(sc) \ ((sc)->sc_proto == UDPROTO_HSHUBSTT || (sc)->sc_proto == UDPROTO_HSHUBMTT) #define UHUB_IS_SINGLE_TT(sc) ((sc)->sc_proto == UDPROTO_HSHUBSTT) #define PORTSTAT_ISSET(sc, port) \ ((sc)->sc_status[(port) / 8] & (1 << ((port) % 8))) Static usbd_status uhub_explore(struct usbd_device *); Static void uhub_intr(struct usbd_xfer *, void *, usbd_status); /* * We need two attachment points: * hub to usb and hub to hub * Every other driver only connects to hubs */ static int uhub_match(device_t, cfdata_t, void *); static void uhub_attach(device_t, device_t, void *); static int uhub_rescan(device_t, const char *, const int *); static void uhub_childdet(device_t, device_t); static int uhub_detach(device_t, int); CFATTACH_DECL3_NEW(uhub, sizeof(struct uhub_softc), uhub_match, uhub_attach, uhub_detach, NULL, uhub_rescan, uhub_childdet, DVF_DETACH_SHUTDOWN); CFATTACH_DECL3_NEW(uroothub, sizeof(struct uhub_softc), uhub_match, uhub_attach, uhub_detach, NULL, uhub_rescan, uhub_childdet, DVF_DETACH_SHUTDOWN); /* * Setting this to 1 makes sure than an uhub attaches even at higher * priority than ugen when ugen_override is set to 1. This allows to * probe the whole USB bus and attach functions with ugen. */ int uhub_ubermatch = 0; static usbd_status usbd_get_hub_desc(struct usbd_device *dev, usb_hub_descriptor_t *hd, int speed) { usb_device_request_t req; usbd_status err; int nports; UHUBHIST_FUNC(); UHUBHIST_CALLED(); /* don't issue UDESC_HUB to SS hub, or it would stall */ if (dev->ud_depth != 0 && USB_IS_SS(dev->ud_speed)) { usb_hub_ss_descriptor_t hssd; int rmvlen; memset(&hssd, 0, sizeof(hssd)); req.bmRequestType = UT_READ_CLASS_DEVICE; req.bRequest = UR_GET_DESCRIPTOR; USETW2(req.wValue, UDESC_SS_HUB, 0); USETW(req.wIndex, 0); USETW(req.wLength, USB_HUB_SS_DESCRIPTOR_SIZE); DPRINTFN(1, "getting sshub descriptor", 0, 0, 0, 0); err = usbd_do_request(dev, &req, &hssd); nports = hssd.bNbrPorts; if (dev->ud_depth != 0 && nports > UHD_SS_NPORTS_MAX) { DPRINTF("num of ports %jd exceeds maxports %jd", nports, UHD_SS_NPORTS_MAX, 0, 0); nports = hd->bNbrPorts = UHD_SS_NPORTS_MAX; } rmvlen = (nports + 7) / 8; hd->bDescLength = USB_HUB_DESCRIPTOR_SIZE + (rmvlen > 1 ? rmvlen : 1) - 1; memcpy(hd->DeviceRemovable, hssd.DeviceRemovable, rmvlen); hd->bDescriptorType = hssd.bDescriptorType; hd->bNbrPorts = hssd.bNbrPorts; hd->wHubCharacteristics[0] = hssd.wHubCharacteristics[0]; hd->wHubCharacteristics[1] = hssd.wHubCharacteristics[1]; hd->bPwrOn2PwrGood = hssd.bPwrOn2PwrGood; hd->bHubContrCurrent = hssd.bHubContrCurrent; } else { req.bmRequestType = UT_READ_CLASS_DEVICE; req.bRequest = UR_GET_DESCRIPTOR; USETW2(req.wValue, UDESC_HUB, 0); USETW(req.wIndex, 0); USETW(req.wLength, USB_HUB_DESCRIPTOR_SIZE); DPRINTFN(1, "getting hub descriptor", 0, 0, 0, 0); err = usbd_do_request(dev, &req, hd); nports = hd->bNbrPorts; if (!err && nports > 7) { USETW(req.wLength, USB_HUB_DESCRIPTOR_SIZE + (nports+1) / 8); err = usbd_do_request(dev, &req, hd); } } return err; } static usbd_status usbd_set_hub_depth(struct usbd_device *dev, int depth) { usb_device_request_t req; req.bmRequestType = UT_WRITE_CLASS_DEVICE; req.bRequest = UR_SET_HUB_DEPTH; USETW(req.wValue, depth); USETW(req.wIndex, 0); USETW(req.wLength, 0); return usbd_do_request(dev, &req, 0); } static int uhub_match(device_t parent, cfdata_t match, void *aux) { struct usb_attach_arg *uaa = aux; int matchvalue; UHUBHIST_FUNC(); UHUBHIST_CALLED(); if (uhub_ubermatch) matchvalue = UMATCH_HIGHEST+1; else matchvalue = UMATCH_DEVCLASS_DEVSUBCLASS; DPRINTFN(5, "uaa=%#jx", (uintptr_t)uaa, 0, 0, 0); /* * The subclass for hubs seems to be 0 for some and 1 for others, * so we just ignore the subclass. */ if (uaa->uaa_class == UDCLASS_HUB) return matchvalue; return UMATCH_NONE; } static void uhub_attach(device_t parent, device_t self, void *aux) { struct uhub_softc *sc = device_private(self); struct usb_attach_arg *uaa = aux; struct usbd_device *dev = uaa->uaa_device; char *devinfop; usbd_status err; struct usbd_hub *hub = NULL; usb_hub_descriptor_t hubdesc; int p, port, nports, nremov, pwrdly; struct usbd_interface *iface; usb_endpoint_descriptor_t *ed; struct usbd_tt *tts = NULL; UHUBHIST_FUNC(); UHUBHIST_CALLED(); KASSERT(usb_in_event_thread(parent)); config_pending_incr(self); sc->sc_dev = self; sc->sc_hub = dev; sc->sc_proto = uaa->uaa_proto; devinfop = usbd_devinfo_alloc(dev, 1); aprint_naive("\n"); aprint_normal(": %s\n", devinfop); usbd_devinfo_free(devinfop); if (dev->ud_depth > 0 && UHUB_IS_HIGH_SPEED(sc)) { aprint_normal_dev(self, "%s transaction translator%s\n", UHUB_IS_SINGLE_TT(sc) ? "single" : "multiple", UHUB_IS_SINGLE_TT(sc) ? "" : "s"); } err = usbd_set_config_index(dev, 0, 1); if (err) { DPRINTF("configuration failed, sc %#jx error %jd", (uintptr_t)sc, err, 0, 0); goto bad2; } if (dev->ud_depth > USB_HUB_MAX_DEPTH) { aprint_error_dev(self, "hub depth (%d) exceeded, hub ignored\n", USB_HUB_MAX_DEPTH); goto bad2; } /* Get hub descriptor. */ memset(&hubdesc, 0, sizeof(hubdesc)); err = usbd_get_hub_desc(dev, &hubdesc, dev->ud_speed); nports = hubdesc.bNbrPorts; if (err) { DPRINTF("getting hub descriptor failed, uhub%jd error %jd", device_unit(self), err, 0, 0); goto bad2; } for (nremov = 0, port = 1; port <= nports; port++) if (!UHD_NOT_REMOV(&hubdesc, port)) nremov++; aprint_verbose_dev(self, "%d port%s with %d removable, %s powered\n", nports, nports != 1 ? "s" : "", nremov, dev->ud_selfpowered ? "self" : "bus"); if (nports == 0) { aprint_debug_dev(self, "no ports, hub ignored\n"); goto bad; } hub = kmem_alloc(sizeof(*hub) + (nports-1) * sizeof(struct usbd_port), KM_SLEEP); dev->ud_hub = hub; dev->ud_hub->uh_hubsoftc = sc; hub->uh_explore = uhub_explore; hub->uh_hubdesc = hubdesc; if (USB_IS_SS(dev->ud_speed) && dev->ud_depth != 0) { aprint_debug_dev(self, "setting hub depth %u\n", dev->ud_depth - 1); err = usbd_set_hub_depth(dev, dev->ud_depth - 1); if (err) { aprint_error_dev(self, "can't set depth\n"); goto bad; } } /* Set up interrupt pipe. */ err = usbd_device2interface_handle(dev, 0, &iface); if (err) { aprint_error_dev(self, "no interface handle\n"); goto bad; } if (UHUB_IS_HIGH_SPEED(sc) && !UHUB_IS_SINGLE_TT(sc)) { err = usbd_set_interface(iface, 1); if (err) aprint_error_dev(self, "can't enable multiple TTs\n"); } ed = usbd_interface2endpoint_descriptor(iface, 0); if (ed == NULL) { aprint_error_dev(self, "no endpoint descriptor\n"); goto bad; } if ((ed->bmAttributes & UE_XFERTYPE) != UE_INTERRUPT) { aprint_error_dev(self, "bad interrupt endpoint\n"); goto bad; } sc->sc_statuslen = (nports + 1 + 7) / 8; sc->sc_statusbuf = kmem_alloc(sc->sc_statuslen, KM_SLEEP); sc->sc_statuspend = kmem_zalloc(sc->sc_statuslen, KM_SLEEP); sc->sc_status = kmem_alloc(sc->sc_statuslen, KM_SLEEP); mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB); cv_init(&sc->sc_cv, "uhubex"); /* force initial scan */ memset(sc->sc_status, 0xff, sc->sc_statuslen); sc->sc_explorepending = true; err = usbd_open_pipe_intr(iface, ed->bEndpointAddress, USBD_SHORT_XFER_OK|USBD_MPSAFE, &sc->sc_ipipe, sc, sc->sc_statusbuf, sc->sc_statuslen, uhub_intr, USBD_DEFAULT_INTERVAL); if (err) { aprint_error_dev(self, "cannot open interrupt pipe\n"); goto bad; } /* Wait with power off for a while if we are not a root hub */ if (dev->ud_powersrc->up_parent != NULL) usbd_delay_ms(dev, USB_POWER_DOWN_TIME); usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, dev, sc->sc_dev); /* * To have the best chance of success we do things in the exact same * order as Windows 98. This should not be necessary, but some * devices do not follow the USB specs to the letter. * * These are the events on the bus when a hub is attached: * Get device and config descriptors (see attach code) * Get hub descriptor (see above) * For all ports * turn on power * wait for power to become stable * (all below happens in explore code) * For all ports * clear C_PORT_CONNECTION * For all ports * get port status * if device connected * wait 100 ms * turn on reset * wait * clear C_PORT_RESET * get port status * proceed with device attachment */ if (UHUB_IS_HIGH_SPEED(sc) && nports > 0) { tts = kmem_alloc((UHUB_IS_SINGLE_TT(sc) ? 1 : nports) * sizeof(struct usbd_tt), KM_SLEEP); } /* Set up data structures */ for (p = 1; p <= nports; p++) { struct usbd_port *up = &hub->uh_ports[p - 1]; up->up_dev = NULL; up->up_parent = dev; up->up_portno = p; if (dev->ud_selfpowered) /* Self powered hub, give ports maximum current. */ up->up_power = USB_MAX_POWER; else up->up_power = USB_MIN_POWER; up->up_restartcnt = 0; up->up_reattach = 0; if (UHUB_IS_HIGH_SPEED(sc)) { up->up_tt = &tts[UHUB_IS_SINGLE_TT(sc) ? 0 : p - 1]; up->up_tt->utt_hub = hub; } else { up->up_tt = NULL; } } /* XXX should check for none, individual, or ganged power? */ pwrdly = dev->ud_hub->uh_hubdesc.bPwrOn2PwrGood * UHD_PWRON_FACTOR + USB_EXTRA_POWER_UP_TIME; for (port = 1; port <= nports; port++) { /* Turn the power on. */ err = usbd_set_port_feature(dev, port, UHF_PORT_POWER); if (err) aprint_error_dev(self, "port %d power on failed, %s\n", port, usbd_errstr(err)); DPRINTF("uhub%jd turn on port %jd power", device_unit(self), port, 0, 0); } /* Wait for stable power if we are not a root hub */ if (dev->ud_powersrc->up_parent != NULL) usbd_delay_ms(dev, pwrdly); /* The usual exploration will finish the setup. */ sc->sc_running = true; sc->sc_first_explore = true; if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); return; bad: if (sc->sc_status) kmem_free(sc->sc_status, sc->sc_statuslen); if (sc->sc_statuspend) kmem_free(sc->sc_statuspend, sc->sc_statuslen); if (sc->sc_statusbuf) kmem_free(sc->sc_statusbuf, sc->sc_statuslen); if (hub) kmem_free(hub, sizeof(*hub) + (nports-1) * sizeof(struct usbd_port)); dev->ud_hub = NULL; bad2: config_pending_decr(self); } usbd_status uhub_explore(struct usbd_device *dev) { usb_hub_descriptor_t *hd = &dev->ud_hub->uh_hubdesc; struct uhub_softc *sc = dev->ud_hub->uh_hubsoftc; struct usbd_port *up; struct usbd_device *subdev; usbd_status err; int speed; int port; int change, status, reconnect, rescan; UHUBHIST_FUNC(); UHUBHIST_CALLARGS("uhub%jd dev=%#jx addr=%jd speed=%ju", device_unit(sc->sc_dev), (uintptr_t)dev, dev->ud_addr, dev->ud_speed); KASSERT(usb_in_event_thread(sc->sc_dev)); if (!sc->sc_running) return USBD_NOT_STARTED; /* Ignore hubs that are too deep. */ if (dev->ud_depth > USB_HUB_MAX_DEPTH) return USBD_TOO_DEEP; SDT_PROBE1(usb, hub, explore, start, dev); /* Process rescan if requested. */ mutex_enter(&sc->sc_lock); rescan = sc->sc_rescan; sc->sc_rescan = false; mutex_exit(&sc->sc_lock); if (rescan) { for (port = 1; port <= hd->bNbrPorts; port++) { SDT_PROBE3(usb, hub, explore, rescan, dev, port, &dev->ud_hub->uh_ports[port - 1]); subdev = dev->ud_hub->uh_ports[port - 1].up_dev; if (subdev == NULL) continue; usbd_reattach_device(sc->sc_dev, subdev, port, NULL); } } if (PORTSTAT_ISSET(sc, 0)) { /* hub status change */ usb_hub_status_t hs; err = usbd_get_hub_status(dev, &hs); if (err) { DPRINTF("uhub%jd get hub status failed, err %jd", device_unit(sc->sc_dev), err, 0, 0); } else { /* just acknowledge */ status = UGETW(hs.wHubStatus); change = UGETW(hs.wHubChange); SDT_PROBE5(usb, hub, explore, portstat, dev, /*portno*/0, status, change, /*reattach*/0); DPRINTF("uhub%jd s/c=%jx/%jx", device_unit(sc->sc_dev), status, change, 0); if (change & UHS_LOCAL_POWER) usbd_clear_hub_feature(dev, UHF_C_HUB_LOCAL_POWER); if (change & UHS_OVER_CURRENT) usbd_clear_hub_feature(dev, UHF_C_HUB_OVER_CURRENT); } } for (port = 1; port <= hd->bNbrPorts; port++) { up = &dev->ud_hub->uh_ports[port - 1]; /* reattach is needed after firmware upload */ reconnect = up->up_reattach; up->up_reattach = 0; status = change = 0; /* don't check if no change summary notification */ if (PORTSTAT_ISSET(sc, port) || reconnect) { err = usbd_get_port_status(dev, port, &up->up_status); if (err) { DPRINTF("uhub%jd get port stat failed, err %jd", device_unit(sc->sc_dev), err, 0, 0); continue; } status = UGETW(up->up_status.wPortStatus); change = UGETW(up->up_status.wPortChange); DPRINTF("uhub%jd port %jd: s/c=%jx/%jx", device_unit(sc->sc_dev), port, status, change); } SDT_PROBE5(usb, hub, explore, portstat, dev, port, status, change, reconnect); if (!change && !reconnect) { /* No status change, just do recursive explore. */ if (up->up_dev != NULL && up->up_dev->ud_hub != NULL) up->up_dev->ud_hub->uh_explore(up->up_dev); continue; } if (change & UPS_C_PORT_ENABLED) { DPRINTF("uhub%jd port %jd C_PORT_ENABLED", device_unit(sc->sc_dev), port, 0, 0); usbd_clear_port_feature(dev, port, UHF_C_PORT_ENABLE); if (change & UPS_C_CONNECT_STATUS) { /* Ignore the port error if the device vanished. */ } else if (status & UPS_PORT_ENABLED) { aprint_error_dev(sc->sc_dev, "illegal enable change, port %d\n", port); } else { /* Port error condition. */ if (up->up_restartcnt) /* no message first time */ aprint_error_dev(sc->sc_dev, "port error, restarting port %d\n", port); if (up->up_restartcnt++ < USBD_RESTART_MAX) goto disco; else aprint_error_dev(sc->sc_dev, "port error, giving up port %d\n", port); } } if (change & UPS_C_PORT_RESET) { /* * some xHCs set PortResetChange instead of CSC * when port is reset. */ if ((status & UPS_CURRENT_CONNECT_STATUS) != 0) { change |= UPS_C_CONNECT_STATUS; } usbd_clear_port_feature(dev, port, UHF_C_PORT_RESET); } if (change & UPS_C_BH_PORT_RESET) { /* * some xHCs set WarmResetChange instead of CSC * when port is reset. */ if ((status & UPS_CURRENT_CONNECT_STATUS) != 0) { change |= UPS_C_CONNECT_STATUS; } usbd_clear_port_feature(dev, port, UHF_C_BH_PORT_RESET); } if (change & UPS_C_PORT_LINK_STATE) usbd_clear_port_feature(dev, port, UHF_C_PORT_LINK_STATE); if (change & UPS_C_PORT_CONFIG_ERROR) usbd_clear_port_feature(dev, port, UHF_C_PORT_CONFIG_ERROR); /* XXX handle overcurrent and resume events! */ if (!reconnect && !(change & UPS_C_CONNECT_STATUS)) { /* No status change, just do recursive explore. */ if (up->up_dev != NULL && up->up_dev->ud_hub != NULL) up->up_dev->ud_hub->uh_explore(up->up_dev); continue; } /* We have a connect status change, handle it. */ DPRINTF("uhub%jd status change port %jd", device_unit(sc->sc_dev), port, 0, 0); usbd_clear_port_feature(dev, port, UHF_C_PORT_CONNECTION); /* * If there is already a device on the port the change status * must mean that is has disconnected. Looking at the * current connect status is not enough to figure this out * since a new unit may have been connected before we handle * the disconnect. */ disco: if (up->up_dev != NULL) { /* Disconnected */ DPRINTF("uhub%jd device addr=%jd disappeared on " "port %jd", device_unit(sc->sc_dev), up->up_dev->ud_addr, port, 0); SDT_PROBE3(usb, hub, explore, disconnect, dev, port, up); usb_disconnect_port(up, sc->sc_dev, DETACH_FORCE); usbd_clear_port_feature(dev, port, UHF_C_PORT_CONNECTION); } if (!(status & UPS_CURRENT_CONNECT_STATUS)) { /* Nothing connected, just ignore it. */ DPRINTFN(3, "uhub%jd port %jd !CURRENT_CONNECT_STATUS", device_unit(sc->sc_dev), port, 0, 0); SDT_PROBE3(usb, hub, explore, disconnect, dev, port, up); usb_disconnect_port(up, sc->sc_dev, DETACH_FORCE); usbd_clear_port_feature(dev, port, UHF_C_PORT_CONNECTION); continue; } /* Connected */ DPRINTF("unit %jd dev->speed=%ju dev->depth=%ju", device_unit(sc->sc_dev), dev->ud_speed, dev->ud_depth, 0); /* Wait for maximum device power up time. */ usbd_delay_ms(dev, USB_PORT_POWERUP_DELAY); /* Reset port, which implies enabling it. */ if (usbd_reset_port(dev, port, &up->up_status)) { aprint_error_dev(sc->sc_dev, "port %d reset failed\n", port); continue; } #if 0 /* Get port status again, it might have changed during reset */ err = usbd_get_port_status(dev, port, &up->up_status); if (err) { DPRINTF("uhub%jd port %jd get port status failed, " "err %jd", device_unit(sc->sc_dev), port, err, 0); continue; } #endif /* * Use the port status from the reset to check for the device * disappearing, the port enable status, and the port speed */ status = UGETW(up->up_status.wPortStatus); change = UGETW(up->up_status.wPortChange); SDT_PROBE5(usb, hub, explore, reset, dev, port, up, status, change); DPRINTF("uhub%jd port %jd after reset: s/c=%jx/%jx", device_unit(sc->sc_dev), port, status, change); if (!(status & UPS_CURRENT_CONNECT_STATUS)) { /* Nothing connected, just ignore it. */ #ifdef DIAGNOSTIC aprint_debug_dev(sc->sc_dev, "port %d, device disappeared after reset\n", port); #endif continue; } if (!(status & UPS_PORT_ENABLED)) { /* Not allowed send/receive packet. */ #ifdef DIAGNOSTIC printf("%s: port %d, device not enabled\n", device_xname(sc->sc_dev), port); #endif continue; } /* port reset may cause Warm Reset Change, drop it. */ if (change & UPS_C_BH_PORT_RESET) usbd_clear_port_feature(dev, port, UHF_C_BH_PORT_RESET); /* * Figure out device speed from power bit of port status. * USB 2.0 ch 11.24.2.7.1 * USB 3.1 ch 10.16.2.6.1 */ int sts = status; if ((sts & UPS_PORT_POWER) == 0) sts &= ~UPS_PORT_POWER_SS; if (sts & UPS_HIGH_SPEED) speed = USB_SPEED_HIGH; else if (sts & UPS_LOW_SPEED) speed = USB_SPEED_LOW; else { /* * If there is no power bit set, it is certainly * a Super Speed device, so use the speed of its * parent hub. */ if (sts & UPS_PORT_POWER) speed = USB_SPEED_FULL; else speed = dev->ud_speed; } /* * Reduce the speed, otherwise we won't setup the proper * transfer methods. */ if (speed > dev->ud_speed) speed = dev->ud_speed; DPRINTF("uhub%jd speed %ju", device_unit(sc->sc_dev), speed, 0, 0); /* * To check whether port has power, * check UPS_PORT_POWER_SS bit if port speed is SS, and * check UPS_PORT_POWER bit if port speed is HS/FS/LS. */ if (USB_IS_SS(speed)) { /* SS hub port */ if (!(status & UPS_PORT_POWER_SS)) aprint_normal_dev(sc->sc_dev, "strange, connected port %d has no power\n", port); } else { /* HS/FS/LS hub port */ if (!(status & UPS_PORT_POWER)) aprint_normal_dev(sc->sc_dev, "strange, connected port %d has no power\n", port); } if (dev->ud_bus->ub_hctype == USBHCTYPE_VHCI) { kcov_remote_enter(KCOV_REMOTE_VHCI, KCOV_REMOTE_VHCI_ID(dev->ud_bus->ub_busnum, port)); } SDT_PROBE4(usb, hub, explore, connect, dev, port, up, speed); /* Get device info and set its address. */ err = usbd_new_device(sc->sc_dev, dev->ud_bus, dev->ud_depth + 1, speed, port, up); if (dev->ud_bus->ub_hctype == USBHCTYPE_VHCI) { kcov_remote_leave(KCOV_REMOTE_VHCI, KCOV_REMOTE_VHCI_ID(dev->ud_bus->ub_busnum, port)); } /* XXX retry a few times? */ if (err) { DPRINTF("uhub%jd: usbd_new_device failed, error %jd", device_unit(sc->sc_dev), err, 0, 0); /* Avoid addressing problems by disabling. */ /* usbd_reset_port(dev, port, &up->status); */ /* * The unit refused to accept a new address, or had * some other serious problem. Since we cannot leave * at 0 we have to disable the port instead. */ aprint_error_dev(sc->sc_dev, "device problem, disabling port %d\n", port); usbd_clear_port_feature(dev, port, UHF_PORT_ENABLE); } else { SDT_PROBE4(usb, hub, explore, connected, dev, port, up, speed); /* The port set up succeeded, reset error count. */ up->up_restartcnt = 0; if (up->up_dev->ud_hub) up->up_dev->ud_hub->uh_explore(up->up_dev); } } mutex_enter(&sc->sc_lock); sc->sc_explorepending = false; for (int i = 0; i < sc->sc_statuslen; i++) { if (sc->sc_statuspend[i] != 0) { memcpy(sc->sc_status, sc->sc_statuspend, sc->sc_statuslen); memset(sc->sc_statuspend, 0, sc->sc_statuslen); usb_needs_explore(sc->sc_hub); break; } } mutex_exit(&sc->sc_lock); if (sc->sc_first_explore) { config_pending_decr(sc->sc_dev); sc->sc_first_explore = false; } SDT_PROBE1(usb, hub, explore, done, dev); return USBD_NORMAL_COMPLETION; } /* * Called from process context when the hub is gone. * Detach all devices on active ports. */ static int uhub_detach(device_t self, int flags) { struct uhub_softc *sc = device_private(self); struct usbd_hub *hub = sc->sc_hub->ud_hub; struct usbd_port *rup; int nports, port, rc; UHUBHIST_FUNC(); UHUBHIST_CALLED(); DPRINTF("uhub%jd flags=%jd", device_unit(self), flags, 0, 0); if (hub == NULL) /* Must be partially working */ return 0; /* XXXSMP usb */ KERNEL_LOCK(1, curlwp); nports = hub->uh_hubdesc.bNbrPorts; for (port = 1; port <= nports; port++) { rup = &hub->uh_ports[port - 1]; if (rup->up_dev == NULL) continue; if ((rc = usb_disconnect_port(rup, self, flags)) != 0) { /* XXXSMP usb */ KERNEL_UNLOCK_ONE(curlwp); return rc; } } pmf_device_deregister(self); usbd_abort_pipe(sc->sc_ipipe); usbd_close_pipe(sc->sc_ipipe); usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_hub, sc->sc_dev); if (hub->uh_ports[0].up_tt) kmem_free(hub->uh_ports[0].up_tt, (UHUB_IS_SINGLE_TT(sc) ? 1 : nports) * sizeof(struct usbd_tt)); kmem_free(hub, sizeof(*hub) + (nports-1) * sizeof(struct usbd_port)); sc->sc_hub->ud_hub = NULL; if (sc->sc_status) kmem_free(sc->sc_status, sc->sc_statuslen); if (sc->sc_statuspend) kmem_free(sc->sc_statuspend, sc->sc_statuslen); if (sc->sc_statusbuf) kmem_free(sc->sc_statusbuf, sc->sc_statuslen); cv_destroy(&sc->sc_cv); mutex_destroy(&sc->sc_lock); /* XXXSMP usb */ KERNEL_UNLOCK_ONE(curlwp); return 0; } static int uhub_rescan(device_t self, const char *ifattr, const int *locators) { struct uhub_softc *sc = device_private(self); UHUBHIST_FUNC(); UHUBHIST_CALLARGS("uhub%jd", device_unit(sc->sc_dev), 0, 0, 0); KASSERT(KERNEL_LOCKED_P()); /* Trigger bus exploration. */ /* XXX locators */ mutex_enter(&sc->sc_lock); sc->sc_rescan = true; mutex_exit(&sc->sc_lock); usb_needs_explore(sc->sc_hub); return 0; } /* Called when a device has been detached from it */ static void uhub_childdet(device_t self, device_t child) { struct uhub_softc *sc = device_private(self); struct usbd_device *devhub = sc->sc_hub; struct usbd_device *dev; int nports; int port; int i; KASSERT(KERNEL_LOCKED_P()); if (!devhub->ud_hub) /* should never happen; children are only created after init */ panic("hub not fully initialised, but child deleted?"); nports = devhub->ud_hub->uh_hubdesc.bNbrPorts; for (port = 1; port <= nports; port++) { dev = devhub->ud_hub->uh_ports[port - 1].up_dev; if (!dev || dev->ud_subdevlen == 0) continue; for (i = 0; i < dev->ud_subdevlen; i++) { if (dev->ud_subdevs[i] == child) { dev->ud_subdevs[i] = NULL; dev->ud_nifaces_claimed--; } } if (dev->ud_nifaces_claimed == 0) { kmem_free(dev->ud_subdevs, dev->ud_subdevlen * sizeof(device_t)); dev->ud_subdevs = NULL; dev->ud_subdevlen = 0; } } } /* * Hub interrupt. * This an indication that some port has changed status. * Notify the bus event handler thread that we need * to be explored again. */ void uhub_intr(struct usbd_xfer *xfer, void *addr, usbd_status status) { struct uhub_softc *sc = addr; UHUBHIST_FUNC(); UHUBHIST_CALLARGS("called! uhub%jd status=%jx", device_unit(sc->sc_dev), status, 0, 0); SDT_PROBE2(usb, hub, interrupt, , sc->sc_hub, status); if (status == USBD_STALLED) usbd_clear_endpoint_stall_async(sc->sc_ipipe); else if (status == USBD_NORMAL_COMPLETION) { mutex_enter(&sc->sc_lock); DPRINTFN(5, "uhub%jd: explore pending %jd", device_unit(sc->sc_dev), sc->sc_explorepending, 0, 0); /* merge port bitmap into pending interrupts list */ for (size_t i = 0; i < sc->sc_statuslen; i++) { sc->sc_statuspend[i] |= sc->sc_statusbuf[i]; DPRINTFN(5, "uhub%jd: pending/new ports " "[%jd] %#jx/%#jx", device_unit(sc->sc_dev), i, sc->sc_statuspend[i], sc->sc_statusbuf[i]); } if (!sc->sc_explorepending) { sc->sc_explorepending = true; memcpy(sc->sc_status, sc->sc_statuspend, sc->sc_statuslen); memset(sc->sc_statuspend, 0, sc->sc_statuslen); for (size_t i = 0; i < sc->sc_statuslen; i++) { DPRINTFN(5, "uhub%jd: exploring ports " "[%jd] %#jx", device_unit(sc->sc_dev), i, sc->sc_status[i], 0); } usb_needs_explore(sc->sc_hub); } mutex_exit(&sc->sc_lock); } } |
| 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 | /* $NetBSD: if_aue.c,v 1.191 2022/08/20 14:08:59 riastradh Exp $ */ /* * Copyright (c) 1997, 1998, 1999, 2000 * Bill Paul <wpaul@ee.columbia.edu>. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/usb/if_aue.c,v 1.11 2000/01/14 01:36:14 wpaul Exp $ */ /* * ADMtek AN986 Pegasus and AN8511 Pegasus II USB to ethernet driver. * Datasheet is available from http://www.admtek.com.tw. * * Written by Bill Paul <wpaul@ee.columbia.edu> * Electrical Engineering Department * Columbia University, New York City */ /* * The Pegasus chip uses four USB "endpoints" to provide 10/100 ethernet * support: the control endpoint for reading/writing registers, burst * read endpoint for packet reception, burst write for packet transmission * and one for "interrupts." The chip uses the same RX filter scheme * as the other ADMtek ethernet parts: one perfect filter entry for the * the station address and a 64-bit multicast hash table. The chip supports * both MII and HomePNA attachments. * * Since the maximum data transfer speed of USB is supposed to be 12Mbps, * you're never really going to get 100Mbps speeds from this device. I * think the idea is to allow the device to connect to 10 or 100Mbps * networks, not necessarily to provide 100Mbps performance. Also, since * the controller uses an external PHY chip, it's possible that board * designers might simply choose a 10Mbps PHY. * * Registers are accessed using usbd_do_request(). Packet transfers are * done using usbd_transfer() and friends. */ /* * Ported to NetBSD and somewhat rewritten by Lennart Augustsson. */ /* * TODO: * better error messages from rxstat * more error checks * investigate short rx problem * proper cleanup on errors */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: if_aue.c,v 1.191 2022/08/20 14:08:59 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #include "opt_inet.h" #endif #include <sys/param.h> #include <dev/usb/usbnet.h> #include <dev/usb/usbhist.h> #include <dev/usb/if_auereg.h> #ifdef INET #include <netinet/in.h> #include <netinet/if_inarp.h> #endif #ifdef USB_DEBUG #ifndef AUE_DEBUG #define auedebug 0 #else static int auedebug = 10; SYSCTL_SETUP(sysctl_hw_aue_setup, "sysctl hw.aue setup") { int err; const struct sysctlnode *rnode; const struct sysctlnode *cnode; err = sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "aue", SYSCTL_DESCR("aue global controls"), NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL); if (err) goto fail; /* control debugging printfs */ err = sysctl_createv(clog, 0, &rnode, &cnode, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "debug", SYSCTL_DESCR("Enable debugging output"), NULL, 0, &auedebug, sizeof(auedebug), CTL_CREATE, CTL_EOL); if (err) goto fail; return; fail: aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err); } #endif /* AUE_DEBUG */ #endif /* USB_DEBUG */ #define DPRINTF(FMT,A,B,C,D) USBHIST_LOGN(auedebug,1,FMT,A,B,C,D) #define DPRINTFN(N,FMT,A,B,C,D) USBHIST_LOGN(auedebug,N,FMT,A,B,C,D) #define AUEHIST_FUNC() USBHIST_FUNC() #define AUEHIST_CALLED(name) USBHIST_CALLED(auedebug) #define AUEHIST_CALLARGS(FMT,A,B,C,D) \ USBHIST_CALLARGS(auedebug,FMT,A,B,C,D) #define AUEHIST_CALLARGSN(N,FMT,A,B,C,D) \ USBHIST_CALLARGSN(auedebug,N,FMT,A,B,C,D) #define AUE_TX_LIST_CNT 1 #define AUE_RX_LIST_CNT 1 struct aue_softc { struct usbnet aue_un; struct usbnet_intr aue_intr; struct aue_intrpkt aue_ibuf; }; #define AUE_TIMEOUT 1000 #define AUE_BUFSZ 1536 #define AUE_MIN_FRAMELEN 60 #define AUE_TX_TIMEOUT 10000 /* ms */ #define AUE_INTR_INTERVAL 100 /* ms */ /* * Various supported device vendors/products. */ struct aue_type { struct usb_devno aue_dev; uint16_t aue_flags; #define LSYS 0x0001 /* use Linksys reset */ #define PNA 0x0002 /* has Home PNA */ #define PII 0x0004 /* Pegasus II chip */ }; static const struct aue_type aue_devs[] = { {{ USB_VENDOR_3COM, USB_PRODUCT_3COM_3C460B}, PII }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_XX1}, PNA | PII }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_XX2}, PII }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_UFE1000}, LSYS }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_XX4}, PNA }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_XX5}, PNA }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_XX6}, PII }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_XX7}, PII }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_XX8}, PII }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_XX9}, PNA }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_XX10}, 0 }, {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_DSB650TX_PNA}, 0 }, {{ USB_VENDOR_ACCTON, USB_PRODUCT_ACCTON_USB320_EC}, 0 }, {{ USB_VENDOR_ACCTON, USB_PRODUCT_ACCTON_SS1001}, PII }, {{ USB_VENDOR_ADMTEK, USB_PRODUCT_ADMTEK_PEGASUS}, PNA }, {{ USB_VENDOR_ADMTEK, USB_PRODUCT_ADMTEK_PEGASUSII}, PII }, {{ USB_VENDOR_ADMTEK, USB_PRODUCT_ADMTEK_PEGASUSII_2}, PII }, {{ USB_VENDOR_ADMTEK, USB_PRODUCT_ADMTEK_PEGASUSII_3}, PII }, {{ USB_VENDOR_AEI, USB_PRODUCT_AEI_USBTOLAN}, PII }, {{ USB_VENDOR_BELKIN, USB_PRODUCT_BELKIN_USB2LAN}, PII }, {{ USB_VENDOR_BILLIONTON, USB_PRODUCT_BILLIONTON_USB100}, 0 }, {{ USB_VENDOR_BILLIONTON, USB_PRODUCT_BILLIONTON_USBLP100}, PNA }, {{ USB_VENDOR_BILLIONTON, USB_PRODUCT_BILLIONTON_USBEL100}, 0 }, {{ USB_VENDOR_BILLIONTON, USB_PRODUCT_BILLIONTON_USBE100}, PII }, {{ USB_VENDOR_COMPAQ, USB_PRODUCT_COMPAQ_HNE200}, PII }, {{ USB_VENDOR_COREGA, USB_PRODUCT_COREGA_FETHER_USB_TX}, 0 }, {{ USB_VENDOR_COREGA, USB_PRODUCT_COREGA_FETHER_USB_TXS},PII }, {{ USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DSB650TX4}, LSYS | PII }, {{ USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DSB650TX1}, LSYS }, {{ USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DSB650TX}, LSYS }, {{ USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DSB650TX_PNA}, PNA }, {{ USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DSB650TX3}, LSYS | PII }, {{ USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DSB650TX2}, LSYS | PII }, {{ USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DSB650}, 0 }, {{ USB_VENDOR_ELECOM, USB_PRODUCT_ELECOM_LDUSBTX0}, 0 }, {{ USB_VENDOR_ELECOM, USB_PRODUCT_ELECOM_LDUSBTX1}, LSYS }, {{ USB_VENDOR_ELECOM, USB_PRODUCT_ELECOM_LDUSBTX2}, 0 }, {{ USB_VENDOR_ELECOM, USB_PRODUCT_ELECOM_LDUSBTX3}, LSYS }, {{ USB_VENDOR_ELECOM, USB_PRODUCT_ELECOM_LDUSBLTX}, PII }, {{ USB_VENDOR_ELSA, USB_PRODUCT_ELSA_USB2ETHERNET}, 0 }, {{ USB_VENDOR_HAWKING, USB_PRODUCT_HAWKING_UF100}, PII }, {{ USB_VENDOR_HP, USB_PRODUCT_HP_HN210E}, PII }, {{ USB_VENDOR_IODATA, USB_PRODUCT_IODATA_USBETTX}, 0 }, {{ USB_VENDOR_IODATA, USB_PRODUCT_IODATA_USBETTXS}, PII }, {{ USB_VENDOR_IODATA, USB_PRODUCT_IODATA_ETXUS2}, PII }, {{ USB_VENDOR_KINGSTON, USB_PRODUCT_KINGSTON_KNU101TX}, 0 }, {{ USB_VENDOR_LINKSYS, USB_PRODUCT_LINKSYS_USB10TX1}, LSYS | PII }, {{ USB_VENDOR_LINKSYS, USB_PRODUCT_LINKSYS_USB10T}, LSYS }, {{ USB_VENDOR_LINKSYS, USB_PRODUCT_LINKSYS_USB100TX}, LSYS }, {{ USB_VENDOR_LINKSYS, USB_PRODUCT_LINKSYS_USB100H1}, LSYS | PNA }, {{ USB_VENDOR_LINKSYS, USB_PRODUCT_LINKSYS_USB10TA}, LSYS }, {{ USB_VENDOR_LINKSYS, USB_PRODUCT_LINKSYS_USB10TX2}, LSYS | PII }, {{ USB_VENDOR_MELCO, USB_PRODUCT_MELCO_LUATX1}, 0 }, {{ USB_VENDOR_MELCO, USB_PRODUCT_MELCO_LUATX5}, 0 }, {{ USB_VENDOR_MELCO, USB_PRODUCT_MELCO_LUA2TX5}, PII }, {{ USB_VENDOR_MICROSOFT, USB_PRODUCT_MICROSOFT_MN110}, PII }, {{ USB_VENDOR_NETGEAR, USB_PRODUCT_NETGEAR_FA101}, PII }, {{ USB_VENDOR_SIEMENS, USB_PRODUCT_SIEMENS_SPEEDSTREAM}, PII }, {{ USB_VENDOR_SMARTBRIDGES, USB_PRODUCT_SMARTBRIDGES_SMARTNIC},PII }, {{ USB_VENDOR_SMC, USB_PRODUCT_SMC_2202USB}, 0 }, {{ USB_VENDOR_SMC, USB_PRODUCT_SMC_2206USB}, PII }, {{ USB_VENDOR_SOHOWARE, USB_PRODUCT_SOHOWARE_NUB100}, 0 }, }; #define aue_lookup(v, p) ((const struct aue_type *)usb_lookup(aue_devs, v, p)) static int aue_match(device_t, cfdata_t, void *); static void aue_attach(device_t, device_t, void *); CFATTACH_DECL_NEW(aue, sizeof(struct aue_softc), aue_match, aue_attach, usbnet_detach, usbnet_activate); static void aue_reset_pegasus_II(struct aue_softc *); static void aue_uno_stop(struct ifnet *, int); static void aue_uno_mcast(struct ifnet *); static int aue_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *); static int aue_uno_mii_write_reg(struct usbnet *, int, int, uint16_t); static void aue_uno_mii_statchg(struct ifnet *); static unsigned aue_uno_tx_prepare(struct usbnet *, struct mbuf *, struct usbnet_chain *); static void aue_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t); static int aue_uno_init(struct ifnet *); static void aue_uno_intr(struct usbnet *, usbd_status); static const struct usbnet_ops aue_ops = { .uno_stop = aue_uno_stop, .uno_mcast = aue_uno_mcast, .uno_read_reg = aue_uno_mii_read_reg, .uno_write_reg = aue_uno_mii_write_reg, .uno_statchg = aue_uno_mii_statchg, .uno_tx_prepare = aue_uno_tx_prepare, .uno_rx_loop = aue_uno_rx_loop, .uno_init = aue_uno_init, .uno_intr = aue_uno_intr, }; static uint32_t aue_crc(void *); static void aue_reset(struct aue_softc *); static int aue_csr_read_1(struct aue_softc *, int); static int aue_csr_write_1(struct aue_softc *, int, int); static int aue_csr_read_2(struct aue_softc *, int); static int aue_csr_write_2(struct aue_softc *, int, int); #define AUE_SETBIT(sc, reg, x) \ aue_csr_write_1(sc, reg, aue_csr_read_1(sc, reg) | (x)) #define AUE_CLRBIT(sc, reg, x) \ aue_csr_write_1(sc, reg, aue_csr_read_1(sc, reg) & ~(x)) static int aue_csr_read_1(struct aue_softc *sc, int reg) { struct usbnet * const un = &sc->aue_un; usb_device_request_t req; usbd_status err; uByte val = 0; if (usbnet_isdying(un)) return 0; req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = AUE_UR_READREG; USETW(req.wValue, 0); USETW(req.wIndex, reg); USETW(req.wLength, 1); err = usbd_do_request(un->un_udev, &req, &val); if (err) { AUEHIST_FUNC(); AUEHIST_CALLARGS("aue%jd: reg=%#jx err=%jd", device_unit(un->un_dev), reg, err, 0); return 0; } return val; } static int aue_csr_read_2(struct aue_softc *sc, int reg) { struct usbnet * const un = &sc->aue_un; usb_device_request_t req; usbd_status err; uWord val; if (usbnet_isdying(un)) return 0; req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = AUE_UR_READREG; USETW(req.wValue, 0); USETW(req.wIndex, reg); USETW(req.wLength, 2); err = usbd_do_request(un->un_udev, &req, &val); if (err) { AUEHIST_FUNC(); AUEHIST_CALLARGS("aue%jd: reg=%#jx err=%jd", device_unit(un->un_dev), reg, err, 0); return 0; } return UGETW(val); } static int aue_csr_write_1(struct aue_softc *sc, int reg, int aval) { struct usbnet * const un = &sc->aue_un; usb_device_request_t req; usbd_status err; uByte val; if (usbnet_isdying(un)) return 0; val = aval; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = AUE_UR_WRITEREG; USETW(req.wValue, val); USETW(req.wIndex, reg); USETW(req.wLength, 1); err = usbd_do_request(un->un_udev, &req, &val); if (err) { AUEHIST_FUNC(); AUEHIST_CALLARGS("aue%jd: reg=%#jx err=%jd", device_unit(un->un_dev), reg, err, 0); return -1; } return 0; } static int aue_csr_write_2(struct aue_softc *sc, int reg, int aval) { struct usbnet * const un = &sc->aue_un; usb_device_request_t req; usbd_status err; uWord val; if (usbnet_isdying(un)) return 0; USETW(val, aval); req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = AUE_UR_WRITEREG; USETW(req.wValue, aval); USETW(req.wIndex, reg); USETW(req.wLength, 2); err = usbd_do_request(un->un_udev, &req, &val); if (err) { AUEHIST_FUNC(); AUEHIST_CALLARGS("aue%jd: reg=%#jx err=%jd", device_unit(un->un_dev), reg, err, 0); return -1; } return 0; } /* * Read a word of data stored in the EEPROM at address 'addr.' */ static int aue_eeprom_getword(struct aue_softc *sc, int addr) { struct usbnet * const un = &sc->aue_un; int i; AUEHIST_FUNC(); AUEHIST_CALLED(); aue_csr_write_1(sc, AUE_EE_REG, addr); aue_csr_write_1(sc, AUE_EE_CTL, AUE_EECTL_READ); for (i = 0; i < AUE_TIMEOUT; i++) { if (aue_csr_read_1(sc, AUE_EE_CTL) & AUE_EECTL_DONE) break; } if (i == AUE_TIMEOUT) { printf("%s: EEPROM read timed out\n", device_xname(un->un_dev)); } return aue_csr_read_2(sc, AUE_EE_DATA); } /* * Read the MAC from the EEPROM. It's at offset 0. */ static void aue_read_mac(struct usbnet *un) { struct aue_softc *sc = usbnet_softc(un); int i; int off = 0; int word; AUEHIST_FUNC(); AUEHIST_CALLARGS("aue%jd: enter", device_unit(un->un_dev), 0, 0, 0); for (i = 0; i < 3; i++) { word = aue_eeprom_getword(sc, off + i); un->un_eaddr[2 * i] = (u_char)word; un->un_eaddr[2 * i + 1] = (u_char)(word >> 8); } } static int aue_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val) { struct aue_softc *sc = usbnet_softc(un); int i; AUEHIST_FUNC(); #if 0 /* * The Am79C901 HomePNA PHY actually contains * two transceivers: a 1Mbps HomePNA PHY and a * 10Mbps full/half duplex ethernet PHY with * NWAY autoneg. However in the ADMtek adapter, * only the 1Mbps PHY is actually connected to * anything, so we ignore the 10Mbps one. It * happens to be configured for MII address 3, * so we filter that out. */ if (sc->aue_vendor == USB_VENDOR_ADMTEK && sc->aue_product == USB_PRODUCT_ADMTEK_PEGASUS) { if (phy == 3) { *val = 0; return EINVAL; } } #endif aue_csr_write_1(sc, AUE_PHY_ADDR, phy); aue_csr_write_1(sc, AUE_PHY_CTL, reg | AUE_PHYCTL_READ); for (i = 0; i < AUE_TIMEOUT; i++) { if (usbnet_isdying(un)) { *val = 0; return ENXIO; } if (aue_csr_read_1(sc, AUE_PHY_CTL) & AUE_PHYCTL_DONE) break; } if (i == AUE_TIMEOUT) { AUEHIST_CALLARGS("aue%jd: phy=%#jx reg=%#jx read timed out", device_unit(un->un_dev), phy, reg, 0); *val = 0; return ETIMEDOUT; } *val = aue_csr_read_2(sc, AUE_PHY_DATA); AUEHIST_CALLARGSN(11, "aue%jd: phy=%#jx reg=%#jx => 0x%04jx", device_unit(un->un_dev), phy, reg, *val); return 0; } static int aue_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val) { struct aue_softc *sc = usbnet_softc(un); int i; AUEHIST_FUNC(); AUEHIST_CALLARGSN(11, "aue%jd: phy=%jd reg=%jd data=0x%04jx", device_unit(un->un_dev), phy, reg, val); #if 0 if (sc->aue_vendor == USB_VENDOR_ADMTEK && sc->aue_product == USB_PRODUCT_ADMTEK_PEGASUS) { if (phy == 3) return EINVAL; } #endif aue_csr_write_2(sc, AUE_PHY_DATA, val); aue_csr_write_1(sc, AUE_PHY_ADDR, phy); aue_csr_write_1(sc, AUE_PHY_CTL, reg | AUE_PHYCTL_WRITE); for (i = 0; i < AUE_TIMEOUT; i++) { if (usbnet_isdying(un)) return ENXIO; if (aue_csr_read_1(sc, AUE_PHY_CTL) & AUE_PHYCTL_DONE) break; } if (i == AUE_TIMEOUT) { DPRINTF("aue%jd: phy=%#jx reg=%#jx val=%#jx write timed out", device_unit(un->un_dev), phy, reg, val); return ETIMEDOUT; } return 0; } static void aue_uno_mii_statchg(struct ifnet *ifp) { struct usbnet *un = ifp->if_softc; struct aue_softc *sc = usbnet_softc(un); struct mii_data *mii = usbnet_mii(un); const bool hadlink __diagused = usbnet_havelink(un); AUEHIST_FUNC(); AUEHIST_CALLED(); AUEHIST_CALLARGSN(5, "aue%jd: ifp=%#jx link=%jd", device_unit(un->un_dev), (uintptr_t)ifp, hadlink, 0); AUE_CLRBIT(sc, AUE_CTL0, AUE_CTL0_RX_ENB | AUE_CTL0_TX_ENB); if (IFM_SUBTYPE(mii->mii_media_active) == IFM_100_TX) { AUE_SETBIT(sc, AUE_CTL1, AUE_CTL1_SPEEDSEL); } else { AUE_CLRBIT(sc, AUE_CTL1, AUE_CTL1_SPEEDSEL); } if ((mii->mii_media_active & IFM_FDX) != 0) AUE_SETBIT(sc, AUE_CTL1, AUE_CTL1_DUPLEX); else AUE_CLRBIT(sc, AUE_CTL1, AUE_CTL1_DUPLEX); AUE_SETBIT(sc, AUE_CTL0, AUE_CTL0_RX_ENB | AUE_CTL0_TX_ENB); if (mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { usbnet_set_link(un, true); } /* * Set the LED modes on the LinkSys adapter. * This turns on the 'dual link LED' bin in the auxmode * register of the Broadcom PHY. */ if (!usbnet_isdying(un) && (un->un_flags & LSYS)) { uint16_t auxmode; aue_uno_mii_read_reg(un, 0, 0x1b, &auxmode); aue_uno_mii_write_reg(un, 0, 0x1b, auxmode | 0x04); } if (usbnet_havelink(un) != hadlink) { DPRINTFN(5, "aue%jd: exit link %jd", device_unit(un->un_dev), usbnet_havelink(un), 0, 0); } } #define AUE_POLY 0xEDB88320 #define AUE_BITS 6 static uint32_t aue_crc(void *addrv) { uint32_t idx, bit, data, crc; char *addr = addrv; /* Compute CRC for the address value. */ crc = 0xFFFFFFFF; /* initial value */ for (idx = 0; idx < 6; idx++) { for (data = *addr++, bit = 0; bit < 8; bit++, data >>= 1) crc = (crc >> 1) ^ (((crc ^ data) & 1) ? AUE_POLY : 0); } return crc & ((1 << AUE_BITS) - 1); } static void aue_uno_mcast(struct ifnet *ifp) { struct usbnet * const un = ifp->if_softc; struct aue_softc * const sc = usbnet_softc(un); struct ethercom * ec = usbnet_ec(un); struct ether_multi *enm; struct ether_multistep step; uint32_t h = 0, i; uint8_t hashtbl[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; AUEHIST_FUNC(); AUEHIST_CALLARGSN(5, "aue%jd: enter", device_unit(un->un_dev), 0, 0, 0); if (usbnet_ispromisc(un)) { ETHER_LOCK(ec); allmulti: ec->ec_flags |= ETHER_F_ALLMULTI; ETHER_UNLOCK(ec); AUE_SETBIT(sc, AUE_CTL0, AUE_CTL0_ALLMULTI); return; } /* now program new ones */ ETHER_LOCK(ec); ETHER_FIRST_MULTI(step, ec, enm); while (enm != NULL) { if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN) != 0) { goto allmulti; } h = aue_crc(enm->enm_addrlo); hashtbl[h >> 3] |= 1 << (h & 0x7); ETHER_NEXT_MULTI(step, enm); } ec->ec_flags &= ~ETHER_F_ALLMULTI; ETHER_UNLOCK(ec); AUE_CLRBIT(sc, AUE_CTL0, AUE_CTL0_ALLMULTI); /* write the hashtable */ for (i = 0; i < 8; i++) aue_csr_write_1(sc, AUE_MAR0 + i, hashtbl[i]); } static void aue_reset_pegasus_II(struct aue_softc *sc) { /* Magic constants taken from Linux driver. */ aue_csr_write_1(sc, AUE_REG_1D, 0); aue_csr_write_1(sc, AUE_REG_7B, 2); #if 0 if ((un->un_flags & PNA) && mii_mode) aue_csr_write_1(sc, AUE_REG_81, 6); else #endif aue_csr_write_1(sc, AUE_REG_81, 2); } static void aue_reset(struct aue_softc *sc) { struct usbnet * const un = &sc->aue_un; int i; AUEHIST_FUNC(); AUEHIST_CALLARGSN(2, "aue%jd: enter", device_unit(un->un_dev), 0, 0, 0); AUE_SETBIT(sc, AUE_CTL1, AUE_CTL1_RESETMAC); for (i = 0; i < AUE_TIMEOUT; i++) { if (usbnet_isdying(un)) return; if (!(aue_csr_read_1(sc, AUE_CTL1) & AUE_CTL1_RESETMAC)) break; } if (i == AUE_TIMEOUT) printf("%s: reset failed\n", device_xname(un->un_dev)); #if 0 /* XXX what is mii_mode supposed to be */ if (sc->sc_mii_mode && (un->un_flags & PNA)) aue_csr_write_1(sc, AUE_GPIO1, 0x34); else aue_csr_write_1(sc, AUE_GPIO1, 0x26); #endif /* * The PHY(s) attached to the Pegasus chip may be held * in reset until we flip on the GPIO outputs. Make sure * to set the GPIO pins high so that the PHY(s) will * be enabled. * * Note: We force all of the GPIO pins low first, *then* * enable the ones we want. */ if (un->un_flags & LSYS) { /* Grrr. LinkSys has to be different from everyone else. */ aue_csr_write_1(sc, AUE_GPIO0, AUE_GPIO_SEL0 | AUE_GPIO_SEL1); } else { aue_csr_write_1(sc, AUE_GPIO0, AUE_GPIO_OUT0 | AUE_GPIO_SEL0); } aue_csr_write_1(sc, AUE_GPIO0, AUE_GPIO_OUT0 | AUE_GPIO_SEL0 | AUE_GPIO_SEL1); if (un->un_flags & PII) aue_reset_pegasus_II(sc); /* Wait a little while for the chip to get its brains in order. */ delay(10000); /* XXX */ //usbd_delay_ms(un->un_udev, 10); /* XXX */ DPRINTFN(2, "aue%jd: exit", device_unit(un->un_dev), 0, 0, 0); } /* * Probe for a Pegasus chip. */ static int aue_match(device_t parent, cfdata_t match, void *aux) { struct usb_attach_arg *uaa = aux; /* * Some manufacturers use the same vendor and product id for * different devices. We need to sanity check the DeviceClass * in this case * Currently known guilty products: * 0x050d/0x0121 Belkin Bluetooth and USB2LAN * * If this turns out to be more common, we could use a quirk * table. */ if (uaa->uaa_vendor == USB_VENDOR_BELKIN && uaa->uaa_product == USB_PRODUCT_BELKIN_USB2LAN) { usb_device_descriptor_t *dd; dd = usbd_get_device_descriptor(uaa->uaa_device); if (dd != NULL && dd->bDeviceClass != UDCLASS_IN_INTERFACE) return UMATCH_NONE; } return aue_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ? UMATCH_VENDOR_PRODUCT : UMATCH_NONE; } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static void aue_attach(device_t parent, device_t self, void *aux) { USBNET_MII_DECL_DEFAULT(unm); struct aue_softc * const sc = device_private(self); struct usbnet * const un = &sc->aue_un; struct usb_attach_arg *uaa = aux; char *devinfop; struct usbd_device *dev = uaa->uaa_device; usbd_status err; usb_interface_descriptor_t *id; usb_endpoint_descriptor_t *ed; int i; AUEHIST_FUNC(); AUEHIST_CALLARGSN(2, "aue%jd: enter sc=%#jx", device_unit(self), (uintptr_t)sc, 0, 0); KASSERT((void *)sc == un); aprint_naive("\n"); aprint_normal("\n"); devinfop = usbd_devinfo_alloc(uaa->uaa_device, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); un->un_dev = self; un->un_udev = dev; un->un_sc = sc; un->un_ops = &aue_ops; un->un_intr = &sc->aue_intr; un->un_rx_xfer_flags = USBD_SHORT_XFER_OK; un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER; un->un_rx_list_cnt = AUE_RX_LIST_CNT; un->un_tx_list_cnt = AUE_RX_LIST_CNT; un->un_rx_bufsz = AUE_BUFSZ; un->un_tx_bufsz = AUE_BUFSZ; sc->aue_intr.uni_buf = &sc->aue_ibuf; sc->aue_intr.uni_bufsz = sizeof(sc->aue_ibuf); sc->aue_intr.uni_interval = AUE_INTR_INTERVAL; err = usbd_set_config_no(dev, AUE_CONFIG_NO, 1); if (err) { aprint_error_dev(self, "failed to set configuration" ", err=%s\n", usbd_errstr(err)); return; } err = usbd_device2interface_handle(dev, AUE_IFACE_IDX, &un->un_iface); if (err) { aprint_error_dev(self, "getting interface handle failed\n"); return; } un->un_flags = aue_lookup(uaa->uaa_vendor, uaa->uaa_product)->aue_flags; id = usbd_get_interface_descriptor(un->un_iface); /* Find endpoints. */ for (i = 0; i < id->bNumEndpoints; i++) { ed = usbd_interface2endpoint_descriptor(un->un_iface, i); if (ed == NULL) { aprint_error_dev(self, "couldn't get endpoint descriptor %d\n", i); return; } if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) { un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress; } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) { un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress; } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) { un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress; } } if (un->un_ed[USBNET_ENDPT_RX] == 0 || un->un_ed[USBNET_ENDPT_TX] == 0 || un->un_ed[USBNET_ENDPT_INTR] == 0) { aprint_error_dev(self, "missing endpoint\n"); return; } /* First level attach. */ usbnet_attach(un); /* Reset the adapter and get station address from the EEPROM. */ aue_reset(sc); aue_read_mac(un); usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST, 0, &unm); } static void aue_uno_intr(struct usbnet *un, usbd_status status) { struct ifnet *ifp = usbnet_ifp(un); struct aue_softc *sc = usbnet_softc(un); struct aue_intrpkt *p = &sc->aue_ibuf; AUEHIST_FUNC(); AUEHIST_CALLARGSN(20, "aue%jd: enter txstat0 %#jx\n", device_unit(un->un_dev), p->aue_txstat0, 0, 0); if (p->aue_txstat0) if_statinc(ifp, if_oerrors); if (p->aue_txstat0 & (AUE_TXSTAT0_LATECOLL | AUE_TXSTAT0_EXCESSCOLL)) if_statinc(ifp, if_collisions); } static void aue_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len) { struct ifnet *ifp = usbnet_ifp(un); uint8_t *buf = c->unc_buf; struct aue_rxpkt r; uint32_t pktlen; AUEHIST_FUNC(); AUEHIST_CALLARGSN(10, "aue%jd: enter len %ju", device_unit(un->un_dev), total_len, 0, 0); if (total_len <= 4 + ETHER_CRC_LEN) { if_statinc(ifp, if_ierrors); return; } memcpy(&r, buf + total_len - 4, sizeof(r)); /* Turn off all the non-error bits in the rx status word. */ r.aue_rxstat &= AUE_RXSTAT_MASK; if (r.aue_rxstat) { if_statinc(ifp, if_ierrors); return; } /* No errors; receive the packet. */ pktlen = total_len - ETHER_CRC_LEN - 4; usbnet_enqueue(un, buf, pktlen, 0, 0, 0); } static unsigned aue_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c) { uint8_t *buf = c->unc_buf; int total_len; AUEHIST_FUNC(); AUEHIST_CALLARGSN(10, "aue%jd: enter pktlen=%jd", device_unit(un->un_dev), m->m_pkthdr.len, 0, 0); if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - 2) return 0; /* * Copy the mbuf data into a contiguous buffer, leaving two * bytes at the beginning to hold the frame length. */ m_copydata(m, 0, m->m_pkthdr.len, buf + 2); /* * The ADMtek documentation says that the packet length is * supposed to be specified in the first two bytes of the * transfer, however it actually seems to ignore this info * and base the frame size on the bulk transfer length. */ buf[0] = (uint8_t)m->m_pkthdr.len; buf[1] = (uint8_t)(m->m_pkthdr.len >> 8); total_len = m->m_pkthdr.len + 2; DPRINTFN(5, "aue%jd: send %jd bytes", device_unit(un->un_dev), total_len, 0, 0); return total_len; } static int aue_uno_init(struct ifnet *ifp) { struct usbnet * const un = ifp->if_softc; struct aue_softc *sc = usbnet_softc(un); int i; const u_char *eaddr; AUEHIST_FUNC(); AUEHIST_CALLARGSN(5, "aue%jd: enter link=%jd", device_unit(un->un_dev), usbnet_havelink(un), 0, 0); /* Reset the interface. */ aue_reset(sc); eaddr = CLLADDR(ifp->if_sadl); for (i = 0; i < ETHER_ADDR_LEN; i++) aue_csr_write_1(sc, AUE_PAR0 + i, eaddr[i]); /* If we want promiscuous mode, set the allframes bit. */ if (usbnet_ispromisc(un)) AUE_SETBIT(sc, AUE_CTL2, AUE_CTL2_RX_PROMISC); else AUE_CLRBIT(sc, AUE_CTL2, AUE_CTL2_RX_PROMISC); /* Enable RX and TX */ aue_csr_write_1(sc, AUE_CTL0, AUE_CTL0_RXSTAT_APPEND | AUE_CTL0_RX_ENB); AUE_SETBIT(sc, AUE_CTL0, AUE_CTL0_TX_ENB); AUE_SETBIT(sc, AUE_CTL2, AUE_CTL2_EP3_CLR); return 0; } static void aue_uno_stop(struct ifnet *ifp, int disable) { struct usbnet * const un = ifp->if_softc; struct aue_softc * const sc = usbnet_softc(un); AUEHIST_FUNC(); AUEHIST_CALLARGSN(5, "aue%jd: enter", device_unit(un->un_dev), 0, 0, 0); aue_csr_write_1(sc, AUE_CTL0, 0); aue_csr_write_1(sc, AUE_CTL1, 0); aue_reset(sc); } #ifdef _MODULE #include "ioconf.c" #endif USBNET_MODULE(aue) |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 | /* $NetBSD: sysmon_envsys.c,v 1.151 2022/05/20 21:31:24 andvar Exp $ */ /*- * Copyright (c) 2007, 2008 Juan Romero Pardines. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2000 Zembu Labs, Inc. * All rights reserved. * * Author: Jason R. Thorpe <thorpej@zembu.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Zembu Labs, Inc. * 4. Neither the name of Zembu Labs nor the names of its employees may * be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY ZEMBU LABS, INC. ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WAR- * RANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DIS- * CLAIMED. IN NO EVENT SHALL ZEMBU LABS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Environmental sensor framework for sysmon, exported to userland * with proplib(3). */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: sysmon_envsys.c,v 1.151 2022/05/20 21:31:24 andvar Exp $"); #include <sys/param.h> #include <sys/types.h> #include <sys/conf.h> #include <sys/errno.h> #include <sys/fcntl.h> #include <sys/kernel.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/mutex.h> #include <sys/kmem.h> #include <sys/rndsource.h> #include <sys/module.h> #include <sys/once.h> #include <dev/sysmon/sysmonvar.h> #include <dev/sysmon/sysmon_envsysvar.h> #include <dev/sysmon/sysmon_taskq.h> kmutex_t sme_global_mtx; prop_dictionary_t sme_propd; struct sysmon_envsys_lh sysmon_envsys_list; static uint32_t sysmon_envsys_next_sensor_index; static struct sysmon_envsys *sysmon_envsys_find_40(u_int); static void sysmon_envsys_destroy_plist(prop_array_t); static void sme_remove_userprops(void); static int sme_add_property_dictionary(struct sysmon_envsys *, prop_array_t, prop_dictionary_t); static sme_event_drv_t * sme_add_sensor_dictionary(struct sysmon_envsys *, prop_array_t, prop_dictionary_t, envsys_data_t *); static void sme_initial_refresh(void *); static uint32_t sme_get_max_value(struct sysmon_envsys *, bool (*)(const envsys_data_t*), bool); MODULE(MODULE_CLASS_DRIVER, sysmon_envsys, "sysmon,sysmon_taskq,sysmon_power"); static struct sysmon_opvec sysmon_envsys_opvec = { sysmonopen_envsys, sysmonclose_envsys, sysmonioctl_envsys, NULL, NULL, NULL }; ONCE_DECL(once_envsys); static int sme_preinit(void) { LIST_INIT(&sysmon_envsys_list); mutex_init(&sme_global_mtx, MUTEX_DEFAULT, IPL_NONE); sme_propd = prop_dictionary_create(); return 0; } /* * sysmon_envsys_init: * * + Initialize global mutex, dictionary and the linked list. */ int sysmon_envsys_init(void) { int error; (void)RUN_ONCE(&once_envsys, sme_preinit); error = sysmon_attach_minor(SYSMON_MINOR_ENVSYS, &sysmon_envsys_opvec); return error; } int sysmon_envsys_fini(void) { int error; if ( ! LIST_EMPTY(&sysmon_envsys_list)) error = EBUSY; else error = sysmon_attach_minor(SYSMON_MINOR_ENVSYS, NULL); if (error == 0) mutex_destroy(&sme_global_mtx); // XXX: prop_dictionary ??? return error; } /* * sysmonopen_envsys: * * + Open the system monitor device. */ int sysmonopen_envsys(dev_t dev, int flag, int mode, struct lwp *l) { return 0; } /* * sysmonclose_envsys: * * + Close the system monitor device. */ int sysmonclose_envsys(dev_t dev, int flag, int mode, struct lwp *l) { return 0; } /* * sysmonioctl_envsys: * * + Perform a sysmon envsys control request. */ int sysmonioctl_envsys(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { struct sysmon_envsys *sme = NULL; int error = 0; u_int oidx; switch (cmd) { /* * To update the global dictionary with latest data from devices. */ case ENVSYS_GETDICTIONARY: { struct plistref *plist = (struct plistref *)data; /* * Update dictionaries on all sysmon envsys devices * registered. */ mutex_enter(&sme_global_mtx); LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) { sysmon_envsys_acquire(sme, false); error = sme_update_dictionary(sme); if (error) { DPRINTF(("%s: sme_update_dictionary, " "error=%d\n", __func__, error)); sysmon_envsys_release(sme, false); mutex_exit(&sme_global_mtx); return error; } sysmon_envsys_release(sme, false); } mutex_exit(&sme_global_mtx); /* * Copy global dictionary to userland. */ error = prop_dictionary_copyout_ioctl(plist, cmd, sme_propd); break; } /* * To set properties on multiple devices. */ case ENVSYS_SETDICTIONARY: { const struct plistref *plist = (const struct plistref *)data; prop_dictionary_t udict; prop_object_iterator_t iter, iter2; prop_object_t obj, obj2; prop_array_t array_u, array_k; const char *devname = NULL; if ((flag & FWRITE) == 0) return EPERM; /* * Get dictionary from userland. */ error = prop_dictionary_copyin_ioctl(plist, cmd, &udict); if (error) { DPRINTF(("%s: copyin_ioctl error=%d\n", __func__, error)); break; } iter = prop_dictionary_iterator(udict); if (!iter) { prop_object_release(udict); return ENOMEM; } /* * Iterate over the userland dictionary and process * the list of devices. */ while ((obj = prop_object_iterator_next(iter))) { array_u = prop_dictionary_get_keysym(udict, obj); if (prop_object_type(array_u) != PROP_TYPE_ARRAY) { prop_object_iterator_release(iter); prop_object_release(udict); return EINVAL; } devname = prop_dictionary_keysym_value(obj); DPRINTF(("%s: processing the '%s' array requests\n", __func__, devname)); /* * find the correct sme device. */ sme = sysmon_envsys_find(devname); if (!sme) { DPRINTF(("%s: NULL sme\n", __func__)); prop_object_iterator_release(iter); prop_object_release(udict); return EINVAL; } /* * Find the correct array object with the string * supplied by the userland dictionary. */ array_k = prop_dictionary_get(sme_propd, devname); if (prop_object_type(array_k) != PROP_TYPE_ARRAY) { DPRINTF(("%s: array device failed\n", __func__)); sysmon_envsys_release(sme, false); prop_object_iterator_release(iter); prop_object_release(udict); return EINVAL; } iter2 = prop_array_iterator(array_u); if (!iter2) { sysmon_envsys_release(sme, false); prop_object_iterator_release(iter); prop_object_release(udict); return ENOMEM; } /* * Iterate over the array of dictionaries to * process the list of sensors and properties. */ while ((obj2 = prop_object_iterator_next(iter2))) { /* * do the real work now. */ error = sme_userset_dictionary(sme, obj2, array_k); if (error) { sysmon_envsys_release(sme, false); prop_object_iterator_release(iter2); prop_object_iterator_release(iter); prop_object_release(udict); return error; } } sysmon_envsys_release(sme, false); prop_object_iterator_release(iter2); } prop_object_iterator_release(iter); prop_object_release(udict); break; } /* * To remove all properties from all devices registered. */ case ENVSYS_REMOVEPROPS: { const struct plistref *plist = (const struct plistref *)data; prop_dictionary_t udict; prop_object_t obj; if ((flag & FWRITE) == 0) return EPERM; error = prop_dictionary_copyin_ioctl(plist, cmd, &udict); if (error) { DPRINTF(("%s: copyin_ioctl error=%d\n", __func__, error)); break; } obj = prop_dictionary_get(udict, "envsys-remove-props"); if (!obj || !prop_bool_true(obj)) { DPRINTF(("%s: invalid 'envsys-remove-props'\n", __func__)); return EINVAL; } prop_object_release(udict); sme_remove_userprops(); break; } /* * Compatibility ioctls with the old interface, only implemented * ENVSYS_GTREDATA and ENVSYS_GTREINFO; enough to make old * applications work. */ case ENVSYS_GTREDATA: { struct envsys_tre_data *tred = (void *)data; envsys_data_t *edata = NULL; bool found = false; tred->validflags = 0; sme = sysmon_envsys_find_40(tred->sensor); if (!sme) break; oidx = tred->sensor; tred->sensor = SME_SENSOR_IDX(sme, tred->sensor); DPRINTFOBJ(("%s: sensor=%d oidx=%d dev=%s nsensors=%d\n", __func__, tred->sensor, oidx, sme->sme_name, sme->sme_nsensors)); TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) { if (edata->sensor == tred->sensor) { found = true; break; } } if (!found) { sysmon_envsys_release(sme, false); error = ENODEV; break; } if (tred->sensor < sme->sme_nsensors) { if ((sme->sme_flags & SME_POLL_ONLY) == 0) { mutex_enter(&sme->sme_mtx); sysmon_envsys_refresh_sensor(sme, edata); mutex_exit(&sme->sme_mtx); } /* * copy required values to the old interface. */ tred->sensor = edata->sensor; tred->cur.data_us = edata->value_cur; tred->cur.data_s = edata->value_cur; tred->max.data_us = edata->value_max; tred->max.data_s = edata->value_max; tred->min.data_us = edata->value_min; tred->min.data_s = edata->value_min; tred->avg.data_us = 0; tred->avg.data_s = 0; if (edata->units == ENVSYS_BATTERY_CHARGE) tred->units = ENVSYS_INDICATOR; else tred->units = edata->units; tred->validflags |= ENVSYS_FVALID; tred->validflags |= ENVSYS_FCURVALID; if (edata->flags & ENVSYS_FPERCENT) { tred->validflags |= ENVSYS_FMAXVALID; tred->validflags |= ENVSYS_FFRACVALID; } if (edata->state == ENVSYS_SINVALID) { tred->validflags &= ~ENVSYS_FCURVALID; tred->cur.data_us = tred->cur.data_s = 0; } DPRINTFOBJ(("%s: sensor=%s tred->cur.data_s=%d\n", __func__, edata->desc, tred->cur.data_s)); DPRINTFOBJ(("%s: tred->validflags=%d tred->units=%d" " tred->sensor=%d\n", __func__, tred->validflags, tred->units, tred->sensor)); } tred->sensor = oidx; sysmon_envsys_release(sme, false); break; } case ENVSYS_GTREINFO: { struct envsys_basic_info *binfo = (void *)data; envsys_data_t *edata = NULL; bool found = false; binfo->validflags = 0; sme = sysmon_envsys_find_40(binfo->sensor); if (!sme) break; oidx = binfo->sensor; binfo->sensor = SME_SENSOR_IDX(sme, binfo->sensor); TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) { if (edata->sensor == binfo->sensor) { found = true; break; } } if (!found) { sysmon_envsys_release(sme, false); error = ENODEV; break; } binfo->validflags |= ENVSYS_FVALID; if (binfo->sensor < sme->sme_nsensors) { if (edata->units == ENVSYS_BATTERY_CHARGE) binfo->units = ENVSYS_INDICATOR; else binfo->units = edata->units; /* * previously, the ACPI sensor names included the * device name. Include that in compatibility code. */ if (strncmp(sme->sme_name, "acpi", 4) == 0) (void)snprintf(binfo->desc, sizeof(binfo->desc), "%s %s", sme->sme_name, edata->desc); else (void)strlcpy(binfo->desc, edata->desc, sizeof(binfo->desc)); } DPRINTFOBJ(("%s: binfo->units=%d binfo->validflags=%d\n", __func__, binfo->units, binfo->validflags)); DPRINTFOBJ(("%s: binfo->desc=%s binfo->sensor=%d\n", __func__, binfo->desc, binfo->sensor)); binfo->sensor = oidx; sysmon_envsys_release(sme, false); break; } default: error = ENOTTY; break; } return error; } /* * sysmon_envsys_create: * * + Allocates a new sysmon_envsys object and initializes the * stuff for sensors and events. */ struct sysmon_envsys * sysmon_envsys_create(void) { struct sysmon_envsys *sme; CTASSERT(SME_CALLOUT_INVALID == 0); sme = kmem_zalloc(sizeof(*sme), KM_SLEEP); TAILQ_INIT(&sme->sme_sensors_list); LIST_INIT(&sme->sme_events_list); mutex_init(&sme->sme_mtx, MUTEX_DEFAULT, IPL_NONE); mutex_init(&sme->sme_work_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK); cv_init(&sme->sme_condvar, "sme_wait"); return sme; } /* * sysmon_envsys_destroy: * * + Removes all sensors from the tail queue, destroys the callout * and frees the sysmon_envsys object. */ void sysmon_envsys_destroy(struct sysmon_envsys *sme) { envsys_data_t *edata; KASSERT(sme != NULL); while (!TAILQ_EMPTY(&sme->sme_sensors_list)) { edata = TAILQ_FIRST(&sme->sme_sensors_list); TAILQ_REMOVE(&sme->sme_sensors_list, edata, sensors_head); } mutex_destroy(&sme->sme_mtx); mutex_destroy(&sme->sme_work_mtx); cv_destroy(&sme->sme_condvar); kmem_free(sme, sizeof(*sme)); } /* * sysmon_envsys_sensor_attach: * * + Attaches a sensor into a sysmon_envsys device checking that units * is set to a valid type and description is unique and not empty. */ int sysmon_envsys_sensor_attach(struct sysmon_envsys *sme, envsys_data_t *edata) { const struct sme_descr_entry *sdt_units; envsys_data_t *oedata; KASSERT(sme != NULL || edata != NULL); /* * Find the correct units for this sensor. */ sdt_units = sme_find_table_entry(SME_DESC_UNITS, edata->units); if (sdt_units == NULL || sdt_units->type == -1) return EINVAL; /* * Check that description is not empty or duplicate. */ if (strlen(edata->desc) == 0) return EINVAL; mutex_enter(&sme->sme_mtx); sysmon_envsys_acquire(sme, true); TAILQ_FOREACH(oedata, &sme->sme_sensors_list, sensors_head) { if (strcmp(oedata->desc, edata->desc) == 0) { sysmon_envsys_release(sme, true); mutex_exit(&sme->sme_mtx); return EEXIST; } } /* * Ok, the sensor has been added into the device queue. */ TAILQ_INSERT_TAIL(&sme->sme_sensors_list, edata, sensors_head); /* * Give the sensor an index position. */ edata->sensor = sme->sme_nsensors; sme->sme_nsensors++; sysmon_envsys_release(sme, true); mutex_exit(&sme->sme_mtx); DPRINTF(("%s: attached #%d (%s), units=%d (%s)\n", __func__, edata->sensor, edata->desc, sdt_units->type, sdt_units->desc)); return 0; } /* * sysmon_envsys_sensor_detach: * * + Detachs a sensor from a sysmon_envsys device and decrements the * sensors count on success. */ int sysmon_envsys_sensor_detach(struct sysmon_envsys *sme, envsys_data_t *edata) { envsys_data_t *oedata; bool found = false; bool destroy = false; KASSERT(sme != NULL || edata != NULL); /* * Check the sensor is already on the list. */ mutex_enter(&sme->sme_mtx); sysmon_envsys_acquire(sme, true); TAILQ_FOREACH(oedata, &sme->sme_sensors_list, sensors_head) { if (oedata->sensor == edata->sensor) { found = true; break; } } if (!found) { sysmon_envsys_release(sme, true); mutex_exit(&sme->sme_mtx); return EINVAL; } /* * remove it, unhook from rnd(4), and decrement the sensors count. */ if (oedata->flags & ENVSYS_FHAS_ENTROPY) rnd_detach_source(&oedata->rnd_src); sme_event_unregister_sensor(sme, edata); mutex_enter(&sme->sme_work_mtx); if (LIST_EMPTY(&sme->sme_events_list)) { if (sme->sme_callout_state == SME_CALLOUT_READY) sme_events_halt_callout(sme); destroy = true; } mutex_exit(&sme->sme_work_mtx); TAILQ_REMOVE(&sme->sme_sensors_list, edata, sensors_head); sme->sme_nsensors--; sysmon_envsys_release(sme, true); mutex_exit(&sme->sme_mtx); if (destroy) sme_events_destroy(sme); return 0; } /* * sysmon_envsys_register: * * + Register a sysmon envsys device. * + Create array of dictionaries for a device. */ int sysmon_envsys_register(struct sysmon_envsys *sme) { struct sme_evdrv { SLIST_ENTRY(sme_evdrv) evdrv_head; sme_event_drv_t *evdrv; }; SLIST_HEAD(, sme_evdrv) sme_evdrv_list; struct sme_evdrv *evdv = NULL; struct sysmon_envsys *lsme; prop_array_t array = NULL; prop_dictionary_t dict, dict2; envsys_data_t *edata = NULL; sme_event_drv_t *this_evdrv; int nevent; int error = 0; char rnd_name[sizeof(edata->rnd_src.name)]; KASSERT(sme != NULL); KASSERT(sme->sme_name != NULL); (void)RUN_ONCE(&once_envsys, sme_preinit); /* * Check if requested sysmon_envsys device is valid * and does not exist already in the list. */ mutex_enter(&sme_global_mtx); LIST_FOREACH(lsme, &sysmon_envsys_list, sme_list) { if (strcmp(lsme->sme_name, sme->sme_name) == 0) { mutex_exit(&sme_global_mtx); return EEXIST; } } mutex_exit(&sme_global_mtx); /* * sanity check: if SME_DISABLE_REFRESH is not set, * the sme_refresh function callback must be non NULL. */ if ((sme->sme_flags & SME_DISABLE_REFRESH) == 0) if (!sme->sme_refresh) return EINVAL; /* * If the list of sensors is empty, there's no point to continue... */ if (TAILQ_EMPTY(&sme->sme_sensors_list)) { DPRINTF(("%s: sensors list empty for %s\n", __func__, sme->sme_name)); return ENOTSUP; } /* * Initialize the singly linked list for driver events. */ SLIST_INIT(&sme_evdrv_list); array = prop_array_create(); if (!array) return ENOMEM; /* * Iterate over all sensors and create a dictionary per sensor. * We must respect the order in which the sensors were added. */ TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) { dict = prop_dictionary_create(); if (!dict) { error = ENOMEM; goto out2; } /* * Create all objects in sensor's dictionary. */ this_evdrv = sme_add_sensor_dictionary(sme, array, dict, edata); if (this_evdrv) { evdv = kmem_zalloc(sizeof(*evdv), KM_SLEEP); evdv->evdrv = this_evdrv; SLIST_INSERT_HEAD(&sme_evdrv_list, evdv, evdrv_head); } } /* * If the array does not contain any object (sensor), there's * no need to attach the driver. */ if (prop_array_count(array) == 0) { error = EINVAL; DPRINTF(("%s: empty array for '%s'\n", __func__, sme->sme_name)); goto out; } /* * Add the dictionary for the global properties of this device. */ dict2 = prop_dictionary_create(); if (!dict2) { error = ENOMEM; goto out; } error = sme_add_property_dictionary(sme, array, dict2); if (error) { prop_object_release(dict2); goto out; } /* * Add the array into the global dictionary for the driver. * * <dict> * <key>foo0</key> * <array> * ... */ mutex_enter(&sme_global_mtx); if (!prop_dictionary_set(sme_propd, sme->sme_name, array)) { error = EINVAL; mutex_exit(&sme_global_mtx); DPRINTF(("%s: prop_dictionary_set for '%s'\n", __func__, sme->sme_name)); goto out; } /* * Add the device into the list. */ LIST_INSERT_HEAD(&sysmon_envsys_list, sme, sme_list); sme->sme_fsensor = sysmon_envsys_next_sensor_index; sysmon_envsys_next_sensor_index += sme->sme_nsensors; mutex_exit(&sme_global_mtx); out: /* * No errors? Make an initial data refresh if was requested, * then register the events that were set in the driver. Do * the refresh first in case it is needed to establish the * limits or max_value needed by some events. */ if (error == 0) { nevent = 0; /* * Hook the sensor into rnd(4) entropy pool if requested */ TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) { if (edata->flags & ENVSYS_FHAS_ENTROPY) { uint32_t rnd_type, rnd_flag = 0; size_t n; int tail = 1; snprintf(rnd_name, sizeof(rnd_name), "%s-%s", sme->sme_name, edata->desc); n = strlen(rnd_name); /* * 1) Remove trailing white space(s). * 2) If space exist, replace it with '-' */ while (--n) { if (rnd_name[n] == ' ') { if (tail != 0) rnd_name[n] = '\0'; else rnd_name[n] = '-'; } else tail = 0; } rnd_flag |= RND_FLAG_COLLECT_TIME; rnd_flag |= RND_FLAG_ESTIMATE_TIME; switch (edata->units) { case ENVSYS_STEMP: case ENVSYS_SFANRPM: case ENVSYS_INTEGER: rnd_type = RND_TYPE_ENV; rnd_flag |= RND_FLAG_COLLECT_VALUE; rnd_flag |= RND_FLAG_ESTIMATE_VALUE; break; case ENVSYS_SVOLTS_AC: case ENVSYS_SVOLTS_DC: case ENVSYS_SOHMS: case ENVSYS_SWATTS: case ENVSYS_SAMPS: case ENVSYS_SWATTHOUR: case ENVSYS_SAMPHOUR: rnd_type = RND_TYPE_POWER; rnd_flag |= RND_FLAG_COLLECT_VALUE; rnd_flag |= RND_FLAG_ESTIMATE_VALUE; break; default: rnd_type = RND_TYPE_UNKNOWN; break; } rnd_attach_source(&edata->rnd_src, rnd_name, rnd_type, rnd_flag); } } if (sme->sme_flags & SME_INIT_REFRESH) { sysmon_task_queue_sched(0, sme_initial_refresh, sme); DPRINTF(("%s: scheduled initial refresh for '%s'\n", __func__, sme->sme_name)); } SLIST_FOREACH(evdv, &sme_evdrv_list, evdrv_head) { sysmon_task_queue_sched(0, sme_event_drvadd, evdv->evdrv); nevent++; } DPRINTF(("%s: driver '%s' registered (nsens=%d nevent=%d)\n", __func__, sme->sme_name, sme->sme_nsensors, nevent)); } out2: while (!SLIST_EMPTY(&sme_evdrv_list)) { evdv = SLIST_FIRST(&sme_evdrv_list); SLIST_REMOVE_HEAD(&sme_evdrv_list, evdrv_head); kmem_free(evdv, sizeof(*evdv)); } if (!error) return 0; /* * Ugh... something wasn't right; unregister all events and sensors * previously assigned and destroy the array with all its objects. */ DPRINTF(("%s: failed to register '%s' (%d)\n", __func__, sme->sme_name, error)); sme_event_unregister_all(sme); while (!TAILQ_EMPTY(&sme->sme_sensors_list)) { edata = TAILQ_FIRST(&sme->sme_sensors_list); TAILQ_REMOVE(&sme->sme_sensors_list, edata, sensors_head); } sysmon_envsys_destroy_plist(array); return error; } /* * sysmon_envsys_destroy_plist: * * + Remove all objects from the array of dictionaries that is * created in a sysmon envsys device. */ static void sysmon_envsys_destroy_plist(prop_array_t array) { prop_object_iterator_t iter, iter2; prop_dictionary_t dict; prop_object_t obj; KASSERT(array != NULL); KASSERT(prop_object_type(array) == PROP_TYPE_ARRAY); DPRINTFOBJ(("%s: objects in array=%d\n", __func__, prop_array_count(array))); iter = prop_array_iterator(array); if (!iter) return; while ((dict = prop_object_iterator_next(iter))) { KASSERT(prop_object_type(dict) == PROP_TYPE_DICTIONARY); iter2 = prop_dictionary_iterator(dict); if (!iter2) goto out; DPRINTFOBJ(("%s: iterating over dictionary\n", __func__)); while ((obj = prop_object_iterator_next(iter2)) != NULL) { DPRINTFOBJ(("%s: obj=%s\n", __func__, prop_dictionary_keysym_value(obj))); prop_dictionary_remove(dict, prop_dictionary_keysym_value(obj)); prop_object_iterator_reset(iter2); } prop_object_iterator_release(iter2); DPRINTFOBJ(("%s: objects in dictionary:%d\n", __func__, prop_dictionary_count(dict))); prop_object_release(dict); } out: prop_object_iterator_release(iter); prop_object_release(array); } /* * sysmon_envsys_unregister: * * + Unregister a sysmon envsys device. */ void sysmon_envsys_unregister(struct sysmon_envsys *sme) { prop_array_t array; struct sysmon_envsys *osme; envsys_data_t *edata; KASSERT(sme != NULL); /* * Decrement global sensors counter and the first_sensor index * for remaining devices in the list (only used for compatibility * with previous API), and remove the device from the list. */ mutex_enter(&sme_global_mtx); sysmon_envsys_next_sensor_index -= sme->sme_nsensors; LIST_FOREACH(osme, &sysmon_envsys_list, sme_list) { if (osme->sme_fsensor >= sme->sme_fsensor) osme->sme_fsensor -= sme->sme_nsensors; } LIST_REMOVE(sme, sme_list); mutex_exit(&sme_global_mtx); while ((edata = TAILQ_FIRST(&sme->sme_sensors_list)) != NULL) { sysmon_envsys_sensor_detach(sme, edata); } /* * Unregister all events associated with device. */ sme_event_unregister_all(sme); /* * Remove the device (and all its objects) from the global dictionary. */ array = prop_dictionary_get(sme_propd, sme->sme_name); if (array && prop_object_type(array) == PROP_TYPE_ARRAY) { mutex_enter(&sme_global_mtx); prop_dictionary_remove(sme_propd, sme->sme_name); mutex_exit(&sme_global_mtx); sysmon_envsys_destroy_plist(array); } /* * And finally destroy the sysmon_envsys object. */ sysmon_envsys_destroy(sme); } /* * sysmon_envsys_find: * * + Find a sysmon envsys device and mark it as busy * once it's available. */ struct sysmon_envsys * sysmon_envsys_find(const char *name) { struct sysmon_envsys *sme; mutex_enter(&sme_global_mtx); LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) { if (strcmp(sme->sme_name, name) == 0) { sysmon_envsys_acquire(sme, false); break; } } mutex_exit(&sme_global_mtx); return sme; } /* * Compatibility function with the old API. */ struct sysmon_envsys * sysmon_envsys_find_40(u_int idx) { struct sysmon_envsys *sme; mutex_enter(&sme_global_mtx); LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) { if (idx >= sme->sme_fsensor && idx < (sme->sme_fsensor + sme->sme_nsensors)) { sysmon_envsys_acquire(sme, false); break; } } mutex_exit(&sme_global_mtx); return sme; } /* * sysmon_envsys_acquire: * * + Wait until a sysmon envsys device is available and mark * it as busy. */ void sysmon_envsys_acquire(struct sysmon_envsys *sme, bool locked) { KASSERT(sme != NULL); if (locked) { while (sme->sme_flags & SME_FLAG_BUSY) cv_wait(&sme->sme_condvar, &sme->sme_mtx); sme->sme_flags |= SME_FLAG_BUSY; } else { mutex_enter(&sme->sme_mtx); while (sme->sme_flags & SME_FLAG_BUSY) cv_wait(&sme->sme_condvar, &sme->sme_mtx); sme->sme_flags |= SME_FLAG_BUSY; mutex_exit(&sme->sme_mtx); } } /* * sysmon_envsys_release: * * + Unmark a sysmon envsys device as busy, and notify * waiters. */ void sysmon_envsys_release(struct sysmon_envsys *sme, bool locked) { KASSERT(sme != NULL); if (locked) { sme->sme_flags &= ~SME_FLAG_BUSY; cv_broadcast(&sme->sme_condvar); } else { mutex_enter(&sme->sme_mtx); sme->sme_flags &= ~SME_FLAG_BUSY; cv_broadcast(&sme->sme_condvar); mutex_exit(&sme->sme_mtx); } } /* * sme_initial_refresh: * * + Do an initial refresh of the sensors in a device just after * interrupts are enabled in the autoconf(9) process. * */ static void sme_initial_refresh(void *arg) { struct sysmon_envsys *sme = arg; envsys_data_t *edata; mutex_enter(&sme->sme_mtx); sysmon_envsys_acquire(sme, true); TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) sysmon_envsys_refresh_sensor(sme, edata); sysmon_envsys_release(sme, true); mutex_exit(&sme->sme_mtx); } /* * sme_sensor_dictionary_get: * * + Returns a dictionary of a device specified by its index * position. */ prop_dictionary_t sme_sensor_dictionary_get(prop_array_t array, const char *index) { prop_object_iterator_t iter; prop_dictionary_t dict; prop_object_t obj; KASSERT(array != NULL || index != NULL); iter = prop_array_iterator(array); if (!iter) return NULL; while ((dict = prop_object_iterator_next(iter))) { obj = prop_dictionary_get(dict, "index"); if (prop_string_equals_string(obj, index)) break; } prop_object_iterator_release(iter); return dict; } /* * sme_remove_userprops: * * + Remove all properties from all devices that were set by * the ENVSYS_SETDICTIONARY ioctl. */ static void sme_remove_userprops(void) { struct sysmon_envsys *sme; prop_array_t array; prop_dictionary_t sdict; envsys_data_t *edata = NULL; char tmp[ENVSYS_DESCLEN]; char rnd_name[sizeof(edata->rnd_src.name)]; sysmon_envsys_lim_t lims; const struct sme_descr_entry *sdt_units; uint32_t props; int ptype; mutex_enter(&sme_global_mtx); LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) { sysmon_envsys_acquire(sme, false); array = prop_dictionary_get(sme_propd, sme->sme_name); TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) { (void)snprintf(tmp, sizeof(tmp), "sensor%d", edata->sensor); sdict = sme_sensor_dictionary_get(array, tmp); KASSERT(sdict != NULL); ptype = 0; if (edata->upropset & PROP_BATTCAP) { prop_dictionary_remove(sdict, "critical-capacity"); ptype = PENVSYS_EVENT_CAPACITY; } if (edata->upropset & PROP_BATTWARN) { prop_dictionary_remove(sdict, "warning-capacity"); ptype = PENVSYS_EVENT_CAPACITY; } if (edata->upropset & PROP_BATTHIGH) { prop_dictionary_remove(sdict, "high-capacity"); ptype = PENVSYS_EVENT_CAPACITY; } if (edata->upropset & PROP_BATTMAX) { prop_dictionary_remove(sdict, "maximum-capacity"); ptype = PENVSYS_EVENT_CAPACITY; } if (edata->upropset & PROP_WARNMAX) { prop_dictionary_remove(sdict, "warning-max"); ptype = PENVSYS_EVENT_LIMITS; } if (edata->upropset & PROP_WARNMIN) { prop_dictionary_remove(sdict, "warning-min"); ptype = PENVSYS_EVENT_LIMITS; } if (edata->upropset & PROP_CRITMAX) { prop_dictionary_remove(sdict, "critical-max"); ptype = PENVSYS_EVENT_LIMITS; } if (edata->upropset & PROP_CRITMIN) { prop_dictionary_remove(sdict, "critical-min"); ptype = PENVSYS_EVENT_LIMITS; } if (edata->upropset & PROP_RFACT) { (void)sme_sensor_upint32(sdict, "rfact", 0); edata->rfact = 0; } if (edata->upropset & PROP_DESC) (void)sme_sensor_upstring(sdict, "description", edata->desc); if (ptype == 0) continue; /* * If there were any limit values removed, we * need to revert to initial limits. * * First, tell the driver that we need it to * restore any h/w limits which may have been * changed to stored, boot-time values. */ if (sme->sme_set_limits) { DPRINTF(("%s: reset limits for %s %s\n", __func__, sme->sme_name, edata->desc)); (*sme->sme_set_limits)(sme, edata, NULL, NULL); } /* * Next, we need to retrieve those initial limits. */ props = 0; edata->upropset &= ~PROP_LIMITS; if (sme->sme_get_limits) { DPRINTF(("%s: retrieve limits for %s %s\n", __func__, sme->sme_name, edata->desc)); lims = edata->limits; (*sme->sme_get_limits)(sme, edata, &lims, &props); } /* * If the sensor is providing entropy data, * get rid of the rndsrc; we'll provide a new * one shortly. */ if (edata->flags & ENVSYS_FHAS_ENTROPY) rnd_detach_source(&edata->rnd_src); /* * Remove the old limits event, if any */ sme_event_unregister(sme, edata->desc, PENVSYS_EVENT_LIMITS); /* * Create and install a new event (which will * update the dictionary) with the correct * units. */ sdt_units = sme_find_table_entry(SME_DESC_UNITS, edata->units); if (props & PROP_LIMITS) { DPRINTF(("%s: install limits for %s %s\n", __func__, sme->sme_name, edata->desc)); sme_event_register(sdict, edata, sme, &lims, props, PENVSYS_EVENT_LIMITS, sdt_units->crittype); } /* Finally, if the sensor provides entropy, * create an additional event entry and attach * the rndsrc */ if (edata->flags & ENVSYS_FHAS_ENTROPY) { sme_event_register(sdict, edata, sme, &lims, props, PENVSYS_EVENT_NULL, sdt_units->crittype); snprintf(rnd_name, sizeof(rnd_name), "%s-%s", sme->sme_name, edata->desc); rnd_attach_source(&edata->rnd_src, rnd_name, RND_TYPE_ENV, RND_FLAG_COLLECT_VALUE| RND_FLAG_COLLECT_TIME| RND_FLAG_ESTIMATE_VALUE| RND_FLAG_ESTIMATE_TIME); } } /* * Restore default timeout value. */ mutex_enter(&sme->sme_work_mtx); sme->sme_events_timeout = SME_EVENTS_DEFTIMEOUT; sme_schedule_callout(sme); mutex_exit(&sme->sme_work_mtx); sysmon_envsys_release(sme, false); } mutex_exit(&sme_global_mtx); } /* * sme_add_property_dictionary: * * + Add global properties into a device. */ static int sme_add_property_dictionary(struct sysmon_envsys *sme, prop_array_t array, prop_dictionary_t dict) { prop_dictionary_t pdict; uint64_t timo; const char *class; int error = 0; pdict = prop_dictionary_create(); if (!pdict) return EINVAL; /* * Add the 'refresh-timeout' and 'dev-class' objects into the * 'device-properties' dictionary. * * ... * <dict> * <key>device-properties</key> * <dict> * <key>refresh-timeout</key> * <integer>120</integer< * <key>device-class</key> * <string>class_name</string> * </dict> * </dict> * ... * */ mutex_enter(&sme->sme_work_mtx); if (sme->sme_events_timeout == 0) { sme->sme_events_timeout = SME_EVENTS_DEFTIMEOUT; sme_schedule_callout(sme); } timo = sme->sme_events_timeout; mutex_exit(&sme->sme_work_mtx); if (!prop_dictionary_set_uint64(pdict, "refresh-timeout", timo)) { error = EINVAL; goto out; } if (sme->sme_class == SME_CLASS_BATTERY) class = "battery"; else if (sme->sme_class == SME_CLASS_ACADAPTER) class = "ac-adapter"; else class = "other"; if (!prop_dictionary_set_string_nocopy(pdict, "device-class", class)) { error = EINVAL; goto out; } if (!prop_dictionary_set(dict, "device-properties", pdict)) { error = EINVAL; goto out; } /* * Add the device dictionary into the sysmon envsys array. */ if (!prop_array_add(array, dict)) error = EINVAL; out: prop_object_release(pdict); return error; } /* * sme_add_sensor_dictionary: * * + Adds the sensor objects into the dictionary and returns a pointer * to a sme_event_drv_t object if a monitoring flag was set * (or NULL otherwise). */ static sme_event_drv_t * sme_add_sensor_dictionary(struct sysmon_envsys *sme, prop_array_t array, prop_dictionary_t dict, envsys_data_t *edata) { const struct sme_descr_entry *sdt; int error; sme_event_drv_t *sme_evdrv_t = NULL; char indexstr[ENVSYS_DESCLEN]; bool mon_supported, allow_rfact; /* * Add the index sensor string. * * ... * <key>index</eyr * <string>sensor0</string> * ... */ (void)snprintf(indexstr, sizeof(indexstr), "sensor%d", edata->sensor); if (sme_sensor_upstring(dict, "index", indexstr)) goto bad; /* * ... * <key>description</key> * <string>blah blah</string> * ... */ if (sme_sensor_upstring(dict, "description", edata->desc)) goto bad; /* * Add the monitoring boolean object: * * ... * <key>monitoring-supported</key> * <true/> * ... * * always false on Battery {capacity,charge}, Drive and Indicator types. * They cannot be monitored. * */ if ((edata->flags & ENVSYS_FMONNOTSUPP) || (edata->units == ENVSYS_INDICATOR) || (edata->units == ENVSYS_DRIVE) || (edata->units == ENVSYS_BATTERY_CAPACITY) || (edata->units == ENVSYS_BATTERY_CHARGE)) mon_supported = false; else mon_supported = true; if (sme_sensor_upbool(dict, "monitoring-supported", mon_supported)) goto out; /* * Add the allow-rfact boolean object, true if * ENVSYS_FCHANGERFACT is set, false otherwise. * * ... * <key>allow-rfact</key> * <true/> * ... */ if (edata->units == ENVSYS_SVOLTS_DC || edata->units == ENVSYS_SVOLTS_AC) { if (edata->flags & ENVSYS_FCHANGERFACT) allow_rfact = true; else allow_rfact = false; if (sme_sensor_upbool(dict, "allow-rfact", allow_rfact)) goto out; } error = sme_update_sensor_dictionary(dict, edata, (edata->state == ENVSYS_SVALID)); if (error < 0) goto bad; else if (error) goto out; /* * ... * </dict> * * Add the dictionary into the array. * */ if (!prop_array_add(array, dict)) { DPRINTF(("%s: prop_array_add\n", __func__)); goto bad; } /* * Register new event(s) if any monitoring flag was set or if * the sensor provides entropy for rnd(4). */ if (edata->flags & (ENVSYS_FMONANY | ENVSYS_FHAS_ENTROPY)) { sme_evdrv_t = kmem_zalloc(sizeof(*sme_evdrv_t), KM_SLEEP); sme_evdrv_t->sed_sdict = dict; sme_evdrv_t->sed_edata = edata; sme_evdrv_t->sed_sme = sme; sdt = sme_find_table_entry(SME_DESC_UNITS, edata->units); sme_evdrv_t->sed_powertype = sdt->crittype; } out: return sme_evdrv_t; bad: prop_object_release(dict); return NULL; } /* * Find the maximum of all currently reported values. * The provided callback decides whether a sensor is part of the * maximum calculation (by returning true) or ignored (callback * returns false). Example usage: callback selects temperature * sensors in a given thermal zone, the function calculates the * maximum currently reported temperature in this zone. * If the parameter "refresh" is true, new values will be acquired * from the hardware, if not, the last reported value will be used. */ uint32_t sysmon_envsys_get_max_value(bool (*predicate)(const envsys_data_t*), bool refresh) { struct sysmon_envsys *sme; uint32_t maxv, v; maxv = 0; mutex_enter(&sme_global_mtx); LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) { sysmon_envsys_acquire(sme, false); v = sme_get_max_value(sme, predicate, refresh); sysmon_envsys_release(sme, false); if (v > maxv) maxv = v; } mutex_exit(&sme_global_mtx); return maxv; } static uint32_t sme_get_max_value(struct sysmon_envsys *sme, bool (*predicate)(const envsys_data_t*), bool refresh) { envsys_data_t *edata; uint32_t maxv, v; /* * Iterate over all sensors that match the predicate */ maxv = 0; TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) { if (!(*predicate)(edata)) continue; /* * refresh sensor data */ mutex_enter(&sme->sme_mtx); sysmon_envsys_refresh_sensor(sme, edata); mutex_exit(&sme->sme_mtx); v = edata->value_cur; if (v > maxv) maxv = v; } return maxv; } /* * sme_update_dictionary: * * + Update per-sensor dictionaries with new values if there were * changes, otherwise the object in dictionary is untouched. */ int sme_update_dictionary(struct sysmon_envsys *sme) { envsys_data_t *edata; prop_object_t array, dict, obj, obj2; uint64_t timo; int error = 0; /* * Retrieve the array of dictionaries in device. */ array = prop_dictionary_get(sme_propd, sme->sme_name); if (prop_object_type(array) != PROP_TYPE_ARRAY) { DPRINTF(("%s: not an array (%s)\n", __func__, sme->sme_name)); return EINVAL; } /* * Get the last dictionary on the array, this contains the * 'device-properties' sub-dictionary. */ obj = prop_array_get(array, prop_array_count(array) - 1); if (!obj || prop_object_type(obj) != PROP_TYPE_DICTIONARY) { DPRINTF(("%s: not a device-properties dictionary\n", __func__)); return EINVAL; } obj2 = prop_dictionary_get(obj, "device-properties"); if (!obj2) return EINVAL; /* * Update the 'refresh-timeout' property. */ mutex_enter(&sme->sme_work_mtx); timo = sme->sme_events_timeout; mutex_exit(&sme->sme_work_mtx); if (!prop_dictionary_set_uint64(obj2, "refresh-timeout", timo)) return EINVAL; /* * - iterate over all sensors. * - fetch new data. * - check if data in dictionary is different than new data. * - update dictionary if there were changes. */ DPRINTF(("%s: updating '%s' with nsensors=%d\n", __func__, sme->sme_name, sme->sme_nsensors)); /* * Don't bother with locking when traversing the queue, * the device is already marked as busy; if a sensor * is going to be removed or added it will have to wait. */ TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) { /* * refresh sensor data via sme_envsys_refresh_sensor */ mutex_enter(&sme->sme_mtx); sysmon_envsys_refresh_sensor(sme, edata); mutex_exit(&sme->sme_mtx); /* * retrieve sensor's dictionary. */ dict = prop_array_get(array, edata->sensor); if (prop_object_type(dict) != PROP_TYPE_DICTIONARY) { DPRINTF(("%s: not a dictionary (%d:%s)\n", __func__, edata->sensor, sme->sme_name)); return EINVAL; } /* * update sensor's state. */ error = sme_update_sensor_dictionary(dict, edata, true); if (error) break; } return error; } int sme_update_sensor_dictionary(prop_object_t dict, envsys_data_t *edata, bool value_update) { const struct sme_descr_entry *sdt; int error = 0; sdt = sme_find_table_entry(SME_DESC_STATES, edata->state); if (sdt == NULL) { printf("sme_update_sensor_dictionary: cannot update sensor %d " "state %d unknown\n", edata->sensor, edata->state); return EINVAL; } DPRINTFOBJ(("%s: sensor #%d type=%d (%s) flags=%d\n", __func__, edata->sensor, sdt->type, sdt->desc, edata->flags)); error = sme_sensor_upstring(dict, "state", sdt->desc); if (error) return (-error); /* * update sensor's type. */ sdt = sme_find_table_entry(SME_DESC_UNITS, edata->units); if (sdt == NULL) return EINVAL; DPRINTFOBJ(("%s: sensor #%d units=%d (%s)\n", __func__, edata->sensor, sdt->type, sdt->desc)); error = sme_sensor_upstring(dict, "type", sdt->desc); if (error) return (-error); if (value_update) { /* * update sensor's current value. */ error = sme_sensor_upint32(dict, "cur-value", edata->value_cur); if (error) return error; } /* * Battery charge and Indicator types do not * need the remaining objects, so skip them. */ if (edata->units == ENVSYS_INDICATOR || edata->units == ENVSYS_BATTERY_CHARGE) return error; /* * update sensor flags. */ if (edata->flags & ENVSYS_FPERCENT) { error = sme_sensor_upbool(dict, "want-percentage", true); if (error) return error; } if (value_update) { /* * update sensor's {max,min}-value. */ if (edata->flags & ENVSYS_FVALID_MAX) { error = sme_sensor_upint32(dict, "max-value", edata->value_max); if (error) return error; } if (edata->flags & ENVSYS_FVALID_MIN) { error = sme_sensor_upint32(dict, "min-value", edata->value_min); if (error) return error; } /* * update 'rpms' only for ENVSYS_SFANRPM sensors. */ if (edata->units == ENVSYS_SFANRPM) { error = sme_sensor_upuint32(dict, "rpms", edata->rpms); if (error) return error; } /* * update 'rfact' only for ENVSYS_SVOLTS_[AD]C sensors. */ if (edata->units == ENVSYS_SVOLTS_AC || edata->units == ENVSYS_SVOLTS_DC) { error = sme_sensor_upint32(dict, "rfact", edata->rfact); if (error) return error; } } /* * update 'drive-state' only for ENVSYS_DRIVE sensors. */ if (edata->units == ENVSYS_DRIVE) { sdt = sme_find_table_entry(SME_DESC_DRIVE_STATES, edata->value_cur); if (sdt == NULL) return EINVAL; error = sme_sensor_upstring(dict, "drive-state", sdt->desc); if (error) return error; } /* * update 'battery-capacity' only for ENVSYS_BATTERY_CAPACITY * sensors. */ if (edata->units == ENVSYS_BATTERY_CAPACITY) { sdt = sme_find_table_entry(SME_DESC_BATTERY_CAPACITY, edata->value_cur); if (sdt == NULL) return EINVAL; error = sme_sensor_upstring(dict, "battery-capacity", sdt->desc); if (error) return error; } return error; } /* * sme_userset_dictionary: * * + Parse the userland dictionary and run the appropriate tasks * that were specified. */ int sme_userset_dictionary(struct sysmon_envsys *sme, prop_dictionary_t udict, prop_array_t array) { const struct sme_descr_entry *sdt; envsys_data_t *edata; prop_dictionary_t dict, tdict = NULL; prop_object_t obj, obj1, obj2, tobj = NULL; uint32_t props; uint64_t refresh_timo = 0; sysmon_envsys_lim_t lims; int i, error = 0; const char *blah; bool targetfound = false; /* * The user wanted to change the refresh timeout value for this * device. * * Get the 'device-properties' object from the userland dictionary. */ obj = prop_dictionary_get(udict, "device-properties"); if (obj && prop_object_type(obj) == PROP_TYPE_DICTIONARY) { /* * Get the 'refresh-timeout' property for this device. */ obj1 = prop_dictionary_get(obj, "refresh-timeout"); if (obj1 && prop_object_type(obj1) == PROP_TYPE_NUMBER) { targetfound = true; refresh_timo = prop_number_unsigned_value(obj1); if (refresh_timo < 1) error = EINVAL; else { mutex_enter(&sme->sme_work_mtx); if (sme->sme_events_timeout != refresh_timo) { sme->sme_events_timeout = refresh_timo; sme_schedule_callout(sme); } mutex_exit(&sme->sme_work_mtx); } } return error; } else if (!obj) { /* * Get sensor's index from userland dictionary. */ obj = prop_dictionary_get(udict, "index"); if (!obj) return EINVAL; if (prop_object_type(obj) != PROP_TYPE_STRING) { DPRINTF(("%s: 'index' not a string\n", __func__)); return EINVAL; } } else return EINVAL; /* * Don't bother with locking when traversing the queue, * the device is already marked as busy; if a sensor * is going to be removed or added it will have to wait. */ TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) { /* * Get a dictionary and check if it's our sensor by checking * at its index position. */ dict = prop_array_get(array, edata->sensor); obj1 = prop_dictionary_get(dict, "index"); /* * is it our sensor? */ if (!prop_string_equals(obj1, obj)) continue; props = 0; /* * Check if a new description operation was * requested by the user and set new description. */ obj2 = prop_dictionary_get(udict, "description"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_STRING) { targetfound = true; blah = prop_string_value(obj2); /* * Check for duplicate description. */ for (i = 0; i < sme->sme_nsensors; i++) { if (i == edata->sensor) continue; tdict = prop_array_get(array, i); tobj = prop_dictionary_get(tdict, "description"); if (prop_string_equals(obj2, tobj)) { error = EEXIST; goto out; } } /* * Update the object in dictionary. */ mutex_enter(&sme->sme_mtx); error = sme_sensor_upstring(dict, "description", blah); if (error) { mutex_exit(&sme->sme_mtx); goto out; } DPRINTF(("%s: sensor%d changed desc to: %s\n", __func__, edata->sensor, blah)); edata->upropset |= PROP_DESC; mutex_exit(&sme->sme_mtx); } /* * did the user want to change the rfact? */ obj2 = prop_dictionary_get(udict, "rfact"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) { targetfound = true; if (edata->flags & ENVSYS_FCHANGERFACT) { mutex_enter(&sme->sme_mtx); edata->rfact = prop_number_signed_value(obj2); edata->upropset |= PROP_RFACT; mutex_exit(&sme->sme_mtx); DPRINTF(("%s: sensor%d changed rfact to %d\n", __func__, edata->sensor, edata->rfact)); } else { error = ENOTSUP; goto out; } } sdt = sme_find_table_entry(SME_DESC_UNITS, edata->units); /* * did the user want to set a critical capacity event? */ obj2 = prop_dictionary_get(udict, "critical-capacity"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) { targetfound = true; lims.sel_critmin = prop_number_signed_value(obj2); props |= PROP_BATTCAP; } /* * did the user want to set a warning capacity event? */ obj2 = prop_dictionary_get(udict, "warning-capacity"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) { targetfound = true; lims.sel_warnmin = prop_number_signed_value(obj2); props |= PROP_BATTWARN; } /* * did the user want to set a high capacity event? */ obj2 = prop_dictionary_get(udict, "high-capacity"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) { targetfound = true; lims.sel_warnmin = prop_number_signed_value(obj2); props |= PROP_BATTHIGH; } /* * did the user want to set a maximum capacity event? */ obj2 = prop_dictionary_get(udict, "maximum-capacity"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) { targetfound = true; lims.sel_warnmin = prop_number_signed_value(obj2); props |= PROP_BATTMAX; } /* * did the user want to set a critical max event? */ obj2 = prop_dictionary_get(udict, "critical-max"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) { targetfound = true; lims.sel_critmax = prop_number_signed_value(obj2); props |= PROP_CRITMAX; } /* * did the user want to set a warning max event? */ obj2 = prop_dictionary_get(udict, "warning-max"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) { targetfound = true; lims.sel_warnmax = prop_number_signed_value(obj2); props |= PROP_WARNMAX; } /* * did the user want to set a critical min event? */ obj2 = prop_dictionary_get(udict, "critical-min"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) { targetfound = true; lims.sel_critmin = prop_number_signed_value(obj2); props |= PROP_CRITMIN; } /* * did the user want to set a warning min event? */ obj2 = prop_dictionary_get(udict, "warning-min"); if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) { targetfound = true; lims.sel_warnmin = prop_number_signed_value(obj2); props |= PROP_WARNMIN; } if (props && (edata->flags & ENVSYS_FMONNOTSUPP) != 0) { error = ENOTSUP; goto out; } if (props || (edata->flags & ENVSYS_FHAS_ENTROPY) != 0) { error = sme_event_register(dict, edata, sme, &lims, props, (edata->flags & ENVSYS_FPERCENT)? PENVSYS_EVENT_CAPACITY: PENVSYS_EVENT_LIMITS, sdt->crittype); if (error == EEXIST) error = 0; if (error) goto out; } /* * All objects in dictionary were processed. */ break; } out: /* * invalid target? return the error. */ if (!targetfound) error = EINVAL; return error; } /* * + sysmon_envsys_foreach_sensor * * Walk through the devices' sensor lists and execute the callback. * If the callback returns false, the remainder of the current * device's sensors are skipped. */ void sysmon_envsys_foreach_sensor(sysmon_envsys_callback_t func, void *arg, bool refresh) { struct sysmon_envsys *sme; envsys_data_t *sensor; mutex_enter(&sme_global_mtx); LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) { sysmon_envsys_acquire(sme, false); TAILQ_FOREACH(sensor, &sme->sme_sensors_list, sensors_head) { if (refresh) { mutex_enter(&sme->sme_mtx); sysmon_envsys_refresh_sensor(sme, sensor); mutex_exit(&sme->sme_mtx); } if (!(*func)(sme, sensor, arg)) break; } sysmon_envsys_release(sme, false); } mutex_exit(&sme_global_mtx); } /* * Call the sensor's refresh function, and collect/stir entropy */ void sysmon_envsys_refresh_sensor(struct sysmon_envsys *sme, envsys_data_t *edata) { if ((sme->sme_flags & SME_DISABLE_REFRESH) == 0) (*sme->sme_refresh)(sme, edata); if (edata->flags & ENVSYS_FHAS_ENTROPY && edata->state != ENVSYS_SINVALID && edata->value_prev != edata->value_cur) rnd_add_uint32(&edata->rnd_src, edata->value_cur); edata->value_prev = edata->value_cur; } static int sysmon_envsys_modcmd(modcmd_t cmd, void *arg) { int ret; switch (cmd) { case MODULE_CMD_INIT: ret = sysmon_envsys_init(); break; case MODULE_CMD_FINI: ret = sysmon_envsys_fini(); break; case MODULE_CMD_STAT: default: ret = ENOTTY; } return ret; } |
| 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | /* $NetBSD: rf_shutdown.c,v 1.21 2021/07/21 23:10:12 oster Exp $ */ /* * rf_shutdown.c */ /* * Copyright (c) 1996 Carnegie-Mellon University. * All rights reserved. * * Author: Jim Zelenka * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Maintain lists of cleanup functions. Also, mechanisms for coordinating * thread startup and shutdown. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: rf_shutdown.c,v 1.21 2021/07/21 23:10:12 oster Exp $"); #include <dev/raidframe/raidframevar.h> #include "rf_archs.h" #include "rf_shutdown.h" #include "rf_debugMem.h" #ifndef RF_DEBUG_SHUTDOWN #define RF_DEBUG_SHUTDOWN 0 #endif static void rf_FreeShutdownEnt(RF_ShutdownList_t *); static void rf_FreeShutdownEnt(RF_ShutdownList_t *ent) { free(ent, M_RAIDFRAME); } #if RF_DEBUG_SHUTDOWN void _rf_ShutdownCreate(RF_ShutdownList_t **listp, void (*cleanup)(void *arg), void *arg, char *file, int line) #else void _rf_ShutdownCreate(RF_ShutdownList_t **listp, void (*cleanup)(void *arg), void *arg) #endif { RF_ShutdownList_t *ent; /* * Have to directly allocate memory here, since we start up before * and shutdown after RAIDframe internal allocation system. */ ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), M_RAIDFRAME, M_WAITOK); ent->cleanup = cleanup; ent->arg = arg; #if RF_DEBUG_SHUTDOWN ent->file = file; ent->line = line; #endif ent->next = *listp; *listp = ent; } void rf_ShutdownList(RF_ShutdownList_t **list) { RF_ShutdownList_t *r, *next; #if RF_DEBUG_SHUTDOWN char *file; int line; #endif for (r = *list; r; r = next) { next = r->next; #if RF_DEBUG_SHUTDOWN file = r->file; line = r->line; if (rf_shutdownDebug) { printf("call shutdown, created %s:%d\n", file, line); } #endif r->cleanup(r->arg); #if RF_DEBUG_SHUTDOWN if (rf_shutdownDebug) { printf("completed shutdown, created %s:%d\n", file, line); } #endif rf_FreeShutdownEnt(r); } *list = NULL; } |
| 78 739 10 10 385 4 4 4 4 375 1 375 374 375 76 76 76 74 74 5 3 366 366 784 739 783 756 783 70 782 738 738 1 400 396 10 10 358 733 731 8 8 8 733 733 703 731 731 7 732 727 395 727 726 727 725 727 395 395 386 722 727 727 679 667 667 4 665 703 468 7 464 463 464 96 380 376 376 13 376 376 2 94 3 665 665 665 354 275 85 75 73 72 35 26 10 26 25 25 24 622 622 733 724 727 726 727 725 726 727 727 727 726 726 727 346 346 1 1 1 1 1 1 1 1 350 350 350 350 350 350 350 350 3 3 3 349 349 349 350 350 350 350 349 1 1 349 350 350 1 350 34 34 34 34 34 33 1 34 34 34 31 32 32 34 34 8 7 7 7 7 7 7 7 11 11 11 11 11 11 11 11 11 11 11 737 734 734 734 734 734 733 733 733 732 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 | /* $NetBSD: ufs_lookup.c,v 1.156 2022/08/06 18:26:42 andvar Exp $ */ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_lookup.c 8.9 (Berkeley) 8/11/94 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.156 2022/08/06 18:26:42 andvar Exp $"); #ifdef _KERNEL_OPT #include "opt_ffs.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/buf.h> #include <sys/file.h> #include <sys/stat.h> #include <sys/mount.h> #include <sys/vnode.h> #include <sys/kernel.h> #include <sys/kauth.h> #include <sys/wapbl.h> #include <sys/proc.h> #include <sys/kmem.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/dir.h> #ifdef UFS_DIRHASH #include <ufs/ufs/dirhash.h> #endif #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_bswap.h> #include <ufs/ufs/ufs_wapbl.h> #include <miscfs/genfs/genfs.h> #ifdef DIAGNOSTIC int dirchk = 1; #else int dirchk = 0; #endif #if BYTE_ORDER == LITTLE_ENDIAN # define ENDIANSWAP(needswap) ((needswap) == 0) #else # define ENDIANSWAP(needswap) ((needswap) != 0) #endif #define NAMLEN(fsfmt, needswap, dp) \ ((fsfmt) && ENDIANSWAP(needswap) ? (dp)->d_type : (dp)->d_namlen) static void ufs_dirswap(struct direct *dirp) { uint8_t tmp = dirp->d_namlen; dirp->d_namlen = dirp->d_type; dirp->d_type = tmp; } struct slotinfo { enum { NONE, /* need to search a slot for our new entry */ COMPACT, /* a compaction can make a slot in the current DIRBLKSIZ block */ FOUND, /* found a slot (or no need to search) */ } status; doff_t offset; /* offset of area with free space. a special value -1 for invalid */ int size; /* size of area at slotoffset */ int freespace; /* accumulated amount of space free in the current DIRBLKSIZ block */ int needed; /* size of the entry we're seeking */ }; static void calc_count(struct ufs_lookup_results *results, int dirblksiz, doff_t prevoff) { if ((results->ulr_offset & (dirblksiz - 1)) == 0) results->ulr_count = 0; else results->ulr_count = results->ulr_offset - prevoff; } static void slot_init(struct slotinfo *slot) { slot->status = FOUND; slot->offset = -1; slot->freespace = slot->size = slot->needed = 0; } #ifdef UFS_DIRHASH static doff_t slot_findfree(struct slotinfo *slot, struct inode *dp) { if (slot->status == FOUND) return dp->i_size; slot->offset = ufsdirhash_findfree(dp, slot->needed, &slot->size); if (slot->offset < 0) return dp->i_size; slot->status = COMPACT; doff_t enduseful = ufsdirhash_enduseful(dp); if (enduseful < 0) return dp->i_size; return enduseful; } #endif static void slot_white(struct slotinfo *slot, uint16_t reclen, struct ufs_lookup_results *results) { slot->status = FOUND; slot->offset = results->ulr_offset; slot->size = reclen; results->ulr_reclen = slot->size; } static void slot_update(struct slotinfo *slot, int size, uint16_t reclen, doff_t offset) { if (size >= slot->needed) { slot->status = FOUND; slot->offset = offset; slot->size = reclen; } else if (slot->status == NONE) { slot->freespace += size; if (slot->offset == -1) slot->offset = offset; if (slot->freespace >= slot->needed) { slot->status = COMPACT; slot->size = offset + reclen - slot->offset; } } } /* * Return an indication of where the new directory entry should be put. * If we didn't find a slot, then set results->ulr_count to 0 indicating * that the new slot belongs at the end of the directory. If we found a slot, * then the new entry can be put in the range from results->ulr_offset to * results->ulr_offset + results->ulr_count. */ static int slot_estimate(const struct slotinfo *slot, int dirblksiz, int nameiop, doff_t prevoff, doff_t enduseful, const struct inode *ip, struct ufs_lookup_results *results) { if (slot->status == NONE) { results->ulr_offset = roundup(ip->i_size, dirblksiz); results->ulr_count = 0; enduseful = results->ulr_offset; } else if (nameiop == DELETE) { results->ulr_offset = slot->offset; calc_count(results, dirblksiz, prevoff); } else { results->ulr_offset = slot->offset; results->ulr_count = slot->size; if (enduseful < slot->offset + slot->size) enduseful = slot->offset + slot->size; } results->ulr_endoff = roundup(enduseful, dirblksiz); #if 0 /* commented out by dbj. none of the on disk fields changed */ ip->i_flag |= IN_CHANGE | IN_UPDATE; #endif return EJUSTRETURN; } /* * Check if we can delete inode tdp in directory vdp with inode ip and creds. */ static int ufs_can_delete(struct vnode *tdp, struct vnode *vdp, struct inode *ip, kauth_cred_t cred) { int error; #ifdef UFS_ACL /* * NFSv4 Minor Version 1, draft-ietf-nfsv4-minorversion1-03.txt * * 3.16.2.1. ACE4_DELETE vs. ACE4_DELETE_CHILD */ /* * XXX: Is this check required? */ error = VOP_ACCESS(vdp, VEXEC, cred); if (error) goto out; #if 0 /* Moved to ufs_remove, ufs_rmdir because they hold the lock */ error = VOP_ACCESSX(tdp, VDELETE, cred); if (error == 0) return (0); #endif error = VOP_ACCESSX(vdp, VDELETE_CHILD, cred); if (error == 0) return (0); error = VOP_ACCESSX(vdp, VEXPLICIT_DENY | VDELETE_CHILD, cred); if (error) goto out; #endif /* !UFS_ACL */ /* * Write access to directory required to delete files. */ error = VOP_ACCESS(vdp, VWRITE, cred); if (error) goto out; if (!(ip->i_mode & ISVTX)) return 0; /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, tdp, vdp, genfs_can_sticky(vdp, cred, ip->i_uid, VTOI(tdp)->i_uid)); if (error) { error = EPERM; // Why override? goto out; } return 0; out: vrele(tdp); return error; } static int ufs_getino(struct vnode *vdp, struct inode *ip, ino_t foundino, struct vnode **tdp, bool same) { if (ip->i_number == foundino) { if (same) return EISDIR; vref(vdp); *tdp = vdp; return 0; } return vcache_get(vdp->v_mount, &foundino, sizeof(foundino), tdp); } /* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the file system is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending * on whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and vput * instead of two vputs. * * Overall outline of ufs_lookup: * * check accessibility of directory * look for name in cache, if found, then if at end of path * and deleting or creating, drop it, else return name * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache */ int ufs_lookup(void *v) { struct vop_lookup_v2_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap = v; struct vnode *vdp = ap->a_dvp; /* vnode for directory being searched */ struct inode *dp = VTOI(vdp); /* inode for directory being searched */ struct buf *bp; /* a buffer of directory entries */ struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ struct slotinfo slot; int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* previous value of ulr_offset */ struct vnode *tdp; /* returned by vcache_get */ doff_t enduseful; /* pointer past last used dir slot. used for directory truncation. */ u_long bmask; /* block offset mask */ int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; kauth_cred_t cred = cnp->cn_cred; int flags; int nameiop = cnp->cn_nameiop; struct ufsmount *ump = dp->i_ump; const int needswap = UFS_MPNEEDSWAP(ump); int dirblksiz = ump->um_dirblksiz; ino_t foundino; struct ufs_lookup_results *results; int iswhiteout; /* temp result from cache_lookup() */ const int fsfmt = FSFMT(vdp); uint16_t reclen; flags = cnp->cn_flags; bp = NULL; *vpp = NULL; endsearch = 0; /* silence compiler warning */ /* * Check accessibility of directory. */ if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0) return (error); if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && (nameiop == DELETE || nameiop == RENAME)) return (EROFS); /* * We now have a segment name to search for, and a directory to search. * * Before tediously performing a linear scan of the directory, * check the name cache to see if the directory/name pair * we are looking for is known already. */ if (cache_lookup(vdp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) { if (iswhiteout) { cnp->cn_flags |= ISWHITEOUT; } return *vpp == NULLVP ? ENOENT : 0; } /* May need to restart the lookup with an exclusive lock. */ if (VOP_ISLOCKED(vdp) != LK_EXCLUSIVE) { return ENOLCK; } /* * Produce the auxiliary lookup results into i_crap. Increment * its serial number so elsewhere we can tell if we're using * stale results. This should not be done this way. XXX. */ results = &dp->i_crap; dp->i_crapcounter++; if (iswhiteout) { /* * The namecache set iswhiteout without finding a * cache entry. As of this writing (20121014), this * can happen if there was a whiteout entry that has * been invalidated by the lookup. It is not clear if * it is correct to set ISWHITEOUT in this case or * not; however, doing so retains the prior behavior, * so we'll go with that until some clearer answer * appears. XXX */ cnp->cn_flags |= ISWHITEOUT; } /* * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ slot_init(&slot); if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { slot.status = NONE; slot.needed = UFS_DIRECTSIZ(cnp->cn_namelen); } /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ bmask = vdp->v_mount->mnt_stat.f_iosize - 1; #ifdef UFS_DIRHASH /* * Use dirhash for fast operations on large directories. The logic * to determine whether to hash the directory is contained within * ufsdirhash_build(); a zero return means that it decided to hash * this directory and it successfully built up the hash table. */ if (ufsdirhash_build(dp) == 0) { /* Look for a free slot if needed. */ enduseful = slot_findfree(&slot, dp); /* Look up the component. */ numdirpasses = 1; entryoffsetinblock = 0; /* silence compiler warning */ switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, &results->ulr_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { case 0: ep = (void *)((char *)bp->b_data + (results->ulr_offset & bmask)); reclen = ufs_rw16(ep->d_reclen, needswap); goto foundentry; case ENOENT: results->ulr_offset = roundup(dp->i_size, dirblksiz); goto notfound; default: /* Something failed; just do a linear search. */ break; } } #endif /* UFS_DIRHASH */ if (nameiop != LOOKUP || results->ulr_diroff == 0 || results->ulr_diroff >= dp->i_size) { entryoffsetinblock = 0; results->ulr_offset = 0; numdirpasses = 1; } else { results->ulr_offset = results->ulr_diroff; entryoffsetinblock = results->ulr_offset & bmask; if (entryoffsetinblock != 0 && (error = ufs_blkatoff(vdp, (off_t)results->ulr_offset, NULL, &bp, false))) goto out; numdirpasses = 2; namecache_count_2passes(); } prevoff = results->ulr_offset; endsearch = roundup(dp->i_size, dirblksiz); enduseful = 0; searchloop: while (results->ulr_offset < endsearch) { preempt_point(); /* * If necessary, get the next directory block. */ if ((results->ulr_offset & bmask) == 0) { if (bp != NULL) brelse(bp, 0); error = ufs_blkatoff(vdp, (off_t)results->ulr_offset, NULL, &bp, false); if (error) goto out; entryoffsetinblock = 0; } /* * If still looking for a slot, and at a DIRBLKSIZ * boundary, have to start looking for free space again. */ if (slot.status == NONE && (entryoffsetinblock & (dirblksiz - 1)) == 0) { slot.offset = -1; slot.freespace = 0; } /* * Get pointer to next entry. * Full validation checks are slow, so we only check * enough to insure forward progress through the * directory. Complete checks can be run by patching * "dirchk" to be true. */ KASSERT(bp != NULL); ep = (void *)((char *)bp->b_data + entryoffsetinblock); const char *msg; reclen = ufs_rw16(ep->d_reclen, needswap); if ((reclen == 0 && (msg = "null entry")) || (dirchk && (msg = ufs_dirbadentry(vdp, ep, entryoffsetinblock)))) { ufs_dirbad(dp, results->ulr_offset, msg); reclen = dirblksiz - (entryoffsetinblock & (dirblksiz - 1)); goto next; } /* * If an appropriate sized slot has not yet been found, * check to see if one is available. Also accumulate space * in the current block so that we can determine if * compaction is viable. */ if (slot.status != FOUND) { int size = reclen; if (ep->d_ino != 0) size -= UFS_DIRSIZ(fsfmt, ep, needswap); if (size > 0) slot_update(&slot, size, reclen, results->ulr_offset); } if (ep->d_ino == 0) goto next; /* * Check for a name match. */ const uint16_t namlen = NAMLEN(fsfmt, needswap, ep); if (namlen != cnp->cn_namelen || memcmp(cnp->cn_nameptr, ep->d_name, (size_t)namlen)) goto next; #ifdef UFS_DIRHASH foundentry: #endif /* * Save directory entry's inode number and * reclen, and release directory buffer. */ if (!fsfmt && ep->d_type == DT_WHT) { slot_white(&slot, reclen, results); /* * This is used to set results->ulr_endoff, which may * be used by ufs_direnter() as a length to truncate * the directory to. Therefore, it must point past the * end of the last non-empty directory entry. We don't * know where that is in this case, so we effectively * disable shrinking by using the existing size of the * directory. * * Note that we wouldn't expect to shrink the * directory while rewriting an existing entry anyway. */ enduseful = endsearch; cnp->cn_flags |= ISWHITEOUT; numdirpasses--; goto notfound; } foundino = ufs_rw32(ep->d_ino, needswap); results->ulr_reclen = reclen; goto found; next: prevoff = results->ulr_offset; results->ulr_offset += reclen; entryoffsetinblock += reclen; if (ep->d_ino) enduseful = results->ulr_offset; } notfound: /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; results->ulr_offset = 0; endsearch = results->ulr_diroff; goto searchloop; } if (bp != NULL) brelse(bp, 0); /* * If creating, and at end of pathname and current * directory has not been removed, then can consider * allowing file to be created. */ if ((nameiop == CREATE || nameiop == RENAME || (nameiop == DELETE && (cnp->cn_flags & DOWHITEOUT) && (cnp->cn_flags & ISWHITEOUT))) && (flags & ISLASTCN) && dp->i_nlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. */ if (flags & WILLBEDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred); else error = VOP_ACCESS(vdp, VWRITE, cred); if (error) goto out; error = slot_estimate(&slot, dirblksiz, nameiop, prevoff, enduseful, dp, results); /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * * NB - if the directory is unlocked, then this * information cannot be used. */ goto out; } /* * Insert name into cache (as non-existent) if appropriate. */ if (nameiop != CREATE) { cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); } error = ENOENT; goto out; found: if (numdirpasses == 2) namecache_count_pass2(); /* * Check that directory length properly reflects presence * of this entry. */ const uint64_t newisize = results->ulr_offset + UFS_DIRSIZ(fsfmt, ep, needswap); if (newisize > dp->i_size) { ufs_dirbad(dp, results->ulr_offset, "i_size too small"); dp->i_size = newisize; DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP); } brelse(bp, 0); /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) results->ulr_diroff = results->ulr_offset & ~(dirblksiz - 1); /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. * Lock the inode, being careful with ".". */ if (nameiop == DELETE && (flags & ISLASTCN)) { /* * Return pointer to current entry in results->ulr_offset, * and distance past previous entry (if there * is a previous entry in this block) in results->ulr_count. * Save directory inode pointer in ndp->ni_dvp for dirremove(). */ calc_count(results, dirblksiz, prevoff); if ((error = ufs_getino(vdp, dp, foundino, &tdp, false)) != 0) goto out; if ((error = ufs_can_delete(tdp, vdp, dp, cred)) != 0) goto out; *vpp = tdp; goto out; } /* * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a * regular file, or empty directory. */ if (nameiop == RENAME && (flags & ISLASTCN)) { if (flags & WILLBEDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred); else error = VOP_ACCESS(vdp, VWRITE, cred); if (error) goto out; /* * Careful about locking second inode. * This can only occur if the target is ".". */ if ((error = ufs_getino(vdp, dp, foundino, &tdp, true)) != 0) goto out; *vpp = tdp; goto out; } if ((error = ufs_getino(vdp, dp, foundino, &tdp, false)) != 0) goto out; *vpp = tdp; /* * Insert name into cache if appropriate. */ cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); error = 0; out: return error; } void ufs_dirbad(struct inode *ip, doff_t offset, const char *how) { struct mount *mp = ITOV(ip)->v_mount; void (*p)(const char *, ...) __printflike(1, 2) = (mp->mnt_flag & MNT_RDONLY) == 0 ? panic : printf; (*p)("%s: bad dir ino %ju at offset %d: %s\n", mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number, offset, how); } /* * Do consistency checking on a directory entry: * record length must be multiple of 4 * entry must fit in rest of its DIRBLKSIZ block * record must be large enough to contain entry * name is not longer than FFS_MAXNAMLEN * name must be as long as advertised, and null terminated */ const char * ufs_dirbadentry(const struct vnode *dp, const struct direct *ep, int entryoffsetinblock) { const struct ufsmount *ump = VFSTOUFS(dp->v_mount); const int needswap = UFS_MPNEEDSWAP(ump); const int dirblksiz = ump->um_dirblksiz; const int maxsize = dirblksiz - (entryoffsetinblock & (dirblksiz - 1)); const int fsfmt = FSFMT(dp); const uint8_t namlen = NAMLEN(fsfmt, needswap, ep); const uint16_t reclen = ufs_rw16(ep->d_reclen, needswap); const int dirsiz = (int)UFS_DIRSIZ(fsfmt, ep, needswap); const char *name = ep->d_name; const char *str; #ifdef DIAGNOSTIC static char buf[512]; #endif if ((reclen & 0x3) != 0) str = "not rounded"; else if (reclen > maxsize) str = "too big"; else if (reclen < dirsiz) str = "too small"; #if FFS_MAXNAMLEN < 255 else if (namlen > FFS_MAXNAMLEN) str = "long name"; #endif else str = NULL; if (str) { #ifdef DIAGNOSTIC snprintf(buf, sizeof(buf), "Bad dir (%s), reclen=%#x, " "namlen=%d, dirsiz=%d <= reclen=%d <= maxsize=%d, " "flags=%#x, entryoffsetinblock=%d, dirblksiz=%d", str, reclen, namlen, dirsiz, reclen, maxsize, dp->v_mount->mnt_flag, entryoffsetinblock, dirblksiz); str = buf; #endif return str; } if (ep->d_ino == 0) return NULL; for (uint8_t i = 0; i < namlen; i++) if (name[i] == '\0') { str = "NUL in name"; #ifdef DIAGNOSTIC snprintf(buf, sizeof(buf), "%s [%s] i=%d, namlen=%d", str, name, i, namlen); str = buf; #endif return str; } if (name[namlen]) { str = "missing NUL in name"; #ifdef DIAGNOSTIC snprintf(buf, sizeof(buf), "%s [%*.*s] namlen=%d", str, namlen, namlen, name, namlen); str = buf; #endif return str; } return NULL; } /* * Construct a new directory entry after a call to namei, using the * name in the componentname argument cnp. The argument ip is the * inode to which the new directory entry will refer. */ void ufs_makedirentry(struct inode *ip, struct componentname *cnp, struct direct *newdirp) { size_t namelen = cnp->cn_namelen; newdirp->d_ino = ip->i_number; newdirp->d_namlen = namelen; memcpy(newdirp->d_name, cnp->cn_nameptr, namelen); /* NUL terminate and zero out padding */ memset(&newdirp->d_name[namelen], 0, UFS_NAMEPAD(namelen)); if (FSFMT(ITOV(ip))) newdirp->d_type = 0; else newdirp->d_type = IFTODT(ip->i_mode); } static int ufs_dirgrow(struct vnode *dvp, const struct ufs_lookup_results *ulr, struct vnode *tvp, struct direct *dirp, struct componentname *cnp, struct buf *newdirbp) { const kauth_cred_t cr = cnp->cn_cred; const struct ufsmount *ump = VFSTOUFS(dvp->v_mount); const int needswap = UFS_MPNEEDSWAP(ump); const int dirblksiz = ump->um_dirblksiz; const int fsfmt = FSFMT(dvp); const u_int newentrysize = UFS_DIRSIZ(0, dirp, 0); struct inode *dp = VTOI(dvp); int error, ret, blkoff; struct timespec ts; struct buf *bp; /* * If ulr_count is 0, then namei could find no * space in the directory. Here, ulr_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (ulr->ulr_offset & (dirblksiz - 1)) panic("%s: newblk", __func__); if ((error = UFS_BALLOC(dvp, (off_t)ulr->ulr_offset, dirblksiz, cr, B_CLRBUF | B_SYNC, &bp)) != 0) { return error; } dp->i_size = ulr->ulr_offset + dirblksiz; DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; uvm_vnp_setsize(dvp, dp->i_size); dirp->d_reclen = ufs_rw16(dirblksiz, needswap); dirp->d_ino = ufs_rw32(dirp->d_ino, needswap); if (fsfmt && ENDIANSWAP(needswap)) ufs_dirswap(dirp); blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1); memcpy((char *)bp->b_data + blkoff, dirp, newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { ufsdirhash_newblk(dp, ulr->ulr_offset); ufsdirhash_add(dp, dirp, ulr->ulr_offset); ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, ulr->ulr_offset); } #endif error = VOP_BWRITE(bp->b_vp, bp); vfs_timestamp(&ts); ret = UFS_UPDATE(dvp, &ts, &ts, UPDATE_DIROP); if (error == 0) return ret; return error; } static int #if __GNUC_PREREQ__(5, 3) /* This gets miscompiled by gcc 5.3 PR/51094 */ __attribute__((__optimize__("no-tree-vrp"))) #endif ufs_dircompact(struct vnode *dvp, const struct ufs_lookup_results *ulr, struct vnode *tvp, struct direct *dirp, struct componentname *cnp, struct buf *newdirbp) { const struct ufsmount *ump = VFSTOUFS(dvp->v_mount); const int needswap = UFS_MPNEEDSWAP(ump); const int fsfmt = FSFMT(dvp); const u_int newentrysize = UFS_DIRSIZ(0, dirp, 0); struct inode *dp = VTOI(dvp); struct buf *bp; u_int dsize; struct direct *ep, *nep; int error, loc, spacefree; char *dirbuf; uint16_t reclen; UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); /* * If ulr_count is non-zero, then namei found space for the new * entry in the range ulr_offset to ulr_offset + ulr_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZ. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) { #ifdef DIAGNOSTIC printf("%s: reached 4.2-only block, not supposed to happen\n", __func__); #endif dp->i_size = ulr->ulr_offset + ulr->ulr_count; DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); } /* * Get the block containing the space for the new directory entry. */ error = ufs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true); if (error) return error; /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region ulr_offset to * ulr_offset + ulr_count would yield the space. */ ep = (void *)dirbuf; dsize = (ep->d_ino != 0) ? UFS_DIRSIZ(fsfmt, ep, needswap) : 0; reclen = ufs_rw16(ep->d_reclen, needswap); spacefree = reclen - dsize; for (loc = reclen; loc < ulr->ulr_count; ) { nep = (void *)(dirbuf + loc); /* Trim the existing slot (NB: dsize may be zero). */ ep->d_reclen = ufs_rw16(dsize, needswap); ep = (void *)((char *)ep + dsize); reclen = ufs_rw16(nep->d_reclen, needswap); loc += reclen; if (nep->d_ino == 0) { /* * A mid-block unused entry. Such entries are * never created by the kernel, but fsck_ffs * can create them (and it doesn't fix them). * * Add up the free space, and initialise the * relocated entry since we don't memcpy it. */ spacefree += reclen; ep->d_ino = 0; dsize = 0; continue; } dsize = UFS_DIRSIZ(fsfmt, nep, needswap); spacefree += reclen - dsize; #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_move(dp, nep, ulr->ulr_offset + ((char *)nep - dirbuf), ulr->ulr_offset + ((char *)ep - dirbuf)); #endif memcpy(ep, nep, dsize); } /* * Here, `ep' points to a directory entry containing `dsize' in-use * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, * then the entry is completely unused (dsize == 0). The value * of ep->d_reclen is always indeterminate. * * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ if (ep->d_ino == 0 || (ufs_rw32(ep->d_ino, needswap) == UFS_WINO && memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) panic("%s: too big", __func__); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) panic("%s: nospace", __func__); dirp->d_reclen = spacefree; ep->d_reclen = ufs_rw16(dsize, needswap); ep = (void *)((char *)ep + dsize); } dirp->d_reclen = ufs_rw16(dirp->d_reclen, needswap); dirp->d_ino = ufs_rw32(dirp->d_ino, needswap); if (fsfmt && ENDIANSWAP(needswap)) ufs_dirswap(dirp); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) ufsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf)); #endif memcpy(ep, dirp, newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { const int dirblkmsk = ump->um_dirblksiz - 1; ufsdirhash_checkblock(dp, dirbuf - (ulr->ulr_offset & dirblkmsk), ulr->ulr_offset & ~dirblkmsk); } #endif error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If all went well, and the directory can be shortened, proceed * with the truncation. Note that we have to unlock the inode for * the entry that we just entered, as the truncation may need to * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) { const kauth_cred_t cr = cnp->cn_cred; #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_dirtrunc(dp, ulr->ulr_endoff); #endif (void) UFS_TRUNCATE(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr); } UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); return error; } /* * Write a directory entry after a call to namei, using the parameters * that ufs_lookup left in nameidata and in the ufs_lookup_results. * * DVP is the directory to be updated. It must be locked. * ULR is the ufs_lookup_results structure from the final lookup step. * TVP is not used. (XXX: why is it here? remove it) * DIRP is the new directory entry contents. * CNP is the componentname from the final lookup step. * NEWDIRBP is not used and (XXX) should be removed. The previous * comment here said it was used by the now-removed softupdates code. * * The link count of the target inode is *not* incremented; the * caller does that. * * If ulr->ulr_count is 0, ufs_lookup did not find space to insert the * directory entry. ulr_offset, which is the place to put the entry, * should be on a block boundary (and should be at the end of the * directory AFAIK) and a fresh block is allocated to put the new * directory entry in. * * If ulr->ulr_count is not zero, ufs_lookup found a slot to insert * the entry into. This slot ranges from ulr_offset to ulr_offset + * ulr_count. However, this slot may already be partially populated * requiring compaction. See notes below. * * Furthermore, if ulr_count is not zero and ulr_endoff is not the * same as i_size, the directory is truncated to size ulr_endoff. */ int ufs_direnter(struct vnode *dvp, const struct ufs_lookup_results *ulr, struct vnode *tvp, struct direct *dirp, struct componentname *cnp, struct buf *newdirbp) { if (ulr->ulr_count == 0) return ufs_dirgrow(dvp, ulr, tvp, dirp, cnp, newdirbp); else return ufs_dircompact(dvp, ulr, tvp, dirp, cnp, newdirbp); } /* * Remove a directory entry after a call to namei, using the * parameters that ufs_lookup left in nameidata and in the * ufs_lookup_results. * * DVP is the directory to be updated. It must be locked. * ULR is the ufs_lookup_results structure from the final lookup step. * IP, if not null, is the inode being unlinked. * FLAGS may contain DOWHITEOUT. * ISRMDIR is not used and (XXX) should be removed. * * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout * instead of being cleared. * * ulr->ulr_offset contains the position of the directory entry * to be removed. * * ulr->ulr_reclen contains the size of the directory entry to be * removed. * * ulr->ulr_count contains the size of the *previous* directory * entry. This allows finding it, for free space management. If * ulr_count is 0, the target entry is at the beginning of the * directory. (Does this ever happen? The first entry should be ".", * which should only be removed at rmdir time. Does rmdir come here * to clear out the "." and ".." entries? Perhaps, but I doubt it.) * * The space is marked free by adding it to the record length (not * name length) of the preceding entry. If the first entry becomes * free, it is marked free by setting the inode number to 0. * * The link count of IP is decremented. Note that this is not the * inverse behavior of ufs_direnter, which does not adjust link * counts. Sigh. */ int ufs_dirremove(struct vnode *dvp, const struct ufs_lookup_results *ulr, struct inode *ip, int flags, int isrmdir) { struct inode *dp = VTOI(dvp); struct direct *ep; struct buf *bp; int error; const int needswap = UFS_MPNEEDSWAP(dp->i_ump); uint16_t reclen; UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to UFS_WINO. */ error = ufs_blkatoff(dvp, (off_t)ulr->ulr_offset, &ep, &bp, true); if (error) return (error); ep->d_ino = ufs_rw32(UFS_WINO, needswap); ep->d_type = DT_WHT; goto out; } if ((error = ufs_blkatoff(dvp, (off_t)(ulr->ulr_offset - ulr->ulr_count), &ep, &bp, true)) != 0) return (error); reclen = ufs_rw16(ep->d_reclen, needswap); #ifdef UFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact * that `ep' is the previous entry when ulr_count != 0. */ if (dp->i_dirhash != NULL) ufsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep : (void *)((char *)ep + reclen), ulr->ulr_offset); #endif if (ulr->ulr_count == 0) { /* * First entry in block: set d_ino to zero. */ ep->d_ino = 0; } else { /* * Collapse new free space into previous entry. */ ep->d_reclen = ufs_rw16(reclen + ulr->ulr_reclen, needswap); } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { int dirblksiz = ip->i_ump->um_dirblksiz; ufsdirhash_checkblock(dp, (char *)ep - ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)), ulr->ulr_offset & ~(dirblksiz - 1)); } #endif out: if (ip) { ip->i_nlink--; DIP_ASSIGN(ip, nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; UFS_WAPBL_UPDATE(ITOV(ip), NULL, NULL, 0); } /* * XXX did it ever occur to anyone that it might be a good * idea to restore ip->i_nlink if this fails? Or something? * Currently on error return from this function the state of * ip->i_nlink depends on what happened, and callers * definitely do not take this into account. */ error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_nlink == 0) UFS_SNAPGONE(ITOV(ip)); UFS_WAPBL_UPDATE(dvp, NULL, NULL, 0); return (error); } /* * Rewrite an existing directory entry to point at the inode supplied. * * DP is the directory to update. * OFFSET is the position of the entry in question. It may come * from ulr_offset of a ufs_lookup_results. * OIP is the old inode the directory previously pointed to. * NEWINUM is the number of the new inode. * NEWTYPE is the new value for the type field of the directory entry. * (This is ignored if the fs doesn't support that.) * ISRMDIR is not used and (XXX) should be removed. * IFLAGS are added to DP's inode flags. * * The link count of OIP is decremented. Note that the link count of * the new inode is *not* incremented. Yay for symmetry. */ int ufs_dirrewrite(struct inode *dp, off_t offset, struct inode *oip, ino_t newinum, int newtype, int isrmdir, int iflags) { struct buf *bp; struct direct *ep; struct vnode *vdp = ITOV(dp); int error; error = ufs_blkatoff(vdp, offset, &ep, &bp, true); if (error) return (error); ep->d_ino = ufs_rw32(newinum, UFS_MPNEEDSWAP(dp->i_ump)); if (!FSFMT(vdp)) ep->d_type = newtype; oip->i_nlink--; DIP_ASSIGN(oip, nlink, oip->i_nlink); oip->i_flag |= IN_CHANGE; UFS_WAPBL_UPDATE(ITOV(oip), NULL, NULL, UPDATE_DIROP); error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= iflags; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0) UFS_SNAPGONE(ITOV(oip)); UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP); return (error); } /* * Check if a directory is empty or not. * Inode supplied must be locked. * * Using a struct dirtemplate here is not precisely * what we want, but better than using a struct direct. * * NB: does not handle corrupted directories. */ int ufs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred) { doff_t off; struct dirtemplate dbuf; struct direct *dp = (void *)&dbuf; int error; size_t count; const int needswap = UFS_IPNEEDSWAP(ip); const int fsfmt = FSFMT(ITOV(ip)); #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) for (off = 0; off < ip->i_size; off += ufs_rw16(dp->d_reclen, needswap)) { error = ufs_bufio(UIO_READ, ITOV(ip), dp, MINDIRSIZ, off, IO_NODELOCKED, cred, &count, NULL); /* * Since we read MINDIRSIZ, residual must * be 0 unless we're at end of file. */ if (error || count != 0) return (0); /* avoid infinite loops */ if (dp->d_reclen == 0) return (0); /* skip empty entries */ ino_t ino = ufs_rw32(dp->d_ino, needswap); if (ino == 0 || ino == UFS_WINO) continue; /* accept only "." and ".." */ const uint8_t namlen = NAMLEN(fsfmt, needswap, dp); if (namlen > 2) return (0); if (dp->d_name[0] != '.') return (0); /* * At this point namlen must be 1 or 2. * 1 implies ".", 2 implies ".." if second * char is also "." */ if (namlen == 1 && ino == ip->i_number) continue; if (dp->d_name[1] == '.' && ino == parentino) continue; return (0); } return (1); } #define UFS_DIRRABLKS 0 int ufs_dirrablks = UFS_DIRRABLKS; /* * ufs_blkatoff: Return buffer with the contents of block "offset" from * the beginning of directory "vp". If "res" is non-NULL, fill it in with * a pointer to the remaining space in the directory. If the caller intends * to modify the buffer returned, "modify" must be true. */ int ufs_blkatoff(struct vnode *vp, off_t offset, void *v, struct buf **bpp, bool modify) { char **res = v; struct inode *ip __diagused; struct buf *bp; daddr_t lbn; const int dirrablks = ufs_dirrablks; daddr_t *blks; int *blksizes; int run, error; struct mount *mp = vp->v_mount; const int bshift = mp->mnt_fs_bshift; const int bsize = 1 << bshift; off_t eof; blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP); blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP); ip = VTOI(vp); KASSERT(vp->v_size == ip->i_size); GOP_SIZE(vp, vp->v_size, &eof, 0); lbn = offset >> bshift; for (run = 0; run <= dirrablks;) { const off_t curoff = lbn << bshift; const int size = MIN(eof - curoff, bsize); if (size == 0) { break; } KASSERT(curoff < eof); blks[run] = lbn; blksizes[run] = size; lbn++; run++; if (size != bsize) { break; } } KASSERT(run >= 1); error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1], run - 1, (modify ? B_MODIFY : 0), &bp); if (error != 0) { *bpp = NULL; goto out; } if (res) { *res = (char *)bp->b_data + (offset & (bsize - 1)); } *bpp = bp; out: kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t)); kmem_free(blksizes, (1 + dirrablks) * sizeof(int)); return error; } |
| 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 | /* $OpenBSD: if_rum.c,v 1.40 2006/09/18 16:20:20 damien Exp $ */ /* $NetBSD: if_rum.c,v 1.70 2022/08/12 19:13:36 riastradh Exp $ */ /*- * Copyright (c) 2005-2007 Damien Bergamini <damien.bergamini@free.fr> * Copyright (c) 2006 Niall O'Higgins <niallo@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /*- * Ralink Technology RT2501USB/RT2601USB chipset driver * http://www.ralinktech.com.tw/ */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: if_rum.c,v 1.70 2022/08/12 19:13:36 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #endif #include <sys/param.h> #include <sys/sockio.h> #include <sys/sysctl.h> #include <sys/mbuf.h> #include <sys/kernel.h> #include <sys/socket.h> #include <sys/systm.h> #include <sys/module.h> #include <sys/conf.h> #include <sys/device.h> #include <sys/bus.h> #include <machine/endian.h> #include <sys/intr.h> #include <net/bpf.h> #include <net/if.h> #include <net/if_arp.h> #include <net/if_dl.h> #include <net/if_ether.h> #include <net/if_media.h> #include <net/if_types.h> #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/in_var.h> #include <netinet/ip.h> #include <net80211/ieee80211_netbsd.h> #include <net80211/ieee80211_var.h> #include <net80211/ieee80211_amrr.h> #include <net80211/ieee80211_radiotap.h> #include <dev/firmload.h> #include <dev/usb/usb.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdevs.h> #include <dev/usb/if_rumreg.h> #include <dev/usb/if_rumvar.h> #ifdef RUM_DEBUG #define DPRINTF(x) do { if (rum_debug) printf x; } while (0) #define DPRINTFN(n, x) do { if (rum_debug >= (n)) printf x; } while (0) int rum_debug = 1; #else #define DPRINTF(x) #define DPRINTFN(n, x) #endif /* various supported device vendors/products */ static const struct usb_devno rum_devs[] = { { USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_HWU54DM }, { USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_RT2573_2 }, { USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_RT2573_3 }, { USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_RT2573_4 }, { USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_WUG2700 }, { USB_VENDOR_AMIT, USB_PRODUCT_AMIT_CGWLUSB2GO }, { USB_VENDOR_ASUSTEK, USB_PRODUCT_ASUSTEK_WL167G_2 }, { USB_VENDOR_ASUSTEK, USB_PRODUCT_ASUSTEK_WL167G_3 }, { USB_VENDOR_BELKIN, USB_PRODUCT_BELKIN_F5D7050A }, { USB_VENDOR_BELKIN, USB_PRODUCT_BELKIN_F5D9050V3 }, { USB_VENDOR_BELKIN, USB_PRODUCT_BELKIN_F5D9050C }, { USB_VENDOR_CISCOLINKSYS, USB_PRODUCT_CISCOLINKSYS_WUSB200 }, { USB_VENDOR_CISCOLINKSYS, USB_PRODUCT_CISCOLINKSYS_WUSB54GC }, { USB_VENDOR_CISCOLINKSYS, USB_PRODUCT_CISCOLINKSYS_WUSB54GR }, { USB_VENDOR_CONCEPTRONIC, USB_PRODUCT_CONCEPTRONIC_C54RU2 }, { USB_VENDOR_CONCEPTRONIC, USB_PRODUCT_CONCEPTRONIC_RT2573 }, { USB_VENDOR_COREGA, USB_PRODUCT_COREGA_CGWLUSB2GL }, { USB_VENDOR_COREGA, USB_PRODUCT_COREGA_CGWLUSB2GPX }, { USB_VENDOR_DICKSMITH, USB_PRODUCT_DICKSMITH_CWD854F }, { USB_VENDOR_DICKSMITH, USB_PRODUCT_DICKSMITH_RT2573 }, { USB_VENDOR_DLINK2, USB_PRODUCT_DLINK2_DWLG122C1 }, { USB_VENDOR_DLINK2, USB_PRODUCT_DLINK2_WUA1340 }, { USB_VENDOR_DLINK2, USB_PRODUCT_DLINK2_DWA110 }, { USB_VENDOR_DLINK2, USB_PRODUCT_DLINK2_DWA111 }, { USB_VENDOR_EDIMAX, USB_PRODUCT_EDIMAX_EW7318 }, { USB_VENDOR_EDIMAX, USB_PRODUCT_EDIMAX_EW7618 }, { USB_VENDOR_GIGABYTE, USB_PRODUCT_GIGABYTE_GNWB01GS }, { USB_VENDOR_GIGABYTE, USB_PRODUCT_GIGABYTE_GNWI05GS }, { USB_VENDOR_GIGASET, USB_PRODUCT_GIGASET_RT2573 }, { USB_VENDOR_GOODWAY, USB_PRODUCT_GOODWAY_RT2573 }, { USB_VENDOR_GUILLEMOT, USB_PRODUCT_GUILLEMOT_HWGUSB254LB }, { USB_VENDOR_GUILLEMOT, USB_PRODUCT_GUILLEMOT_HWGUSB254V2AP }, { USB_VENDOR_HUAWEI3COM, USB_PRODUCT_HUAWEI3COM_RT2573 }, { USB_VENDOR_MELCO, USB_PRODUCT_MELCO_G54HP }, { USB_VENDOR_MELCO, USB_PRODUCT_MELCO_SG54HP }, { USB_VENDOR_MELCO, USB_PRODUCT_MELCO_SG54HG }, { USB_VENDOR_MELCO, USB_PRODUCT_MELCO_WLIUCG }, { USB_VENDOR_MSI, USB_PRODUCT_MSI_RT2573 }, { USB_VENDOR_MSI, USB_PRODUCT_MSI_RT2573_2 }, { USB_VENDOR_MSI, USB_PRODUCT_MSI_RT2573_3 }, { USB_VENDOR_MSI, USB_PRODUCT_MSI_RT2573_4 }, { USB_VENDOR_NOVATECH, USB_PRODUCT_NOVATECH_RT2573 }, { USB_VENDOR_PLANEX2, USB_PRODUCT_PLANEX2_GWUS54HP }, { USB_VENDOR_PLANEX2, USB_PRODUCT_PLANEX2_GWUS54MINI2 }, { USB_VENDOR_PLANEX2, USB_PRODUCT_PLANEX2_GWUSMM }, { USB_VENDOR_QCOM, USB_PRODUCT_QCOM_RT2573 }, { USB_VENDOR_QCOM, USB_PRODUCT_QCOM_RT2573_2 }, { USB_VENDOR_QCOM, USB_PRODUCT_QCOM_RT2573_3 }, { USB_VENDOR_RALINK, USB_PRODUCT_RALINK_RT2573 }, { USB_VENDOR_RALINK, USB_PRODUCT_RALINK_RT2671 }, { USB_VENDOR_SITECOMEU, USB_PRODUCT_SITECOMEU_WL113R2 }, { USB_VENDOR_SITECOMEU, USB_PRODUCT_SITECOMEU_WL172 }, { USB_VENDOR_SPARKLAN, USB_PRODUCT_SPARKLAN_RT2573 }, { USB_VENDOR_SURECOM, USB_PRODUCT_SURECOM_RT2573 }, { USB_VENDOR_SYNET, USB_PRODUCT_SYNET_MWP54SS }, { USB_VENDOR_ZYXEL, USB_PRODUCT_ZYXEL_RT2573 } }; static int rum_attachhook(void *); static int rum_alloc_tx_list(struct rum_softc *); static void rum_free_tx_list(struct rum_softc *); static int rum_alloc_rx_list(struct rum_softc *); static void rum_free_rx_list(struct rum_softc *); static int rum_media_change(struct ifnet *); static void rum_next_scan(void *); static void rum_task(void *); static int rum_newstate(struct ieee80211com *, enum ieee80211_state, int); static void rum_txeof(struct usbd_xfer *, void *, usbd_status); static void rum_rxeof(struct usbd_xfer *, void *, usbd_status); static uint8_t rum_rxrate(const struct rum_rx_desc *); static int rum_ack_rate(struct ieee80211com *, int); static uint16_t rum_txtime(int, int, uint32_t); static uint8_t rum_plcp_signal(int); static void rum_setup_tx_desc(struct rum_softc *, struct rum_tx_desc *, uint32_t, uint16_t, int, int); static int rum_tx_data(struct rum_softc *, struct mbuf *, struct ieee80211_node *); static void rum_start(struct ifnet *); static void rum_watchdog(struct ifnet *); static int rum_ioctl(struct ifnet *, u_long, void *); static void rum_eeprom_read(struct rum_softc *, uint16_t, void *, int); static uint32_t rum_read(struct rum_softc *, uint16_t); static void rum_read_multi(struct rum_softc *, uint16_t, void *, int); static void rum_write(struct rum_softc *, uint16_t, uint32_t); static void rum_write_multi(struct rum_softc *, uint16_t, void *, size_t); static void rum_bbp_write(struct rum_softc *, uint8_t, uint8_t); static uint8_t rum_bbp_read(struct rum_softc *, uint8_t); static void rum_rf_write(struct rum_softc *, uint8_t, uint32_t); static void rum_select_antenna(struct rum_softc *); static void rum_enable_mrr(struct rum_softc *); static void rum_set_txpreamble(struct rum_softc *); static void rum_set_basicrates(struct rum_softc *); static void rum_select_band(struct rum_softc *, struct ieee80211_channel *); static void rum_set_chan(struct rum_softc *, struct ieee80211_channel *); static void rum_enable_tsf_sync(struct rum_softc *); static void rum_update_slot(struct rum_softc *); static void rum_set_bssid(struct rum_softc *, const uint8_t *); static void rum_set_macaddr(struct rum_softc *, const uint8_t *); static void rum_update_promisc(struct rum_softc *); static const char *rum_get_rf(int); static void rum_read_eeprom(struct rum_softc *); static int rum_bbp_init(struct rum_softc *); static int rum_init(struct ifnet *); static void rum_stop(struct ifnet *, int); static int rum_load_microcode(struct rum_softc *, const u_char *, size_t); static int rum_prepare_beacon(struct rum_softc *); static void rum_newassoc(struct ieee80211_node *, int); static void rum_amrr_start(struct rum_softc *, struct ieee80211_node *); static void rum_amrr_timeout(void *); static void rum_amrr_update(struct usbd_xfer *, void *, usbd_status); static const struct { uint32_t reg; uint32_t val; } rum_def_mac[] = { RT2573_DEF_MAC }; static const struct { uint8_t reg; uint8_t val; } rum_def_bbp[] = { RT2573_DEF_BBP }; static const struct rfprog { uint8_t chan; uint32_t r1, r2, r3, r4; } rum_rf5226[] = { RT2573_RF5226 }, rum_rf5225[] = { RT2573_RF5225 }; static int rum_match(device_t, cfdata_t, void *); static void rum_attach(device_t, device_t, void *); static int rum_detach(device_t, int); static int rum_activate(device_t, enum devact); CFATTACH_DECL_NEW(rum, sizeof(struct rum_softc), rum_match, rum_attach, rum_detach, rum_activate); static int rum_match(device_t parent, cfdata_t match, void *aux) { struct usb_attach_arg *uaa = aux; return (usb_lookup(rum_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL) ? UMATCH_VENDOR_PRODUCT : UMATCH_NONE; } static int rum_attachhook(void *xsc) { struct rum_softc *sc = xsc; firmware_handle_t fwh; const char *name = "rum-rt2573"; u_char *ucode; size_t size; int error; if ((error = firmware_open("rum", name, &fwh)) != 0) { printf("%s: failed firmware_open of file %s (error %d)\n", device_xname(sc->sc_dev), name, error); return error; } size = firmware_get_size(fwh); ucode = firmware_malloc(size); if (ucode == NULL) { printf("%s: failed to allocate firmware memory\n", device_xname(sc->sc_dev)); firmware_close(fwh); return ENOMEM; } error = firmware_read(fwh, 0, ucode, size); firmware_close(fwh); if (error != 0) { printf("%s: failed to read firmware (error %d)\n", device_xname(sc->sc_dev), error); firmware_free(ucode, size); return error; } if (rum_load_microcode(sc, ucode, size) != 0) { printf("%s: could not load 8051 microcode\n", device_xname(sc->sc_dev)); firmware_free(ucode, size); return ENXIO; } firmware_free(ucode, size); sc->sc_flags |= RT2573_FWLOADED; return 0; } static void rum_attach(device_t parent, device_t self, void *aux) { struct rum_softc *sc = device_private(self); struct usb_attach_arg *uaa = aux; struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &sc->sc_if; usb_interface_descriptor_t *id; usb_endpoint_descriptor_t *ed; usbd_status error; char *devinfop; int i, ntries; uint32_t tmp; sc->sc_dev = self; sc->sc_udev = uaa->uaa_device; sc->sc_flags = 0; aprint_naive("\n"); aprint_normal("\n"); devinfop = usbd_devinfo_alloc(sc->sc_udev, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); error = usbd_set_config_no(sc->sc_udev, RT2573_CONFIG_NO, 0); if (error != 0) { aprint_error_dev(self, "failed to set configuration" ", err=%s\n", usbd_errstr(error)); return; } /* get the first interface handle */ error = usbd_device2interface_handle(sc->sc_udev, RT2573_IFACE_INDEX, &sc->sc_iface); if (error != 0) { aprint_error_dev(self, "could not get interface handle\n"); return; } /* * Find endpoints. */ id = usbd_get_interface_descriptor(sc->sc_iface); sc->sc_rx_no = sc->sc_tx_no = -1; for (i = 0; i < id->bNumEndpoints; i++) { ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i); if (ed == NULL) { aprint_error_dev(self, "no endpoint descriptor for iface %d\n", i); return; } if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) sc->sc_rx_no = ed->bEndpointAddress; else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) sc->sc_tx_no = ed->bEndpointAddress; } if (sc->sc_rx_no == -1 || sc->sc_tx_no == -1) { aprint_error_dev(self, "missing endpoint\n"); return; } usb_init_task(&sc->sc_task, rum_task, sc, 0); callout_init(&sc->sc_scan_ch, 0); sc->amrr.amrr_min_success_threshold = 1; sc->amrr.amrr_max_success_threshold = 10; callout_init(&sc->sc_amrr_ch, 0); /* retrieve RT2573 rev. no */ for (ntries = 0; ntries < 1000; ntries++) { if ((tmp = rum_read(sc, RT2573_MAC_CSR0)) != 0) break; DELAY(1000); } if (ntries == 1000) { aprint_error_dev(self, "timeout waiting for chip to settle\n"); return; } /* retrieve MAC address and various other things from EEPROM */ rum_read_eeprom(sc); aprint_normal_dev(self, "MAC/BBP RT%04x (rev 0x%05x), RF %s, address %s\n", sc->macbbp_rev, tmp, rum_get_rf(sc->rf_rev), ether_sprintf(ic->ic_myaddr)); ic->ic_ifp = ifp; ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */ ic->ic_opmode = IEEE80211_M_STA; /* default to BSS mode */ ic->ic_state = IEEE80211_S_INIT; /* set device capabilities */ ic->ic_caps = IEEE80211_C_IBSS | /* IBSS mode supported */ IEEE80211_C_MONITOR | /* monitor mode supported */ IEEE80211_C_HOSTAP | /* HostAp mode supported */ IEEE80211_C_TXPMGT | /* tx power management */ IEEE80211_C_SHPREAMBLE | /* short preamble supported */ IEEE80211_C_SHSLOT | /* short slot time supported */ IEEE80211_C_WPA; /* 802.11i */ if (sc->rf_rev == RT2573_RF_5225 || sc->rf_rev == RT2573_RF_5226) { /* set supported .11a rates */ ic->ic_sup_rates[IEEE80211_MODE_11A] = ieee80211_std_rateset_11a; /* set supported .11a channels */ for (i = 34; i <= 46; i += 4) { ic->ic_channels[i].ic_freq = ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ); ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A; } for (i = 36; i <= 64; i += 4) { ic->ic_channels[i].ic_freq = ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ); ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A; } for (i = 100; i <= 140; i += 4) { ic->ic_channels[i].ic_freq = ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ); ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A; } for (i = 149; i <= 165; i += 4) { ic->ic_channels[i].ic_freq = ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ); ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A; } } /* set supported .11b and .11g rates */ ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b; ic->ic_sup_rates[IEEE80211_MODE_11G] = ieee80211_std_rateset_11g; /* set supported .11b and .11g channels (1 through 14) */ for (i = 1; i <= 14; i++) { ic->ic_channels[i].ic_freq = ieee80211_ieee2mhz(i, IEEE80211_CHAN_2GHZ); ic->ic_channels[i].ic_flags = IEEE80211_CHAN_CCK | IEEE80211_CHAN_OFDM | IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ; } ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = rum_init; ifp->if_ioctl = rum_ioctl; ifp->if_start = rum_start; ifp->if_watchdog = rum_watchdog; IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); IFQ_SET_READY(&ifp->if_snd); memcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ); if_attach(ifp); ieee80211_ifattach(ic); ic->ic_newassoc = rum_newassoc; /* override state transition machine */ sc->sc_newstate = ic->ic_newstate; ic->ic_newstate = rum_newstate; /* XXX media locking needs revisiting */ mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB); ieee80211_media_init_with_lock(ic, rum_media_change, ieee80211_media_status, &sc->sc_media_mtx); bpf_attach2(ifp, DLT_IEEE802_11_RADIO, sizeof(struct ieee80211_frame) + IEEE80211_RADIOTAP_HDRLEN, &sc->sc_drvbpf); sc->sc_rxtap_len = sizeof(sc->sc_rxtapu); sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len); sc->sc_rxtap.wr_ihdr.it_present = htole32(RT2573_RX_RADIOTAP_PRESENT); sc->sc_txtap_len = sizeof(sc->sc_txtapu); sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len); sc->sc_txtap.wt_ihdr.it_present = htole32(RT2573_TX_RADIOTAP_PRESENT); ieee80211_announce(ic); usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); return; } static int rum_detach(device_t self, int flags) { struct rum_softc *sc = device_private(self); struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &sc->sc_if; int s; if (!ifp->if_softc) return 0; pmf_device_deregister(self); s = splusb(); rum_stop(ifp, 1); callout_halt(&sc->sc_scan_ch, NULL); callout_halt(&sc->sc_amrr_ch, NULL); usb_rem_task_wait(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER, NULL); bpf_detach(ifp); ieee80211_ifdetach(ic); /* free all nodes */ if_detach(ifp); splx(s); usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev); return 0; } static int rum_alloc_tx_list(struct rum_softc *sc) { struct rum_tx_data *data; int i, error; sc->tx_cur = sc->tx_queued = 0; for (i = 0; i < RUM_TX_LIST_COUNT; i++) { data = &sc->tx_data[i]; data->sc = sc; error = usbd_create_xfer(sc->sc_tx_pipeh, RT2573_TX_DESC_SIZE + IEEE80211_MAX_LEN, USBD_FORCE_SHORT_XFER, 0, &data->xfer); if (error) { printf("%s: could not allocate tx xfer\n", device_xname(sc->sc_dev)); goto fail; } data->buf = usbd_get_buffer(data->xfer); /* clean Tx descriptor */ memset(data->buf, 0, RT2573_TX_DESC_SIZE); } return 0; fail: rum_free_tx_list(sc); return error; } static void rum_free_tx_list(struct rum_softc *sc) { struct rum_tx_data *data; int i; for (i = 0; i < RUM_TX_LIST_COUNT; i++) { data = &sc->tx_data[i]; if (data->xfer != NULL) { usbd_destroy_xfer(data->xfer); data->xfer = NULL; } if (data->ni != NULL) { ieee80211_free_node(data->ni); data->ni = NULL; } } } static int rum_alloc_rx_list(struct rum_softc *sc) { struct rum_rx_data *data; int i, error; for (i = 0; i < RUM_RX_LIST_COUNT; i++) { data = &sc->rx_data[i]; data->sc = sc; error = usbd_create_xfer(sc->sc_rx_pipeh, MCLBYTES, 0, 0, &data->xfer); if (error) { printf("%s: could not allocate rx xfer\n", device_xname(sc->sc_dev)); goto fail; } MGETHDR(data->m, M_DONTWAIT, MT_DATA); if (data->m == NULL) { printf("%s: could not allocate rx mbuf\n", device_xname(sc->sc_dev)); error = ENOMEM; goto fail; } MCLGET(data->m, M_DONTWAIT); if (!(data->m->m_flags & M_EXT)) { printf("%s: could not allocate rx mbuf cluster\n", device_xname(sc->sc_dev)); error = ENOMEM; goto fail; } data->buf = mtod(data->m, uint8_t *); } return 0; fail: rum_free_rx_list(sc); return error; } static void rum_free_rx_list(struct rum_softc *sc) { struct rum_rx_data *data; int i; for (i = 0; i < RUM_RX_LIST_COUNT; i++) { data = &sc->rx_data[i]; if (data->xfer != NULL) { usbd_destroy_xfer(data->xfer); data->xfer = NULL; } if (data->m != NULL) { m_freem(data->m); data->m = NULL; } } } static int rum_media_change(struct ifnet *ifp) { int error; error = ieee80211_media_change(ifp); if (error != ENETRESET) return error; if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) rum_init(ifp); return 0; } /* * This function is called periodically (every 200ms) during scanning to * switch from one channel to another. */ static void rum_next_scan(void *arg) { struct rum_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; int s; s = splnet(); if (ic->ic_state == IEEE80211_S_SCAN) ieee80211_next_scan(ic); splx(s); } static void rum_task(void *arg) { struct rum_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; enum ieee80211_state ostate; struct ieee80211_node *ni; uint32_t tmp; ostate = ic->ic_state; switch (sc->sc_state) { case IEEE80211_S_INIT: if (ostate == IEEE80211_S_RUN) { /* abort TSF synchronization */ tmp = rum_read(sc, RT2573_TXRX_CSR9); rum_write(sc, RT2573_TXRX_CSR9, tmp & ~0x00ffffff); } break; case IEEE80211_S_SCAN: rum_set_chan(sc, ic->ic_curchan); callout_reset(&sc->sc_scan_ch, hz / 5, rum_next_scan, sc); break; case IEEE80211_S_AUTH: rum_set_chan(sc, ic->ic_curchan); break; case IEEE80211_S_ASSOC: rum_set_chan(sc, ic->ic_curchan); break; case IEEE80211_S_RUN: rum_set_chan(sc, ic->ic_curchan); ni = ic->ic_bss; if (ic->ic_opmode != IEEE80211_M_MONITOR) { rum_update_slot(sc); rum_enable_mrr(sc); rum_set_txpreamble(sc); rum_set_basicrates(sc); rum_set_bssid(sc, ni->ni_bssid); } if (ic->ic_opmode == IEEE80211_M_HOSTAP || ic->ic_opmode == IEEE80211_M_IBSS) rum_prepare_beacon(sc); if (ic->ic_opmode != IEEE80211_M_MONITOR) rum_enable_tsf_sync(sc); if (ic->ic_opmode == IEEE80211_M_STA) { /* fake a join to init the tx rate */ rum_newassoc(ic->ic_bss, 1); /* enable automatic rate adaptation in STA mode */ if (ic->ic_fixed_rate == IEEE80211_FIXED_RATE_NONE) rum_amrr_start(sc, ni); } break; } sc->sc_newstate(ic, sc->sc_state, sc->sc_arg); } static int rum_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg) { struct rum_softc *sc = ic->ic_ifp->if_softc; /* * XXXSMP: This does not wait for the task, if it is in flight, * to complete. If this code works at all, it must rely on the * kernel lock to serialize with the USB task thread. */ usb_rem_task(sc->sc_udev, &sc->sc_task); callout_stop(&sc->sc_scan_ch); callout_stop(&sc->sc_amrr_ch); /* do it in a process context */ sc->sc_state = nstate; sc->sc_arg = arg; usb_add_task(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER); return 0; } /* quickly determine if a given rate is CCK or OFDM */ #define RUM_RATE_IS_OFDM(rate) ((rate) >= 12 && (rate) != 22) #define RUM_ACK_SIZE 14 /* 10 + 4(FCS) */ #define RUM_CTS_SIZE 14 /* 10 + 4(FCS) */ static void rum_txeof(struct usbd_xfer *xfer, void *priv, usbd_status status) { struct rum_tx_data *data = priv; struct rum_softc *sc = data->sc; struct ifnet *ifp = &sc->sc_if; int s; if (status != USBD_NORMAL_COMPLETION) { if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) return; printf("%s: could not transmit buffer: %s\n", device_xname(sc->sc_dev), usbd_errstr(status)); if (status == USBD_STALLED) usbd_clear_endpoint_stall_async(sc->sc_tx_pipeh); if_statinc(ifp, if_oerrors); return; } s = splnet(); ieee80211_free_node(data->ni); data->ni = NULL; sc->tx_queued--; if_statinc(ifp, if_opackets); DPRINTFN(10, ("tx done\n")); sc->sc_tx_timer = 0; ifp->if_flags &= ~IFF_OACTIVE; rum_start(ifp); splx(s); } static void rum_rxeof(struct usbd_xfer *xfer, void *priv, usbd_status status) { struct rum_rx_data *data = priv; struct rum_softc *sc = data->sc; struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &sc->sc_if; struct rum_rx_desc *desc; struct ieee80211_frame *wh; struct ieee80211_node *ni; struct mbuf *mnew, *m; int s, len; if (status != USBD_NORMAL_COMPLETION) { if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) return; if (status == USBD_STALLED) usbd_clear_endpoint_stall_async(sc->sc_rx_pipeh); goto skip; } usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL); if (len < (int)(RT2573_RX_DESC_SIZE + sizeof(struct ieee80211_frame_min))) { DPRINTF(("%s: xfer too short %d\n", device_xname(sc->sc_dev), len)); if_statinc(ifp, if_ierrors); goto skip; } desc = (struct rum_rx_desc *)data->buf; if (le32toh(desc->flags) & RT2573_RX_CRC_ERROR) { /* * This should not happen since we did not request to receive * those frames when we filled RT2573_TXRX_CSR0. */ DPRINTFN(5, ("CRC error\n")); if_statinc(ifp, if_ierrors); goto skip; } MGETHDR(mnew, M_DONTWAIT, MT_DATA); if (mnew == NULL) { printf("%s: could not allocate rx mbuf\n", device_xname(sc->sc_dev)); if_statinc(ifp, if_ierrors); goto skip; } MCLGET(mnew, M_DONTWAIT); if (!(mnew->m_flags & M_EXT)) { printf("%s: could not allocate rx mbuf cluster\n", device_xname(sc->sc_dev)); m_freem(mnew); if_statinc(ifp, if_ierrors); goto skip; } m = data->m; data->m = mnew; data->buf = mtod(data->m, uint8_t *); /* finalize mbuf */ m_set_rcvif(m, ifp); m->m_data = (void *)(desc + 1); m->m_pkthdr.len = m->m_len = (le32toh(desc->flags) >> 16) & 0xfff; s = splnet(); if (sc->sc_drvbpf != NULL) { struct rum_rx_radiotap_header *tap = &sc->sc_rxtap; tap->wr_flags = IEEE80211_RADIOTAP_F_FCS; tap->wr_rate = rum_rxrate(desc); tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq); tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags); tap->wr_antenna = sc->rx_ant; tap->wr_antsignal = desc->rssi; bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN); } wh = mtod(m, struct ieee80211_frame *); ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh); /* send the frame to the 802.11 layer */ ieee80211_input(ic, m, ni, desc->rssi, 0); /* node is no longer needed */ ieee80211_free_node(ni); splx(s); DPRINTFN(15, ("rx done\n")); skip: /* setup a new transfer */ usbd_setup_xfer(xfer, data, data->buf, MCLBYTES, USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, rum_rxeof); usbd_transfer(xfer); } /* * This function is only used by the Rx radiotap code. It returns the rate at * which a given frame was received. */ static uint8_t rum_rxrate(const struct rum_rx_desc *desc) { if (le32toh(desc->flags) & RT2573_RX_OFDM) { /* reverse function of rum_plcp_signal */ switch (desc->rate) { case 0xb: return 12; case 0xf: return 18; case 0xa: return 24; case 0xe: return 36; case 0x9: return 48; case 0xd: return 72; case 0x8: return 96; case 0xc: return 108; } } else { if (desc->rate == 10) return 2; if (desc->rate == 20) return 4; if (desc->rate == 55) return 11; if (desc->rate == 110) return 22; } return 2; /* should not get there */ } /* * Return the expected ack rate for a frame transmitted at rate `rate'. * XXX: this should depend on the destination node basic rate set. */ static int rum_ack_rate(struct ieee80211com *ic, int rate) { switch (rate) { /* CCK rates */ case 2: return 2; case 4: case 11: case 22: return (ic->ic_curmode == IEEE80211_MODE_11B) ? 4 : rate; /* OFDM rates */ case 12: case 18: return 12; case 24: case 36: return 24; case 48: case 72: case 96: case 108: return 48; } /* default to 1Mbps */ return 2; } /* * Compute the duration (in us) needed to transmit `len' bytes at rate `rate'. * The function automatically determines the operating mode depending on the * given rate. `flags' indicates whether short preamble is in use or not. */ static uint16_t rum_txtime(int len, int rate, uint32_t flags) { uint16_t txtime; if (RUM_RATE_IS_OFDM(rate)) { /* IEEE Std 802.11a-1999, pp. 37 */ txtime = (8 + 4 * len + 3 + rate - 1) / rate; txtime = 16 + 4 + 4 * txtime + 6; } else { /* IEEE Std 802.11b-1999, pp. 28 */ txtime = (16 * len + rate - 1) / rate; if (rate != 2 && (flags & IEEE80211_F_SHPREAMBLE)) txtime += 72 + 24; else txtime += 144 + 48; } return txtime; } static uint8_t rum_plcp_signal(int rate) { switch (rate) { /* CCK rates (returned values are device-dependent) */ case 2: return 0x0; case 4: return 0x1; case 11: return 0x2; case 22: return 0x3; /* OFDM rates (cf IEEE Std 802.11a-1999, pp. 14 Table 80) */ case 12: return 0xb; case 18: return 0xf; case 24: return 0xa; case 36: return 0xe; case 48: return 0x9; case 72: return 0xd; case 96: return 0x8; case 108: return 0xc; /* unsupported rates (should not get there) */ default: return 0xff; } } static void rum_setup_tx_desc(struct rum_softc *sc, struct rum_tx_desc *desc, uint32_t flags, uint16_t xflags, int len, int rate) { struct ieee80211com *ic = &sc->sc_ic; uint16_t plcp_length; int remainder; desc->flags = htole32(flags); desc->flags |= htole32(RT2573_TX_VALID); desc->flags |= htole32(len << 16); desc->xflags = htole16(xflags); desc->wme = htole16( RT2573_QID(0) | RT2573_AIFSN(2) | RT2573_LOGCWMIN(4) | RT2573_LOGCWMAX(10)); /* setup PLCP fields */ desc->plcp_signal = rum_plcp_signal(rate); desc->plcp_service = 4; len += IEEE80211_CRC_LEN; if (RUM_RATE_IS_OFDM(rate)) { desc->flags |= htole32(RT2573_TX_OFDM); plcp_length = len & 0xfff; desc->plcp_length_hi = plcp_length >> 6; desc->plcp_length_lo = plcp_length & 0x3f; } else { plcp_length = (16 * len + rate - 1) / rate; if (rate == 22) { remainder = (16 * len) % 22; if (remainder != 0 && remainder < 7) desc->plcp_service |= RT2573_PLCP_LENGEXT; } desc->plcp_length_hi = plcp_length >> 8; desc->plcp_length_lo = plcp_length & 0xff; if (rate != 2 && (ic->ic_flags & IEEE80211_F_SHPREAMBLE)) desc->plcp_signal |= 0x08; } } #define RUM_TX_TIMEOUT 5000 static int rum_tx_data(struct rum_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) { struct ieee80211com *ic = &sc->sc_ic; struct rum_tx_desc *desc; struct rum_tx_data *data; struct ieee80211_frame *wh; struct ieee80211_key *k; uint32_t flags = 0; uint16_t dur; usbd_status error; int rate, xferlen, pktlen, needrts = 0, needcts = 0; wh = mtod(m0, struct ieee80211_frame *); if (wh->i_fc[1] & IEEE80211_FC1_WEP) { k = ieee80211_crypto_encap(ic, ni, m0); if (k == NULL) { m_freem(m0); return ENOBUFS; } /* packet header may have moved, reset our local pointer */ wh = mtod(m0, struct ieee80211_frame *); } /* compute actual packet length (including CRC and crypto overhead) */ pktlen = m0->m_pkthdr.len + IEEE80211_CRC_LEN; /* pickup a rate */ if (IEEE80211_IS_MULTICAST(wh->i_addr1) || ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_MGT)) { /* mgmt/multicast frames are sent at the lowest avail. rate */ rate = ni->ni_rates.rs_rates[0]; } else if (ic->ic_fixed_rate != IEEE80211_FIXED_RATE_NONE) { rate = ic->ic_bss->ni_rates.rs_rates[ic->ic_fixed_rate]; } else rate = ni->ni_rates.rs_rates[ni->ni_txrate]; if (rate == 0) rate = 2; /* XXX should not happen */ rate &= IEEE80211_RATE_VAL; /* check if RTS/CTS or CTS-to-self protection must be used */ if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) { /* multicast frames are not sent at OFDM rates in 802.11b/g */ if (pktlen > ic->ic_rtsthreshold) { needrts = 1; /* RTS/CTS based on frame length */ } else if ((ic->ic_flags & IEEE80211_F_USEPROT) && RUM_RATE_IS_OFDM(rate)) { if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) needcts = 1; /* CTS-to-self */ else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) needrts = 1; /* RTS/CTS */ } } if (needrts || needcts) { struct mbuf *mprot; int protrate, ackrate; protrate = IEEE80211_IS_CHAN_5GHZ(ni->ni_chan) ? 12 : 2; ackrate = rum_ack_rate(ic, rate); dur = rum_txtime(pktlen, rate, ic->ic_flags) + rum_txtime(RUM_ACK_SIZE, ackrate, ic->ic_flags) + 2 * sc->sifs; if (needrts) { dur += rum_txtime(RUM_CTS_SIZE, rum_ack_rate(ic, protrate), ic->ic_flags) + sc->sifs; mprot = ieee80211_get_rts(ic, wh, dur); } else { mprot = ieee80211_get_cts_to_self(ic, dur); } if (mprot == NULL) { aprint_error_dev(sc->sc_dev, "couldn't allocate protection frame\n"); m_freem(m0); return ENOBUFS; } data = &sc->tx_data[sc->tx_cur]; desc = (struct rum_tx_desc *)data->buf; /* avoid multiple free() of the same node for each fragment */ data->ni = ieee80211_ref_node(ni); m_copydata(mprot, 0, mprot->m_pkthdr.len, data->buf + RT2573_TX_DESC_SIZE); rum_setup_tx_desc(sc, desc, (needrts ? RT2573_TX_NEED_ACK : 0) | RT2573_TX_MORE_FRAG, 0, mprot->m_pkthdr.len, protrate); /* no roundup necessary here */ xferlen = RT2573_TX_DESC_SIZE + mprot->m_pkthdr.len; /* XXX may want to pass the protection frame to BPF */ /* mbuf is no longer needed */ m_freem(mprot); usbd_setup_xfer(data->xfer, data, data->buf, xferlen, USBD_FORCE_SHORT_XFER, RUM_TX_TIMEOUT, rum_txeof); error = usbd_transfer(data->xfer); if (error != USBD_NORMAL_COMPLETION && error != USBD_IN_PROGRESS) { m_freem(m0); return error; } sc->tx_queued++; sc->tx_cur = (sc->tx_cur + 1) % RUM_TX_LIST_COUNT; flags |= RT2573_TX_LONG_RETRY | RT2573_TX_IFS_SIFS; } data = &sc->tx_data[sc->tx_cur]; desc = (struct rum_tx_desc *)data->buf; data->ni = ni; if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) { flags |= RT2573_TX_NEED_ACK; dur = rum_txtime(RUM_ACK_SIZE, rum_ack_rate(ic, rate), ic->ic_flags) + sc->sifs; *(uint16_t *)wh->i_dur = htole16(dur); /* tell hardware to set timestamp in probe responses */ if ((wh->i_fc[0] & (IEEE80211_FC0_TYPE_MASK | IEEE80211_FC0_SUBTYPE_MASK)) == (IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_PROBE_RESP)) flags |= RT2573_TX_TIMESTAMP; } if (sc->sc_drvbpf != NULL) { struct rum_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_rate = rate; tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq); tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags); tap->wt_antenna = sc->tx_ant; bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m0, BPF_D_OUT); } m_copydata(m0, 0, m0->m_pkthdr.len, data->buf + RT2573_TX_DESC_SIZE); rum_setup_tx_desc(sc, desc, flags, 0, m0->m_pkthdr.len, rate); /* align end on a 4-bytes boundary */ xferlen = (RT2573_TX_DESC_SIZE + m0->m_pkthdr.len + 3) & ~3; /* * No space left in the last URB to store the extra 4 bytes, force * sending of another URB. */ if ((xferlen % 64) == 0) xferlen += 4; DPRINTFN(10, ("sending data frame len=%zu rate=%u xfer len=%u\n", (size_t)m0->m_pkthdr.len + RT2573_TX_DESC_SIZE, rate, xferlen)); /* mbuf is no longer needed */ m_freem(m0); usbd_setup_xfer(data->xfer, data, data->buf, xferlen, USBD_FORCE_SHORT_XFER, RUM_TX_TIMEOUT, rum_txeof); error = usbd_transfer(data->xfer); if (error != USBD_NORMAL_COMPLETION && error != USBD_IN_PROGRESS) return error; sc->tx_queued++; sc->tx_cur = (sc->tx_cur + 1) % RUM_TX_LIST_COUNT; return 0; } static void rum_start(struct ifnet *ifp) { struct rum_softc *sc = ifp->if_softc; struct ieee80211com *ic = &sc->sc_ic; struct ether_header *eh; struct ieee80211_node *ni; struct mbuf *m0; if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING) return; for (;;) { IF_POLL(&ic->ic_mgtq, m0); if (m0 != NULL) { if (sc->tx_queued >= RUM_TX_LIST_COUNT - 1) { ifp->if_flags |= IFF_OACTIVE; break; } IF_DEQUEUE(&ic->ic_mgtq, m0); ni = M_GETCTX(m0, struct ieee80211_node *); M_CLEARCTX(m0); bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT); if (rum_tx_data(sc, m0, ni) != 0) break; } else { if (ic->ic_state != IEEE80211_S_RUN) break; IFQ_POLL(&ifp->if_snd, m0); if (m0 == NULL) break; if (sc->tx_queued >= RUM_TX_LIST_COUNT - 1) { ifp->if_flags |= IFF_OACTIVE; break; } IFQ_DEQUEUE(&ifp->if_snd, m0); if (m0->m_len < (int)sizeof(struct ether_header) && !(m0 = m_pullup(m0, sizeof(struct ether_header)))) continue; eh = mtod(m0, struct ether_header *); ni = ieee80211_find_txnode(ic, eh->ether_dhost); if (ni == NULL) { m_freem(m0); continue; } bpf_mtap(ifp, m0, BPF_D_OUT); m0 = ieee80211_encap(ic, m0, ni); if (m0 == NULL) { ieee80211_free_node(ni); continue; } bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT); if (rum_tx_data(sc, m0, ni) != 0) { ieee80211_free_node(ni); if_statinc(ifp, if_oerrors); break; } } sc->sc_tx_timer = 5; ifp->if_timer = 1; } } static void rum_watchdog(struct ifnet *ifp) { struct rum_softc *sc = ifp->if_softc; struct ieee80211com *ic = &sc->sc_ic; ifp->if_timer = 0; if (sc->sc_tx_timer > 0) { if (--sc->sc_tx_timer == 0) { printf("%s: device timeout\n", device_xname(sc->sc_dev)); /*rum_init(ifp); XXX needs a process context! */ if_statinc(ifp, if_oerrors); return; } ifp->if_timer = 1; } ieee80211_watchdog(ic); } static int rum_ioctl(struct ifnet *ifp, u_long cmd, void *data) { #define IS_RUNNING(ifp) \ (((ifp)->if_flags & IFF_UP) && ((ifp)->if_flags & IFF_RUNNING)) struct rum_softc *sc = ifp->if_softc; struct ieee80211com *ic = &sc->sc_ic; int s, error = 0; s = splnet(); switch (cmd) { case SIOCSIFFLAGS: if ((error = ifioctl_common(ifp, cmd, data)) != 0) break; switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) { case IFF_UP|IFF_RUNNING: rum_update_promisc(sc); break; case IFF_UP: rum_init(ifp); break; case IFF_RUNNING: rum_stop(ifp, 1); break; case 0: break; } break; case SIOCADDMULTI: case SIOCDELMULTI: if ((error = ether_ioctl(ifp, cmd, data)) == ENETRESET) { error = 0; } break; default: error = ieee80211_ioctl(ic, cmd, data); } if (error == ENETRESET) { if (IS_RUNNING(ifp) && (ic->ic_roaming != IEEE80211_ROAMING_MANUAL)) rum_init(ifp); error = 0; } splx(s); return error; #undef IS_RUNNING } static void rum_eeprom_read(struct rum_softc *sc, uint16_t addr, void *buf, int len) { usb_device_request_t req; usbd_status error; req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = RT2573_READ_EEPROM; USETW(req.wValue, 0); USETW(req.wIndex, addr); USETW(req.wLength, len); error = usbd_do_request(sc->sc_udev, &req, buf); if (error != 0) { printf("%s: could not read EEPROM: %s\n", device_xname(sc->sc_dev), usbd_errstr(error)); memset(buf, 0, len); } } static uint32_t rum_read(struct rum_softc *sc, uint16_t reg) { uint32_t val; rum_read_multi(sc, reg, &val, sizeof(val)); return le32toh(val); } static void rum_read_multi(struct rum_softc *sc, uint16_t reg, void *buf, int len) { usb_device_request_t req; usbd_status error; req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = RT2573_READ_MULTI_MAC; USETW(req.wValue, 0); USETW(req.wIndex, reg); USETW(req.wLength, len); error = usbd_do_request(sc->sc_udev, &req, buf); if (error != 0) { printf("%s: could not multi read MAC register: %s\n", device_xname(sc->sc_dev), usbd_errstr(error)); memset(buf, 0, len); } } static void rum_write(struct rum_softc *sc, uint16_t reg, uint32_t val) { uint32_t tmp = htole32(val); rum_write_multi(sc, reg, &tmp, sizeof(tmp)); } static void rum_write_multi(struct rum_softc *sc, uint16_t reg, void *buf, size_t len) { usb_device_request_t req; usbd_status error; int offset; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = RT2573_WRITE_MULTI_MAC; USETW(req.wValue, 0); /* write at most 64 bytes at a time */ for (offset = 0; offset < len; offset += 64) { USETW(req.wIndex, reg + offset); USETW(req.wLength, MIN(len - offset, 64)); error = usbd_do_request(sc->sc_udev, &req, (char *)buf + offset); if (error != 0) { printf("%s: could not multi write MAC register: %s\n", device_xname(sc->sc_dev), usbd_errstr(error)); } } } static void rum_bbp_write(struct rum_softc *sc, uint8_t reg, uint8_t val) { uint32_t tmp; int ntries; for (ntries = 0; ntries < 5; ntries++) { if (!(rum_read(sc, RT2573_PHY_CSR3) & RT2573_BBP_BUSY)) break; } if (ntries == 5) { printf("%s: could not write to BBP\n", device_xname(sc->sc_dev)); return; } tmp = RT2573_BBP_BUSY | (reg & 0x7f) << 8 | val; rum_write(sc, RT2573_PHY_CSR3, tmp); } static uint8_t rum_bbp_read(struct rum_softc *sc, uint8_t reg) { uint32_t val; int ntries; for (ntries = 0; ntries < 5; ntries++) { if (!(rum_read(sc, RT2573_PHY_CSR3) & RT2573_BBP_BUSY)) break; } if (ntries == 5) { printf("%s: could not read BBP\n", device_xname(sc->sc_dev)); return 0; } val = RT2573_BBP_BUSY | RT2573_BBP_READ | reg << 8; rum_write(sc, RT2573_PHY_CSR3, val); for (ntries = 0; ntries < 100; ntries++) { val = rum_read(sc, RT2573_PHY_CSR3); if (!(val & RT2573_BBP_BUSY)) return val & 0xff; DELAY(1); } printf("%s: could not read BBP\n", device_xname(sc->sc_dev)); return 0; } static void rum_rf_write(struct rum_softc *sc, uint8_t reg, uint32_t val) { uint32_t tmp; int ntries; for (ntries = 0; ntries < 5; ntries++) { if (!(rum_read(sc, RT2573_PHY_CSR4) & RT2573_RF_BUSY)) break; } if (ntries == 5) { printf("%s: could not write to RF\n", device_xname(sc->sc_dev)); return; } tmp = RT2573_RF_BUSY | RT2573_RF_20BIT | (val & 0xfffff) << 2 | (reg & 3); rum_write(sc, RT2573_PHY_CSR4, tmp); /* remember last written value in sc */ sc->rf_regs[reg] = val; DPRINTFN(15, ("RF R[%u] <- 0x%05x\n", reg & 3, val & 0xfffff)); } static void rum_select_antenna(struct rum_softc *sc) { uint8_t bbp4, bbp77; uint32_t tmp; bbp4 = rum_bbp_read(sc, 4); bbp77 = rum_bbp_read(sc, 77); /* TBD */ /* make sure Rx is disabled before switching antenna */ tmp = rum_read(sc, RT2573_TXRX_CSR0); rum_write(sc, RT2573_TXRX_CSR0, tmp | RT2573_DISABLE_RX); rum_bbp_write(sc, 4, bbp4); rum_bbp_write(sc, 77, bbp77); rum_write(sc, RT2573_TXRX_CSR0, tmp); } /* * Enable multi-rate retries for frames sent at OFDM rates. * In 802.11b/g mode, allow fallback to CCK rates. */ static void rum_enable_mrr(struct rum_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; uint32_t tmp; tmp = rum_read(sc, RT2573_TXRX_CSR4); tmp &= ~RT2573_MRR_CCK_FALLBACK; if (!IEEE80211_IS_CHAN_5GHZ(ic->ic_curchan)) tmp |= RT2573_MRR_CCK_FALLBACK; tmp |= RT2573_MRR_ENABLED; rum_write(sc, RT2573_TXRX_CSR4, tmp); } static void rum_set_txpreamble(struct rum_softc *sc) { uint32_t tmp; tmp = rum_read(sc, RT2573_TXRX_CSR4); tmp &= ~RT2573_SHORT_PREAMBLE; if (sc->sc_ic.ic_flags & IEEE80211_F_SHPREAMBLE) tmp |= RT2573_SHORT_PREAMBLE; rum_write(sc, RT2573_TXRX_CSR4, tmp); } static void rum_set_basicrates(struct rum_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; /* update basic rate set */ if (ic->ic_curmode == IEEE80211_MODE_11B) { /* 11b basic rates: 1, 2Mbps */ rum_write(sc, RT2573_TXRX_CSR5, 0x3); } else if (ic->ic_curmode == IEEE80211_MODE_11A) { /* 11a basic rates: 6, 12, 24Mbps */ rum_write(sc, RT2573_TXRX_CSR5, 0x150); } else { /* 11b/g basic rates: 1, 2, 5.5, 11Mbps */ rum_write(sc, RT2573_TXRX_CSR5, 0xf); } } /* * Reprogram MAC/BBP to switch to a new band. Values taken from the reference * driver. */ static void rum_select_band(struct rum_softc *sc, struct ieee80211_channel *c) { uint8_t bbp17, bbp35, bbp96, bbp97, bbp98, bbp104; uint32_t tmp; /* update all BBP registers that depend on the band */ bbp17 = 0x20; bbp96 = 0x48; bbp104 = 0x2c; bbp35 = 0x50; bbp97 = 0x48; bbp98 = 0x48; if (IEEE80211_IS_CHAN_5GHZ(c)) { bbp17 += 0x08; bbp96 += 0x10; bbp104 += 0x0c; bbp35 += 0x10; bbp97 += 0x10; bbp98 += 0x10; } if ((IEEE80211_IS_CHAN_2GHZ(c) && sc->ext_2ghz_lna) || (IEEE80211_IS_CHAN_5GHZ(c) && sc->ext_5ghz_lna)) { bbp17 += 0x10; bbp96 += 0x10; bbp104 += 0x10; } sc->bbp17 = bbp17; rum_bbp_write(sc, 17, bbp17); rum_bbp_write(sc, 96, bbp96); rum_bbp_write(sc, 104, bbp104); if ((IEEE80211_IS_CHAN_2GHZ(c) && sc->ext_2ghz_lna) || (IEEE80211_IS_CHAN_5GHZ(c) && sc->ext_5ghz_lna)) { rum_bbp_write(sc, 75, 0x80); rum_bbp_write(sc, 86, 0x80); rum_bbp_write(sc, 88, 0x80); } rum_bbp_write(sc, 35, bbp35); rum_bbp_write(sc, 97, bbp97); rum_bbp_write(sc, 98, bbp98); tmp = rum_read(sc, RT2573_PHY_CSR0); tmp &= ~(RT2573_PA_PE_2GHZ | RT2573_PA_PE_5GHZ); if (IEEE80211_IS_CHAN_2GHZ(c)) tmp |= RT2573_PA_PE_2GHZ; else tmp |= RT2573_PA_PE_5GHZ; rum_write(sc, RT2573_PHY_CSR0, tmp); /* 802.11a uses a 16 microseconds short interframe space */ sc->sifs = IEEE80211_IS_CHAN_5GHZ(c) ? 16 : 10; } static void rum_set_chan(struct rum_softc *sc, struct ieee80211_channel *c) { struct ieee80211com *ic = &sc->sc_ic; const struct rfprog *rfprog; uint8_t bbp3, bbp94 = RT2573_BBPR94_DEFAULT; int8_t power; u_int i, chan; chan = ieee80211_chan2ieee(ic, c); if (chan == 0 || chan == IEEE80211_CHAN_ANY) return; /* select the appropriate RF settings based on what EEPROM says */ rfprog = (sc->rf_rev == RT2573_RF_5225 || sc->rf_rev == RT2573_RF_2527) ? rum_rf5225 : rum_rf5226; /* find the settings for this channel (we know it exists) */ for (i = 0; rfprog[i].chan != chan; i++); power = sc->txpow[i]; if (power < 0) { bbp94 += power; power = 0; } else if (power > 31) { bbp94 += power - 31; power = 31; } /* * If we are switching from the 2GHz band to the 5GHz band or * vice-versa, BBP registers need to be reprogrammed. */ if (c->ic_flags != ic->ic_curchan->ic_flags) { rum_select_band(sc, c); rum_select_antenna(sc); } ic->ic_curchan = c; rum_rf_write(sc, RT2573_RF1, rfprog[i].r1); rum_rf_write(sc, RT2573_RF2, rfprog[i].r2); rum_rf_write(sc, RT2573_RF3, rfprog[i].r3 | power << 7); rum_rf_write(sc, RT2573_RF4, rfprog[i].r4 | sc->rffreq << 10); rum_rf_write(sc, RT2573_RF1, rfprog[i].r1); rum_rf_write(sc, RT2573_RF2, rfprog[i].r2); rum_rf_write(sc, RT2573_RF3, rfprog[i].r3 | power << 7 | 1); rum_rf_write(sc, RT2573_RF4, rfprog[i].r4 | sc->rffreq << 10); rum_rf_write(sc, RT2573_RF1, rfprog[i].r1); rum_rf_write(sc, RT2573_RF2, rfprog[i].r2); rum_rf_write(sc, RT2573_RF3, rfprog[i].r3 | power << 7); rum_rf_write(sc, RT2573_RF4, rfprog[i].r4 | sc->rffreq << 10); DELAY(10); /* enable smart mode for MIMO-capable RFs */ bbp3 = rum_bbp_read(sc, 3); bbp3 &= ~RT2573_SMART_MODE; if (sc->rf_rev == RT2573_RF_5225 || sc->rf_rev == RT2573_RF_2527) bbp3 |= RT2573_SMART_MODE; rum_bbp_write(sc, 3, bbp3); if (bbp94 != RT2573_BBPR94_DEFAULT) rum_bbp_write(sc, 94, bbp94); } /* * Enable TSF synchronization and tell h/w to start sending beacons for IBSS * and HostAP operating modes. */ static void rum_enable_tsf_sync(struct rum_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; uint32_t tmp; if (ic->ic_opmode != IEEE80211_M_STA) { /* * Change default 16ms TBTT adjustment to 8ms. * Must be done before enabling beacon generation. */ rum_write(sc, RT2573_TXRX_CSR10, 1 << 12 | 8); } tmp = rum_read(sc, RT2573_TXRX_CSR9) & 0xff000000; /* set beacon interval (in 1/16ms unit) */ tmp |= ic->ic_bss->ni_intval * 16; tmp |= RT2573_TSF_TICKING | RT2573_ENABLE_TBTT; if (ic->ic_opmode == IEEE80211_M_STA) tmp |= RT2573_TSF_MODE(1); else tmp |= RT2573_TSF_MODE(2) | RT2573_GENERATE_BEACON; rum_write(sc, RT2573_TXRX_CSR9, tmp); } static void rum_update_slot(struct rum_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; uint8_t slottime; uint32_t tmp; slottime = (ic->ic_flags & IEEE80211_F_SHSLOT) ? 9 : 20; tmp = rum_read(sc, RT2573_MAC_CSR9); tmp = (tmp & ~0xff) | slottime; rum_write(sc, RT2573_MAC_CSR9, tmp); DPRINTF(("setting slot time to %uus\n", slottime)); } static void rum_set_bssid(struct rum_softc *sc, const uint8_t *bssid) { uint32_t tmp; tmp = bssid[0] | bssid[1] << 8 | bssid[2] << 16 | bssid[3] << 24; rum_write(sc, RT2573_MAC_CSR4, tmp); tmp = bssid[4] | bssid[5] << 8 | RT2573_ONE_BSSID << 16; rum_write(sc, RT2573_MAC_CSR5, tmp); } static void rum_set_macaddr(struct rum_softc *sc, const uint8_t *addr) { uint32_t tmp; tmp = addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24; rum_write(sc, RT2573_MAC_CSR2, tmp); tmp = addr[4] | addr[5] << 8 | 0xff << 16; rum_write(sc, RT2573_MAC_CSR3, tmp); } static void rum_update_promisc(struct rum_softc *sc) { struct ifnet *ifp = sc->sc_ic.ic_ifp; uint32_t tmp; tmp = rum_read(sc, RT2573_TXRX_CSR0); tmp &= ~RT2573_DROP_NOT_TO_ME; if (!(ifp->if_flags & IFF_PROMISC)) tmp |= RT2573_DROP_NOT_TO_ME; rum_write(sc, RT2573_TXRX_CSR0, tmp); DPRINTF(("%s promiscuous mode\n", (ifp->if_flags & IFF_PROMISC) ? "entering" : "leaving")); } static const char * rum_get_rf(int rev) { switch (rev) { case RT2573_RF_2527: return "RT2527 (MIMO XR)"; case RT2573_RF_2528: return "RT2528"; case RT2573_RF_5225: return "RT5225 (MIMO XR)"; case RT2573_RF_5226: return "RT5226"; default: return "unknown"; } } static void rum_read_eeprom(struct rum_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; uint16_t val; #ifdef RUM_DEBUG int i; #endif /* read MAC/BBP type */ rum_eeprom_read(sc, RT2573_EEPROM_MACBBP, &val, 2); sc->macbbp_rev = le16toh(val); /* read MAC address */ rum_eeprom_read(sc, RT2573_EEPROM_ADDRESS, ic->ic_myaddr, 6); rum_eeprom_read(sc, RT2573_EEPROM_ANTENNA, &val, 2); val = le16toh(val); sc->rf_rev = (val >> 11) & 0x1f; sc->hw_radio = (val >> 10) & 0x1; sc->rx_ant = (val >> 4) & 0x3; sc->tx_ant = (val >> 2) & 0x3; sc->nb_ant = val & 0x3; DPRINTF(("RF revision=%d\n", sc->rf_rev)); rum_eeprom_read(sc, RT2573_EEPROM_CONFIG2, &val, 2); val = le16toh(val); sc->ext_5ghz_lna = (val >> 6) & 0x1; sc->ext_2ghz_lna = (val >> 4) & 0x1; DPRINTF(("External 2GHz LNA=%d\nExternal 5GHz LNA=%d\n", sc->ext_2ghz_lna, sc->ext_5ghz_lna)); rum_eeprom_read(sc, RT2573_EEPROM_RSSI_2GHZ_OFFSET, &val, 2); val = le16toh(val); if ((val & 0xff) != 0xff) sc->rssi_2ghz_corr = (int8_t)(val & 0xff); /* signed */ rum_eeprom_read(sc, RT2573_EEPROM_RSSI_5GHZ_OFFSET, &val, 2); val = le16toh(val); if ((val & 0xff) != 0xff) sc->rssi_5ghz_corr = (int8_t)(val & 0xff); /* signed */ DPRINTF(("RSSI 2GHz corr=%d\nRSSI 5GHz corr=%d\n", sc->rssi_2ghz_corr, sc->rssi_5ghz_corr)); rum_eeprom_read(sc, RT2573_EEPROM_FREQ_OFFSET, &val, 2); val = le16toh(val); if ((val & 0xff) != 0xff) sc->rffreq = val & 0xff; DPRINTF(("RF freq=%d\n", sc->rffreq)); /* read Tx power for all a/b/g channels */ rum_eeprom_read(sc, RT2573_EEPROM_TXPOWER, sc->txpow, 14); /* XXX default Tx power for 802.11a channels */ memset(sc->txpow + 14, 24, sizeof(sc->txpow) - 14); #ifdef RUM_DEBUG for (i = 0; i < 14; i++) DPRINTF(("Channel=%d Tx power=%d\n", i + 1, sc->txpow[i])); #endif /* read default values for BBP registers */ rum_eeprom_read(sc, RT2573_EEPROM_BBP_BASE, sc->bbp_prom, 2 * 16); #ifdef RUM_DEBUG for (i = 0; i < 14; i++) { if (sc->bbp_prom[i].reg == 0 || sc->bbp_prom[i].reg == 0xff) continue; DPRINTF(("BBP R%d=%02x\n", sc->bbp_prom[i].reg, sc->bbp_prom[i].val)); } #endif } static int rum_bbp_init(struct rum_softc *sc) { unsigned int i, ntries; uint8_t val; /* wait for BBP to be ready */ for (ntries = 0; ntries < 100; ntries++) { val = rum_bbp_read(sc, 0); if (val != 0 && val != 0xff) break; DELAY(1000); } if (ntries == 100) { printf("%s: timeout waiting for BBP\n", device_xname(sc->sc_dev)); return EIO; } /* initialize BBP registers to default values */ for (i = 0; i < __arraycount(rum_def_bbp); i++) rum_bbp_write(sc, rum_def_bbp[i].reg, rum_def_bbp[i].val); /* write vendor-specific BBP values (from EEPROM) */ for (i = 0; i < 16; i++) { if (sc->bbp_prom[i].reg == 0 || sc->bbp_prom[i].reg == 0xff) continue; rum_bbp_write(sc, sc->bbp_prom[i].reg, sc->bbp_prom[i].val); } return 0; } static int rum_init(struct ifnet *ifp) { struct rum_softc *sc = ifp->if_softc; struct ieee80211com *ic = &sc->sc_ic; uint32_t tmp; usbd_status error = 0; unsigned int i, ntries; if ((sc->sc_flags & RT2573_FWLOADED) == 0) { if (rum_attachhook(sc)) goto fail; } rum_stop(ifp, 0); /* initialize MAC registers to default values */ for (i = 0; i < __arraycount(rum_def_mac); i++) rum_write(sc, rum_def_mac[i].reg, rum_def_mac[i].val); /* set host ready */ rum_write(sc, RT2573_MAC_CSR1, 3); rum_write(sc, RT2573_MAC_CSR1, 0); /* wait for BBP/RF to wakeup */ for (ntries = 0; ntries < 1000; ntries++) { if (rum_read(sc, RT2573_MAC_CSR12) & 8) break; rum_write(sc, RT2573_MAC_CSR12, 4); /* force wakeup */ DELAY(1000); } if (ntries == 1000) { printf("%s: timeout waiting for BBP/RF to wakeup\n", device_xname(sc->sc_dev)); goto fail; } if ((error = rum_bbp_init(sc)) != 0) goto fail; /* select default channel */ rum_select_band(sc, ic->ic_curchan); rum_select_antenna(sc); rum_set_chan(sc, ic->ic_curchan); /* clear STA registers */ rum_read_multi(sc, RT2573_STA_CSR0, sc->sta, sizeof(sc->sta)); IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl)); rum_set_macaddr(sc, ic->ic_myaddr); /* initialize ASIC */ rum_write(sc, RT2573_MAC_CSR1, 4); /* * Allocate xfer for AMRR statistics requests. */ struct usbd_pipe *pipe0 = usbd_get_pipe0(sc->sc_udev); error = usbd_create_xfer(pipe0, sizeof(sc->sta), 0, 0, &sc->amrr_xfer); if (error) { printf("%s: could not allocate AMRR xfer\n", device_xname(sc->sc_dev)); goto fail; } /* * Open Tx and Rx USB bulk pipes. */ error = usbd_open_pipe(sc->sc_iface, sc->sc_tx_no, USBD_EXCLUSIVE_USE, &sc->sc_tx_pipeh); if (error != 0) { printf("%s: could not open Tx pipe: %s\n", device_xname(sc->sc_dev), usbd_errstr(error)); goto fail; } error = usbd_open_pipe(sc->sc_iface, sc->sc_rx_no, USBD_EXCLUSIVE_USE, &sc->sc_rx_pipeh); if (error != 0) { printf("%s: could not open Rx pipe: %s\n", device_xname(sc->sc_dev), usbd_errstr(error)); goto fail; } /* * Allocate Tx and Rx xfer queues. */ error = rum_alloc_tx_list(sc); if (error != 0) { printf("%s: could not allocate Tx list\n", device_xname(sc->sc_dev)); goto fail; } error = rum_alloc_rx_list(sc); if (error != 0) { printf("%s: could not allocate Rx list\n", device_xname(sc->sc_dev)); goto fail; } /* * Start up the receive pipe. */ for (i = 0; i < RUM_RX_LIST_COUNT; i++) { struct rum_rx_data *data; data = &sc->rx_data[i]; usbd_setup_xfer(data->xfer, data, data->buf, MCLBYTES, USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, rum_rxeof); error = usbd_transfer(data->xfer); if (error != USBD_NORMAL_COMPLETION && error != USBD_IN_PROGRESS) { printf("%s: could not queue Rx transfer\n", device_xname(sc->sc_dev)); goto fail; } } /* update Rx filter */ tmp = rum_read(sc, RT2573_TXRX_CSR0) & 0xffff; tmp |= RT2573_DROP_PHY_ERROR | RT2573_DROP_CRC_ERROR; if (ic->ic_opmode != IEEE80211_M_MONITOR) { tmp |= RT2573_DROP_CTL | RT2573_DROP_VER_ERROR | RT2573_DROP_ACKCTS; if (ic->ic_opmode != IEEE80211_M_HOSTAP) tmp |= RT2573_DROP_TODS; if (!(ifp->if_flags & IFF_PROMISC)) tmp |= RT2573_DROP_NOT_TO_ME; } rum_write(sc, RT2573_TXRX_CSR0, tmp); ifp->if_flags &= ~IFF_OACTIVE; ifp->if_flags |= IFF_RUNNING; if (ic->ic_opmode == IEEE80211_M_MONITOR) ieee80211_new_state(ic, IEEE80211_S_RUN, -1); else ieee80211_new_state(ic, IEEE80211_S_SCAN, -1); return 0; fail: rum_stop(ifp, 1); return error; } static void rum_stop(struct ifnet *ifp, int disable) { struct rum_softc *sc = ifp->if_softc; struct ieee80211com *ic = &sc->sc_ic; uint32_t tmp; ieee80211_new_state(ic, IEEE80211_S_INIT, -1); /* free all nodes */ sc->sc_tx_timer = 0; ifp->if_timer = 0; ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); /* disable Rx */ tmp = rum_read(sc, RT2573_TXRX_CSR0); rum_write(sc, RT2573_TXRX_CSR0, tmp | RT2573_DISABLE_RX); /* reset ASIC */ rum_write(sc, RT2573_MAC_CSR1, 3); rum_write(sc, RT2573_MAC_CSR1, 0); if (sc->amrr_xfer != NULL) { usbd_destroy_xfer(sc->amrr_xfer); sc->amrr_xfer = NULL; } if (sc->sc_rx_pipeh != NULL) { usbd_abort_pipe(sc->sc_rx_pipeh); } if (sc->sc_tx_pipeh != NULL) { usbd_abort_pipe(sc->sc_tx_pipeh); } rum_free_rx_list(sc); rum_free_tx_list(sc); if (sc->sc_rx_pipeh != NULL) { usbd_close_pipe(sc->sc_rx_pipeh); sc->sc_rx_pipeh = NULL; } if (sc->sc_tx_pipeh != NULL) { usbd_close_pipe(sc->sc_tx_pipeh); sc->sc_tx_pipeh = NULL; } } static int rum_load_microcode(struct rum_softc *sc, const u_char *ucode, size_t size) { usb_device_request_t req; uint16_t reg = RT2573_MCU_CODE_BASE; usbd_status error; /* copy firmware image into NIC */ for (; size >= 4; reg += 4, ucode += 4, size -= 4) rum_write(sc, reg, UGETDW(ucode)); req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = RT2573_MCU_CNTL; USETW(req.wValue, RT2573_MCU_RUN); USETW(req.wIndex, 0); USETW(req.wLength, 0); error = usbd_do_request(sc->sc_udev, &req, NULL); if (error != 0) { printf("%s: could not run firmware: %s\n", device_xname(sc->sc_dev), usbd_errstr(error)); } return error; } static int rum_prepare_beacon(struct rum_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct rum_tx_desc desc; struct mbuf *m0; int rate; m0 = ieee80211_beacon_alloc(ic, ic->ic_bss, &sc->sc_bo); if (m0 == NULL) { aprint_error_dev(sc->sc_dev, "could not allocate beacon frame\n"); return ENOBUFS; } /* send beacons at the lowest available rate */ rate = IEEE80211_IS_CHAN_5GHZ(ic->ic_curchan) ? 12 : 2; rum_setup_tx_desc(sc, &desc, RT2573_TX_TIMESTAMP, RT2573_TX_HWSEQ, m0->m_pkthdr.len, rate); /* copy the first 24 bytes of Tx descriptor into NIC memory */ rum_write_multi(sc, RT2573_HW_BEACON_BASE0, (uint8_t *)&desc, 24); /* copy beacon header and payload into NIC memory */ rum_write_multi(sc, RT2573_HW_BEACON_BASE0 + 24, mtod(m0, uint8_t *), m0->m_pkthdr.len); m_freem(m0); return 0; } static void rum_newassoc(struct ieee80211_node *ni, int isnew) { /* start with lowest Tx rate */ ni->ni_txrate = 0; } static void rum_amrr_start(struct rum_softc *sc, struct ieee80211_node *ni) { int i; /* clear statistic registers (STA_CSR0 to STA_CSR5) */ rum_read_multi(sc, RT2573_STA_CSR0, sc->sta, sizeof(sc->sta)); ieee80211_amrr_node_init(&sc->amrr, &sc->amn); /* set rate to some reasonable initial value */ for (i = ni->ni_rates.rs_nrates - 1; i > 0 && (ni->ni_rates.rs_rates[i] & IEEE80211_RATE_VAL) > 72; i--); ni->ni_txrate = i; callout_reset(&sc->sc_amrr_ch, hz, rum_amrr_timeout, sc); } static void rum_amrr_timeout(void *arg) { struct rum_softc *sc = arg; usb_device_request_t req; /* * Asynchronously read statistic registers (cleared by read). */ req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = RT2573_READ_MULTI_MAC; USETW(req.wValue, 0); USETW(req.wIndex, RT2573_STA_CSR0); USETW(req.wLength, sizeof(sc->sta)); usbd_setup_default_xfer(sc->amrr_xfer, sc->sc_udev, sc, USBD_DEFAULT_TIMEOUT, &req, sc->sta, sizeof(sc->sta), 0, rum_amrr_update); (void)usbd_transfer(sc->amrr_xfer); } static void rum_amrr_update(struct usbd_xfer *xfer, void *priv, usbd_status status) { struct rum_softc *sc = (struct rum_softc *)priv; struct ifnet *ifp = sc->sc_ic.ic_ifp; if (status != USBD_NORMAL_COMPLETION) { printf("%s: could not retrieve Tx statistics - cancelling " "automatic rate control\n", device_xname(sc->sc_dev)); return; } /* count TX retry-fail as Tx errors */ if_statadd(ifp, if_oerrors, le32toh(sc->sta[5]) >> 16); sc->amn.amn_retrycnt = (le32toh(sc->sta[4]) >> 16) + /* TX one-retry ok count */ (le32toh(sc->sta[5]) & 0xffff) + /* TX more-retry ok count */ (le32toh(sc->sta[5]) >> 16); /* TX retry-fail count */ sc->amn.amn_txcnt = sc->amn.amn_retrycnt + (le32toh(sc->sta[4]) & 0xffff); /* TX no-retry ok count */ ieee80211_amrr_choose(&sc->amrr, sc->sc_ic.ic_bss, &sc->amn); callout_reset(&sc->sc_amrr_ch, hz, rum_amrr_timeout, sc); } static int rum_activate(device_t self, enum devact act) { switch (act) { case DVACT_DEACTIVATE: /*if_deactivate(&sc->sc_ic.ic_if);*/ return 0; default: return 0; } } MODULE(MODULE_CLASS_DRIVER, if_rum, NULL); #ifdef _MODULE #include "ioconf.c" #endif static int if_rum_modcmd(modcmd_t cmd, void *aux) { int error = 0; switch (cmd) { case MODULE_CMD_INIT: #ifdef _MODULE error = config_init_component(cfdriver_ioconf_rum, cfattach_ioconf_rum, cfdata_ioconf_rum); #endif return error; case MODULE_CMD_FINI: #ifdef _MODULE error = config_fini_component(cfdriver_ioconf_rum, cfattach_ioconf_rum, cfdata_ioconf_rum); #endif return error; default: return ENOTTY; } } |
| 2 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 | /* $NetBSD: pseye.c,v 1.29 2022/03/03 06:23:25 riastradh Exp $ */ /*- * Copyright (c) 2008 Jared D. McNeill <jmcneill@invisible.ca> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Sony PlayStation Eye Driver * * The only documentation we have for this part is based on a series * of forum postings by Jim Paris on ps2dev.org. Many thanks for * figuring this one out. * * URL: http://forums.ps2dev.org/viewtopic.php?t=9238 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: pseye.c,v 1.29 2022/03/03 06:23:25 riastradh Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/device.h> #include <sys/fcntl.h> #include <sys/conf.h> #include <sys/poll.h> #include <sys/bus.h> #include <sys/mutex.h> #include <sys/kthread.h> #include <sys/condvar.h> #include <sys/module.h> #include <dev/usb/usb.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdivar.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdevs.h> #include <dev/usb/uvideoreg.h> #include <dev/video_if.h> #define PRI_PSEYE PRI_BIO /* Bulk-in buffer length -- make room for payload + UVC headers */ #define PSEYE_BULKIN_BUFLEN ((640 * 480 * 2) + 4096) #define PSEYE_BULKIN_BLKLEN 2048 /* SCCB/sensor interface */ #define PSEYE_SCCB_ADDRESS 0xf1 #define PSEYE_SCCB_SUBADDR 0xf2 #define PSEYE_SCCB_WRITE 0xf3 #define PSEYE_SCCB_READ 0xf4 #define PSEYE_SCCB_OPERATION 0xf5 #define PSEYE_SCCB_STATUS 0xf6 #define PSEYE_SCCB_OP_WRITE_3 0x37 #define PSEYE_SCCB_OP_WRITE_2 0x33 #define PSEYE_SCCB_OP_READ_2 0xf9 struct pseye_softc { device_t sc_dev; struct usbd_device * sc_udev; struct usbd_interface * sc_iface; device_t sc_videodev; char sc_running; kcondvar_t sc_cv; kmutex_t sc_mtx; struct usbd_pipe * sc_bulkin_pipe; struct usbd_xfer * sc_bulkin_xfer; int sc_bulkin; uint8_t *sc_bulkin_buffer; int sc_bulkin_bufferlen; char sc_dying; char sc_businfo[32]; }; static int pseye_match(device_t, cfdata_t, void *); static void pseye_attach(device_t, device_t, void *); static int pseye_detach(device_t, int); static void pseye_childdet(device_t, device_t); static int pseye_activate(device_t, enum devact); static void pseye_init(struct pseye_softc *); static void pseye_sccb_init(struct pseye_softc *); static void pseye_stop(struct pseye_softc *); static void pseye_start(struct pseye_softc *); static void pseye_led(struct pseye_softc *, bool); static uint8_t pseye_getreg(struct pseye_softc *, uint16_t); static void pseye_setreg(struct pseye_softc *, uint16_t, uint8_t); static void pseye_setregv(struct pseye_softc *, uint16_t, uint8_t); static void pseye_sccb_setreg(struct pseye_softc *, uint8_t, uint8_t); static bool pseye_sccb_status(struct pseye_softc *); static int pseye_init_pipes(struct pseye_softc *); static int pseye_close_pipes(struct pseye_softc *); static usbd_status pseye_get_frame(struct pseye_softc *, uint32_t *); static void pseye_submit_payload(struct pseye_softc *, uint32_t); /* video(9) API */ static int pseye_open(void *, int); static void pseye_close(void *); static const char * pseye_get_devname(void *); static const char * pseye_get_businfo(void *); static int pseye_enum_format(void *, uint32_t, struct video_format *); static int pseye_get_format(void *, struct video_format *); static int pseye_set_format(void *, struct video_format *); static int pseye_try_format(void *, struct video_format *); static int pseye_get_framerate(void *, struct video_fract *); static int pseye_set_framerate(void *, struct video_fract *); static int pseye_start_transfer(void *); static int pseye_stop_transfer(void *); CFATTACH_DECL2_NEW(pseye, sizeof(struct pseye_softc), pseye_match, pseye_attach, pseye_detach, pseye_activate, NULL, pseye_childdet); static const struct video_hw_if pseye_hw_if = { .open = pseye_open, .close = pseye_close, .get_devname = pseye_get_devname, .get_businfo = pseye_get_businfo, .enum_format = pseye_enum_format, .get_format = pseye_get_format, .set_format = pseye_set_format, .try_format = pseye_try_format, .get_framerate = pseye_get_framerate, .set_framerate = pseye_set_framerate, .start_transfer = pseye_start_transfer, .stop_transfer = pseye_stop_transfer, .control_iter_init = NULL, .control_iter_next = NULL, .get_control_desc_group = NULL, .get_control_group = NULL, .set_control_group = NULL, }; static int pseye_match(device_t parent, cfdata_t match, void *opaque) { struct usbif_attach_arg *uiaa = opaque; if (uiaa->uiaa_class != UICLASS_VENDOR) return UMATCH_NONE; if (uiaa->uiaa_vendor == USB_VENDOR_OMNIVISION2) { switch (uiaa->uiaa_product) { case USB_PRODUCT_OMNIVISION2_PSEYE: if (uiaa->uiaa_ifaceno != 0) return UMATCH_NONE; return UMATCH_VENDOR_PRODUCT; } } return UMATCH_NONE; } static void pseye_attach(device_t parent, device_t self, void *opaque) { struct pseye_softc *sc = device_private(self); struct usbif_attach_arg *uiaa = opaque; struct usbd_device *dev = uiaa->uiaa_device; usb_interface_descriptor_t *id = NULL; usb_endpoint_descriptor_t *ed = NULL, *ed_bulkin = NULL; char *devinfop; int i; aprint_naive("\n"); aprint_normal("\n"); devinfop = usbd_devinfo_alloc(dev, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); sc->sc_dev = self; sc->sc_udev = dev; sc->sc_iface = uiaa->uiaa_iface; snprintf(sc->sc_businfo, sizeof(sc->sc_businfo), "usb:%08x", sc->sc_udev->ud_cookie.cookie); sc->sc_bulkin_bufferlen = PSEYE_BULKIN_BUFLEN; sc->sc_dying = sc->sc_running = 0; cv_init(&sc->sc_cv, device_xname(self)); mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE); id = usbd_get_interface_descriptor(sc->sc_iface); if (id == NULL) { aprint_error_dev(self, "failed to get interface descriptor\n"); sc->sc_dying = 1; return; } for (i = 0; i < id->bNumEndpoints; i++) { ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i); if (ed == NULL) { aprint_error_dev(self, "couldn't get ep %d\n", i); sc->sc_dying = 1; return; } if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) { ed_bulkin = ed; break; } } if (ed_bulkin == NULL) { aprint_error_dev(self, "no bulk-in endpoint found\n"); sc->sc_dying = 1; return; } sc->sc_bulkin = ed_bulkin->bEndpointAddress; int error = pseye_init_pipes(sc); if (error) { aprint_error_dev(self, "couldn't open pipes\n"); return; } error = usbd_create_xfer(sc->sc_bulkin_pipe, sc->sc_bulkin_bufferlen, 0, 0, &sc->sc_bulkin_xfer); if (error) { aprint_error_dev(self, "couldn't create transfer\n"); pseye_close_pipes(sc); return; } sc->sc_bulkin_buffer = usbd_get_buffer(sc->sc_bulkin_xfer); pseye_init(sc); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); sc->sc_videodev = video_attach_mi(&pseye_hw_if, self, sc); if (sc->sc_videodev == NULL) { aprint_error_dev(self, "couldn't attach video layer\n"); sc->sc_dying = 1; return; } usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, self); } static int pseye_detach(device_t self, int flags) { struct pseye_softc *sc = device_private(self); sc->sc_dying = 1; pmf_device_deregister(self); if (sc->sc_videodev != NULL) { config_detach(sc->sc_videodev, flags); sc->sc_videodev = NULL; } if (sc->sc_bulkin_pipe != NULL) { usbd_abort_pipe(sc->sc_bulkin_pipe); } if (sc->sc_bulkin_xfer != NULL) { usbd_destroy_xfer(sc->sc_bulkin_xfer); sc->sc_bulkin_xfer = NULL; } if (sc->sc_bulkin_pipe != NULL) { usbd_close_pipe(sc->sc_bulkin_pipe); sc->sc_bulkin_pipe = NULL; } mutex_enter(&sc->sc_mtx); if (sc->sc_running) { sc->sc_running = 0; cv_wait_sig(&sc->sc_cv, &sc->sc_mtx); } mutex_exit(&sc->sc_mtx); cv_destroy(&sc->sc_cv); mutex_destroy(&sc->sc_mtx); usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev); return 0; } int pseye_activate(device_t self, enum devact act) { struct pseye_softc *sc = device_private(self); switch (act) { case DVACT_DEACTIVATE: sc->sc_dying = 1; return 0; default: return EOPNOTSUPP; } } static void pseye_childdet(device_t self, device_t child) { struct pseye_softc *sc = device_private(self); if (sc->sc_videodev) { KASSERT(sc->sc_videodev == child); sc->sc_videodev = NULL; } } /* * Device access */ static void pseye_init(struct pseye_softc *sc) { pseye_sccb_init(sc); pseye_setregv(sc, 0xc2, 0x0c); pseye_setregv(sc, 0x88, 0xf8); pseye_setregv(sc, 0xc3, 0x69); pseye_setregv(sc, 0x89, 0xff); pseye_setregv(sc, 0x76, 0x03); pseye_setregv(sc, 0x92, 0x01); pseye_setregv(sc, 0x93, 0x18); pseye_setregv(sc, 0x94, 0x10); pseye_setregv(sc, 0x95, 0x10); pseye_setregv(sc, 0xe2, 0x00); pseye_setregv(sc, 0xe7, 0x3e); pseye_setregv(sc, 0x96, 0x00); pseye_setreg(sc, 0x97, 0x20); pseye_setreg(sc, 0x97, 0x20); pseye_setreg(sc, 0x97, 0x20); pseye_setreg(sc, 0x97, 0x0a); pseye_setreg(sc, 0x97, 0x3f); pseye_setreg(sc, 0x97, 0x4a); pseye_setreg(sc, 0x97, 0x20); pseye_setreg(sc, 0x97, 0x15); pseye_setreg(sc, 0x97, 0x0b); pseye_setregv(sc, 0x8e, 0x40); pseye_setregv(sc, 0x1f, 0x81); pseye_setregv(sc, 0x34, 0x05); pseye_setregv(sc, 0xe3, 0x04); pseye_setregv(sc, 0x88, 0x00); pseye_setregv(sc, 0x89, 0x00); pseye_setregv(sc, 0x76, 0x00); pseye_setregv(sc, 0xe7, 0x2e); pseye_setregv(sc, 0x31, 0xf9); pseye_setregv(sc, 0x25, 0x42); pseye_setregv(sc, 0x21, 0xf0); pseye_setreg(sc, 0x1c, 0x00); pseye_setreg(sc, 0x1d, 0x40); pseye_setreg(sc, 0x1d, 0x02); /* payload size 0x0200 * 4 == 2048 */ pseye_setreg(sc, 0x1d, 0x00); pseye_setreg(sc, 0x1d, 0x02); /* frame size 0x025800 * 4 == 614400 */ pseye_setreg(sc, 0x1d, 0x58); pseye_setreg(sc, 0x1d, 0x00); pseye_setreg(sc, 0x1c, 0x0a); pseye_setreg(sc, 0x1d, 0x08); /* enable UVC header */ pseye_setreg(sc, 0x1d, 0x0e); pseye_setregv(sc, 0x8d, 0x1c); pseye_setregv(sc, 0x8e, 0x80); pseye_setregv(sc, 0xe5, 0x04); pseye_sccb_setreg(sc, 0x12, 0x80); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x3d, 0x03); pseye_sccb_setreg(sc, 0x17, 0x26); pseye_sccb_setreg(sc, 0x18, 0xa0); pseye_sccb_setreg(sc, 0x19, 0x07); pseye_sccb_setreg(sc, 0x1a, 0xf0); pseye_sccb_setreg(sc, 0x32, 0x00); pseye_sccb_setreg(sc, 0x29, 0xa0); pseye_sccb_setreg(sc, 0x2c, 0xf0); pseye_sccb_setreg(sc, 0x65, 0x20); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x42, 0x7f); pseye_sccb_setreg(sc, 0x63, 0xe0); pseye_sccb_setreg(sc, 0x64, 0xff); pseye_sccb_setreg(sc, 0x66, 0x00); pseye_sccb_setreg(sc, 0x13, 0xf0); pseye_sccb_setreg(sc, 0x0d, 0x41); pseye_sccb_setreg(sc, 0x0f, 0xc5); pseye_sccb_setreg(sc, 0x14, 0x11); pseye_sccb_setreg(sc, 0x22, 0x7f); pseye_sccb_setreg(sc, 0x23, 0x03); pseye_sccb_setreg(sc, 0x24, 0x40); pseye_sccb_setreg(sc, 0x25, 0x30); pseye_sccb_setreg(sc, 0x26, 0xa1); pseye_sccb_setreg(sc, 0x2a, 0x00); pseye_sccb_setreg(sc, 0x2b, 0x00); pseye_sccb_setreg(sc, 0x6b, 0xaa); pseye_sccb_setreg(sc, 0x13, 0xff); pseye_sccb_setreg(sc, 0x90, 0x05); pseye_sccb_setreg(sc, 0x91, 0x01); pseye_sccb_setreg(sc, 0x92, 0x03); pseye_sccb_setreg(sc, 0x93, 0x00); pseye_sccb_setreg(sc, 0x94, 0x60); pseye_sccb_setreg(sc, 0x95, 0x3c); pseye_sccb_setreg(sc, 0x96, 0x24); pseye_sccb_setreg(sc, 0x97, 0x1e); pseye_sccb_setreg(sc, 0x98, 0x62); pseye_sccb_setreg(sc, 0x99, 0x80); pseye_sccb_setreg(sc, 0x9a, 0x1e); pseye_sccb_setreg(sc, 0x9b, 0x08); pseye_sccb_setreg(sc, 0x9c, 0x20); pseye_sccb_setreg(sc, 0x9e, 0x81); pseye_sccb_setreg(sc, 0xa6, 0x04); pseye_sccb_setreg(sc, 0x7e, 0x0c); pseye_sccb_setreg(sc, 0x7f, 0x16); pseye_sccb_setreg(sc, 0x80, 0x2a); pseye_sccb_setreg(sc, 0x81, 0x4e); pseye_sccb_setreg(sc, 0x82, 0x61); pseye_sccb_setreg(sc, 0x83, 0x6f); pseye_sccb_setreg(sc, 0x84, 0x7b); pseye_sccb_setreg(sc, 0x85, 0x86); pseye_sccb_setreg(sc, 0x86, 0x8e); pseye_sccb_setreg(sc, 0x87, 0x97); pseye_sccb_setreg(sc, 0x88, 0xa4); pseye_sccb_setreg(sc, 0x89, 0xaf); pseye_sccb_setreg(sc, 0x8a, 0xc5); pseye_sccb_setreg(sc, 0x8b, 0xd7); pseye_sccb_setreg(sc, 0x8c, 0xe8); pseye_sccb_setreg(sc, 0x8d, 0x20); pseye_sccb_setreg(sc, 0x0c, 0x90); pseye_setregv(sc, 0xc0, 0x50); pseye_setregv(sc, 0xc1, 0x3c); pseye_setregv(sc, 0xc2, 0x0c); pseye_sccb_setreg(sc, 0x2b, 0x00); pseye_sccb_setreg(sc, 0x22, 0x7f); pseye_sccb_setreg(sc, 0x23, 0x03); pseye_sccb_setreg(sc, 0x11, 0x01); pseye_sccb_setreg(sc, 0x0c, 0xd0); pseye_sccb_setreg(sc, 0x64, 0xff); pseye_sccb_setreg(sc, 0x0d, 0x41); pseye_sccb_setreg(sc, 0x14, 0x41); pseye_sccb_setreg(sc, 0x0e, 0xcd); pseye_sccb_setreg(sc, 0xac, 0xbf); pseye_sccb_setreg(sc, 0x8e, 0x00); pseye_sccb_setreg(sc, 0x0c, 0xd0); pseye_stop(sc); } static void pseye_sccb_init(struct pseye_softc *sc) { pseye_setregv(sc, 0xe7, 0x3a); pseye_setreg(sc, PSEYE_SCCB_ADDRESS, 0x60); pseye_setreg(sc, PSEYE_SCCB_ADDRESS, 0x60); pseye_setreg(sc, PSEYE_SCCB_ADDRESS, 0x60); pseye_setreg(sc, PSEYE_SCCB_ADDRESS, 0x42); } static void pseye_stop(struct pseye_softc *sc) { pseye_led(sc, false); pseye_setreg(sc, 0xe0, 0x09); } static void pseye_start(struct pseye_softc *sc) { pseye_led(sc, true); pseye_setreg(sc, 0xe0, 0x00); } static void pseye_led(struct pseye_softc *sc, bool enabled) { uint8_t val; val = pseye_getreg(sc, 0x21); pseye_setreg(sc, 0x21, val | 0x80); val = pseye_getreg(sc, 0x23); if (enabled == true) val |= 0x80; else val &= ~0x80; pseye_setreg(sc, 0x23, val); } static uint8_t pseye_getreg(struct pseye_softc *sc, uint16_t reg) { usb_device_request_t req; usbd_status err; uint8_t buf; req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = 1; USETW(req.wValue, 0x0000); USETW(req.wIndex, reg); USETW(req.wLength, 1); err = usbd_do_request(sc->sc_udev, &req, &buf); if (err) { aprint_error_dev(sc->sc_dev, "couldn't read reg 0x%04x: %s\n", reg, usbd_errstr(err)); return 0xff; } return buf; } static void pseye_setreg(struct pseye_softc *sc, uint16_t reg, uint8_t val) { usb_device_request_t req; usbd_status err; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = 1; USETW(req.wValue, 0x0000); USETW(req.wIndex, reg); USETW(req.wLength, 1); err = usbd_do_request(sc->sc_udev, &req, &val); if (err) aprint_error_dev(sc->sc_dev, "couldn't write reg 0x%04x: %s\n", reg, usbd_errstr(err)); } static void pseye_setregv(struct pseye_softc *sc, uint16_t reg, uint8_t val) { pseye_setreg(sc, reg, val); if (pseye_getreg(sc, reg) != val) aprint_error_dev(sc->sc_dev, "couldn't verify reg 0x%04x\n", reg); } static void pseye_sccb_setreg(struct pseye_softc *sc, uint8_t reg, uint8_t val) { pseye_setreg(sc, PSEYE_SCCB_SUBADDR, reg); pseye_setreg(sc, PSEYE_SCCB_WRITE, val); pseye_setreg(sc, PSEYE_SCCB_OPERATION, PSEYE_SCCB_OP_WRITE_3); if (pseye_sccb_status(sc) == false) aprint_error_dev(sc->sc_dev, "couldn't write sccb reg 0x%04x\n", reg); } static bool pseye_sccb_status(struct pseye_softc *sc) { int retry = 5; uint8_t reg; while (retry-- >= 0) { reg = pseye_getreg(sc, PSEYE_SCCB_STATUS); if (reg == 0x00) return true; else if (reg == 0x04) return false; } aprint_error_dev(sc->sc_dev, "timeout reading sccb status\n"); return false; } static usbd_status pseye_get_frame(struct pseye_softc *sc, uint32_t *plen) { if (sc->sc_dying) return USBD_IOERROR; return usbd_bulk_transfer(sc->sc_bulkin_xfer, sc->sc_bulkin_pipe, USBD_SHORT_XFER_OK, 1000, sc->sc_bulkin_buffer, plen); } static int pseye_init_pipes(struct pseye_softc *sc) { usbd_status err; if (sc->sc_dying) return EIO; err = usbd_open_pipe(sc->sc_iface, sc->sc_bulkin, 0, &sc->sc_bulkin_pipe); if (err) { aprint_error_dev(sc->sc_dev, "couldn't open bulk-in pipe: %s\n", usbd_errstr(err)); return ENOMEM; } return 0; } int pseye_close_pipes(struct pseye_softc *sc) { if (sc->sc_bulkin_pipe != NULL) { usbd_abort_pipe(sc->sc_bulkin_pipe); usbd_close_pipe(sc->sc_bulkin_pipe); sc->sc_bulkin_pipe = NULL; } return 0; } static void pseye_submit_payload(struct pseye_softc *sc, uint32_t tlen) { struct video_payload payload; uvideo_payload_header_t *uvchdr; uint8_t *buf = sc->sc_bulkin_buffer; uint32_t len; uint32_t brem = (640*480*2); while (brem > 0 && tlen > 0) { len = uimin(tlen, PSEYE_BULKIN_BLKLEN); if (len < UVIDEO_PAYLOAD_HEADER_SIZE) { printf("pseye_submit_payload: len=%u\n", len); return; } uvchdr = (uvideo_payload_header_t *)buf; if (uvchdr->bHeaderLength != UVIDEO_PAYLOAD_HEADER_SIZE) goto next; if (uvchdr->bHeaderLength == len && !(uvchdr->bmHeaderInfo & UV_END_OF_FRAME)) goto next; if (uvchdr->bmHeaderInfo & UV_ERROR) return; if ((uvchdr->bmHeaderInfo & UV_PRES_TIME) == 0) goto next; payload.data = buf + uvchdr->bHeaderLength; payload.size = uimin(brem, len - uvchdr->bHeaderLength); payload.frameno = UGETDW(&buf[2]); payload.end_of_frame = uvchdr->bmHeaderInfo & UV_END_OF_FRAME; video_submit_payload(sc->sc_videodev, &payload); next: tlen -= len; buf += len; brem -= payload.size; } } static void pseye_transfer_thread(void *opaque) { struct pseye_softc *sc = opaque; uint32_t len; int error; while (sc->sc_running) { len = sc->sc_bulkin_bufferlen; error = pseye_get_frame(sc, &len); if (error == USBD_NORMAL_COMPLETION) pseye_submit_payload(sc, len); } mutex_enter(&sc->sc_mtx); cv_broadcast(&sc->sc_cv); mutex_exit(&sc->sc_mtx); kthread_exit(0); } /* video(9) API implementations */ static int pseye_open(void *opaque, int flags) { struct pseye_softc *sc = opaque; if (sc->sc_dying) return EIO; pseye_start(sc); return 0; } static void pseye_close(void *opaque) { struct pseye_softc *sc = opaque; pseye_stop(sc); } static const char * pseye_get_devname(void *opaque) { return "PlayStation Eye"; } static const char * pseye_get_businfo(void *opaque) { struct pseye_softc *sc = opaque; return sc->sc_businfo; } static int pseye_enum_format(void *opaque, uint32_t index, struct video_format *format) { if (index != 0) return EINVAL; return pseye_get_format(opaque, format); } static int pseye_get_format(void *opaque, struct video_format *format) { format->pixel_format = VIDEO_FORMAT_YUY2; /* XXX actually YUYV */ format->width = 640; format->height = 480; format->aspect_x = 4; format->aspect_y = 3; format->sample_size = format->width * format->height * 2; format->stride = format->width * 2; format->color.primaries = VIDEO_COLOR_PRIMARIES_UNSPECIFIED; format->color.gamma_function = VIDEO_GAMMA_FUNCTION_UNSPECIFIED; format->color.matrix_coeff = VIDEO_MATRIX_COEFF_UNSPECIFIED; format->interlace_flags = VIDEO_INTERLACE_ON; format->priv = 0; return 0; } static int pseye_set_format(void *opaque, struct video_format *format) { #if notyet if (format->pixel_format != VIDEO_FORMAT_YUYV) return EINVAL; if (format->width != 640 || format->height != 480) return EINVAL; #endif /* XXX */ return pseye_get_format(opaque, format); } static int pseye_try_format(void *opaque, struct video_format *format) { return pseye_get_format(opaque, format); } static int pseye_get_framerate(void *opaque, struct video_fract *fract) { /* Driver only supports 60fps */ fract->numerator = 1; fract->denominator = 60; return 0; } static int pseye_set_framerate(void *opaque, struct video_fract *fract) { /* Driver only supports one framerate. Return actual rate. */ return pseye_get_framerate(opaque, fract); } static int pseye_start_transfer(void *opaque) { struct pseye_softc *sc = opaque; int err = 0; mutex_enter(&sc->sc_mtx); if (sc->sc_running == 0) { sc->sc_running = 1; err = kthread_create(PRI_PSEYE, 0, NULL, pseye_transfer_thread, opaque, NULL, "%s", device_xname(sc->sc_dev)); } else aprint_error_dev(sc->sc_dev, "transfer already in progress\n"); mutex_exit(&sc->sc_mtx); return err; } static int pseye_stop_transfer(void *opaque) { struct pseye_softc *sc = opaque; mutex_enter(&sc->sc_mtx); if (sc->sc_running) { sc->sc_running = 0; cv_wait_sig(&sc->sc_cv, &sc->sc_mtx); } mutex_exit(&sc->sc_mtx); return 0; } MODULE(MODULE_CLASS_DRIVER, pseye, NULL); #ifdef _MODULE #include "ioconf.c" #endif static int pseye_modcmd(modcmd_t cmd, void *opaque) { switch (cmd) { case MODULE_CMD_INIT: #ifdef _MODULE return config_init_component(cfdriver_ioconf_pseye, cfattach_ioconf_pseye, cfdata_ioconf_pseye); #else return 0; #endif case MODULE_CMD_FINI: #ifdef _MODULE return config_fini_component(cfdriver_ioconf_pseye, cfattach_ioconf_pseye, cfdata_ioconf_pseye); #else return 0; #endif default: return ENOTTY; } } |
| 2 11 514 515 1 1 1 1 5 5 2 2 5 1 1 8 2 8 8 2 8 8 7 2 2 2 2 30 8 8 23 23 23 12 11 19 30 12 12 7 7 12 2 7 3 2 3 2 2 3 19 19 19 1 1 19 19 8 6 6 6 5 5 5 5 3 5 5 13 13 12 10 9 9 8 7 2 3 3 2 2 1 1 1 1 1 3 1 8 8 9 2 5 2 2 1 5 2 1 1 1 2 5 4 2 2 2 6 6 4 4 3 3 4 13 12 8 8 7 5 3 4 1 3 3 7 4 10 8 5 10 4 6 5 4 4 6 5 5 5 5 3 2 2 2 2 5 2 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 | /* $NetBSD: uipc_sem.c,v 1.60 2020/12/14 23:12:12 chs Exp $ */ /*- * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Mindaugas Rasiukevicius and Jason R. Thorpe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Implementation of POSIX semaphore. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.60 2020/12/14 23:12:12 chs Exp $"); #include <sys/param.h> #include <sys/kernel.h> #include <sys/atomic.h> #include <sys/proc.h> #include <sys/lwp.h> #include <sys/ksem.h> #include <sys/syscall.h> #include <sys/stat.h> #include <sys/kmem.h> #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filedesc.h> #include <sys/kauth.h> #include <sys/module.h> #include <sys/mount.h> #include <sys/mutex.h> #include <sys/rwlock.h> #include <sys/semaphore.h> #include <sys/syscall.h> #include <sys/syscallargs.h> #include <sys/syscallvar.h> #include <sys/sysctl.h> #include <sys/uidinfo.h> #include <sys/cprng.h> MODULE(MODULE_CLASS_MISC, ksem, NULL); #define SEM_MAX_NAMELEN NAME_MAX #define KS_UNLINKED 0x01 static kmutex_t ksem_lock __cacheline_aligned; static LIST_HEAD(,ksem) ksem_head __cacheline_aligned; static u_int nsems_total __cacheline_aligned; static u_int nsems __cacheline_aligned; static krwlock_t ksem_pshared_lock __cacheline_aligned; static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned; static u_long ksem_pshared_hashmask __read_mostly; #define KSEM_PSHARED_HASHSIZE 32 static kauth_listener_t ksem_listener; static int ksem_sysinit(void); static int ksem_sysfini(bool); static int ksem_modcmd(modcmd_t, void *); static void ksem_release(ksem_t *, int); static int ksem_close_fop(file_t *); static int ksem_stat_fop(file_t *, struct stat *); static int ksem_read_fop(file_t *, off_t *, struct uio *, kauth_cred_t, int); static const struct fileops semops = { .fo_name = "sem", .fo_read = ksem_read_fop, .fo_write = fbadop_write, .fo_ioctl = fbadop_ioctl, .fo_fcntl = fnullop_fcntl, .fo_poll = fnullop_poll, .fo_stat = ksem_stat_fop, .fo_close = ksem_close_fop, .fo_kqfilter = fnullop_kqfilter, .fo_restart = fnullop_restart, }; static const struct syscall_package ksem_syscalls[] = { { SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init }, { SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open }, { SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink }, { SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close }, { SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post }, { SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait }, { SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait }, { SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue }, { SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy }, { SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait }, { 0, 0, NULL }, }; struct sysctllog *ksem_clog; int ksem_max = KSEM_MAX; static int name_copyin(const char *uname, char **name) { *name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP); int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL); if (error) kmem_free(*name, SEM_MAX_NAMELEN); return error; } static void name_destroy(char **name) { if (!*name) return; kmem_free(*name, SEM_MAX_NAMELEN); *name = NULL; } static int ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, void *arg0, void *arg1, void *arg2, void *arg3) { ksem_t *ks; mode_t mode; if (action != KAUTH_SYSTEM_SEMAPHORE) return KAUTH_RESULT_DEFER; ks = arg1; mode = ks->ks_mode; if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) || (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) || (mode & S_IWOTH) != 0) return KAUTH_RESULT_ALLOW; return KAUTH_RESULT_DEFER; } static int ksem_sysinit(void) { int error; const struct sysctlnode *rnode; mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE); LIST_INIT(&ksem_head); nsems_total = 0; nsems = 0; rw_init(&ksem_pshared_lock); ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST, true, &ksem_pshared_hashmask); KASSERT(ksem_pshared_hashtab != NULL); ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, ksem_listener_cb, NULL); /* Define module-specific sysctl tree */ ksem_clog = NULL; sysctl_createv(&ksem_clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "posix", SYSCTL_DESCR("POSIX options"), NULL, 0, NULL, 0, CTL_KERN, CTL_CREATE, CTL_EOL); sysctl_createv(&ksem_clog, 0, &rnode, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "semmax", SYSCTL_DESCR("Maximal number of semaphores"), NULL, 0, &ksem_max, 0, CTL_CREATE, CTL_EOL); sysctl_createv(&ksem_clog, 0, &rnode, NULL, CTLFLAG_PERMANENT | CTLFLAG_READONLY, CTLTYPE_INT, "semcnt", SYSCTL_DESCR("Current number of semaphores"), NULL, 0, &nsems, 0, CTL_CREATE, CTL_EOL); error = syscall_establish(NULL, ksem_syscalls); if (error) { (void)ksem_sysfini(false); } return error; } static int ksem_sysfini(bool interface) { int error; if (interface) { error = syscall_disestablish(NULL, ksem_syscalls); if (error != 0) { return error; } /* * Make sure that no semaphores are in use. Note: semops * must be unused at this point. */ if (nsems_total) { error = syscall_establish(NULL, ksem_syscalls); KASSERT(error == 0); return EBUSY; } } kauth_unlisten_scope(ksem_listener); hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask); rw_destroy(&ksem_pshared_lock); mutex_destroy(&ksem_lock); sysctl_teardown(&ksem_clog); return 0; } static int ksem_modcmd(modcmd_t cmd, void *arg) { switch (cmd) { case MODULE_CMD_INIT: return ksem_sysinit(); case MODULE_CMD_FINI: return ksem_sysfini(true); default: return ENOTTY; } } static ksem_t * ksem_lookup(const char *name) { ksem_t *ks; KASSERT(mutex_owned(&ksem_lock)); LIST_FOREACH(ks, &ksem_head, ks_entry) { if (strcmp(ks->ks_name, name) == 0) { mutex_enter(&ks->ks_lock); return ks; } } return NULL; } static int ksem_perm(lwp_t *l, ksem_t *ks) { kauth_cred_t uc = l->l_cred; KASSERT(mutex_owned(&ks->ks_lock)); if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0) return EACCES; return 0; } /* * Bits 1..23 are random, just pluck a few of those and assume the * distribution is going to be pretty good. */ #define KSEM_PSHARED_HASH(id) (((id) >> 1) & ksem_pshared_hashmask) static void ksem_remove_pshared(ksem_t *ksem) { rw_enter(&ksem_pshared_lock, RW_WRITER); LIST_REMOVE(ksem, ks_entry); rw_exit(&ksem_pshared_lock); } static ksem_t * ksem_lookup_pshared_locked(intptr_t id) { u_long bucket = KSEM_PSHARED_HASH(id); ksem_t *ksem = NULL; /* ksem_t is locked and referenced upon return. */ LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) { if (ksem->ks_pshared_id == id) { mutex_enter(&ksem->ks_lock); if (ksem->ks_pshared_proc == NULL) { /* * This entry is dead, and in the process * of being torn down; skip it. */ mutex_exit(&ksem->ks_lock); continue; } ksem->ks_ref++; KASSERT(ksem->ks_ref != 0); return ksem; } } return NULL; } static ksem_t * ksem_lookup_pshared(intptr_t id) { rw_enter(&ksem_pshared_lock, RW_READER); ksem_t *ksem = ksem_lookup_pshared_locked(id); rw_exit(&ksem_pshared_lock); return ksem; } static void ksem_alloc_pshared_id(ksem_t *ksem) { ksem_t *ksem0; uint32_t try; KASSERT(ksem->ks_pshared_proc != NULL); rw_enter(&ksem_pshared_lock, RW_WRITER); for (;;) { try = (cprng_fast32() & ~KSEM_MARKER_MASK) | KSEM_PSHARED_MARKER; if ((ksem0 = ksem_lookup_pshared_locked(try)) == NULL) { /* Got it! */ break; } ksem_release(ksem0, -1); } ksem->ks_pshared_id = try; u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id); LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry); rw_exit(&ksem_pshared_lock); } /* * ksem_get: get the semaphore from the descriptor. * * => locks the semaphore, if found, and holds an extra reference. * => holds a reference on the file descriptor. */ static int ksem_get(intptr_t id, ksem_t **ksret, int *fdp) { ksem_t *ks; int fd; if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) { /* * ksem_lookup_pshared() returns the ksem_t * * locked and referenced. */ ks = ksem_lookup_pshared(id); if (ks == NULL) return EINVAL; KASSERT(ks->ks_pshared_id == id); KASSERT(ks->ks_pshared_proc != NULL); fd = -1; } else if (id <= INT_MAX) { fd = (int)id; file_t *fp = fd_getfile(fd); if (__predict_false(fp == NULL)) return EINVAL; if (__predict_false(fp->f_type != DTYPE_SEM)) { fd_putfile(fd); return EINVAL; } ks = fp->f_ksem; mutex_enter(&ks->ks_lock); ks->ks_ref++; } else { return EINVAL; } *ksret = ks; *fdp = fd; return 0; } /* * ksem_create: allocate and setup a new semaphore structure. */ static int ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val) { ksem_t *ks; kauth_cred_t uc; char *kname; size_t len; /* Pre-check for the limit. */ if (nsems >= ksem_max) { return ENFILE; } if (val > SEM_VALUE_MAX) { return EINVAL; } if (name != NULL) { len = strlen(name); if (len > SEM_MAX_NAMELEN) { return ENAMETOOLONG; } /* Name must start with a '/' but not contain one. */ if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) { return EINVAL; } kname = kmem_alloc(++len, KM_SLEEP); strlcpy(kname, name, len); } else { kname = NULL; len = 0; } ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP); mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&ks->ks_cv, "psem"); ks->ks_name = kname; ks->ks_namelen = len; ks->ks_mode = mode; ks->ks_value = val; ks->ks_ref = 1; uc = l->l_cred; ks->ks_uid = kauth_cred_geteuid(uc); ks->ks_gid = kauth_cred_getegid(uc); chgsemcnt(ks->ks_uid, 1); atomic_inc_uint(&nsems_total); *ksret = ks; return 0; } static void ksem_free(ksem_t *ks) { KASSERT(!cv_has_waiters(&ks->ks_cv)); chgsemcnt(ks->ks_uid, -1); atomic_dec_uint(&nsems_total); if (ks->ks_pshared_id) { KASSERT(ks->ks_pshared_proc == NULL); ksem_remove_pshared(ks); } if (ks->ks_name) { KASSERT(ks->ks_namelen > 0); kmem_free(ks->ks_name, ks->ks_namelen); } mutex_destroy(&ks->ks_lock); cv_destroy(&ks->ks_cv); kmem_free(ks, sizeof(ksem_t)); } #define KSEM_ID_IS_PSHARED(id) \ (((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) static void ksem_release(ksem_t *ksem, int fd) { bool destroy = false; KASSERT(mutex_owned(&ksem->ks_lock)); KASSERT(ksem->ks_ref > 0); if (--ksem->ks_ref == 0) { /* * Destroy if the last reference and semaphore is unnamed, * or unlinked (for named semaphore). */ destroy = (ksem->ks_flags & KS_UNLINKED) || (ksem->ks_name == NULL); } mutex_exit(&ksem->ks_lock); if (destroy) { ksem_free(ksem); } if (fd != -1) { fd_putfile(fd); } } int sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap, register_t *retval) { /* { unsigned int value; intptr_t *idp; } */ return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp), copyin, copyout); } int do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin, copyout_t docopyout) { proc_t *p = l->l_proc; ksem_t *ks; file_t *fp; intptr_t id, arg; int fd, error; /* * Newer versions of librt / libpthread pass us 'PSRD' in *idp to * indicate that a pshared semaphore is wanted. In that case we * allocate globally unique ID and return that, rather than the * process-scoped file descriptor ID. */ error = (*docopyin)(idp, &arg, sizeof(*idp)); if (error) { return error; } error = fd_allocfile(&fp, &fd); if (error) { return error; } fp->f_type = DTYPE_SEM; fp->f_flag = FREAD | FWRITE; fp->f_ops = &semops; if (fd >= KSEM_MARKER_MIN) { /* * This is super-unlikely, but we check for it anyway * because potential collisions with the pshared marker * would be bad. */ fd_abort(p, fp, fd); return EMFILE; } /* Note the mode does not matter for anonymous semaphores. */ error = ksem_create(l, NULL, &ks, 0, val); if (error) { fd_abort(p, fp, fd); return error; } if (arg == KSEM_PSHARED) { ks->ks_pshared_proc = curproc; ks->ks_pshared_fd = fd; ksem_alloc_pshared_id(ks); id = ks->ks_pshared_id; } else { id = (intptr_t)fd; } error = (*docopyout)(&id, idp, sizeof(*idp)); if (error) { ksem_free(ks); fd_abort(p, fp, fd); return error; } fp->f_ksem = ks; fd_affix(p, fp, fd); return error; } int sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap, register_t *retval) { /* { const char *name; int oflag; mode_t mode; unsigned int value; intptr_t *idp; } */ return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag), SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout); } int do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode, unsigned int value, intptr_t *idp, copyout_t docopyout) { char *name; proc_t *p = l->l_proc; ksem_t *ksnew = NULL, *ks; file_t *fp; intptr_t id; int fd, error; error = name_copyin(semname, &name); if (error) { return error; } error = fd_allocfile(&fp, &fd); if (error) { name_destroy(&name); return error; } fp->f_type = DTYPE_SEM; fp->f_flag = FREAD | FWRITE; fp->f_ops = &semops; if (fd >= KSEM_MARKER_MIN) { /* * This is super-unlikely, but we check for it anyway * because potential collisions with the pshared marker * would be bad. */ fd_abort(p, fp, fd); return EMFILE; } /* * The ID (file descriptor number) can be stored early. * Note that zero is a special value for libpthread. */ id = (intptr_t)fd; error = (*docopyout)(&id, idp, sizeof(*idp)); if (error) { goto err; } if (oflag & O_CREAT) { /* Create a new semaphore. */ error = ksem_create(l, name, &ksnew, mode, value); if (error) { goto err; } KASSERT(ksnew != NULL); } /* Lookup for a semaphore with such name. */ mutex_enter(&ksem_lock); ks = ksem_lookup(name); name_destroy(&name); if (ks) { KASSERT(mutex_owned(&ks->ks_lock)); mutex_exit(&ksem_lock); /* Check for exclusive create. */ if (oflag & O_EXCL) { mutex_exit(&ks->ks_lock); error = EEXIST; goto err; } /* * Verify permissions. If we can access it, * add the reference of this thread. */ error = ksem_perm(l, ks); if (error == 0) { ks->ks_ref++; } mutex_exit(&ks->ks_lock); if (error) { goto err; } } else { /* Fail if not found and not creating. */ if ((oflag & O_CREAT) == 0) { mutex_exit(&ksem_lock); KASSERT(ksnew == NULL); error = ENOENT; goto err; } /* Check for the limit locked. */ if (nsems >= ksem_max) { mutex_exit(&ksem_lock); error = ENFILE; goto err; } /* * Finally, insert semaphore into the list. * Note: it already has the initial reference. */ ks = ksnew; LIST_INSERT_HEAD(&ksem_head, ks, ks_entry); nsems++; mutex_exit(&ksem_lock); ksnew = NULL; } KASSERT(ks != NULL); fp->f_ksem = ks; fd_affix(p, fp, fd); err: name_destroy(&name); if (error) { fd_abort(p, fp, fd); } if (ksnew) { ksem_free(ksnew); } return error; } int sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap, register_t *retval) { /* { intptr_t id; } */ intptr_t id = SCARG(uap, id); int fd, error; ksem_t *ks; error = ksem_get(id, &ks, &fd); if (error) { return error; } /* This is only for named semaphores. */ if (ks->ks_name == NULL) { error = EINVAL; } ksem_release(ks, -1); if (error) { if (fd != -1) fd_putfile(fd); return error; } return fd_close(fd); } static int ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred, int flags) { size_t len; char *name; ksem_t *ks = fp->f_ksem; mutex_enter(&ks->ks_lock); len = ks->ks_namelen; name = ks->ks_name; mutex_exit(&ks->ks_lock); if (name == NULL || len == 0) return 0; return uiomove(name, len, uio); } static int ksem_stat_fop(file_t *fp, struct stat *ub) { ksem_t *ks = fp->f_ksem; mutex_enter(&ks->ks_lock); memset(ub, 0, sizeof(*ub)); ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen) ? _S_IFLNK : _S_IFREG); ub->st_uid = ks->ks_uid; ub->st_gid = ks->ks_gid; ub->st_size = ks->ks_value; ub->st_blocks = (ub->st_size) ? 1 : 0; ub->st_nlink = ks->ks_ref; ub->st_blksize = 4096; nanotime(&ub->st_atimespec); ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec = ub->st_atimespec; /* * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen. * XXX (st_dev, st_ino) should be unique. */ mutex_exit(&ks->ks_lock); return 0; } static int ksem_close_fop(file_t *fp) { ksem_t *ks = fp->f_ksem; mutex_enter(&ks->ks_lock); if (ks->ks_pshared_id) { if (ks->ks_pshared_proc != curproc) { /* Do nothing if this is not the creator. */ mutex_exit(&ks->ks_lock); return 0; } /* Mark this semaphore as dead. */ ks->ks_pshared_proc = NULL; } ksem_release(ks, -1); return 0; } int sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap, register_t *retval) { /* { const char *name; } */ char *name; ksem_t *ks; u_int refcnt; int error; error = name_copyin(SCARG(uap, name), &name); if (error) return error; mutex_enter(&ksem_lock); ks = ksem_lookup(name); name_destroy(&name); if (ks == NULL) { mutex_exit(&ksem_lock); return ENOENT; } KASSERT(mutex_owned(&ks->ks_lock)); /* Verify permissions. */ error = ksem_perm(l, ks); if (error) { mutex_exit(&ks->ks_lock); mutex_exit(&ksem_lock); return error; } /* Remove from the global list. */ LIST_REMOVE(ks, ks_entry); nsems--; mutex_exit(&ksem_lock); refcnt = ks->ks_ref; if (refcnt) { /* Mark as unlinked, if there are references. */ ks->ks_flags |= KS_UNLINKED; } mutex_exit(&ks->ks_lock); if (refcnt == 0) { ksem_free(ks); } return 0; } int sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap, register_t *retval) { /* { intptr_t id; } */ int fd, error; ksem_t *ks; error = ksem_get(SCARG(uap, id), &ks, &fd); if (error) { return error; } KASSERT(mutex_owned(&ks->ks_lock)); if (ks->ks_value == SEM_VALUE_MAX) { error = EOVERFLOW; goto out; } ks->ks_value++; if (ks->ks_waiters) { cv_broadcast(&ks->ks_cv); } out: ksem_release(ks, fd); return error; } int do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime) { int fd, error, timeo; ksem_t *ks; error = ksem_get(id, &ks, &fd); if (error) { return error; } KASSERT(mutex_owned(&ks->ks_lock)); while (ks->ks_value == 0) { ks->ks_waiters++; if (!try_p && abstime != NULL) { error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime, &timeo, NULL); if (error != 0) goto out; } else { timeo = 0; } error = try_p ? EAGAIN : cv_timedwait_sig(&ks->ks_cv, &ks->ks_lock, timeo); ks->ks_waiters--; if (error) goto out; } ks->ks_value--; out: ksem_release(ks, fd); return error; } int sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap, register_t *retval) { /* { intptr_t id; } */ return do_ksem_wait(l, SCARG(uap, id), false, NULL); } int sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap, register_t *retval) { /* { intptr_t id; const struct timespec *abstime; } */ struct timespec ts; int error; error = copyin(SCARG(uap, abstime), &ts, sizeof(ts)); if (error != 0) return error; if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) return EINVAL; error = do_ksem_wait(l, SCARG(uap, id), false, &ts); if (error == EWOULDBLOCK) error = ETIMEDOUT; return error; } int sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap, register_t *retval) { /* { intptr_t id; } */ return do_ksem_wait(l, SCARG(uap, id), true, NULL); } int sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap, register_t *retval) { /* { intptr_t id; unsigned int *value; } */ int fd, error; ksem_t *ks; unsigned int val; error = ksem_get(SCARG(uap, id), &ks, &fd); if (error) { return error; } KASSERT(mutex_owned(&ks->ks_lock)); val = ks->ks_value; ksem_release(ks, fd); return copyout(&val, SCARG(uap, value), sizeof(val)); } int sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap, register_t *retval) { /* { intptr_t id; } */ int fd, error; ksem_t *ks; intptr_t id = SCARG(uap, id); error = ksem_get(id, &ks, &fd); if (error) { return error; } KASSERT(mutex_owned(&ks->ks_lock)); /* Operation is only for unnamed semaphores. */ if (ks->ks_name != NULL) { error = EINVAL; goto out; } /* Cannot destroy if there are waiters. */ if (ks->ks_waiters) { error = EBUSY; goto out; } if (KSEM_ID_IS_PSHARED(id)) { /* Cannot destroy if we did't create it. */ KASSERT(fd == -1); KASSERT(ks->ks_pshared_proc != NULL); if (ks->ks_pshared_proc != curproc) { error = EINVAL; goto out; } fd = ks->ks_pshared_fd; /* Mark it dead so subsequent lookups fail. */ ks->ks_pshared_proc = NULL; /* Do an fd_getfile() to for the benefit of fd_close(). */ file_t *fp __diagused = fd_getfile(fd); KASSERT(fp != NULL); KASSERT(fp->f_ksem == ks); } out: ksem_release(ks, -1); if (error) { if (!KSEM_ID_IS_PSHARED(id)) fd_putfile(fd); return error; } return fd_close(fd); } |
| 5 5 5 5 4 3 3 1 1 1 1 3 3 2 1 1 1 1 1 1 1 6 6 6 4 3 1 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 | /* $NetBSD: sco_socket.c,v 1.38 2019/01/28 12:53:01 martin Exp $ */ /*- * Copyright (c) 2006 Itronix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of Itronix Inc. may not be used to endorse * or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: sco_socket.c,v 1.38 2019/01/28 12:53:01 martin Exp $"); /* load symbolic names */ #ifdef BLUETOOTH_DEBUG #define PRUREQUESTS #define PRCOREQUESTS #endif #include <sys/param.h> #include <sys/domain.h> #include <sys/kernel.h> #include <sys/mbuf.h> #include <sys/proc.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/systm.h> #include <netbt/bluetooth.h> #include <netbt/hci.h> #include <netbt/sco.h> /******************************************************************************* * * SCO SOCK_SEQPACKET sockets - low latency audio data */ static void sco_connecting(void *); static void sco_connected(void *); static void sco_disconnected(void *, int); static void *sco_newconn(void *, struct sockaddr_bt *, struct sockaddr_bt *); static void sco_complete(void *, int); static void sco_linkmode(void *, int); static void sco_input(void *, struct mbuf *); static const struct btproto sco_proto = { sco_connecting, sco_connected, sco_disconnected, sco_newconn, sco_complete, sco_linkmode, sco_input, }; int sco_sendspace = 4096; int sco_recvspace = 4096; static int sco_attach(struct socket *so, int proto) { int error; KASSERT(so->so_pcb == NULL); if (so->so_lock == NULL) { mutex_obj_hold(bt_lock); so->so_lock = bt_lock; solock(so); } KASSERT(solocked(so)); error = soreserve(so, sco_sendspace, sco_recvspace); if (error) { return error; } return sco_attach_pcb((struct sco_pcb **)&so->so_pcb, &sco_proto, so); } static void sco_detach(struct socket *so) { KASSERT(so->so_pcb != NULL); sco_detach_pcb((struct sco_pcb **)&so->so_pcb); KASSERT(so->so_pcb == NULL); } static int sco_accept(struct socket *so, struct sockaddr *nam) { struct sco_pcb *pcb = so->so_pcb; KASSERT(solocked(so)); KASSERT(nam != NULL); if (pcb == NULL) return EINVAL; return sco_peeraddr_pcb(pcb, (struct sockaddr_bt *)nam); } static int sco_bind(struct socket *so, struct sockaddr *nam, struct lwp *l) { struct sco_pcb *pcb = so->so_pcb; struct sockaddr_bt *sa = (struct sockaddr_bt *)nam; KASSERT(solocked(so)); KASSERT(nam != NULL); if (pcb == NULL) return EINVAL; if (sa->bt_len != sizeof(struct sockaddr_bt)) return EINVAL; if (sa->bt_family != AF_BLUETOOTH) return EAFNOSUPPORT; return sco_bind_pcb(pcb, sa); } static int sco_listen(struct socket *so, struct lwp *l) { struct sco_pcb *pcb = so->so_pcb; KASSERT(solocked(so)); if (pcb == NULL) return EINVAL; return sco_listen_pcb(pcb); } static int sco_connect(struct socket *so, struct sockaddr *nam, struct lwp *l) { struct sco_pcb *pcb = so->so_pcb; struct sockaddr_bt *sa = (struct sockaddr_bt *)nam; KASSERT(solocked(so)); KASSERT(nam != NULL); if (pcb == NULL) return EINVAL; if (sa->bt_len != sizeof(struct sockaddr_bt)) return EINVAL; if (sa->bt_family != AF_BLUETOOTH) return EAFNOSUPPORT; soisconnecting(so); return sco_connect_pcb(pcb, sa); } static int sco_connect2(struct socket *so, struct socket *so2) { struct sco_pcb *pcb = so->so_pcb; KASSERT(solocked(so)); if (pcb == NULL) return EINVAL; return EOPNOTSUPP; } static int sco_disconnect(struct socket *so) { struct sco_pcb *pcb = so->so_pcb; KASSERT(solocked(so)); if (pcb == NULL) return EINVAL; soisdisconnecting(so); return sco_disconnect_pcb(pcb, so->so_linger); } static int sco_shutdown(struct socket *so) { KASSERT(solocked(so)); socantsendmore(so); return 0; } static int sco_abort(struct socket *so) { struct sco_pcb *pcb = so->so_pcb; KASSERT(solocked(so)); if (pcb == NULL) return EINVAL; sco_disconnect_pcb(pcb, 0); soisdisconnected(so); sco_detach(so); return 0; } static int sco_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) { return EOPNOTSUPP; } static int sco_stat(struct socket *so, struct stat *ub) { KASSERT(solocked(so)); return 0; } static int sco_peeraddr(struct socket *so, struct sockaddr *nam) { struct sco_pcb *pcb = (struct sco_pcb *)so->so_pcb; KASSERT(solocked(so)); KASSERT(pcb != NULL); KASSERT(nam != NULL); return sco_peeraddr_pcb(pcb, (struct sockaddr_bt *)nam); } static int sco_sockaddr(struct socket *so, struct sockaddr *nam) { struct sco_pcb *pcb = (struct sco_pcb *)so->so_pcb; KASSERT(solocked(so)); KASSERT(pcb != NULL); KASSERT(nam != NULL); return sco_sockaddr_pcb(pcb, (struct sockaddr_bt *)nam); } static int sco_rcvd(struct socket *so, int flags, struct lwp *l) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int sco_recvoob(struct socket *so, struct mbuf *m, int flags) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int sco_send(struct socket *so, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct lwp *l) { struct sco_pcb *pcb = so->so_pcb; int err = 0; struct mbuf *m0; KASSERT(solocked(so)); KASSERT(m != NULL); if (control) /* no use for that */ m_freem(control); if (pcb == NULL) { err = EINVAL; goto release; } if (m->m_pkthdr.len == 0) goto release; if (m->m_pkthdr.len > pcb->sp_mtu) { err = EMSGSIZE; goto release; } m0 = m_copypacket(m, M_DONTWAIT); if (m0 == NULL) { err = ENOMEM; goto release; } sbappendrecord(&so->so_snd, m); return sco_send_pcb(pcb, m0); release: m_freem(m); return err; } static int sco_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control) { KASSERT(solocked(so)); m_freem(m); m_freem(control); return EOPNOTSUPP; } static int sco_purgeif(struct socket *so, struct ifnet *ifp) { return EOPNOTSUPP; } /* * get/set socket options */ int sco_ctloutput(int req, struct socket *so, struct sockopt *sopt) { struct sco_pcb *pcb = (struct sco_pcb *)so->so_pcb; int err = 0; DPRINTFN(2, "req %s\n", prcorequests[req]); if (pcb == NULL) return EINVAL; if (sopt->sopt_level != BTPROTO_SCO) return ENOPROTOOPT; switch(req) { case PRCO_GETOPT: err = sco_getopt(pcb, sopt); break; case PRCO_SETOPT: err = sco_setopt(pcb, sopt); break; default: err = ENOPROTOOPT; break; } return err; } /***************************************************************************** * * SCO Protocol socket callbacks * */ static void sco_connecting(void *arg) { struct socket *so = arg; DPRINTF("Connecting\n"); soisconnecting(so); } static void sco_connected(void *arg) { struct socket *so = arg; DPRINTF("Connected\n"); soisconnected(so); } static void sco_disconnected(void *arg, int err) { struct socket *so = arg; DPRINTF("Disconnected (%d)\n", err); so->so_error = err; soisdisconnected(so); } static void * sco_newconn(void *arg, struct sockaddr_bt *laddr, struct sockaddr_bt *raddr) { struct socket *so = arg; DPRINTF("New Connection\n"); so = sonewconn(so, false); if (so == NULL) return NULL; soisconnecting(so); return so->so_pcb; } static void sco_complete(void *arg, int num) { struct socket *so = arg; while (num-- > 0) sbdroprecord(&so->so_snd); sowwakeup(so); } static void sco_linkmode(void *arg, int mode) { } static void sco_input(void *arg, struct mbuf *m) { struct socket *so = arg; /* * since this data is time sensitive, if the buffer * is full we just dump data until the latest one * will fit. */ while (m->m_pkthdr.len > sbspace(&so->so_rcv)) sbdroprecord(&so->so_rcv); DPRINTFN(10, "received %d bytes\n", m->m_pkthdr.len); sbappendrecord(&so->so_rcv, m); sorwakeup(so); } PR_WRAP_USRREQS(sco) #define sco_attach sco_attach_wrapper #define sco_detach sco_detach_wrapper #define sco_accept sco_accept_wrapper #define sco_bind sco_bind_wrapper #define sco_listen sco_listen_wrapper #define sco_connect sco_connect_wrapper #define sco_connect2 sco_connect2_wrapper #define sco_disconnect sco_disconnect_wrapper #define sco_shutdown sco_shutdown_wrapper #define sco_abort sco_abort_wrapper #define sco_ioctl sco_ioctl_wrapper #define sco_stat sco_stat_wrapper #define sco_peeraddr sco_peeraddr_wrapper #define sco_sockaddr sco_sockaddr_wrapper #define sco_rcvd sco_rcvd_wrapper #define sco_recvoob sco_recvoob_wrapper #define sco_send sco_send_wrapper #define sco_sendoob sco_sendoob_wrapper #define sco_purgeif sco_purgeif_wrapper const struct pr_usrreqs sco_usrreqs = { .pr_attach = sco_attach, .pr_detach = sco_detach, .pr_accept = sco_accept, .pr_bind = sco_bind, .pr_listen = sco_listen, .pr_connect = sco_connect, .pr_connect2 = sco_connect2, .pr_disconnect = sco_disconnect, .pr_shutdown = sco_shutdown, .pr_abort = sco_abort, .pr_ioctl = sco_ioctl, .pr_stat = sco_stat, .pr_peeraddr = sco_peeraddr, .pr_sockaddr = sco_sockaddr, .pr_rcvd = sco_rcvd, .pr_recvoob = sco_recvoob, .pr_send = sco_send, .pr_sendoob = sco_sendoob, .pr_purgeif = sco_purgeif, }; |
| 1604 1600 1603 1602 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | /* $NetBSD: subr_fault.c,v 1.2 2020/06/30 16:28:17 maxv Exp $ */ /* * Copyright (c) 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Maxime Villard. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: subr_fault.c,v 1.2 2020/06/30 16:28:17 maxv Exp $"); #include <sys/module.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/conf.h> #include <sys/types.h> #include <sys/specificdata.h> #include <sys/kmem.h> #include <sys/atomic.h> #include <sys/ioccom.h> #include <sys/lwp.h> #include <sys/fault.h> typedef struct { volatile bool enabled; volatile bool oneshot; volatile unsigned long nth; volatile unsigned long cnt; volatile unsigned long nfaults; } fault_t; static fault_t fault_global __cacheline_aligned = { .enabled = false, .oneshot = false, .nth = FAULT_NTH_MIN, .cnt = 0, .nfaults = 0 }; static kmutex_t fault_global_lock __cacheline_aligned; static specificdata_key_t fault_lwp_key; /* -------------------------------------------------------------------------- */ bool fault_inject(void) { volatile unsigned long cnt; fault_t *f; if (__predict_false(cold)) return false; if (__predict_false(atomic_load_acquire(&fault_global.enabled))) { f = &fault_global; } else { f = lwp_getspecific(fault_lwp_key); if (__predict_true(f == NULL)) return false; if (__predict_false(!f->enabled)) return false; } if (atomic_load_relaxed(&f->oneshot)) { if (__predict_true(atomic_load_relaxed(&f->nfaults) > 0)) return false; } cnt = atomic_inc_ulong_nv(&f->cnt); if (__predict_false(cnt % atomic_load_relaxed(&f->nth) == 0)) { atomic_inc_ulong(&f->nfaults); return true; } return false; } /* -------------------------------------------------------------------------- */ static int fault_open(dev_t dev, int flag, int mode, struct lwp *l) { return 0; } static int fault_close(dev_t dev, int flag, int mode, struct lwp *l) { return 0; } static int fault_ioc_enable(struct fault_ioc_enable *args) { fault_t *f; if (args->mode != FAULT_MODE_NTH_ONESHOT) return EINVAL; if (args->nth < FAULT_NTH_MIN) return EINVAL; switch (args->scope) { case FAULT_SCOPE_GLOBAL: mutex_enter(&fault_global_lock); if (fault_global.enabled) { mutex_exit(&fault_global_lock); return EEXIST; } fault_global.oneshot = true; atomic_store_relaxed(&fault_global.nth, args->nth); fault_global.cnt = 0; fault_global.nfaults = 0; atomic_store_release(&fault_global.enabled, true); mutex_exit(&fault_global_lock); break; case FAULT_SCOPE_LWP: f = lwp_getspecific(fault_lwp_key); if (f != NULL) { if (f->enabled) return EEXIST; } else { f = kmem_zalloc(sizeof(*f), KM_SLEEP); lwp_setspecific(fault_lwp_key, f); } f->oneshot = true; atomic_store_relaxed(&f->nth, args->nth); f->cnt = 0; f->nfaults = 0; atomic_store_release(&f->enabled, true); break; default: return EINVAL; } return 0; } static int fault_ioc_disable(struct fault_ioc_disable *args) { fault_t *f; switch (args->scope) { case FAULT_SCOPE_GLOBAL: mutex_enter(&fault_global_lock); if (!fault_global.enabled) { mutex_exit(&fault_global_lock); return ENOENT; } atomic_store_release(&fault_global.enabled, false); mutex_exit(&fault_global_lock); break; case FAULT_SCOPE_LWP: f = lwp_getspecific(fault_lwp_key); if (f == NULL) return ENOENT; if (!f->enabled) return ENOENT; atomic_store_release(&f->enabled, false); break; default: return EINVAL; } return 0; } static int fault_ioc_getinfo(struct fault_ioc_getinfo *args) { fault_t *f; switch (args->scope) { case FAULT_SCOPE_GLOBAL: args->nfaults = atomic_load_relaxed(&fault_global.nfaults); break; case FAULT_SCOPE_LWP: f = lwp_getspecific(fault_lwp_key); if (f == NULL) return ENOENT; args->nfaults = atomic_load_relaxed(&f->nfaults); break; default: return EINVAL; } return 0; } static int fault_ioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l) { switch (cmd) { case FAULT_IOC_ENABLE: return fault_ioc_enable(addr); case FAULT_IOC_DISABLE: return fault_ioc_disable(addr); case FAULT_IOC_GETINFO: return fault_ioc_getinfo(addr); default: return EINVAL; } } const struct cdevsw fault_cdevsw = { .d_open = fault_open, .d_close = fault_close, .d_read = noread, .d_write = nowrite, .d_ioctl = fault_ioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER | D_MPSAFE }; /* -------------------------------------------------------------------------- */ MODULE(MODULE_CLASS_MISC, fault, NULL); static void fault_lwp_free(void *arg) { fault_t *f = (fault_t *)arg; if (f == NULL) { return; } kmem_free(f, sizeof(*f)); } static void fault_init(void) { mutex_init(&fault_global_lock, MUTEX_DEFAULT, IPL_NONE); lwp_specific_key_create(&fault_lwp_key, fault_lwp_free); } static int fault_modcmd(modcmd_t cmd, void *arg) { switch (cmd) { case MODULE_CMD_INIT: fault_init(); return 0; case MODULE_CMD_FINI: return EINVAL; default: return ENOTTY; } } |
| 2 1 3 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 | /* $NetBSD: efs_vfsops.c,v 1.30 2022/03/19 13:53:32 hannken Exp $ */ /* * Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: efs_vfsops.c,v 1.30 2022/03/19 13:53:32 hannken Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> #include <sys/mount.h> #include <sys/fstypes.h> #include <sys/vnode.h> #include <sys/buf.h> #include <sys/namei.h> #include <sys/fcntl.h> #include <sys/stat.h> #include <sys/kauth.h> #include <sys/proc.h> #include <sys/module.h> #include <miscfs/genfs/genfs_node.h> #include <miscfs/genfs/genfs.h> #include <miscfs/specfs/specdev.h> #include <fs/efs/efs.h> #include <fs/efs/efs_sb.h> #include <fs/efs/efs_dir.h> #include <fs/efs/efs_genfs.h> #include <fs/efs/efs_mount.h> #include <fs/efs/efs_extent.h> #include <fs/efs/efs_dinode.h> #include <fs/efs/efs_inode.h> #include <fs/efs/efs_subr.h> MODULE(MODULE_CLASS_VFS, efs, NULL); MALLOC_JUSTDEFINE(M_EFSMNT, "efsmnt", "efs mount structure"); MALLOC_JUSTDEFINE(M_EFSINO, "efsino", "efs in-core inode structure"); MALLOC_JUSTDEFINE(M_EFSTMP, "efstmp", "efs temporary allocations"); extern int (**efs_vnodeop_p)(void *); /* for getnewvnode() */ extern int (**efs_specop_p)(void *); /* for getnewvnode() */ extern int (**efs_fifoop_p)(void *); /* for getnewvnode() */ static int efs_statvfs(struct mount *, struct statvfs *); /* * efs_mount and efs_mountroot common functions. */ static int efs_mount_common(struct mount *mp, const char *path, struct vnode *devvp, struct efs_args *args) { int err; struct buf *bp; const char *why; struct efs_mount *emp; struct lwp *l = curlwp; emp = malloc(sizeof(*emp), M_EFSMNT, M_WAITOK); emp->em_dev = devvp->v_rdev; emp->em_devvp = devvp; emp->em_mnt = mp; /* read in the superblock */ err = efs_bread(emp, EFS_BB_SB, l, &bp); if (err) { EFS_DPRINTF(("superblock read failed\n")); free(emp, M_EFSMNT); return (err); } memcpy(&emp->em_sb, bp->b_data, sizeof(emp->em_sb)); brelse(bp, 0); /* validate the superblock */ if (efs_sb_validate(&emp->em_sb, &why)) { printf("efs: invalid superblock: %s\n", why); if (!(mp->mnt_flag & MNT_FORCE)) { free(emp, M_EFSMNT); return (EIO); } } /* check that it's clean */ if (be16toh(emp->em_sb.sb_dirty) != EFS_SB_CLEAN) { printf("efs: filesystem is dirty (sb_dirty = 0x%x); please " "run fsck_efs(8)\n", be16toh(emp->em_sb.sb_dirty)); /* XXX - default to readonly unless forced?? */ } /* if the superblock was replicated, verify that it is the same */ if (be32toh(emp->em_sb.sb_replsb) != 0) { struct buf *rbp; bool skip = false; err = efs_bread(emp, be32toh(emp->em_sb.sb_replsb), l, &rbp); if (err) { printf("efs: read of superblock replicant failed; " "please run fsck_efs(8)\n"); if (mp->mnt_flag & MNT_FORCE) { skip = true; } else { free(emp, M_EFSMNT); return (err); } } if (!skip) { if (memcmp(rbp->b_data, &emp->em_sb, sizeof(emp->em_sb))) { printf("efs: superblock differs from " "replicant; please run fsck_efs(8)\n"); if (!(mp->mnt_flag & MNT_FORCE)) { brelse(rbp, 0); free(emp, M_EFSMNT); return (EIO); } } brelse(rbp, 0); } } /* ensure we can read last block */ err = efs_bread(emp, be32toh(emp->em_sb.sb_size) - 1, l, &bp); if (err) { printf("efs: cannot access all filesystem blocks; please run " "fsck_efs(8)\n"); if (!(mp->mnt_flag & MNT_FORCE)) { free(emp, M_EFSMNT); return (err); } } else { brelse(bp, 0); } mp->mnt_data = emp; mp->mnt_flag |= MNT_LOCAL; mp->mnt_fs_bshift = EFS_BB_SHFT; mp->mnt_dev_bshift = DEV_BSHIFT; vfs_getnewfsid(mp); efs_statvfs(mp, &mp->mnt_stat); err = set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); if (err) free(emp, M_EFSMNT); return (err); } /* * mount syscall vfsop. * * Returns 0 on success. */ static int efs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct efs_args *args = data; struct pathbuf *pb; struct nameidata devnd; struct efs_mount *emp; struct vnode *devvp; int err, mode; if (args == NULL) return EINVAL; if (*data_len < sizeof *args) return EINVAL; if (mp->mnt_flag & MNT_GETARGS) { if ((emp = VFSTOEFS(mp)) == NULL) return (EIO); args->fspec = NULL; args->version = EFS_MNT_VERSION; *data_len = sizeof *args; return 0; } if (mp->mnt_flag & MNT_UPDATE) return (EOPNOTSUPP); /* XXX read-only */ /* look up our device's vnode. it is returned locked */ err = pathbuf_copyin(args->fspec, &pb); if (err) { return err; } NDINIT(&devnd, LOOKUP, FOLLOW | LOCKLEAF, pb); if ((err = namei(&devnd))) { pathbuf_destroy(pb); return (err); } devvp = devnd.ni_vp; pathbuf_destroy(pb); if (devvp->v_type != VBLK) { vput(devvp); return (ENOTBLK); } /* XXX - rdonly */ mode = FREAD; /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ err = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(VREAD)); if (err) { vput(devvp); return (err); } if ((err = VOP_OPEN(devvp, mode, l->l_cred))) { vput(devvp); return (err); } err = efs_mount_common(mp, path, devvp, args); if (err) { VOP_CLOSE(devvp, mode, l->l_cred); vput(devvp); return (err); } VOP_UNLOCK(devvp); return (0); } /* * Initialisation routine. * * Returns 0 on success. */ static int efs_start(struct mount *mp, int flags) { return (0); } /* * unmount syscall vfsop. * * Returns 0 on success. */ static int efs_unmount(struct mount *mp, int mntflags) { struct efs_mount *emp; struct lwp *l = curlwp; int err; emp = VFSTOEFS(mp); err = vflush(mp, NULL, (mntflags & MNT_FORCE) ? FORCECLOSE : 0); if (err) return (err); cache_purgevfs(mp); vn_lock(emp->em_devvp, LK_EXCLUSIVE | LK_RETRY); err = VOP_CLOSE(emp->em_devvp, FREAD, l->l_cred); vput(emp->em_devvp); free(mp->mnt_data, M_EFSMNT); mp->mnt_data = NULL; mp->mnt_flag &= ~MNT_LOCAL; return (err); } /* * Return the root vnode. * * Returns 0 on success. */ static int efs_root(struct mount *mp, int lktype, struct vnode **vpp) { int err; struct vnode *vp; if ((err = VFS_VGET(mp, EFS_ROOTINO, lktype, &vp))) return (err); *vpp = vp; return (0); } /* * statvfs syscall vfsop. * * Returns 0 on success. */ static int efs_statvfs(struct mount *mp, struct statvfs *sbp) { struct efs_mount *emp; emp = VFSTOEFS(mp); sbp->f_bsize = EFS_BB_SIZE; sbp->f_frsize = EFS_BB_SIZE; sbp->f_iosize = EFS_BB_SIZE; sbp->f_blocks = be32toh(emp->em_sb.sb_size); sbp->f_bfree = be32toh(emp->em_sb.sb_tfree); sbp->f_bavail = sbp->f_bfree; // XXX same?? sbp->f_bresvd = 0; sbp->f_files = be32toh(emp->em_sb.sb_tinode); sbp->f_ffree = be16toh(emp->em_sb.sb_cgisize) * be16toh(emp->em_sb.sb_ncg) * EFS_DINODES_PER_BB; sbp->f_favail = sbp->f_ffree; // XXX same?? sbp->f_fresvd = 0; sbp->f_namemax = EFS_DIRENT_NAMELEN_MAX; copy_statvfs_info(sbp, mp); return (0); } /* * Obtain a locked vnode for the given on-disk inode number. * * Returns 0 on success. */ static int efs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp) { int error; error = vcache_get(mp, &ino, sizeof(ino), vpp); if (error) return error; error = vn_lock(*vpp, lktype); if (error) { vrele(*vpp); *vpp = NULL; return error; } return 0; } /* * Initialize this vnode / inode pair. * Caller assures no other thread will try to load this inode. */ static int efs_loadvnode(struct mount *mp, struct vnode *vp, const void *key, size_t key_len, const void **new_key) { int error; ino_t ino; struct efs_inode *eip; struct efs_mount *emp; KASSERT(key_len == sizeof(ino)); memcpy(&ino, key, key_len); emp = VFSTOEFS(mp); eip = pool_get(&efs_inode_pool, PR_WAITOK); eip->ei_mode = 0; eip->ei_lockf = NULL; eip->ei_number = ino; eip->ei_dev = emp->em_dev; eip->ei_vp = vp; error = efs_read_inode(emp, ino, NULL, &eip->ei_di); if (error) { pool_put(&efs_inode_pool, eip); return error; } efs_sync_dinode_to_inode(eip); if (ino == EFS_ROOTINO && !S_ISDIR(eip->ei_mode)) { printf("efs: root inode (%lu) is not a directory!\n", (ulong)EFS_ROOTINO); pool_put(&efs_inode_pool, eip); return EIO; } switch (eip->ei_mode & S_IFMT) { case S_IFIFO: vp->v_type = VFIFO; vp->v_op = efs_fifoop_p; break; case S_IFCHR: vp->v_type = VCHR; vp->v_op = efs_specop_p; spec_node_init(vp, eip->ei_dev); break; case S_IFDIR: vp->v_type = VDIR; vp->v_op = efs_vnodeop_p; if (ino == EFS_ROOTINO) vp->v_vflag |= VV_ROOT; break; case S_IFBLK: vp->v_type = VBLK; vp->v_op = efs_specop_p; spec_node_init(vp, eip->ei_dev); break; case S_IFREG: vp->v_type = VREG; vp->v_op = efs_vnodeop_p; break; case S_IFLNK: vp->v_type = VLNK; vp->v_op = efs_vnodeop_p; break; case S_IFSOCK: vp->v_type = VSOCK; vp->v_op = efs_vnodeop_p; break; default: printf("efs: invalid mode 0x%x in inode %lu on mount %s\n", eip->ei_mode, (ulong)ino, mp->mnt_stat.f_mntonname); pool_put(&efs_inode_pool, eip); return EIO; } vp->v_tag = VT_EFS; vp->v_data = eip; genfs_node_init(vp, &efs_genfsops); uvm_vnp_setsize(vp, eip->ei_size); *new_key = &eip->ei_number; return 0; } /* * Convert the provided opaque, unique file handle into a vnode. * * Returns 0 on success. */ static int efs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp) { int err; struct vnode *vp; struct efs_fid *efp; struct efs_inode *eip; if (fhp->fid_len != sizeof(struct efs_fid)) return (EINVAL); efp = (struct efs_fid *)fhp; if ((err = VFS_VGET(mp, efp->ef_ino, lktype, &vp))) { *vpp = NULL; return (err); } eip = EFS_VTOI(vp); if (eip->ei_mode == 0 || eip->ei_gen != efp->ef_gen) { vput(vp); *vpp = NULL; return (ESTALE); } *vpp = vp; return (0); } /* * Convert the provided vnode into an opaque, unique file handle. * * Returns 0 on success. */ static int efs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) { struct efs_fid *efp; struct efs_inode *eip; if (*fh_size < sizeof(struct efs_fid)) { *fh_size = sizeof(struct efs_fid); return (E2BIG); } *fh_size = sizeof(struct efs_fid); eip = EFS_VTOI(vp); efp = (struct efs_fid *)fhp; fhp->fid_len = sizeof(struct efs_fid); efp->ef_ino = eip->ei_number; efp->ef_gen = eip->ei_gen; return (0); } /* * Globally initialise the filesystem. */ static void efs_init(void) { malloc_type_attach(M_EFSMNT); malloc_type_attach(M_EFSINO); malloc_type_attach(M_EFSTMP); pool_init(&efs_inode_pool, sizeof(struct efs_inode), 0, 0, 0, "efsinopl", &pool_allocator_nointr, IPL_NONE); } /* * Globally reinitialise the filesystem. */ static void efs_reinit(void) { } /* * Globally clean up the filesystem. */ static void efs_done(void) { pool_destroy(&efs_inode_pool); malloc_type_detach(M_EFSMNT); malloc_type_detach(M_EFSINO); malloc_type_detach(M_EFSTMP); } extern const struct vnodeopv_desc efs_vnodeop_opv_desc; extern const struct vnodeopv_desc efs_specop_opv_desc; extern const struct vnodeopv_desc efs_fifoop_opv_desc; const struct vnodeopv_desc * const efs_vnodeopv_descs[] = { &efs_vnodeop_opv_desc, &efs_specop_opv_desc, &efs_fifoop_opv_desc, NULL }; struct vfsops efs_vfsops = { .vfs_name = MOUNT_EFS, .vfs_min_mount_data = sizeof (struct efs_args), .vfs_mount = efs_mount, .vfs_start = efs_start, .vfs_unmount = efs_unmount, .vfs_root = efs_root, .vfs_quotactl = (void *)eopnotsupp, .vfs_statvfs = efs_statvfs, .vfs_sync = (void *)nullop, .vfs_vget = efs_vget, .vfs_loadvnode = efs_loadvnode, .vfs_fhtovp = efs_fhtovp, .vfs_vptofh = efs_vptofh, .vfs_init = efs_init, .vfs_reinit = efs_reinit, .vfs_done = efs_done, .vfs_mountroot = (void *)eopnotsupp, .vfs_snapshot = (void *)eopnotsupp, .vfs_extattrctl = vfs_stdextattrctl, .vfs_suspendctl = genfs_suspendctl, .vfs_opv_descs = efs_vnodeopv_descs /* .vfs_refcount */ /* .vfs_list */ }; static int efs_modcmd(modcmd_t cmd, void *arg) { switch (cmd) { case MODULE_CMD_INIT: return vfs_attach(&efs_vfsops); case MODULE_CMD_FINI: return vfs_detach(&efs_vfsops); default: return ENOTTY; } } |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 | /* $NetBSD: kern_ksyms.c,v 1.107 2022/07/15 06:40:24 mrg Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software developed for The NetBSD Foundation * by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 2001, 2003 Anders Magnusson (ragge@ludd.luth.se). * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Code to deal with in-kernel symbol table management + /dev/ksyms. * * For each loaded module the symbol table info is kept track of by a * struct, placed in a circular list. The first entry is the kernel * symbol table. */ /* * TODO: * * Add support for mmap, poll. * Constify tables. * Constify db_symtab and move it to .rodata. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.107 2022/07/15 06:40:24 mrg Exp $"); #if defined(_KERNEL) && defined(_KERNEL_OPT) #include "opt_copy_symtab.h" #include "opt_ddb.h" #include "opt_dtrace.h" #endif #define _KSYMS_PRIVATE #include <sys/param.h> #include <sys/queue.h> #include <sys/exec.h> #include <sys/file.h> #include <sys/filedesc.h> #include <sys/kauth.h> #include <sys/systm.h> #include <sys/conf.h> #include <sys/kmem.h> #include <sys/proc.h> #include <sys/atomic.h> #include <sys/ksyms.h> #include <sys/kernel.h> #include <sys/intr.h> #include <sys/pserialize.h> #include <sys/stat.h> #include <uvm/uvm_extern.h> #ifdef DDB #include <ddb/db_output.h> #endif #include "ksyms.h" #if NKSYMS > 0 #include "ioconf.h" #endif struct ksyms_snapshot { uint64_t ks_refcnt; uint64_t ks_gen; struct uvm_object *ks_uobj; size_t ks_size; dev_t ks_dev; int ks_maxlen; }; #define KSYMS_MAX_ID 98304 #ifdef KDTRACE_HOOKS static uint32_t ksyms_nmap[KSYMS_MAX_ID]; /* sorted symbol table map */ #else static uint32_t *ksyms_nmap = NULL; #endif static int ksyms_maxlen; static bool ksyms_initted; static bool ksyms_loaded; static kmutex_t ksyms_lock __cacheline_aligned; static struct ksyms_symtab kernel_symtab; static kcondvar_t ksyms_cv; static struct lwp *ksyms_snapshotting; static struct ksyms_snapshot *ksyms_snapshot; static uint64_t ksyms_snapshot_gen; static pserialize_t ksyms_psz __read_mostly; static void ksyms_hdr_init(const void *); static void ksyms_sizes_calc(void); static struct ksyms_snapshot *ksyms_snapshot_alloc(int, size_t, dev_t, uint64_t); static void ksyms_snapshot_release(struct ksyms_snapshot *); #ifdef KSYMS_DEBUG #define FOLLOW_CALLS 1 #define FOLLOW_MORE_CALLS 2 #define FOLLOW_DEVKSYMS 4 static int ksyms_debug; #endif #define SYMTAB_FILLER "|This is the symbol table!" #ifdef makeoptions_COPY_SYMTAB extern char db_symtab[]; extern int db_symtabsize; #endif /* * used by savecore(8) so non-static */ struct ksyms_hdr ksyms_hdr; int ksyms_symsz; int ksyms_strsz; int ksyms_ctfsz; /* this is not currently used by savecore(8) */ TAILQ_HEAD(ksyms_symtab_queue, ksyms_symtab) ksyms_symtabs = TAILQ_HEAD_INITIALIZER(ksyms_symtabs); static struct pslist_head ksyms_symtabs_psz = PSLIST_INITIALIZER; static int ksyms_verify(const void *symstart, const void *strstart) { #if defined(DIAGNOSTIC) || defined(DEBUG) if (symstart == NULL) printf("ksyms: Symbol table not found\n"); if (strstart == NULL) printf("ksyms: String table not found\n"); if (symstart == NULL || strstart == NULL) printf("ksyms: Perhaps the kernel is stripped?\n"); #endif if (symstart == NULL || strstart == NULL) return 0; return 1; } /* * Finds a certain symbol name in a certain symbol table. */ static Elf_Sym * findsym(const char *name, struct ksyms_symtab *table, int type) { Elf_Sym *sym, *maxsym; int low, mid, high, nglob; char *str, *cmp; sym = table->sd_symstart; str = table->sd_strstart - table->sd_usroffset; nglob = table->sd_nglob; low = 0; high = nglob; /* * Start with a binary search of all global symbols in this table. * Global symbols must have unique names. */ while (low < high) { mid = (low + high) >> 1; cmp = sym[mid].st_name + str; if (cmp[0] < name[0] || strcmp(cmp, name) < 0) { low = mid + 1; } else { high = mid; } } KASSERT(low == high); if (__predict_true(low < nglob && strcmp(sym[low].st_name + str, name) == 0)) { KASSERT(ELF_ST_BIND(sym[low].st_info) == STB_GLOBAL); return &sym[low]; } /* * Perform a linear search of local symbols (rare). Many local * symbols with the same name can exist so are not included in * the binary search. */ if (type != KSYMS_EXTERN) { maxsym = sym + table->sd_symsize / sizeof(Elf_Sym); for (sym += nglob; sym < maxsym; sym++) { if (strcmp(name, sym->st_name + str) == 0) { return sym; } } } return NULL; } /* * The "attach" is in reality done in ksyms_init(). */ #if NKSYMS > 0 /* * ksyms can be loaded even if the kernel has a missing "pseudo-device ksyms" * statement because ddb and modules require it. Fixing it properly requires * fixing config to warn about required, but missing preudo-devices. For now, * if we don't have the pseudo-device we don't need the attach function; this * is fine, as it does nothing. */ void ksymsattach(int arg) { } #endif void ksyms_init(void) { #ifdef makeoptions_COPY_SYMTAB if (!ksyms_loaded && strncmp(db_symtab, SYMTAB_FILLER, sizeof(SYMTAB_FILLER))) { ksyms_addsyms_elf(db_symtabsize, db_symtab, db_symtab + db_symtabsize); } #endif if (!ksyms_initted) { mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&ksyms_cv, "ksyms"); ksyms_psz = pserialize_create(); ksyms_initted = true; } } /* * Are any symbols available? */ bool ksyms_available(void) { return ksyms_loaded; } /* * Add a symbol table. * This is intended for use when the symbol table and its corresponding * string table are easily available. If they are embedded in an ELF * image, use addsymtab_elf() instead. * * name - Symbol's table name. * symstart, symsize - Address and size of the symbol table. * strstart, strsize - Address and size of the string table. * tab - Symbol table to be updated with this information. * newstart - Address to which the symbol table has to be copied during * shrinking. If NULL, it is not moved. */ static const char *addsymtab_strstart; static int addsymtab_compar(const void *a, const void *b) { const Elf_Sym *sa, *sb; sa = a; sb = b; /* * Split the symbol table into two, with globals at the start * and locals at the end. */ if (ELF_ST_BIND(sa->st_info) != ELF_ST_BIND(sb->st_info)) { if (ELF_ST_BIND(sa->st_info) == STB_GLOBAL) { return -1; } if (ELF_ST_BIND(sb->st_info) == STB_GLOBAL) { return 1; } } /* Within each band, sort by name. */ return strcmp(sa->st_name + addsymtab_strstart, sb->st_name + addsymtab_strstart); } static void addsymtab(const char *name, void *symstart, size_t symsize, void *strstart, size_t strsize, struct ksyms_symtab *tab, void *newstart, void *ctfstart, size_t ctfsize, uint32_t *nmap) { Elf_Sym *sym, *nsym, ts; int i, j, n, nglob; char *str; int nsyms = symsize / sizeof(Elf_Sym); int s; /* Sanity check for pre-allocated map table used during startup. */ if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) { printf("kern_ksyms: ERROR %d > %d, increase KSYMS_MAX_ID\n", nsyms, KSYMS_MAX_ID); /* truncate for now */ nsyms = KSYMS_MAX_ID - 1; } tab->sd_symstart = symstart; tab->sd_symsize = symsize; tab->sd_strstart = strstart; tab->sd_strsize = strsize; tab->sd_name = name; tab->sd_minsym = UINTPTR_MAX; tab->sd_maxsym = 0; tab->sd_usroffset = 0; tab->sd_ctfstart = ctfstart; tab->sd_ctfsize = ctfsize; tab->sd_nmap = nmap; tab->sd_nmapsize = nsyms; #ifdef KSYMS_DEBUG printf("newstart %p sym %p ksyms_symsz %zu str %p strsz %zu send %p\n", newstart, symstart, symsize, strstart, strsize, tab->sd_strstart + tab->sd_strsize); #endif if (nmap) { memset(nmap, 0, nsyms * sizeof(uint32_t)); } /* Pack symbol table by removing all file name references. */ sym = tab->sd_symstart; nsym = (Elf_Sym *)newstart; str = tab->sd_strstart; nglob = 0; for (i = n = 0; i < nsyms; i++) { /* * This breaks CTF mapping, so don't do it when * DTrace is enabled. */ #ifndef KDTRACE_HOOKS /* * Remove useless symbols. * Should actually remove all typeless symbols. */ if (sym[i].st_name == 0) continue; /* Skip nameless entries */ if (sym[i].st_shndx == SHN_UNDEF) continue; /* Skip external references */ if (ELF_ST_TYPE(sym[i].st_info) == STT_FILE) continue; /* Skip filenames */ if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE && sym[i].st_value == 0 && strcmp(str + sym[i].st_name, "*ABS*") == 0) continue; /* XXX */ if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE && strcmp(str + sym[i].st_name, "gcc2_compiled.") == 0) continue; /* XXX */ #endif /* Save symbol. Set it as an absolute offset */ nsym[n] = sym[i]; #ifdef KDTRACE_HOOKS if (nmap != NULL) { /* * Save the size, replace it with the symbol id so * the mapping can be done after the cleanup and sort. */ nmap[i] = nsym[n].st_size; nsym[n].st_size = i + 1; /* zero is reserved */ } #endif if (sym[i].st_shndx != SHN_ABS) { nsym[n].st_shndx = SHBSS; } else { /* SHN_ABS is a magic value, don't overwrite it */ } j = strlen(nsym[n].st_name + str) + 1; if (j > ksyms_maxlen) ksyms_maxlen = j; nglob += (ELF_ST_BIND(nsym[n].st_info) == STB_GLOBAL); /* Compute min and max symbols. */ if (strcmp(str + sym[i].st_name, "*ABS*") != 0 && ELF_ST_TYPE(nsym[n].st_info) != STT_NOTYPE) { if (nsym[n].st_value < tab->sd_minsym) { tab->sd_minsym = nsym[n].st_value; } if (nsym[n].st_value > tab->sd_maxsym) { tab->sd_maxsym = nsym[n].st_value; } } n++; } /* Fill the rest of the record, and sort the symbols. */ tab->sd_symstart = nsym; tab->sd_symsize = n * sizeof(Elf_Sym); tab->sd_nglob = nglob; addsymtab_strstart = str; if (kheapsort(nsym, n, sizeof(Elf_Sym), addsymtab_compar, &ts) != 0) panic("addsymtab"); #ifdef KDTRACE_HOOKS /* * Build the mapping from original symbol id to new symbol table. * Deleted symbols will have a zero map, indices will be one based * instead of zero based. * Resulting map is sd_nmap[original_index] = new_index + 1 */ if (nmap != NULL) { int new; for (new = 0; new < n; new++) { uint32_t orig = nsym[new].st_size - 1; uint32_t size = nmap[orig]; nmap[orig] = new + 1; /* restore the size */ nsym[new].st_size = size; } } #endif KASSERT(strcmp(name, "netbsd") == 0 || mutex_owned(&ksyms_lock)); KASSERT(cold || mutex_owned(&ksyms_lock)); /* * Publish the symtab. Do this at splhigh to ensure ddb never * witnesses an inconsistent state of the queue, unless memory * is so corrupt that we crash in PSLIST_WRITER_INSERT_AFTER or * TAILQ_INSERT_TAIL. */ PSLIST_ENTRY_INIT(tab, sd_pslist); s = splhigh(); if (TAILQ_EMPTY(&ksyms_symtabs)) { PSLIST_WRITER_INSERT_HEAD(&ksyms_symtabs_psz, tab, sd_pslist); } else { struct ksyms_symtab *last; last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue); PSLIST_WRITER_INSERT_AFTER(last, tab, sd_pslist); } TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue); splx(s); ksyms_sizes_calc(); ksyms_loaded = true; } /* * Setup the kernel symbol table stuff. */ void ksyms_addsyms_elf(int symsize, void *start, void *end) { int i, j; Elf_Shdr *shdr; char *symstart = NULL, *strstart = NULL; size_t strsize = 0; Elf_Ehdr *ehdr; char *ctfstart = NULL; size_t ctfsize = 0; if (symsize <= 0) { printf("[ Kernel symbol table missing! ]\n"); return; } /* Sanity check */ if (ALIGNED_POINTER(start, long) == 0) { printf("[ Kernel symbol table has bad start address %p ]\n", start); return; } ehdr = (Elf_Ehdr *)start; /* check if this is a valid ELF header */ /* No reason to verify arch type, the kernel is actually running! */ if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || ehdr->e_ident[EI_CLASS] != ELFCLASS || ehdr->e_version > 1) { printf("[ Kernel symbol table invalid! ]\n"); return; /* nothing to do */ } /* Loaded header will be scratched in addsymtab */ ksyms_hdr_init(start); /* Find the symbol table and the corresponding string table. */ shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff); for (i = 1; i < ehdr->e_shnum; i++) { if (shdr[i].sh_type != SHT_SYMTAB) continue; if (shdr[i].sh_offset == 0) continue; symstart = (uint8_t *)start + shdr[i].sh_offset; symsize = shdr[i].sh_size; j = shdr[i].sh_link; if (shdr[j].sh_offset == 0) continue; /* Can this happen? */ strstart = (uint8_t *)start + shdr[j].sh_offset; strsize = shdr[j].sh_size; break; } #ifdef KDTRACE_HOOKS /* Find the CTF section */ shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff); if (ehdr->e_shstrndx != 0) { char *shstr = (uint8_t *)start + shdr[ehdr->e_shstrndx].sh_offset; for (i = 1; i < ehdr->e_shnum; i++) { #ifdef KSYMS_DEBUG printf("ksyms: checking %s\n", &shstr[shdr[i].sh_name]); #endif if (shdr[i].sh_type != SHT_PROGBITS) continue; if (strncmp(".SUNW_ctf", &shstr[shdr[i].sh_name], 10) != 0) continue; ctfstart = (uint8_t *)start + shdr[i].sh_offset; ctfsize = shdr[i].sh_size; ksyms_ctfsz = ctfsize; #ifdef DEBUG aprint_normal("Found CTF at %p, size 0x%zx\n", ctfstart, ctfsize); #endif break; } #ifdef DEBUG } else { printf("ksyms: e_shstrndx == 0\n"); #endif } #endif if (!ksyms_verify(symstart, strstart)) return; addsymtab("netbsd", symstart, symsize, strstart, strsize, &kernel_symtab, symstart, ctfstart, ctfsize, ksyms_nmap); #ifdef DEBUG aprint_normal("Loaded initial symtab at %p, strtab at %p, # entries %ld\n", kernel_symtab.sd_symstart, kernel_symtab.sd_strstart, (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym)); #endif /* Should be no snapshot to invalidate yet. */ KASSERT(ksyms_snapshot == NULL); } /* * Setup the kernel symbol table stuff. * Use this when the address of the symbol and string tables are known; * otherwise use ksyms_init with an ELF image. * We need to pass a minimal ELF header which will later be completed by * ksyms_hdr_init and handed off to userland through /dev/ksyms. We use * a void *rather than a pointer to avoid exposing the Elf_Ehdr type. */ void ksyms_addsyms_explicit(void *ehdr, void *symstart, size_t symsize, void *strstart, size_t strsize) { if (!ksyms_verify(symstart, strstart)) return; ksyms_hdr_init(ehdr); addsymtab("netbsd", symstart, symsize, strstart, strsize, &kernel_symtab, symstart, NULL, 0, ksyms_nmap); /* Should be no snapshot to invalidate yet. */ KASSERT(ksyms_snapshot == NULL); } /* * Get the value associated with a symbol. * "mod" is the module name, or null if any module. * "sym" is the symbol name. * "val" is a pointer to the corresponding value, if call succeeded. * Returns 0 if success or ENOENT if no such entry. * * If symp is nonnull, caller must hold ksyms_lock or module_lock, have * ksyms_opencnt nonzero, be in a pserialize read section, be in ddb * with all other CPUs quiescent. */ int ksyms_getval_unlocked(const char *mod, const char *sym, Elf_Sym **symp, unsigned long *val, int type) { struct ksyms_symtab *st; Elf_Sym *es; int s, error = ENOENT; #ifdef KSYMS_DEBUG if (ksyms_debug & FOLLOW_CALLS) printf("%s: mod %s sym %s valp %p\n", __func__, mod, sym, val); #endif s = pserialize_read_enter(); PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab, sd_pslist) { if (mod != NULL && strcmp(st->sd_name, mod)) continue; if ((es = findsym(sym, st, type)) != NULL) { *val = es->st_value; if (symp) *symp = es; error = 0; break; } } pserialize_read_exit(s); return error; } int ksyms_getval(const char *mod, const char *sym, unsigned long *val, int type) { if (!ksyms_loaded) return ENOENT; /* No locking needed -- we read the table pserialized. */ return ksyms_getval_unlocked(mod, sym, NULL, val, type); } /* * ksyms_get_mod(mod) * * Return the symtab for the given module name. Caller must ensure * that the module cannot be unloaded until after this returns. */ struct ksyms_symtab * ksyms_get_mod(const char *mod) { struct ksyms_symtab *st; int s; s = pserialize_read_enter(); PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab, sd_pslist) { if (mod != NULL && strcmp(st->sd_name, mod)) continue; break; } pserialize_read_exit(s); return st; } /* * ksyms_mod_foreach() * * Iterate over the symbol table of the specified module, calling the callback * handler for each symbol. Stop iterating if the handler return is non-zero. * */ int ksyms_mod_foreach(const char *mod, ksyms_callback_t callback, void *opaque) { struct ksyms_symtab *st; Elf_Sym *sym, *maxsym; char *str; int symindx; if (!ksyms_loaded) return ENOENT; mutex_enter(&ksyms_lock); /* find the module */ TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { if (mod != NULL && strcmp(st->sd_name, mod)) continue; sym = st->sd_symstart; str = st->sd_strstart - st->sd_usroffset; /* now iterate through the symbols */ maxsym = sym + st->sd_symsize / sizeof(Elf_Sym); for (symindx = 0; sym < maxsym; sym++, symindx++) { if (callback(str + sym->st_name, symindx, (void *)sym->st_value, sym->st_size, sym->st_info, opaque) != 0) { break; } } } mutex_exit(&ksyms_lock); return 0; } /* * Get "mod" and "symbol" associated with an address. * Returns 0 if success or ENOENT if no such entry. * * Caller must hold ksyms_lock or module_lock, have ksyms_opencnt * nonzero, be in a pserialize read section, or be in ddb with all * other CPUs quiescent. */ int ksyms_getname(const char **mod, const char **sym, vaddr_t v, int f) { struct ksyms_symtab *st; Elf_Sym *les, *es = NULL; vaddr_t laddr = 0; const char *lmod = NULL; char *stable = NULL; int type, i, sz; if (!ksyms_loaded) return ENOENT; PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab, sd_pslist) { if (v < st->sd_minsym || v > st->sd_maxsym) continue; sz = st->sd_symsize/sizeof(Elf_Sym); for (i = 0; i < sz; i++) { les = st->sd_symstart + i; type = ELF_ST_TYPE(les->st_info); if ((f & KSYMS_PROC) && (type != STT_FUNC)) continue; if (type == STT_NOTYPE) continue; if (((f & KSYMS_ANY) == 0) && (type != STT_FUNC) && (type != STT_OBJECT)) continue; if ((les->st_value <= v) && (les->st_value > laddr)) { laddr = les->st_value; es = les; lmod = st->sd_name; stable = st->sd_strstart - st->sd_usroffset; } } } if (es == NULL) return ENOENT; if ((f & KSYMS_EXACT) && (v != es->st_value)) return ENOENT; if (mod) *mod = lmod; if (sym) *sym = stable + es->st_name; return 0; } /* * Add a symbol table from a loadable module. */ void ksyms_modload(const char *name, void *symstart, vsize_t symsize, char *strstart, vsize_t strsize) { struct ksyms_symtab *st; struct ksyms_snapshot *ks; void *nmap; st = kmem_zalloc(sizeof(*st), KM_SLEEP); nmap = kmem_zalloc(symsize / sizeof(Elf_Sym) * sizeof (uint32_t), KM_SLEEP); mutex_enter(&ksyms_lock); addsymtab(name, symstart, symsize, strstart, strsize, st, symstart, NULL, 0, nmap); ks = ksyms_snapshot; ksyms_snapshot = NULL; mutex_exit(&ksyms_lock); if (ks) ksyms_snapshot_release(ks); } /* * Remove a symbol table from a loadable module. */ void ksyms_modunload(const char *name) { struct ksyms_symtab *st; struct ksyms_snapshot *ks; int s; mutex_enter(&ksyms_lock); TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { if (strcmp(name, st->sd_name) != 0) continue; break; } KASSERT(st != NULL); /* Wait for any snapshot in progress to complete. */ while (ksyms_snapshotting) cv_wait(&ksyms_cv, &ksyms_lock); /* * Remove the symtab. Do this at splhigh to ensure ddb never * witnesses an inconsistent state of the queue, unless memory * is so corrupt that we crash in TAILQ_REMOVE or * PSLIST_WRITER_REMOVE. */ s = splhigh(); TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); PSLIST_WRITER_REMOVE(st, sd_pslist); splx(s); /* * And wait a grace period, in case there are any pserialized * readers in flight. */ pserialize_perform(ksyms_psz); PSLIST_ENTRY_DESTROY(st, sd_pslist); /* Recompute the ksyms sizes now that we've removed st. */ ksyms_sizes_calc(); /* Invalidate the global ksyms snapshot. */ ks = ksyms_snapshot; ksyms_snapshot = NULL; mutex_exit(&ksyms_lock); /* * No more references are possible. Free the name map and the * symtab itself, which we had allocated in ksyms_modload. */ kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t)); kmem_free(st, sizeof(*st)); /* Release the formerly global ksyms snapshot, if any. */ if (ks) ksyms_snapshot_release(ks); } #ifdef DDB /* * Keep sifting stuff here, to avoid export of ksyms internals. * * Systems is expected to be quiescent, so no locking done. */ int ksyms_sift(char *mod, char *sym, int mode) { struct ksyms_symtab *st; char *sb; int i, sz; if (!ksyms_loaded) return ENOENT; TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { if (mod && strcmp(mod, st->sd_name)) continue; sb = st->sd_strstart - st->sd_usroffset; sz = st->sd_symsize/sizeof(Elf_Sym); for (i = 0; i < sz; i++) { Elf_Sym *les = st->sd_symstart + i; char c; if (strstr(sb + les->st_name, sym) == NULL) continue; if (mode == 'F') { switch (ELF_ST_TYPE(les->st_info)) { case STT_OBJECT: c = '+'; break; case STT_FUNC: c = '*'; break; case STT_SECTION: c = '&'; break; case STT_FILE: c = '/'; break; default: c = ' '; break; } db_printf("%s%c ", sb + les->st_name, c); } else db_printf("%s ", sb + les->st_name); } } return ENOENT; } #endif /* DDB */ /* * In case we exposing the symbol table to the userland using the pseudo- * device /dev/ksyms, it is easier to provide all the tables as one. * However, it means we have to change all the st_name fields for the * symbols so they match the ELF image that the userland will read * through the device. * * The actual (correct) value of st_name is preserved through a global * offset stored in the symbol table structure. * * Call with ksyms_lock held. */ static void ksyms_sizes_calc(void) { struct ksyms_symtab *st; int i, delta; KASSERT(cold || mutex_owned(&ksyms_lock)); ksyms_symsz = ksyms_strsz = 0; TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { delta = ksyms_strsz - st->sd_usroffset; if (delta != 0) { for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++) st->sd_symstart[i].st_name += delta; st->sd_usroffset = ksyms_strsz; } ksyms_symsz += st->sd_symsize; ksyms_strsz += st->sd_strsize; } } static void ksyms_fill_note(void) { int32_t *note = ksyms_hdr.kh_note; note[0] = ELF_NOTE_NETBSD_NAMESZ; note[1] = ELF_NOTE_NETBSD_DESCSZ; note[2] = ELF_NOTE_TYPE_NETBSD_TAG; memcpy(¬e[3], "NetBSD\0", 8); note[5] = __NetBSD_Version__; } static void ksyms_hdr_init(const void *hdraddr) { /* Copy the loaded elf exec header */ memcpy(&ksyms_hdr.kh_ehdr, hdraddr, sizeof(Elf_Ehdr)); /* Set correct program/section header sizes, offsets and numbers */ ksyms_hdr.kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_phdr[0]); ksyms_hdr.kh_ehdr.e_phentsize = sizeof(Elf_Phdr); ksyms_hdr.kh_ehdr.e_phnum = NPRGHDR; ksyms_hdr.kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr[0]); ksyms_hdr.kh_ehdr.e_shentsize = sizeof(Elf_Shdr); ksyms_hdr.kh_ehdr.e_shnum = NSECHDR; ksyms_hdr.kh_ehdr.e_shstrndx = SHSTRTAB; /* Text/data - fake */ ksyms_hdr.kh_phdr[0].p_type = PT_LOAD; ksyms_hdr.kh_phdr[0].p_memsz = (unsigned long)-1L; ksyms_hdr.kh_phdr[0].p_flags = PF_R | PF_X | PF_W; #define SHTCOPY(name) strlcpy(&ksyms_hdr.kh_strtab[offs], (name), \ sizeof(ksyms_hdr.kh_strtab) - offs), offs += sizeof(name) uint32_t offs = 1; /* First section header ".note.netbsd.ident" */ ksyms_hdr.kh_shdr[SHNOTE].sh_name = offs; ksyms_hdr.kh_shdr[SHNOTE].sh_type = SHT_NOTE; ksyms_hdr.kh_shdr[SHNOTE].sh_offset = offsetof(struct ksyms_hdr, kh_note[0]); ksyms_hdr.kh_shdr[SHNOTE].sh_size = sizeof(ksyms_hdr.kh_note); ksyms_hdr.kh_shdr[SHNOTE].sh_addralign = sizeof(int); SHTCOPY(".note.netbsd.ident"); ksyms_fill_note(); /* Second section header; ".symtab" */ ksyms_hdr.kh_shdr[SYMTAB].sh_name = offs; ksyms_hdr.kh_shdr[SYMTAB].sh_type = SHT_SYMTAB; ksyms_hdr.kh_shdr[SYMTAB].sh_offset = sizeof(struct ksyms_hdr); /* ksyms_hdr.kh_shdr[SYMTAB].sh_size = filled in at open */ ksyms_hdr.kh_shdr[SYMTAB].sh_link = STRTAB; /* Corresponding strtab */ ksyms_hdr.kh_shdr[SYMTAB].sh_addralign = sizeof(long); ksyms_hdr.kh_shdr[SYMTAB].sh_entsize = sizeof(Elf_Sym); SHTCOPY(".symtab"); /* Third section header; ".strtab" */ ksyms_hdr.kh_shdr[STRTAB].sh_name = offs; ksyms_hdr.kh_shdr[STRTAB].sh_type = SHT_STRTAB; /* ksyms_hdr.kh_shdr[STRTAB].sh_offset = filled in at open */ /* ksyms_hdr.kh_shdr[STRTAB].sh_size = filled in at open */ ksyms_hdr.kh_shdr[STRTAB].sh_addralign = sizeof(char); SHTCOPY(".strtab"); /* Fourth section, ".shstrtab" */ ksyms_hdr.kh_shdr[SHSTRTAB].sh_name = offs; ksyms_hdr.kh_shdr[SHSTRTAB].sh_type = SHT_STRTAB; ksyms_hdr.kh_shdr[SHSTRTAB].sh_offset = offsetof(struct ksyms_hdr, kh_strtab); ksyms_hdr.kh_shdr[SHSTRTAB].sh_size = SHSTRSIZ; ksyms_hdr.kh_shdr[SHSTRTAB].sh_addralign = sizeof(char); SHTCOPY(".shstrtab"); /* Fifth section, ".bss". All symbols reside here. */ ksyms_hdr.kh_shdr[SHBSS].sh_name = offs; ksyms_hdr.kh_shdr[SHBSS].sh_type = SHT_NOBITS; ksyms_hdr.kh_shdr[SHBSS].sh_offset = 0; ksyms_hdr.kh_shdr[SHBSS].sh_size = (unsigned long)-1L; ksyms_hdr.kh_shdr[SHBSS].sh_addralign = PAGE_SIZE; ksyms_hdr.kh_shdr[SHBSS].sh_flags = SHF_ALLOC | SHF_EXECINSTR; SHTCOPY(".bss"); /* Sixth section header; ".SUNW_ctf" */ ksyms_hdr.kh_shdr[SHCTF].sh_name = offs; ksyms_hdr.kh_shdr[SHCTF].sh_type = SHT_PROGBITS; /* ksyms_hdr.kh_shdr[SHCTF].sh_offset = filled in at open */ /* ksyms_hdr.kh_shdr[SHCTF].sh_size = filled in at open */ ksyms_hdr.kh_shdr[SHCTF].sh_link = SYMTAB; /* Corresponding symtab */ ksyms_hdr.kh_shdr[SHCTF].sh_addralign = sizeof(char); SHTCOPY(".SUNW_ctf"); } static struct ksyms_snapshot * ksyms_snapshot_alloc(int maxlen, size_t size, dev_t dev, uint64_t gen) { struct ksyms_snapshot *ks; ks = kmem_zalloc(sizeof(*ks), KM_SLEEP); ks->ks_refcnt = 1; ks->ks_gen = gen; ks->ks_uobj = uao_create(size, 0); ks->ks_size = size; ks->ks_dev = dev; ks->ks_maxlen = maxlen; return ks; } static void ksyms_snapshot_release(struct ksyms_snapshot *ks) { uint64_t refcnt; mutex_enter(&ksyms_lock); refcnt = --ks->ks_refcnt; mutex_exit(&ksyms_lock); if (refcnt) return; uao_detach(ks->ks_uobj); kmem_free(ks, sizeof(*ks)); } static int ubc_copyfrombuf(struct uvm_object *uobj, struct uio *uio, const void *buf, size_t n) { struct iovec iov = { .iov_base = __UNCONST(buf), .iov_len = n }; uio->uio_iov = &iov; uio->uio_iovcnt = 1; uio->uio_resid = n; return ubc_uiomove(uobj, uio, n, UVM_ADV_SEQUENTIAL, UBC_WRITE); } static int ksyms_take_snapshot(struct ksyms_snapshot *ks, struct ksyms_symtab *last) { struct uvm_object *uobj = ks->ks_uobj; struct uio uio; struct ksyms_symtab *st; int error; /* Caller must have initiated snapshotting. */ KASSERT(ksyms_snapshotting == curlwp); /* Start a uio transfer to reuse incrementally. */ uio.uio_offset = 0; uio.uio_rw = UIO_WRITE; /* write from buffer to uobj */ UIO_SETUP_SYSSPACE(&uio); /* * First: Copy out the ELF header. */ error = ubc_copyfrombuf(uobj, &uio, &ksyms_hdr, sizeof(ksyms_hdr)); if (error) return error; /* * Copy out the symbol table. The list of symtabs is * guaranteed to be nonempty because we always have an entry * for the main kernel. We stop at last, not at the end of the * tailq or NULL, because entries beyond last are not included * in this snapshot (and may not be fully initialized memory as * we witness it). */ KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr)); for (st = TAILQ_FIRST(&ksyms_symtabs); ; st = TAILQ_NEXT(st, sd_queue)) { error = ubc_copyfrombuf(uobj, &uio, st->sd_symstart, st->sd_symsize); if (error) return error; if (st == last) break; } /* * Copy out the string table */ KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) + ksyms_hdr.kh_shdr[SYMTAB].sh_size); for (st = TAILQ_FIRST(&ksyms_symtabs); ; st = TAILQ_NEXT(st, sd_queue)) { error = ubc_copyfrombuf(uobj, &uio, st->sd_strstart, st->sd_strsize); if (error) return error; if (st == last) break; } /* * Copy out the CTF table. */ KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) + ksyms_hdr.kh_shdr[SYMTAB].sh_size + ksyms_hdr.kh_shdr[STRTAB].sh_size); st = TAILQ_FIRST(&ksyms_symtabs); if (st->sd_ctfstart != NULL) { error = ubc_copyfrombuf(uobj, &uio, st->sd_ctfstart, st->sd_ctfsize); if (error) return error; } KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) + ksyms_hdr.kh_shdr[SYMTAB].sh_size + ksyms_hdr.kh_shdr[STRTAB].sh_size + ksyms_hdr.kh_shdr[SHCTF].sh_size); KASSERT(uio.uio_offset == ks->ks_size); return 0; } static const struct fileops ksyms_fileops; static int ksymsopen(dev_t dev, int flags, int devtype, struct lwp *l) { struct file *fp = NULL; int fd = -1; struct ksyms_snapshot *ks = NULL; size_t size; struct ksyms_symtab *last; int maxlen; uint64_t gen; int error; if (minor(dev) != 0 || !ksyms_loaded) return ENXIO; /* Allocate a private file. */ error = fd_allocfile(&fp, &fd); if (error) return error; mutex_enter(&ksyms_lock); /* * Wait until we have a snapshot, or until there is no snapshot * being taken right now so we can take one. */ while ((ks = ksyms_snapshot) == NULL && ksyms_snapshotting) { error = cv_wait_sig(&ksyms_cv, &ksyms_lock); if (error) goto out; } /* * If there's a usable snapshot, increment its reference count * (can't overflow, 64-bit) and just reuse it. */ if (ks) { ks->ks_refcnt++; goto out; } /* Find the current length of the symtab object. */ size = sizeof(struct ksyms_hdr); size += ksyms_strsz; size += ksyms_symsz; size += ksyms_ctfsz; /* Start a new snapshot. */ ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz; ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym); ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz + ksyms_hdr.kh_shdr[SYMTAB].sh_offset; ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz; ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz + ksyms_hdr.kh_shdr[STRTAB].sh_offset; ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz; last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue); maxlen = ksyms_maxlen; gen = ksyms_snapshot_gen++; /* * Prevent ksyms entries from being removed while we take the * snapshot. */ KASSERT(ksyms_snapshotting == NULL); ksyms_snapshotting = curlwp; mutex_exit(&ksyms_lock); /* Create a snapshot and write the symtab to it. */ ks = ksyms_snapshot_alloc(maxlen, size, dev, gen); error = ksyms_take_snapshot(ks, last); /* * Snapshot creation is done. Wake up anyone waiting to remove * entries (module unload). */ mutex_enter(&ksyms_lock); KASSERTMSG(ksyms_snapshotting == curlwp, "lwp %p stole snapshot", ksyms_snapshotting); ksyms_snapshotting = NULL; cv_broadcast(&ksyms_cv); /* If we failed, give up. */ if (error) goto out; /* Cache the snapshot for the next reader. */ KASSERT(ksyms_snapshot == NULL); ksyms_snapshot = ks; ks->ks_refcnt++; KASSERT(ks->ks_refcnt == 2); out: mutex_exit(&ksyms_lock); if (error) { if (fp) fd_abort(curproc, fp, fd); if (ks) ksyms_snapshot_release(ks); } else { KASSERT(fp); KASSERT(ks); error = fd_clone(fp, fd, flags, &ksyms_fileops, ks); KASSERTMSG(error == EMOVEFD, "error=%d", error); } return error; } static int ksymsclose(struct file *fp) { struct ksyms_snapshot *ks = fp->f_data; ksyms_snapshot_release(ks); return 0; } static int ksymsread(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, int flags) { const struct ksyms_snapshot *ks = fp->f_data; size_t count; int error; /* * Since we don't have a per-object lock, we might as well use * the struct file lock to serialize access to fp->f_offset -- * but if the caller isn't relying on or updating fp->f_offset, * there's no need to do even that. We could use ksyms_lock, * but why bother with a global lock if not needed? Either * way, the lock we use here must agree with what ksymsseek * takes (nothing else in ksyms uses fp->f_offset). */ if (offp == &fp->f_offset) mutex_enter(&fp->f_lock); /* Refuse negative offsets. */ if (*offp < 0) { error = EINVAL; goto out; } /* Return nothing at or past end of file. */ if (*offp >= ks->ks_size) { error = 0; goto out; } /* * 1. Set up the uio to transfer from offset *offp. * 2. Transfer as many bytes as we can (at most uio->uio_resid * or what's left in the ksyms). * 3. If requested, update *offp to reflect the number of bytes * transferred. */ uio->uio_offset = *offp; count = uio->uio_resid; error = ubc_uiomove(ks->ks_uobj, uio, MIN(count, ks->ks_size - *offp), UVM_ADV_SEQUENTIAL, UBC_READ|UBC_PARTIALOK); if (flags & FOF_UPDATE_OFFSET) *offp += count - uio->uio_resid; out: if (offp == &fp->f_offset) mutex_exit(&fp->f_lock); return error; } static int ksymsstat(struct file *fp, struct stat *st) { const struct ksyms_snapshot *ks = fp->f_data; memset(st, 0, sizeof(*st)); st->st_dev = NODEV; st->st_ino = 0; st->st_mode = S_IFCHR; st->st_nlink = 1; st->st_uid = kauth_cred_geteuid(fp->f_cred); st->st_gid = kauth_cred_getegid(fp->f_cred); st->st_rdev = ks->ks_dev; st->st_size = ks->ks_size; /* zero time */ st->st_blksize = MAXPHYS; /* XXX arbitrary */ st->st_blocks = 0; st->st_gen = ks->ks_gen; return 0; } static int ksymsmmap(struct file *fp, off_t *offp, size_t nbytes, int prot, int *flagsp, int *advicep, struct uvm_object **uobjp, int *maxprotp) { const struct ksyms_snapshot *ks = fp->f_data; /* uvm_mmap guarantees page-aligned offset and size. */ KASSERT(*offp == round_page(*offp)); KASSERT(nbytes == round_page(nbytes)); KASSERT(nbytes > 0); /* Refuse negative offsets. */ if (*offp < 0) return EINVAL; /* Refuse mappings that pass the end of file. */ if (nbytes > round_page(ks->ks_size) || *offp > round_page(ks->ks_size) - nbytes) return EINVAL; /* XXX ??? */ /* Success! */ uao_reference(ks->ks_uobj); *advicep = UVM_ADV_SEQUENTIAL; *uobjp = ks->ks_uobj; *maxprotp = prot & VM_PROT_READ; return 0; } static int ksymsseek(struct file *fp, off_t delta, int whence, off_t *newoffp, int flags) { struct ksyms_snapshot *ks = fp->f_data; off_t base, newoff; int error; mutex_enter(&fp->f_lock); switch (whence) { case SEEK_CUR: base = fp->f_offset; break; case SEEK_END: base = ks->ks_size; break; case SEEK_SET: base = 0; break; default: error = EINVAL; goto out; } /* Compute the new offset and validate it. */ newoff = base + delta; /* XXX arithmetic overflow */ if (newoff < 0) { error = EINVAL; goto out; } /* Success! */ if (newoffp) *newoffp = newoff; if (flags & FOF_UPDATE_OFFSET) fp->f_offset = newoff; error = 0; out: mutex_exit(&fp->f_lock); return error; } __CTASSERT(offsetof(struct ksyms_ogsymbol, kg_name) == offsetof(struct ksyms_gsymbol, kg_name)); __CTASSERT(offsetof(struct ksyms_gvalue, kv_name) == offsetof(struct ksyms_gsymbol, kg_name)); static int ksymsioctl(struct file *fp, u_long cmd, void *data) { struct ksyms_snapshot *ks = fp->f_data; struct ksyms_ogsymbol *okg = (struct ksyms_ogsymbol *)data; struct ksyms_gsymbol *kg = (struct ksyms_gsymbol *)data; struct ksyms_gvalue *kv = (struct ksyms_gvalue *)data; struct ksyms_symtab *st; Elf_Sym *sym = NULL, copy; unsigned long val; int error = 0; char *str = NULL; int len, s; /* Read cached ksyms_maxlen. */ len = ks->ks_maxlen; if (cmd == OKIOCGVALUE || cmd == OKIOCGSYMBOL || cmd == KIOCGVALUE || cmd == KIOCGSYMBOL) { str = kmem_alloc(len, KM_SLEEP); if ((error = copyinstr(kg->kg_name, str, len, NULL)) != 0) { kmem_free(str, len); return error; } } switch (cmd) { case OKIOCGVALUE: /* * Use the in-kernel symbol lookup code for fast * retreival of a value. */ error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN); if (error == 0) error = copyout(&val, okg->kg_value, sizeof(long)); kmem_free(str, len); break; case OKIOCGSYMBOL: /* * Use the in-kernel symbol lookup code for fast * retreival of a symbol. */ s = pserialize_read_enter(); PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab, sd_pslist) { if ((sym = findsym(str, st, KSYMS_ANY)) == NULL) continue; #ifdef notdef /* Skip if bad binding */ if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) { sym = NULL; continue; } #endif break; } if (sym != NULL) { memcpy(©, sym, sizeof(copy)); pserialize_read_exit(s); error = copyout(©, okg->kg_sym, sizeof(Elf_Sym)); } else { pserialize_read_exit(s); error = ENOENT; } kmem_free(str, len); break; case KIOCGVALUE: /* * Use the in-kernel symbol lookup code for fast * retreival of a value. */ error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN); if (error == 0) kv->kv_value = val; kmem_free(str, len); break; case KIOCGSYMBOL: /* * Use the in-kernel symbol lookup code for fast * retreival of a symbol. */ s = pserialize_read_enter(); PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab, sd_pslist) { if ((sym = findsym(str, st, KSYMS_ANY)) == NULL) continue; #ifdef notdef /* Skip if bad binding */ if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) { sym = NULL; continue; } #endif break; } if (sym != NULL) { kg->kg_sym = *sym; } else { error = ENOENT; } pserialize_read_exit(s); kmem_free(str, len); break; case KIOCGSIZE: /* * Get total size of symbol table. */ *(int *)data = ks->ks_size; break; default: error = ENOTTY; break; } return error; } const struct cdevsw ksyms_cdevsw = { .d_open = ksymsopen, .d_close = noclose, .d_read = noread, .d_write = nowrite, .d_ioctl = noioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER | D_MPSAFE }; static const struct fileops ksyms_fileops = { .fo_name = "ksyms", .fo_read = ksymsread, .fo_write = fbadop_write, .fo_ioctl = ksymsioctl, .fo_fcntl = fnullop_fcntl, .fo_poll = fnullop_poll, .fo_stat = ksymsstat, .fo_close = ksymsclose, .fo_kqfilter = fnullop_kqfilter, .fo_restart = fnullop_restart, .fo_mmap = ksymsmmap, .fo_seek = ksymsseek, }; |
| 12 52 4 23 12 18 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 | /* $NetBSD: in_var.h,v 1.102 2021/03/08 22:01:18 christos Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Public Access Networks Corporation ("Panix"). It was developed under * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1985, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_var.h 8.2 (Berkeley) 1/9/95 */ #ifndef _NETINET_IN_VAR_H_ #define _NETINET_IN_VAR_H_ #include <sys/queue.h> #define IN_IFF_TENTATIVE 0x01 /* tentative address */ #define IN_IFF_DUPLICATED 0x02 /* DAD detected duplicate */ #define IN_IFF_DETACHED 0x04 /* may be detached from the link */ #define IN_IFF_TRYTENTATIVE 0x08 /* intent to try DAD */ #define IN_IFFBITS \ "\020\1TENTATIVE\2DUPLICATED\3DETACHED\4TRYTENTATIVE" /* do not input/output */ #define IN_IFF_NOTREADY \ (IN_IFF_TRYTENTATIVE | IN_IFF_TENTATIVE | IN_IFF_DUPLICATED) /* * Interface address, Internet version. One of these structures * is allocated for each interface with an Internet address. * The ifaddr structure contains the protocol-independent part * of the structure and is assumed to be first. */ struct in_ifaddr { struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp #define ia_flags ia_ifa.ifa_flags /* ia_{,sub}net{,mask} in host order */ u_int32_t ia_net; /* network number of interface */ u_int32_t ia_netmask; /* mask of net part */ u_int32_t ia_subnet; /* subnet number, including net */ u_int32_t ia_subnetmask; /* mask of subnet part */ struct in_addr ia_netbroadcast; /* to recognize net broadcasts */ LIST_ENTRY(in_ifaddr) ia_hash; /* entry in bucket of inet addresses */ TAILQ_ENTRY(in_ifaddr) ia_list; /* list of internet addresses */ struct sockaddr_in ia_addr; /* reserve space for interface name */ struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */ #define ia_broadaddr ia_dstaddr struct sockaddr_in ia_sockmask; /* reserve space for general netmask */ LIST_HEAD(, in_multi) ia_multiaddrs; /* list of multicast addresses */ struct in_multi *ia_allhosts; /* multicast address record for the allhosts multicast group */ uint16_t ia_idsalt; /* ip_id salt for this ia */ int ia4_flags; /* address flags */ void (*ia_dad_start) (struct ifaddr *); /* DAD start function */ void (*ia_dad_stop) (struct ifaddr *); /* DAD stop function */ time_t ia_dad_defended; /* last time of DAD defence */ #ifdef _KERNEL struct pslist_entry ia_hash_pslist_entry; struct pslist_entry ia_pslist_entry; #endif }; struct in_nbrinfo { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ struct in_addr addr; /* IPv4 address of the neighbor */ long asked; /* number of queries already sent for this addr */ int state; /* reachability state */ int expire; /* lifetime for NDP state transition */ }; #ifdef _KERNEL static __inline void ia4_acquire(struct in_ifaddr *ia, struct psref *psref) { KASSERT(ia != NULL); ifa_acquire(&ia->ia_ifa, psref); } static __inline void ia4_release(struct in_ifaddr *ia, struct psref *psref) { if (ia == NULL) return; ifa_release(&ia->ia_ifa, psref); } #endif struct in_aliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr_in ifra_addr; struct sockaddr_in ifra_dstaddr; #define ifra_broadaddr ifra_dstaddr struct sockaddr_in ifra_mask; }; /* * Given a pointer to an in_ifaddr (ifaddr), * return a pointer to the addr as a sockaddr_in. */ #define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr)) #ifdef _KERNEL /* Note: 61, 127, 251, 509, 1021, 2039 are good. */ #ifndef IN_IFADDR_HASH_SIZE #define IN_IFADDR_HASH_SIZE 509 #endif /* * This is a bit unconventional, and wastes a little bit of space, but * because we want a very even hash function we don't use & in_ifaddrhash * here, but rather % the hash size, which should obviously be prime. */ #define IN_IFADDR_HASH(x) in_ifaddrhashtbl[(u_long)(x) % IN_IFADDR_HASH_SIZE] LIST_HEAD(in_ifaddrhashhead, in_ifaddr); /* Type of the hash head */ TAILQ_HEAD(in_ifaddrhead, in_ifaddr); /* Type of the list head */ extern u_long in_ifaddrhash; /* size of hash table - 1 */ extern struct in_ifaddrhashhead *in_ifaddrhashtbl; /* Hash table head */ extern struct in_ifaddrhead in_ifaddrhead; /* List head (in ip_input) */ extern pserialize_t in_ifaddrhash_psz; extern struct pslist_head *in_ifaddrhashtbl_pslist; extern u_long in_ifaddrhash_pslist; extern struct pslist_head in_ifaddrhead_pslist; #define IN_IFADDR_HASH_PSLIST(x) \ in_ifaddrhashtbl_pslist[(u_long)(x) % IN_IFADDR_HASH_SIZE] #define IN_ADDRHASH_READER_FOREACH(__ia, __addr) \ PSLIST_READER_FOREACH((__ia), &IN_IFADDR_HASH_PSLIST(__addr), \ struct in_ifaddr, ia_hash_pslist_entry) #define IN_ADDRHASH_WRITER_INSERT_HEAD(__ia) \ PSLIST_WRITER_INSERT_HEAD( \ &IN_IFADDR_HASH_PSLIST((__ia)->ia_addr.sin_addr.s_addr), \ (__ia), ia_hash_pslist_entry) #define IN_ADDRHASH_WRITER_REMOVE(__ia) \ PSLIST_WRITER_REMOVE((__ia), ia_hash_pslist_entry) #define IN_ADDRHASH_ENTRY_INIT(__ia) \ PSLIST_ENTRY_INIT((__ia), ia_hash_pslist_entry); #define IN_ADDRHASH_ENTRY_DESTROY(__ia) \ PSLIST_ENTRY_DESTROY((__ia), ia_hash_pslist_entry); #define IN_ADDRHASH_READER_NEXT(__ia) \ PSLIST_READER_NEXT((__ia), struct in_ifaddr, ia_hash_pslist_entry) #define IN_ADDRLIST_ENTRY_INIT(__ia) \ PSLIST_ENTRY_INIT((__ia), ia_pslist_entry) #define IN_ADDRLIST_ENTRY_DESTROY(__ia) \ PSLIST_ENTRY_DESTROY((__ia), ia_pslist_entry); #define IN_ADDRLIST_READER_EMPTY() \ (PSLIST_READER_FIRST(&in_ifaddrhead_pslist, struct in_ifaddr, \ ia_pslist_entry) == NULL) #define IN_ADDRLIST_READER_FIRST() \ PSLIST_READER_FIRST(&in_ifaddrhead_pslist, struct in_ifaddr, \ ia_pslist_entry) #define IN_ADDRLIST_READER_NEXT(__ia) \ PSLIST_READER_NEXT((__ia), struct in_ifaddr, ia_pslist_entry) #define IN_ADDRLIST_READER_FOREACH(__ia) \ PSLIST_READER_FOREACH((__ia), &in_ifaddrhead_pslist, \ struct in_ifaddr, ia_pslist_entry) #define IN_ADDRLIST_WRITER_INSERT_HEAD(__ia) \ PSLIST_WRITER_INSERT_HEAD(&in_ifaddrhead_pslist, (__ia), \ ia_pslist_entry) #define IN_ADDRLIST_WRITER_REMOVE(__ia) \ PSLIST_WRITER_REMOVE((__ia), ia_pslist_entry) #define IN_ADDRLIST_WRITER_FOREACH(__ia) \ PSLIST_WRITER_FOREACH((__ia), &in_ifaddrhead_pslist, \ struct in_ifaddr, ia_pslist_entry) #define IN_ADDRLIST_WRITER_FIRST() \ PSLIST_WRITER_FIRST(&in_ifaddrhead_pslist, struct in_ifaddr, \ ia_pslist_entry) #define IN_ADDRLIST_WRITER_NEXT(__ia) \ PSLIST_WRITER_NEXT((__ia), struct in_ifaddr, ia_pslist_entry) #define IN_ADDRLIST_WRITER_INSERT_AFTER(__ia, __new) \ PSLIST_WRITER_INSERT_AFTER((__ia), (__new), ia_pslist_entry) #define IN_ADDRLIST_WRITER_EMPTY() \ (PSLIST_WRITER_FIRST(&in_ifaddrhead_pslist, struct in_ifaddr, \ ia_pslist_entry) == NULL) #define IN_ADDRLIST_WRITER_INSERT_TAIL(__new) \ do { \ if (IN_ADDRLIST_WRITER_EMPTY()) { \ IN_ADDRLIST_WRITER_INSERT_HEAD((__new)); \ } else { \ struct in_ifaddr *__ia; \ IN_ADDRLIST_WRITER_FOREACH(__ia) { \ if (IN_ADDRLIST_WRITER_NEXT(__ia) == NULL) { \ IN_ADDRLIST_WRITER_INSERT_AFTER(__ia,\ (__new)); \ break; \ } \ } \ } \ } while (0) extern const int inetctlerrmap[]; /* * Find whether an internet address (in_addr) belongs to one * of our interfaces (in_ifaddr). NULL if the address isn't ours. */ static __inline struct in_ifaddr * in_get_ia(struct in_addr addr) { struct in_ifaddr *ia; IN_ADDRHASH_READER_FOREACH(ia, addr.s_addr) { if (in_hosteq(ia->ia_addr.sin_addr, addr)) break; } return ia; } static __inline struct in_ifaddr * in_get_ia_psref(struct in_addr addr, struct psref *psref) { struct in_ifaddr *ia; int s; s = pserialize_read_enter(); ia = in_get_ia(addr); if (ia != NULL) ia4_acquire(ia, psref); pserialize_read_exit(s); return ia; } /* * Find whether an internet address (in_addr) belongs to a specified * interface. NULL if the address isn't ours. */ static __inline struct in_ifaddr * in_get_ia_on_iface(struct in_addr addr, struct ifnet *ifp) { struct in_ifaddr *ia; IN_ADDRHASH_READER_FOREACH(ia, addr.s_addr) { if (in_hosteq(ia->ia_addr.sin_addr, addr) && ia->ia_ifp == ifp) break; } return ia; } static __inline struct in_ifaddr * in_get_ia_on_iface_psref(struct in_addr addr, struct ifnet *ifp, struct psref *psref) { struct in_ifaddr *ia; int s; s = pserialize_read_enter(); ia = in_get_ia_on_iface(addr, ifp); if (ia != NULL) ia4_acquire(ia, psref); pserialize_read_exit(s); return ia; } /* * Find an internet address structure (in_ifaddr) corresponding * to a given interface (ifnet structure). */ static __inline struct in_ifaddr * in_get_ia_from_ifp(struct ifnet *ifp) { struct ifaddr *ifa; IFADDR_READER_FOREACH(ifa, ifp) { if (ifa->ifa_addr->sa_family == AF_INET) break; } return ifatoia(ifa); } static __inline struct in_ifaddr * in_get_ia_from_ifp_psref(struct ifnet *ifp, struct psref *psref) { struct in_ifaddr *ia; int s; s = pserialize_read_enter(); ia = in_get_ia_from_ifp(ifp); if (ia != NULL) ia4_acquire(ia, psref); pserialize_read_exit(s); return ia; } #include <netinet/in_selsrc.h> /* * IPv4 per-interface state. */ struct in_ifinfo { struct lltable *ii_llt; /* ARP state */ struct in_ifsysctl *ii_selsrc; }; #endif /* _KERNEL */ /* * Internet multicast address structure. There is one of these for each IP * multicast group to which this host belongs on a given network interface. * They are kept in a linked list, rooted in the interface's in_ifaddr * structure. */ struct router_info; struct in_multi { LIST_ENTRY(in_multi) inm_list; /* list of multicast addresses */ struct router_info *inm_rti; /* router version info */ struct ifnet *inm_ifp; /* back pointer to ifnet */ struct in_addr inm_addr; /* IP multicast address */ u_int inm_refcount; /* no. membership claims by sockets */ u_int inm_timer; /* IGMP membership report timer */ u_int inm_state; /* state of membership */ }; #ifdef _KERNEL #include <net/pktqueue.h> #include <sys/cprng.h> extern pktqueue_t *ip_pktq; extern int ip_dad_count; /* Duplicate Address Detection probes */ static inline bool ip_dad_enabled(void) { #if NARP > 0 return ip_dad_count > 0; #else return false; #endif } #if defined(INET) && NARP > 0 extern int arp_debug; #define ARPLOGADDR(a) IN_PRINT(_ipbuf, a) #define ARPLOG(level, fmt, args...) \ do { \ char _ipbuf[INET_ADDRSTRLEN]; \ (void)_ipbuf; \ if (arp_debug) \ log(level, "%s: " fmt, __func__, ##args); \ } while (/*CONSTCOND*/0) #else #define ARPLOG(level, fmt, args...) #endif /* * Structure used by functions below to remember position when stepping * through all of the in_multi records. */ struct in_multistep { int i_n; struct in_multi *i_inm; }; bool in_multi_group(struct in_addr, struct ifnet *, int); struct in_multi *in_first_multi(struct in_multistep *); struct in_multi *in_next_multi(struct in_multistep *); struct in_multi *in_lookup_multi(struct in_addr, struct ifnet *); struct in_multi *in_addmulti(struct in_addr *, struct ifnet *); void in_delmulti(struct in_multi *); void in_multi_lock(int); void in_multi_unlock(void); int in_multi_lock_held(void); struct ifaddr; int in_ifinit(struct ifnet *, struct in_ifaddr *, const struct sockaddr_in *, const struct sockaddr_in *, int); void in_savemkludge(struct in_ifaddr *); void in_restoremkludge(struct in_ifaddr *, struct ifnet *); void in_purgemkludge(struct ifnet *); void in_setmaxmtu(void); int in_control(struct socket *, u_long, void *, struct ifnet *); void in_purgeaddr(struct ifaddr *); void in_purgeif(struct ifnet *); void in_addrhash_insert(struct in_ifaddr *); void in_addrhash_remove(struct in_ifaddr *); int ipflow_fastforward(struct mbuf *); extern uint16_t ip_id; extern int ip_do_randomid; static __inline uint16_t ip_randomid(void) { uint16_t id = (uint16_t)cprng_fast32(); return id ? id : 1; } /* * ip_newid_range: "allocate" num contiguous IP IDs. * * => Return the first ID. */ static __inline uint16_t ip_newid_range(const struct in_ifaddr *ia, u_int num) { uint16_t id; if (ip_do_randomid) { /* XXX ignore num */ return ip_randomid(); } /* Never allow an IP ID of 0 (detect wrap). */ if ((uint16_t)(ip_id + num) < ip_id) { ip_id = 1; } id = htons(ip_id); ip_id += num; return id; } static __inline uint16_t ip_newid(const struct in_ifaddr *ia) { return ip_newid_range(ia, 1); } #ifdef SYSCTLFN_PROTO int sysctl_inpcblist(SYSCTLFN_PROTO); #endif #define LLTABLE(ifp) \ ((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_llt #endif /* !_KERNEL */ /* INET6 stuff */ #include <netinet6/in6_var.h> #endif /* !_NETINET_IN_VAR_H_ */ |
| 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | /* $NetBSD: uxrcom.c,v 1.2 2020/07/09 13:43:04 simonb Exp $ */ /* $OpenBSD: uxrcom.c,v 1.1 2019/03/27 22:08:51 kettenis Exp $ */ /* * Copyright (c) 1998, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Lennart Augustsson (lennart@augustsson.net) at * Carlstedt Research & Technology and Simon Burge. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 2006 Jonathan Gray <jsg@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uxrcom.c,v 1.2 2020/07/09 13:43:04 simonb Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/tty.h> #include <sys/device.h> #include <dev/usb/usb.h> #include <dev/usb/usbcdc.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdevs.h> #include <dev/usb/usbhist.h> #include <dev/usb/usbdevs.h> #include <dev/usb/ucomvar.h> #include <dev/usb/umodemvar.h> #define UXRCOMBUFSZ 64 /* XXX uxrcomreg.h */ #define XR_SET_REG 0 #define XR_GET_REGN 1 #define XR_FLOW_CONTROL 0x000c #define XR_FLOW_CONTROL_ON 1 #define XR_FLOW_CONTROL_OFF 0 #define XR_TX_BREAK 0x0014 #define XR_TX_BREAK_ON 1 #define XR_TX_BREAK_OFF 0 #define XR_GPIO_SET 0x001d #define XR_GPIO_CLEAR 0x001e #define XR_GPIO3 (1 << 3) #define XR_GPIO5 (1 << 5) /* for XR_SET_REG/XR_GET_REGN specify which uart block to use */ #define XR_UART_BLOCK(sc) (((sc)->sc_ctl_iface_no / 2) << NBBY) #ifdef UXRCOM_DEBUG #define DPRINTF(x) if (uxrcomdebug) printf x int uxrcomdebug = 0; #else #define DPRINTF(x) #endif static void uxrcom_set(void *, int, int, int); static int uxrcom_param(void *, int, struct termios *); static void uxrcom_break(void *, int, int); static const struct ucom_methods uxrcom_methods = { .ucom_get_status = umodem_get_status, .ucom_set = uxrcom_set, .ucom_param = uxrcom_param, .ucom_ioctl = NULL, /* TODO */ .ucom_open = umodem_open, .ucom_close = umodem_close, }; static const struct usb_devno uxrcom_devs[] = { { USB_VENDOR_EXAR, USB_PRODUCT_EXAR_XR21V1410 }, { USB_VENDOR_EXAR, USB_PRODUCT_EXAR_XR21V1412 }, { USB_VENDOR_EXAR, USB_PRODUCT_EXAR_XR21V1414 }, }; #define uxrcom_lookup(v, p) usb_lookup(uxrcom_devs, v, p) static int uxrcom_match(device_t, cfdata_t, void *); static void uxrcom_attach(device_t, device_t, void *); static int uxrcom_detach(device_t, int); CFATTACH_DECL_NEW(uxrcom, sizeof(struct umodem_softc), uxrcom_match, uxrcom_attach, uxrcom_detach, NULL); static int uxrcom_match(device_t parent, cfdata_t match, void *aux) { struct usbif_attach_arg *uiaa = aux; if (uiaa->uiaa_class != UICLASS_CDC || uiaa->uiaa_subclass != UISUBCLASS_ABSTRACT_CONTROL_MODEL || !(uiaa->uiaa_proto == UIPROTO_CDC_NOCLASS || uiaa->uiaa_proto == UIPROTO_CDC_AT)) return UMATCH_NONE; return uxrcom_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL ? UMATCH_VENDOR_PRODUCT : UMATCH_NONE; } static void uxrcom_attach(device_t parent, device_t self, void *aux) { struct umodem_softc *sc = device_private(self); struct usbif_attach_arg *uiaa = aux; struct ucom_attach_args ucaa; memset(&ucaa, 0, sizeof(ucaa)); ucaa.ucaa_portno = UCOM_UNK_PORTNO; ucaa.ucaa_methods = &uxrcom_methods; ucaa.ucaa_info = NULL; ucaa.ucaa_ibufsize = UXRCOMBUFSZ; ucaa.ucaa_obufsize = UXRCOMBUFSZ; ucaa.ucaa_ibufsizepad = UXRCOMBUFSZ; if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler"); umodem_common_attach(self, sc, uiaa, &ucaa); } static int uxrcom_detach(device_t self, int flags) { struct umodem_softc *sc = device_private(self); pmf_device_deregister(self); return umodem_common_detach(sc, flags); } static void uxrcom_set(void *addr, int portno, int reg, int onoff) { struct umodem_softc *sc = addr; usb_device_request_t req; uint16_t index; uint8_t value; if (sc->sc_dying) return; index = onoff ? XR_GPIO_SET : XR_GPIO_CLEAR; switch (reg) { case UCOM_SET_DTR: value = XR_GPIO3; break; case UCOM_SET_RTS: value = XR_GPIO5; break; case UCOM_SET_BREAK: uxrcom_break(sc, portno, onoff); return; default: return; } req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = XR_SET_REG; USETW(req.wValue, value); USETW(req.wIndex, index | XR_UART_BLOCK(sc)); USETW(req.wLength, 0); usbd_do_request(sc->sc_udev, &req, NULL); } static usbd_status uxrcom_set_line_coding(struct umodem_softc *sc, usb_cdc_line_state_t *state) { usb_device_request_t req; usbd_status err; DPRINTF(("%s: rate=%d fmt=%d parity=%d bits=%d\n", __func__, UGETDW(state->dwDTERate), state->bCharFormat, state->bParityType, state->bDataBits)); if (memcmp(state, &sc->sc_line_state, UCDC_LINE_STATE_LENGTH) == 0) { DPRINTF(("%s: already set\n", __func__)); return USBD_NORMAL_COMPLETION; } req.bmRequestType = UT_WRITE_CLASS_INTERFACE; req.bRequest = UCDC_SET_LINE_CODING; USETW(req.wValue, 0); USETW(req.wIndex, sc->sc_ctl_iface_no); USETW(req.wLength, UCDC_LINE_STATE_LENGTH); err = usbd_do_request(sc->sc_udev, &req, state); if (err) { DPRINTF(("%s: failed, err=%u\n", __func__, err)); return err; } sc->sc_line_state = *state; return USBD_NORMAL_COMPLETION; } static int uxrcom_param(void *addr, int portno, struct termios *t) { struct umodem_softc *sc = addr; usb_device_request_t req; usbd_status err; usb_cdc_line_state_t ls; uint8_t flowctrl; if (sc->sc_dying) return EIO; /* slowest supported baud rate is 1200 bps, max is 12 Mbps */ if (t->c_ospeed < 1200 || t->c_ospeed > 12000000) return (EINVAL); USETDW(ls.dwDTERate, t->c_ospeed); if (ISSET(t->c_cflag, CSTOPB)) ls.bCharFormat = UCDC_STOP_BIT_2; else ls.bCharFormat = UCDC_STOP_BIT_1; if (ISSET(t->c_cflag, PARENB)) { if (ISSET(t->c_cflag, PARODD)) ls.bParityType = UCDC_PARITY_ODD; else ls.bParityType = UCDC_PARITY_EVEN; } else ls.bParityType = UCDC_PARITY_NONE; switch (ISSET(t->c_cflag, CSIZE)) { case CS5: ls.bDataBits = 5; break; case CS6: ls.bDataBits = 6; break; case CS7: ls.bDataBits = 7; break; case CS8: ls.bDataBits = 8; break; } err = uxrcom_set_line_coding(sc, &ls); if (err) { DPRINTF(("%s: err=%u\n", __func__, err)); return EIO; } if (ISSET(t->c_cflag, CRTSCTS)) { /* rts/cts flow ctl */ flowctrl = XR_FLOW_CONTROL_ON; } else { /* disable flow ctl */ flowctrl = XR_FLOW_CONTROL_OFF; } req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = XR_SET_REG; USETW(req.wValue, flowctrl); USETW(req.wIndex, XR_FLOW_CONTROL | XR_UART_BLOCK(sc)); USETW(req.wLength, 0); usbd_do_request(sc->sc_udev, &req, NULL); return (0); } static void uxrcom_break(void *addr, int portno, int onoff) { struct umodem_softc *sc = addr; usb_device_request_t req; uint8_t brk = onoff ? UCDC_BREAK_ON : UCDC_BREAK_OFF; DPRINTF(("%s: port=%d onoff=%d\n", __func__, portno, onoff)); req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = XR_SET_REG; USETW(req.wValue, brk); USETW(req.wIndex, XR_TX_BREAK | XR_UART_BLOCK(sc)); USETW(req.wLength, 0); (void)usbd_do_request(sc->sc_udev, &req, 0); } |
| 2662 2659 2659 2656 2572 2558 490 375 372 191 189 188 2604 2607 2647 969 963 74 2121 2124 2127 2063 2061 303 240 238 119 119 2105 2117 2279 2277 2273 904 895 2026 2030 2026 492 76 76 76 76 17 17 2106 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 | /* $NetBSD: subr_kmem.c,v 1.87 2022/05/30 23:36:26 mrg Exp $ */ /* * Copyright (c) 2009-2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Doran and Maxime Villard. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c)2006 YAMAMOTO Takashi, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Allocator of kernel wired memory. This allocator has some debug features * enabled with "option DIAGNOSTIC" and "option DEBUG". */ /* * KMEM_SIZE: detect alloc/free size mismatch bugs. * Append to each allocation a fixed-sized footer and record the exact * user-requested allocation size in it. When freeing, compare it with * kmem_free's "size" argument. * * This option is enabled on DIAGNOSTIC. * * |CHUNK|CHUNK|CHUNK|CHUNK|CHUNK|CHUNK|CHUNK|CHUNK|CHUNK| | * +-----+-----+-----+-----+-----+-----+-----+-----+-----+-+ * | | | | | | | | |/////|U| * | | | | | | | | |/HSZ/|U| * | | | | | | | | |/////|U| * +-----+-----+-----+-----+-----+-----+-----+-----+-----+-+ * | Buffer usable by the caller (requested size) |Size |Unused */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: subr_kmem.c,v 1.87 2022/05/30 23:36:26 mrg Exp $"); #ifdef _KERNEL_OPT #include "opt_kmem.h" #endif #include <sys/param.h> #include <sys/callback.h> #include <sys/kmem.h> #include <sys/pool.h> #include <sys/debug.h> #include <sys/lockdebug.h> #include <sys/cpu.h> #include <sys/asan.h> #include <sys/msan.h> #include <sys/sdt.h> #include <uvm/uvm_extern.h> #include <uvm/uvm_map.h> #include <lib/libkern/libkern.h> struct kmem_cache_info { size_t kc_size; const char * kc_name; #ifdef KDTRACE_HOOKS const id_t *kc_alloc_probe_id; const id_t *kc_free_probe_id; #endif }; #define KMEM_CACHE_SIZES(F) \ F(8, kmem-00008, kmem__00008) \ F(16, kmem-00016, kmem__00016) \ F(24, kmem-00024, kmem__00024) \ F(32, kmem-00032, kmem__00032) \ F(40, kmem-00040, kmem__00040) \ F(48, kmem-00048, kmem__00048) \ F(56, kmem-00056, kmem__00056) \ F(64, kmem-00064, kmem__00064) \ F(80, kmem-00080, kmem__00080) \ F(96, kmem-00096, kmem__00096) \ F(112, kmem-00112, kmem__00112) \ F(128, kmem-00128, kmem__00128) \ F(160, kmem-00160, kmem__00160) \ F(192, kmem-00192, kmem__00192) \ F(224, kmem-00224, kmem__00224) \ F(256, kmem-00256, kmem__00256) \ F(320, kmem-00320, kmem__00320) \ F(384, kmem-00384, kmem__00384) \ F(448, kmem-00448, kmem__00448) \ F(512, kmem-00512, kmem__00512) \ F(768, kmem-00768, kmem__00768) \ F(1024, kmem-01024, kmem__01024) \ /* end of KMEM_CACHE_SIZES */ #define KMEM_CACHE_BIG_SIZES(F) \ F(2048, kmem-02048, kmem__02048) \ F(4096, kmem-04096, kmem__04096) \ F(8192, kmem-08192, kmem__08192) \ F(16384, kmem-16384, kmem__16384) \ /* end of KMEM_CACHE_BIG_SIZES */ /* sdt:kmem:alloc:kmem-* probes */ #define F(SZ, NAME, PROBENAME) \ SDT_PROBE_DEFINE4(sdt, kmem, alloc, PROBENAME, \ "void *"/*ptr*/, \ "size_t"/*requested_size*/, \ "size_t"/*allocated_size*/, \ "km_flag_t"/*kmflags*/); KMEM_CACHE_SIZES(F); KMEM_CACHE_BIG_SIZES(F); #undef F /* sdt:kmem:free:kmem-* probes */ #define F(SZ, NAME, PROBENAME) \ SDT_PROBE_DEFINE3(sdt, kmem, free, PROBENAME, \ "void *"/*ptr*/, \ "size_t"/*requested_size*/, \ "size_t"/*allocated_size*/); KMEM_CACHE_SIZES(F); KMEM_CACHE_BIG_SIZES(F); #undef F /* sdt:kmem:alloc:large, sdt:kmem:free:large probes */ SDT_PROBE_DEFINE4(sdt, kmem, alloc, large, "void *"/*ptr*/, "size_t"/*requested_size*/, "size_t"/*allocated_size*/, "km_flag_t"/*kmflags*/); SDT_PROBE_DEFINE3(sdt, kmem, free, large, "void *"/*ptr*/, "size_t"/*requested_size*/, "size_t"/*allocated_size*/); #ifdef KDTRACE_HOOKS #define F(SZ, NAME, PROBENAME) \ { SZ, #NAME, \ &sdt_sdt_kmem_alloc_##PROBENAME->id, \ &sdt_sdt_kmem_free_##PROBENAME->id }, #else #define F(SZ, NAME, PROBENAME) { SZ, #NAME }, #endif static const struct kmem_cache_info kmem_cache_sizes[] = { KMEM_CACHE_SIZES(F) { 0 } }; static const struct kmem_cache_info kmem_cache_big_sizes[] = { KMEM_CACHE_BIG_SIZES(F) { 0 } }; #undef F /* * KMEM_ALIGN is the smallest guaranteed alignment and also the * smallest allocateable quantum. * Every cache size >= CACHE_LINE_SIZE gets CACHE_LINE_SIZE alignment. */ #define KMEM_ALIGN 8 #define KMEM_SHIFT 3 #define KMEM_MAXSIZE 1024 #define KMEM_CACHE_COUNT (KMEM_MAXSIZE >> KMEM_SHIFT) static pool_cache_t kmem_cache[KMEM_CACHE_COUNT] __cacheline_aligned; static size_t kmem_cache_maxidx __read_mostly; #define KMEM_BIG_ALIGN 2048 #define KMEM_BIG_SHIFT 11 #define KMEM_BIG_MAXSIZE 16384 #define KMEM_CACHE_BIG_COUNT (KMEM_BIG_MAXSIZE >> KMEM_BIG_SHIFT) static pool_cache_t kmem_cache_big[KMEM_CACHE_BIG_COUNT] __cacheline_aligned; static size_t kmem_cache_big_maxidx __read_mostly; #if defined(DIAGNOSTIC) && defined(_HARDKERNEL) #define KMEM_SIZE #endif #if defined(DEBUG) && defined(_HARDKERNEL) static void *kmem_freecheck; #endif #if defined(KMEM_SIZE) #define SIZE_SIZE sizeof(size_t) static void kmem_size_set(void *, size_t); static void kmem_size_check(void *, size_t); #else #define SIZE_SIZE 0 #define kmem_size_set(p, sz) /* nothing */ #define kmem_size_check(p, sz) /* nothing */ #endif #ifndef KDTRACE_HOOKS static const id_t **const kmem_cache_alloc_probe_id = NULL; static const id_t **const kmem_cache_big_alloc_probe_id = NULL; static const id_t **const kmem_cache_free_probe_id = NULL; static const id_t **const kmem_cache_big_free_probe_id = NULL; #define KMEM_CACHE_PROBE(ARRAY, INDEX, PTR, REQSIZE, ALLOCSIZE, FLAGS) \ __nothing #else static const id_t *kmem_cache_alloc_probe_id[KMEM_CACHE_COUNT]; static const id_t *kmem_cache_big_alloc_probe_id[KMEM_CACHE_COUNT]; static const id_t *kmem_cache_free_probe_id[KMEM_CACHE_COUNT]; static const id_t *kmem_cache_big_free_probe_id[KMEM_CACHE_COUNT]; #define KMEM_CACHE_PROBE(ARRAY, INDEX, PTR, REQSIZE, ALLOCSIZE, FLAGS) do \ { \ id_t id; \ \ KDASSERT((INDEX) < __arraycount(ARRAY)); \ if (__predict_false((id = *(ARRAY)[INDEX]) != 0)) { \ (*sdt_probe_func)(id, \ (uintptr_t)(PTR), \ (uintptr_t)(REQSIZE), \ (uintptr_t)(ALLOCSIZE), \ (uintptr_t)(FLAGS), \ (uintptr_t)0); \ } \ } while (0) #endif /* KDTRACE_HOOKS */ #define KMEM_CACHE_ALLOC_PROBE(I, P, RS, AS, F) \ KMEM_CACHE_PROBE(kmem_cache_alloc_probe_id, I, P, RS, AS, F) #define KMEM_CACHE_BIG_ALLOC_PROBE(I, P, RS, AS, F) \ KMEM_CACHE_PROBE(kmem_cache_big_alloc_probe_id, I, P, RS, AS, F) #define KMEM_CACHE_FREE_PROBE(I, P, RS, AS) \ KMEM_CACHE_PROBE(kmem_cache_free_probe_id, I, P, RS, AS, 0) #define KMEM_CACHE_BIG_FREE_PROBE(I, P, RS, AS) \ KMEM_CACHE_PROBE(kmem_cache_big_free_probe_id, I, P, RS, AS, 0) CTASSERT(KM_SLEEP == PR_WAITOK); CTASSERT(KM_NOSLEEP == PR_NOWAIT); /* * kmem_intr_alloc: allocate wired memory. */ void * kmem_intr_alloc(size_t requested_size, km_flag_t kmflags) { #ifdef KASAN const size_t origsize = requested_size; #endif size_t allocsz, index; size_t size; pool_cache_t pc; uint8_t *p; KASSERT(requested_size > 0); KASSERT((kmflags & KM_SLEEP) || (kmflags & KM_NOSLEEP)); KASSERT(!(kmflags & KM_SLEEP) || !(kmflags & KM_NOSLEEP)); kasan_add_redzone(&requested_size); size = kmem_roundup_size(requested_size); allocsz = size + SIZE_SIZE; if ((index = ((allocsz - 1) >> KMEM_SHIFT)) < kmem_cache_maxidx) { pc = kmem_cache[index]; p = pool_cache_get(pc, kmflags); KMEM_CACHE_ALLOC_PROBE(index, p, requested_size, allocsz, kmflags); } else if ((index = ((allocsz - 1) >> KMEM_BIG_SHIFT)) < kmem_cache_big_maxidx) { pc = kmem_cache_big[index]; p = pool_cache_get(pc, kmflags); KMEM_CACHE_BIG_ALLOC_PROBE(index, p, requested_size, allocsz, kmflags); } else { int ret = uvm_km_kmem_alloc(kmem_va_arena, (vsize_t)round_page(size), ((kmflags & KM_SLEEP) ? VM_SLEEP : VM_NOSLEEP) | VM_INSTANTFIT, (vmem_addr_t *)&p); SDT_PROBE4(sdt, kmem, alloc, large, ret ? NULL : p, requested_size, round_page(size), kmflags); if (ret) { return NULL; } FREECHECK_OUT(&kmem_freecheck, p); return p; } if (__predict_true(p != NULL)) { FREECHECK_OUT(&kmem_freecheck, p); kmem_size_set(p, requested_size); kasan_mark(p, origsize, size, KASAN_KMEM_REDZONE); return p; } return p; } /* * kmem_intr_zalloc: allocate zeroed wired memory. */ void * kmem_intr_zalloc(size_t size, km_flag_t kmflags) { void *p; p = kmem_intr_alloc(size, kmflags); if (p != NULL) { memset(p, 0, size); } return p; } /* * kmem_intr_free: free wired memory allocated by kmem_alloc. */ void kmem_intr_free(void *p, size_t requested_size) { size_t allocsz, index; size_t size; pool_cache_t pc; KASSERT(p != NULL); KASSERTMSG(requested_size > 0, "kmem_intr_free(%p, 0)", p); kasan_add_redzone(&requested_size); size = kmem_roundup_size(requested_size); allocsz = size + SIZE_SIZE; if ((index = ((allocsz - 1) >> KMEM_SHIFT)) < kmem_cache_maxidx) { KMEM_CACHE_FREE_PROBE(index, p, requested_size, allocsz); pc = kmem_cache[index]; } else if ((index = ((allocsz - 1) >> KMEM_BIG_SHIFT)) < kmem_cache_big_maxidx) { KMEM_CACHE_BIG_FREE_PROBE(index, p, requested_size, allocsz); pc = kmem_cache_big[index]; } else { FREECHECK_IN(&kmem_freecheck, p); SDT_PROBE3(sdt, kmem, free, large, p, requested_size, round_page(size)); uvm_km_kmem_free(kmem_va_arena, (vaddr_t)p, round_page(size)); return; } kasan_mark(p, size, size, 0); kmem_size_check(p, requested_size); FREECHECK_IN(&kmem_freecheck, p); LOCKDEBUG_MEM_CHECK(p, size); pool_cache_put(pc, p); } /* -------------------------------- Kmem API -------------------------------- */ /* * kmem_alloc: allocate wired memory. * => must not be called from interrupt context. */ void * kmem_alloc(size_t size, km_flag_t kmflags) { void *v; KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()), "kmem(9) should not be used from the interrupt context"); v = kmem_intr_alloc(size, kmflags); if (__predict_true(v != NULL)) { kmsan_mark(v, size, KMSAN_STATE_UNINIT); kmsan_orig(v, size, KMSAN_TYPE_KMEM, __RET_ADDR); } KASSERT(v || (kmflags & KM_NOSLEEP) != 0); return v; } /* * kmem_zalloc: allocate zeroed wired memory. * => must not be called from interrupt context. */ void * kmem_zalloc(size_t size, km_flag_t kmflags) { void *v; KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()), "kmem(9) should not be used from the interrupt context"); v = kmem_intr_zalloc(size, kmflags); KASSERT(v || (kmflags & KM_NOSLEEP) != 0); return v; } /* * kmem_free: free wired memory allocated by kmem_alloc. * => must not be called from interrupt context. */ void kmem_free(void *p, size_t size) { KASSERT(!cpu_intr_p()); KASSERT(!cpu_softintr_p()); kmem_intr_free(p, size); kmsan_mark(p, size, KMSAN_STATE_INITED); } static size_t kmem_create_caches(const struct kmem_cache_info *array, const id_t *alloc_probe_table[], const id_t *free_probe_table[], pool_cache_t alloc_table[], size_t maxsize, int shift, int ipl) { size_t maxidx = 0; size_t table_unit = (1 << shift); size_t size = table_unit; int i; for (i = 0; array[i].kc_size != 0 ; i++) { const char *name = array[i].kc_name; size_t cache_size = array[i].kc_size; struct pool_allocator *pa; int flags = 0; pool_cache_t pc; size_t align; /* check if we reached the requested size */ if (cache_size > maxsize || cache_size > PAGE_SIZE) { break; } /* * Exclude caches with size not a factor or multiple of the * coherency unit. */ if (cache_size < COHERENCY_UNIT) { if (COHERENCY_UNIT % cache_size > 0) { continue; } flags |= PR_NOTOUCH; align = KMEM_ALIGN; } else if ((cache_size & (PAGE_SIZE - 1)) == 0) { align = PAGE_SIZE; } else { if ((cache_size % COHERENCY_UNIT) > 0) { continue; } align = COHERENCY_UNIT; } if ((cache_size >> shift) > maxidx) { maxidx = cache_size >> shift; } pa = &pool_allocator_kmem; pc = pool_cache_init(cache_size, align, 0, flags, name, pa, ipl, NULL, NULL, NULL); while (size <= cache_size) { alloc_table[(size - 1) >> shift] = pc; #ifdef KDTRACE_HOOKS if (alloc_probe_table) { alloc_probe_table[(size - 1) >> shift] = array[i].kc_alloc_probe_id; } if (free_probe_table) { free_probe_table[(size - 1) >> shift] = array[i].kc_free_probe_id; } #endif size += table_unit; } } return maxidx; } void kmem_init(void) { kmem_cache_maxidx = kmem_create_caches(kmem_cache_sizes, kmem_cache_alloc_probe_id, kmem_cache_free_probe_id, kmem_cache, KMEM_MAXSIZE, KMEM_SHIFT, IPL_VM); kmem_cache_big_maxidx = kmem_create_caches(kmem_cache_big_sizes, kmem_cache_big_alloc_probe_id, kmem_cache_big_free_probe_id, kmem_cache_big, PAGE_SIZE, KMEM_BIG_SHIFT, IPL_VM); } size_t kmem_roundup_size(size_t size) { return (size + (KMEM_ALIGN - 1)) & ~(KMEM_ALIGN - 1); } /* * Used to dynamically allocate string with kmem accordingly to format. */ char * kmem_asprintf(const char *fmt, ...) { int size __diagused, len; va_list va; char *str; va_start(va, fmt); len = vsnprintf(NULL, 0, fmt, va); va_end(va); str = kmem_alloc(len + 1, KM_SLEEP); va_start(va, fmt); size = vsnprintf(str, len + 1, fmt, va); va_end(va); KASSERT(size == len); return str; } char * kmem_strdupsize(const char *str, size_t *lenp, km_flag_t flags) { size_t len = strlen(str) + 1; char *ptr = kmem_alloc(len, flags); if (ptr == NULL) return NULL; if (lenp) *lenp = len; memcpy(ptr, str, len); return ptr; } char * kmem_strndup(const char *str, size_t maxlen, km_flag_t flags) { KASSERT(str != NULL); KASSERT(maxlen != 0); size_t len = strnlen(str, maxlen); char *ptr = kmem_alloc(len + 1, flags); if (ptr == NULL) return NULL; memcpy(ptr, str, len); ptr[len] = '\0'; return ptr; } void kmem_strfree(char *str) { if (str == NULL) return; kmem_free(str, strlen(str) + 1); } /* * Utility routine to maybe-allocate a temporary buffer if the size * is larger than we're willing to put on the stack. */ void * kmem_tmpbuf_alloc(size_t size, void *stackbuf, size_t stackbufsize, km_flag_t flags) { if (size <= stackbufsize) { return stackbuf; } return kmem_alloc(size, flags); } void kmem_tmpbuf_free(void *buf, size_t size, void *stackbuf) { if (buf != stackbuf) { kmem_free(buf, size); } } /* --------------------------- DEBUG / DIAGNOSTIC --------------------------- */ #if defined(KMEM_SIZE) static void kmem_size_set(void *p, size_t sz) { memcpy((char *)p + sz, &sz, sizeof(size_t)); } static void kmem_size_check(void *p, size_t sz) { size_t hsz; memcpy(&hsz, (char *)p + sz, sizeof(size_t)); if (hsz != sz) { panic("kmem_free(%p, %zu) != allocated size %zu; overwrote?", p, sz, hsz); } memset((char *)p + sz, 0xff, sizeof(size_t)); } #endif /* defined(KMEM_SIZE) */ |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | /* $NetBSD: joy.c,v 1.21 2017/10/28 04:53:55 riastradh Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software developed for The NetBSD Foundation * by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1995 Jean-Marc Zucconi * All rights reserved. * * Ported to NetBSD by Matthieu Herrb <matthieu@laas.fr> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: joy.c,v 1.21 2017/10/28 04:53:55 riastradh Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/device.h> #include <sys/errno.h> #include <sys/conf.h> #include <sys/event.h> #include <sys/vnode.h> #include <sys/bus.h> #include <sys/joystick.h> #include <dev/ic/joyvar.h> #include "ioconf.h" /* * The game port can manage 4 buttons and 4 variable resistors (usually 2 * joysticks, each with 2 buttons and 2 pots.) via the port at address 0x201. * Getting the state of the buttons is done by reading the game port; * buttons 1-4 correspond to bits 4-7 and resistors 1-4 (X1, Y1, X2, Y2) * to bits 0-3. If button 1 (resp 2, 3, 4) is pressed, the bit 4 (resp 5, * 6, 7) is set to 0 to get the value of a resistor, write the value 0xff * at port and wait until the corresponding bit returns to 0. */ #define JOYPART(d) (minor(d) & 1) #define JOYUNIT(d) (minor(d) >> 1) #ifndef JOY_TIMEOUT #define JOY_TIMEOUT 2000 /* 2 milliseconds */ #endif static dev_type_open(joyopen); static dev_type_close(joyclose); static dev_type_read(joyread); static dev_type_ioctl(joyioctl); const struct cdevsw joy_cdevsw = { .d_open = joyopen, .d_close = joyclose, .d_read = joyread, .d_write = nowrite, .d_ioctl = joyioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER | D_MPSAFE }; void joyattach(struct joy_softc *sc) { if (sc->sc_lock == NULL) { panic("joyattach: no lock"); } sc->timeout[0] = 0; sc->timeout[1] = 0; mutex_enter(sc->sc_lock); bus_space_write_1(sc->sc_iot, sc->sc_ioh, 0, 0xff); DELAY(10000); /* 10 ms delay */ aprint_normal_dev(sc->sc_dev, "joystick %sconnected\n", (bus_space_read_1(sc->sc_iot, sc->sc_ioh, 0) & 0x0f) == 0x0f ? "not " : ""); mutex_exit(sc->sc_lock); } int joydetach(struct joy_softc *sc, int flags) { int maj, mn; maj = cdevsw_lookup_major(&joy_cdevsw); mn = device_unit(sc->sc_dev) << 1; vdevgone(maj, mn, mn, VCHR); vdevgone(maj, mn + 1, mn + 1, VCHR); return 0; } static int joyopen(dev_t dev, int flag, int mode, struct lwp *l) { int unit = JOYUNIT(dev); int i = JOYPART(dev); struct joy_softc *sc; sc = device_lookup_private(&joy_cd, unit); if (sc == NULL) return ENXIO; mutex_enter(sc->sc_lock); if (sc->timeout[i]) { mutex_exit(sc->sc_lock); return EBUSY; } sc->x_off[i] = sc->y_off[i] = 0; sc->timeout[i] = JOY_TIMEOUT; mutex_exit(sc->sc_lock); return 0; } static int joyclose(dev_t dev, int flag, int mode, struct lwp *l) { int unit = JOYUNIT(dev); int i = JOYPART(dev); struct joy_softc *sc = device_lookup_private(&joy_cd, unit); mutex_enter(sc->sc_lock); sc->timeout[i] = 0; mutex_exit(sc->sc_lock); return 0; } static int joyread(dev_t dev, struct uio *uio, int flag) { int unit = JOYUNIT(dev); struct joy_softc *sc = device_lookup_private(&joy_cd, unit); bus_space_tag_t iot = sc->sc_iot; bus_space_handle_t ioh = sc->sc_ioh; struct joystick c; struct timeval start, now, diff; int state = 0, x = 0, y = 0, i; mutex_enter(sc->sc_lock); bus_space_write_1(iot, ioh, 0, 0xff); microtime(&start); now = start; /* structure assignment */ i = sc->timeout[JOYPART(dev)]; for (;;) { timersub(&now, &start, &diff); if (diff.tv_sec > 0 || diff.tv_usec > i) break; state = bus_space_read_1(iot, ioh, 0); if (JOYPART(dev) == 1) state >>= 2; if (!x && !(state & 0x01)) x = diff.tv_usec; if (!y && !(state & 0x02)) y = diff.tv_usec; if (x && y) break; microtime(&now); } mutex_exit(sc->sc_lock); c.x = x ? sc->x_off[JOYPART(dev)] + x : 0x80000000; c.y = y ? sc->y_off[JOYPART(dev)] + y : 0x80000000; state >>= 4; c.b1 = ~state & 1; c.b2 = ~(state >> 1) & 1; return uiomove(&c, sizeof(struct joystick), uio); } static int joyioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { int unit = JOYUNIT(dev); struct joy_softc *sc = device_lookup_private(&joy_cd, unit); int i = JOYPART(dev), x, error; mutex_enter(sc->sc_lock); error = 0; switch (cmd) { case JOY_SETTIMEOUT: x = *(int *)data; if (x < 1 || x > 10000) { /* 10ms maximum! */ error = EINVAL; break; } sc->timeout[i] = x; break; case JOY_GETTIMEOUT: *(int *)data = sc->timeout[i]; break; case JOY_SET_X_OFFSET: sc->x_off[i] = *(int *)data; break; case JOY_SET_Y_OFFSET: sc->y_off[i] = *(int *)data; break; case JOY_GET_X_OFFSET: *(int *)data = sc->x_off[i]; break; case JOY_GET_Y_OFFSET: *(int *)data = sc->y_off[i]; break; default: error = ENXIO; break; } mutex_exit(sc->sc_lock); return error; } |
| 2028 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | /* $NetBSD: cpu.h,v 1.51 2020/06/15 18:04:42 ad Exp $ */ /*- * Copyright (c) 2007 YAMAMOTO Takashi, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_CPU_H_ #define _SYS_CPU_H_ #ifndef _LOCORE #include <machine/cpu.h> #include <sys/lwp.h> struct cpu_info; #ifdef _KERNEL #ifndef cpu_idle void cpu_idle(void); #endif #ifdef CPU_UCODE #include <sys/cpuio.h> #include <dev/firmload.h> #ifdef COMPAT_60 #include <compat/sys/cpuio.h> #endif #endif #ifndef cpu_need_resched void cpu_need_resched(struct cpu_info *, struct lwp *, int); #endif /* * CPU_INFO_ITERATOR() may be supplied by machine dependent code as it * controls how the cpu_info structures are allocated. * * This macro must always iterate just the boot-CPU when the system has * not attached any cpus via mi_cpu_attach() yet, and the "ncpu" variable * is zero. */ #ifndef CPU_INFO_ITERATOR #define CPU_INFO_ITERATOR int #define CPU_INFO_FOREACH(cii, ci) \ (void)cii, ci = curcpu(); ci != NULL; ci = NULL #endif #ifndef CPU_IS_PRIMARY #define CPU_IS_PRIMARY(ci) ((void)ci, 1) #endif #ifdef __HAVE_MD_CPU_OFFLINE void cpu_offline_md(void); #endif struct lwp *cpu_switchto(struct lwp *, struct lwp *, bool); struct cpu_info *cpu_lookup(u_int); int cpu_setmodel(const char *fmt, ...) __printflike(1, 2); const char *cpu_getmodel(void); int cpu_setstate(struct cpu_info *, bool); int cpu_setintr(struct cpu_info *, bool); bool cpu_intr_p(void); bool cpu_softintr_p(void); bool cpu_kpreempt_enter(uintptr_t, int); void cpu_kpreempt_exit(uintptr_t); bool cpu_kpreempt_disabled(void); int cpu_lwp_setprivate(struct lwp *, void *); void cpu_intr_redistribute(void); u_int cpu_intr_count(struct cpu_info *); void cpu_topology_set(struct cpu_info *, u_int, u_int, u_int, u_int); void cpu_topology_setspeed(struct cpu_info *, bool); void cpu_topology_init(void); #endif #ifdef _KERNEL extern kmutex_t cpu_lock; extern u_int maxcpus; extern struct cpu_info **cpu_infos; extern kcpuset_t *kcpuset_attached; extern kcpuset_t *kcpuset_running; static __inline u_int cpu_index(const struct cpu_info *ci) { return ci->ci_index; } static __inline char * cpu_name(struct cpu_info *ci) { return ci->ci_data.cpu_name; } #ifdef CPU_UCODE struct cpu_ucode_softc { int loader_version; char *sc_blob; off_t sc_blobsize; }; int cpu_ucode_get_version(struct cpu_ucode_version *); int cpu_ucode_apply(const struct cpu_ucode *); #ifdef COMPAT_60 int compat6_cpu_ucode_get_version(struct compat6_cpu_ucode *); int compat6_cpu_ucode_apply(const struct compat6_cpu_ucode *); #endif int cpu_ucode_load(struct cpu_ucode_softc *, const char *); int cpu_ucode_md_open(firmware_handle_t *, int, const char *); #endif #endif #endif /* !_LOCORE */ /* * Flags for cpu_need_resched. RESCHED_KPREEMPT must be greater than * RESCHED_UPREEMPT; see sched_resched_cpu(). */ #define RESCHED_REMOTE 0x01 /* request is for a remote CPU */ #define RESCHED_IDLE 0x02 /* idle LWP observed */ #define RESCHED_UPREEMPT 0x04 /* immediate user ctx switch */ #define RESCHED_KPREEMPT 0x08 /* immediate kernel ctx switch */ #endif /* !_SYS_CPU_H_ */ |
| 1267 1268 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | /* $NetBSD: uvm_pdpolicy.h,v 1.9 2022/08/20 23:26:02 riastradh Exp $ */ /*- * Copyright (c)2005, 2006 YAMAMOTO Takashi, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _UVM_PDPOLICY_H_ #define _UVM_PDPOLICY_H_ #include <sys/mutex.h> #include <sys/stdint.h> #include <uvm/uvm_page.h> struct krwlock; struct uvm_cpu; struct vm_anon; struct vm_page; /* * these API is for uvm internal use only. * don't use them directly from outside of /sys/uvm. */ void uvmpdpol_idle(struct uvm_cpu *); void uvmpdpol_init(void); void uvmpdpol_init_cpu(struct uvm_cpu *); void uvmpdpol_reinit(void); void uvmpdpol_estimatepageable(int *, int *); bool uvmpdpol_needsscan_p(void); void uvmpdpol_pageactivate(struct vm_page *); void uvmpdpol_pagedeactivate(struct vm_page *); void uvmpdpol_pagedequeue(struct vm_page *); void uvmpdpol_pageenqueue(struct vm_page *); bool uvmpdpol_pageactivate_p(struct vm_page *); bool uvmpdpol_pageisqueued_p(struct vm_page *); void uvmpdpol_pagerealize(struct vm_page *); void uvmpdpol_anfree(struct vm_anon *); void uvmpdpol_tune(void); void uvmpdpol_scaninit(void); void uvmpdpol_scanfini(void); struct vm_page *uvmpdpol_selectvictim(struct krwlock **); void uvmpdpol_balancequeue(int); void uvmpdpol_sysctlsetup(void); /* * uvmpdpol_set_intent: set an intended state for the page, taking care not * to overwrite any of the other flags. */ static inline void uvmpdpol_set_intent(struct vm_page *pg, uint32_t i) { KASSERT(mutex_owned(&pg->interlock)); pg->pqflags = PQ_INTENT_SET | (pg->pqflags & ~PQ_INTENT_MASK) | i; } #endif /* !_UVM_PDPOLICY_H_ */ |
| 4341 4342 85 4342 10 4339 1115 4343 4342 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | /* $NetBSD: userret.h,v 1.33 2020/03/26 20:19:06 ad Exp $ */ /*- * Copyright (c) 1998, 2000, 2003, 2006, 2008, 2019, 2020 * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum, and Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #ifndef _SYS_USERRET_H_ #define _SYS_USERRET_H_ #include <sys/lockdebug.h> #include <sys/intr.h> #include <sys/psref.h> /* * Define the MI code needed before returning to user mode, for trap and * syscall. * * We handle "exceptional" events: pending signals, stop/exit actions, etc. * Note that the event must be flagged BEFORE any AST is posted as we are * reading unlocked. */ static __inline void mi_userret(struct lwp *l) { struct cpu_info *ci; KPREEMPT_DISABLE(l); ci = l->l_cpu; KASSERTMSG(ci->ci_biglock_count == 0, "kernel_lock leaked"); KASSERT(l->l_blcnt == 0); if (__predict_false(ci->ci_want_resched)) { preempt(); ci = l->l_cpu; } if (__predict_false(l->l_flag & LW_USERRET)) { KPREEMPT_ENABLE(l); lwp_userret(l); KPREEMPT_DISABLE(l); ci = l->l_cpu; } /* * lwp_eprio() is too involved to use here unlocked. At this point * it only matters for PTHREAD_PRIO_PROTECT; setting a too low value * is OK because the scheduler will find out the true value if we * end up in mi_switch(). * * This is being called on every syscall and trap, and remote CPUs * regularly look at ci_schedstate. Keep the cache line in the * SHARED state by only updating spc_curpriority if it has changed. */ l->l_kpriority = false; if (ci->ci_schedstate.spc_curpriority != l->l_priority) { ci->ci_schedstate.spc_curpriority = l->l_priority; } KPREEMPT_ENABLE(l); LOCKDEBUG_BARRIER(NULL, 0); KASSERT(l->l_nopreempt == 0); PSREF_DEBUG_BARRIER(); KASSERT(l->l_psrefs == 0); } #endif /* !_SYS_USERRET_H_ */ |
| 44 45 38 45 38 10 38 38 9 1 2 1 45 47 22 21 20 5 22 96 92 87 17 66 71 48 47 48 48 47 47 30 20 8 8 8 46 8 5 7 3 5 38 6 3 5 3 5 32 15 46 14 40 35 35 12 18 12 11 14 39 21 21 21 21 21 21 12 12 11 12 12 2 10 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 | /* $NetBSD: subr_time.c,v 1.35 2022/06/28 02:04:51 riastradh Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 * @(#)kern_time.c 8.4 (Berkeley) 5/26/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: subr_time.c,v 1.35 2022/06/28 02:04:51 riastradh Exp $"); #include <sys/param.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/kauth.h> #include <sys/lwp.h> #include <sys/timex.h> #include <sys/time.h> #include <sys/timetc.h> #include <sys/intr.h> #ifdef DEBUG_STICKS #define DPRINTF(a) uprintf a #else #define DPRINTF(a) #endif /* * Compute number of hz until specified time. Used to compute second * argument to callout_reset() from an absolute time. */ int tvhzto(const struct timeval *tvp) { struct timeval now, tv; tv = *tvp; /* Don't modify original tvp. */ getmicrotime(&now); timersub(&tv, &now, &tv); return tvtohz(&tv); } /* * Compute number of ticks in the specified amount of time. */ int tvtohz(const struct timeval *tv) { unsigned long ticks; long sec, usec; /* * If the number of usecs in the whole seconds part of the time * difference fits in a long, then the total number of usecs will * fit in an unsigned long. Compute the total and convert it to * ticks, rounding up and adding 1 to allow for the current tick * to expire. Rounding also depends on unsigned long arithmetic * to avoid overflow. * * Otherwise, if the number of ticks in the whole seconds part of * the time difference fits in a long, then convert the parts to * ticks separately and add, using similar rounding methods and * overflow avoidance. This method would work in the previous * case, but it is slightly slower and assumes that hz is integral. * * Otherwise, round the time difference down to the maximum * representable value. * * If ints are 32-bit, then the maximum value for any timeout in * 10ms ticks is 248 days. */ sec = tv->tv_sec; usec = tv->tv_usec; KASSERT(usec >= 0 && usec < 1000000); /* catch overflows in conversion time_t->int */ if (tv->tv_sec > INT_MAX) return INT_MAX; if (tv->tv_sec < 0) return 0; if (sec < 0 || (sec == 0 && usec == 0)) { /* * Would expire now or in the past. Return 0 ticks. * This is different from the legacy tvhzto() interface, * and callers need to check for it. */ ticks = 0; } else if (sec <= (LONG_MAX / 1000000)) ticks = (((sec * 1000000) + (unsigned long)usec + (tick - 1)) / tick) + 1; else if (sec <= (LONG_MAX / hz)) ticks = (sec * hz) + (((unsigned long)usec + (tick - 1)) / tick) + 1; else ticks = LONG_MAX; if (ticks > INT_MAX) ticks = INT_MAX; return ((int)ticks); } int tshzto(const struct timespec *tsp) { struct timespec now, ts; ts = *tsp; /* Don't modify original tsp. */ getnanotime(&now); timespecsub(&ts, &now, &ts); return tstohz(&ts); } int tshztoup(const struct timespec *tsp) { struct timespec now, ts; ts = *tsp; /* Don't modify original tsp. */ getnanouptime(&now); timespecsub(&ts, &now, &ts); return tstohz(&ts); } /* * Compute number of ticks in the specified amount of time. */ int tstohz(const struct timespec *ts) { struct timeval tv; /* * usec has great enough resolution for hz, so convert to a * timeval and use tvtohz() above. */ TIMESPEC_TO_TIMEVAL(&tv, ts); return tvtohz(&tv); } /* * Check that a proposed value to load into the .it_value or * .it_interval part of an interval timer is acceptable, and * fix it to have at least minimal value (i.e. if it is less * than the resolution of the clock, round it up.). We don't * timeout the 0,0 value because this means to disable the * timer or the interval. */ int itimerfix(struct timeval *tv) { if (tv->tv_usec < 0 || tv->tv_usec >= 1000000) return EINVAL; if (tv->tv_sec < 0) return ETIMEDOUT; if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick) tv->tv_usec = tick; return 0; } int itimespecfix(struct timespec *ts) { if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) return EINVAL; if (ts->tv_sec < 0) return ETIMEDOUT; if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < tick * 1000) ts->tv_nsec = tick * 1000; return 0; } int inittimeleft(struct timespec *ts, struct timespec *sleepts) { if (itimespecfix(ts)) { return -1; } KASSERT(ts->tv_sec >= 0); getnanouptime(sleepts); return 0; } int gettimeleft(struct timespec *ts, struct timespec *sleepts) { struct timespec now, sleptts; KASSERT(ts->tv_sec >= 0); /* * Reduce ts by elapsed time based on monotonic time scale. */ getnanouptime(&now); KASSERT(timespeccmp(sleepts, &now, <=)); timespecsub(&now, sleepts, &sleptts); *sleepts = now; if (timespeccmp(ts, &sleptts, <=)) { /* timed out */ timespecclear(ts); return 0; } timespecsub(ts, &sleptts, ts); return tstohz(ts); } void clock_timeleft(clockid_t clockid, struct timespec *ts, struct timespec *sleepts) { struct timespec sleptts; clock_gettime1(clockid, &sleptts); timespecadd(ts, sleepts, ts); timespecsub(ts, &sleptts, ts); *sleepts = sleptts; } static void ticks2ts(uint64_t ticks, struct timespec *ts) { ts->tv_sec = ticks / hz; uint64_t sticks = ticks - ts->tv_sec * hz; if (sticks > BINTIME_SCALE_MS) /* floor(2^64 / 1000) */ ts->tv_nsec = sticks / hz * 1000000000LL; else if (sticks > BINTIME_SCALE_US) /* floor(2^64 / 1000000) */ ts->tv_nsec = sticks * 1000LL / hz * 1000000LL; else ts->tv_nsec = sticks * 1000000000LL / hz; DPRINTF(("%s: %ju/%ju -> %ju.%ju\n", __func__, (uintmax_t)ticks, (uintmax_t)sticks, (uintmax_t)ts->tv_sec, (uintmax_t)ts->tv_nsec)); } int clock_gettime1(clockid_t clock_id, struct timespec *ts) { int error; uint64_t ticks; struct proc *p; #define CPUCLOCK_ID_MASK (~(CLOCK_THREAD_CPUTIME_ID|CLOCK_PROCESS_CPUTIME_ID)) if (clock_id & CLOCK_PROCESS_CPUTIME_ID) { pid_t pid = clock_id & CPUCLOCK_ID_MASK; mutex_enter(&proc_lock); p = pid == 0 ? curproc : proc_find(pid); if (p == NULL) { mutex_exit(&proc_lock); return ESRCH; } ticks = p->p_uticks + p->p_sticks + p->p_iticks; DPRINTF(("%s: u=%ju, s=%ju, i=%ju\n", __func__, (uintmax_t)p->p_uticks, (uintmax_t)p->p_sticks, (uintmax_t)p->p_iticks)); mutex_exit(&proc_lock); // XXX: Perhaps create a special kauth type error = kauth_authorize_process(kauth_cred_get(), KAUTH_PROCESS_PTRACE, p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); if (error) return error; } else if (clock_id & CLOCK_THREAD_CPUTIME_ID) { struct lwp *l; lwpid_t lid = clock_id & CPUCLOCK_ID_MASK; p = curproc; mutex_enter(p->p_lock); l = lid == 0 ? curlwp : lwp_find(p, lid); if (l == NULL) { mutex_exit(p->p_lock); return ESRCH; } ticks = l->l_rticksum + l->l_slpticksum; DPRINTF(("%s: r=%ju, s=%ju\n", __func__, (uintmax_t)l->l_rticksum, (uintmax_t)l->l_slpticksum)); mutex_exit(p->p_lock); } else ticks = (uint64_t)-1; if (ticks != (uint64_t)-1) { ticks2ts(ticks, ts); return 0; } switch (clock_id) { case CLOCK_REALTIME: nanotime(ts); break; case CLOCK_MONOTONIC: nanouptime(ts); break; default: return EINVAL; } return 0; } /* * Calculate delta and convert from struct timespec to the ticks. */ int ts2timo(clockid_t clock_id, int flags, struct timespec *ts, int *timo, struct timespec *start) { int error; struct timespec tsd; if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000L) return EINVAL; if ((flags & TIMER_ABSTIME) != 0 || start != NULL) { error = clock_gettime1(clock_id, &tsd); if (error != 0) return error; if (start != NULL) *start = tsd; } if ((flags & TIMER_ABSTIME) != 0) { if (!timespecsubok(ts, &tsd)) return EINVAL; timespecsub(ts, &tsd, ts); } error = itimespecfix(ts); if (error != 0) return error; if (ts->tv_sec == 0 && ts->tv_nsec == 0) return ETIMEDOUT; *timo = tstohz(ts); KASSERT(*timo > 0); return 0; } bool timespecaddok(const struct timespec *tsp, const struct timespec *usp) { enum { TIME_MIN = __type_min(time_t), TIME_MAX = __type_max(time_t) }; time_t a = tsp->tv_sec; time_t b = usp->tv_sec; bool carry; /* * Caller is responsible for guaranteeing valid timespec * inputs. Any user-controlled inputs must be validated or * adjusted. */ KASSERT(tsp->tv_nsec >= 0); KASSERT(usp->tv_nsec >= 0); KASSERT(tsp->tv_nsec < 1000000000L); KASSERT(usp->tv_nsec < 1000000000L); CTASSERT(1000000000L <= __type_max(long) - 1000000000L); /* * Fail if a + b + carry overflows TIME_MAX, or if a + b * overflows TIME_MIN because timespecadd adds the carry after * computing a + b. * * Break it into two mutually exclusive and exhaustive cases: * I. a >= 0 * II. a < 0 */ carry = (tsp->tv_nsec + usp->tv_nsec >= 1000000000L); if (a >= 0) { /* * Case I: a >= 0. If b < 0, then b + 1 <= 0, so * * a + b + 1 <= a + 0 <= TIME_MAX, * * and * * a + b >= 0 + b = b >= TIME_MIN, * * so this can't overflow. * * If b >= 0, then a + b + carry >= a + b >= 0, so * negative results and thus results below TIME_MIN are * impossible; we need only avoid * * a + b + carry > TIME_MAX, * * which we will do by rejecting if * * b > TIME_MAX - a - carry, * * which in turn is incidentally always false if b < 0 * so we don't need extra logic to discriminate on the * b >= 0 and b < 0 cases. * * Since 0 <= a <= TIME_MAX, we know * * 0 <= TIME_MAX - a <= TIME_MAX, * * and hence * * -1 <= TIME_MAX - a - 1 < TIME_MAX. * * So we can compute TIME_MAX - a - carry (i.e., either * TIME_MAX - a or TIME_MAX - a - 1) safely without * overflow. */ if (b > TIME_MAX - a - carry) return false; } else { /* * Case II: a < 0. If b >= 0, then since a + 1 <= 0, * we have * * a + b + 1 <= b <= TIME_MAX, * * and * * a + b >= a >= TIME_MIN, * * so this can't overflow. * * If b < 0, then the intermediate a + b is negative * and the outcome a + b + 1 is nonpositive, so we need * only avoid * * a + b < TIME_MIN, * * which we will do by rejecting if * * a < TIME_MIN - b. * * (Reminder: The carry is added afterward in * timespecadd, so to avoid overflow it is not enough * to merely reject a + b + carry < TIME_MIN.) * * It is safe to compute the difference TIME_MIN - b * because b is negative, so the result lies in * (TIME_MIN, 0]. */ if (b < 0 && a < TIME_MIN - b) return false; } return true; } bool timespecsubok(const struct timespec *tsp, const struct timespec *usp) { enum { TIME_MIN = __type_min(time_t), TIME_MAX = __type_max(time_t) }; time_t a = tsp->tv_sec, b = usp->tv_sec; bool borrow; /* * Caller is responsible for guaranteeing valid timespec * inputs. Any user-controlled inputs must be validated or * adjusted. */ KASSERT(tsp->tv_nsec >= 0); KASSERT(usp->tv_nsec >= 0); KASSERT(tsp->tv_nsec < 1000000000L); KASSERT(usp->tv_nsec < 1000000000L); CTASSERT(1000000000L <= __type_max(long) - 1000000000L); /* * Fail if a - b - borrow overflows TIME_MIN, or if a - b * overflows TIME_MAX because timespecsub subtracts the borrow * after computing a - b. * * Break it into two mutually exclusive and exhaustive cases: * I. a < 0 * II. a >= 0 */ borrow = (tsp->tv_nsec - usp->tv_nsec < 0); if (a < 0) { /* * Case I: a < 0. If b < 0, then -b - 1 >= 0, so * * a - b - 1 >= a + 0 >= TIME_MIN, * * and, since a <= -1, provided that TIME_MIN <= * -TIME_MAX - 1 so that TIME_MAX <= -TIME_MIN - 1 (in * fact, equality holds, under the assumption of * two's-complement arithmetic), * * a - b <= -1 - b = -b - 1 <= TIME_MAX, * * so this can't overflow. */ CTASSERT(TIME_MIN <= -TIME_MAX - 1); /* * If b >= 0, then a - b - borrow <= a - b < 0, so * positive results and thus results above TIME_MAX are * impossible; we need only avoid * * a - b - borrow < TIME_MIN, * * which we will do by rejecting if * * a < TIME_MIN + b + borrow. * * The right-hand side is safe to evaluate for any * values of b and borrow as long as TIME_MIN + * TIME_MAX + 1 <= TIME_MAX, i.e., TIME_MIN <= -1. * (Note: If time_t were unsigned, this would fail!) * * Note: Unlike Case I in timespecaddok, this criterion * does not work for b < 0, nor can the roles of a and * b in the inequality be reversed (e.g., -b < TIME_MIN * - a + borrow) without extra cases like checking for * b = TEST_MIN. */ CTASSERT(TIME_MIN < -1); if (b >= 0 && a < TIME_MIN + b + borrow) return false; } else { /* * Case II: a >= 0. If b >= 0, then * * a - b <= a <= TIME_MAX, * * and, provided TIME_MIN <= -TIME_MAX - 1 (in fact, * equality holds, under the assumption of * two's-complement arithmetic) * * a - b - 1 >= -b - 1 >= -TIME_MAX - 1 >= TIME_MIN, * * so this can't overflow. */ CTASSERT(TIME_MIN <= -TIME_MAX - 1); /* * If b < 0, then a - b >= a >= 0, so negative results * and thus results below TIME_MIN are impossible; we * need only avoid * * a - b > TIME_MAX, * * which we will do by rejecting if * * a > TIME_MAX + b. * * (Reminder: The borrow is subtracted afterward in * timespecsub, so to avoid overflow it is not enough * to merely reject a - b - borrow > TIME_MAX.) * * It is safe to compute the sum TIME_MAX + b because b * is negative, so the result lies in [0, TIME_MAX). */ if (b < 0 && a > TIME_MAX + b) return false; } return true; } |
| 6 6 5 6 3 1 1 1 1 4 3 3 1 1 1 1 4 4 3 1 1 1 1 4 4 3 1 1 1 1 6 5 1 1 1 1 1 1 1 1 1 1 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 | /* $NetBSD: cgd.c,v 1.146 2022/04/02 09:53:20 riastradh Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Roland C. Dowdeswell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.146 2022/04/02 09:53:20 riastradh Exp $"); #include <sys/types.h> #include <sys/param.h> #include <sys/buf.h> #include <sys/bufq.h> #include <sys/conf.h> #include <sys/cpu.h> #include <sys/device.h> #include <sys/disk.h> #include <sys/disklabel.h> #include <sys/errno.h> #include <sys/fcntl.h> #include <sys/ioctl.h> #include <sys/kmem.h> #include <sys/module.h> #include <sys/namei.h> /* for pathbuf */ #include <sys/pool.h> #include <sys/proc.h> #include <sys/syslog.h> #include <sys/systm.h> #include <sys/vnode.h> #include <sys/workqueue.h> #include <dev/cgd_crypto.h> #include <dev/cgdvar.h> #include <dev/dkvar.h> #include <miscfs/specfs/specdev.h> /* for v_rdev */ #include "ioconf.h" struct selftest_params { const char *alg; int encblkno8; int blocksize; /* number of bytes */ int secsize; daddr_t blkno; int keylen; /* number of bits */ int txtlen; /* number of bytes */ const uint8_t *key; const uint8_t *ptxt; const uint8_t *ctxt; }; /* Entry Point Functions */ static dev_type_open(cgdopen); static dev_type_close(cgdclose); static dev_type_read(cgdread); static dev_type_write(cgdwrite); static dev_type_ioctl(cgdioctl); static dev_type_strategy(cgdstrategy); static dev_type_dump(cgddump); static dev_type_size(cgdsize); const struct bdevsw cgd_bdevsw = { .d_open = cgdopen, .d_close = cgdclose, .d_strategy = cgdstrategy, .d_ioctl = cgdioctl, .d_dump = cgddump, .d_psize = cgdsize, .d_discard = nodiscard, .d_flag = D_DISK | D_MPSAFE }; const struct cdevsw cgd_cdevsw = { .d_open = cgdopen, .d_close = cgdclose, .d_read = cgdread, .d_write = cgdwrite, .d_ioctl = cgdioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_DISK | D_MPSAFE }; /* * Vector 5 from IEEE 1619/D16 truncated to 64 bytes, blkno 1. */ static const uint8_t selftest_aes_xts_256_ptxt[64] = { 0x27, 0xa7, 0x47, 0x9b, 0xef, 0xa1, 0xd4, 0x76, 0x48, 0x9f, 0x30, 0x8c, 0xd4, 0xcf, 0xa6, 0xe2, 0xa9, 0x6e, 0x4b, 0xbe, 0x32, 0x08, 0xff, 0x25, 0x28, 0x7d, 0xd3, 0x81, 0x96, 0x16, 0xe8, 0x9c, 0xc7, 0x8c, 0xf7, 0xf5, 0xe5, 0x43, 0x44, 0x5f, 0x83, 0x33, 0xd8, 0xfa, 0x7f, 0x56, 0x00, 0x00, 0x05, 0x27, 0x9f, 0xa5, 0xd8, 0xb5, 0xe4, 0xad, 0x40, 0xe7, 0x36, 0xdd, 0xb4, 0xd3, 0x54, 0x12, }; static const uint8_t selftest_aes_xts_256_ctxt[512] = { 0x26, 0x4d, 0x3c, 0xa8, 0x51, 0x21, 0x94, 0xfe, 0xc3, 0x12, 0xc8, 0xc9, 0x89, 0x1f, 0x27, 0x9f, 0xef, 0xdd, 0x60, 0x8d, 0x0c, 0x02, 0x7b, 0x60, 0x48, 0x3a, 0x3f, 0xa8, 0x11, 0xd6, 0x5e, 0xe5, 0x9d, 0x52, 0xd9, 0xe4, 0x0e, 0xc5, 0x67, 0x2d, 0x81, 0x53, 0x2b, 0x38, 0xb6, 0xb0, 0x89, 0xce, 0x95, 0x1f, 0x0f, 0x9c, 0x35, 0x59, 0x0b, 0x8b, 0x97, 0x8d, 0x17, 0x52, 0x13, 0xf3, 0x29, 0xbb, }; static const uint8_t selftest_aes_xts_256_key[33] = { 0x27, 0x18, 0x28, 0x18, 0x28, 0x45, 0x90, 0x45, 0x23, 0x53, 0x60, 0x28, 0x74, 0x71, 0x35, 0x26, 0x31, 0x41, 0x59, 0x26, 0x53, 0x58, 0x97, 0x93, 0x23, 0x84, 0x62, 0x64, 0x33, 0x83, 0x27, 0x95, 0 }; /* * Vector 11 from IEEE 1619/D16 truncated to 64 bytes, blkno 0xffff. */ static const uint8_t selftest_aes_xts_512_ptxt[64] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, }; static const uint8_t selftest_aes_xts_512_ctxt[64] = { 0x77, 0xa3, 0x12, 0x51, 0x61, 0x8a, 0x15, 0xe6, 0xb9, 0x2d, 0x1d, 0x66, 0xdf, 0xfe, 0x7b, 0x50, 0xb5, 0x0b, 0xad, 0x55, 0x23, 0x05, 0xba, 0x02, 0x17, 0xa6, 0x10, 0x68, 0x8e, 0xff, 0x7e, 0x11, 0xe1, 0xd0, 0x22, 0x54, 0x38, 0xe0, 0x93, 0x24, 0x2d, 0x6d, 0xb2, 0x74, 0xfd, 0xe8, 0x01, 0xd4, 0xca, 0xe0, 0x6f, 0x20, 0x92, 0xc7, 0x28, 0xb2, 0x47, 0x85, 0x59, 0xdf, 0x58, 0xe8, 0x37, 0xc2, }; static const uint8_t selftest_aes_xts_512_key[65] = { 0x27, 0x18, 0x28, 0x18, 0x28, 0x45, 0x90, 0x45, 0x23, 0x53, 0x60, 0x28, 0x74, 0x71, 0x35, 0x26, 0x62, 0x49, 0x77, 0x57, 0x24, 0x70, 0x93, 0x69, 0x99, 0x59, 0x57, 0x49, 0x66, 0x96, 0x76, 0x27, 0x31, 0x41, 0x59, 0x26, 0x53, 0x58, 0x97, 0x93, 0x23, 0x84, 0x62, 0x64, 0x33, 0x83, 0x27, 0x95, 0x02, 0x88, 0x41, 0x97, 0x16, 0x93, 0x99, 0x37, 0x51, 0x05, 0x82, 0x09, 0x74, 0x94, 0x45, 0x92, 0 }; static const uint8_t selftest_aes_cbc_key[32] = { 0x27, 0x18, 0x28, 0x18, 0x28, 0x45, 0x90, 0x45, 0x23, 0x53, 0x60, 0x28, 0x74, 0x71, 0x35, 0x26, 0x62, 0x49, 0x77, 0x57, 0x24, 0x70, 0x93, 0x69, 0x99, 0x59, 0x57, 0x49, 0x66, 0x96, 0x76, 0x27, }; static const uint8_t selftest_aes_cbc_128_ptxt[64] = { 0x27, 0xa7, 0x47, 0x9b, 0xef, 0xa1, 0xd4, 0x76, 0x48, 0x9f, 0x30, 0x8c, 0xd4, 0xcf, 0xa6, 0xe2, 0xa9, 0x6e, 0x4b, 0xbe, 0x32, 0x08, 0xff, 0x25, 0x28, 0x7d, 0xd3, 0x81, 0x96, 0x16, 0xe8, 0x9c, 0xc7, 0x8c, 0xf7, 0xf5, 0xe5, 0x43, 0x44, 0x5f, 0x83, 0x33, 0xd8, 0xfa, 0x7f, 0x56, 0x00, 0x00, 0x05, 0x27, 0x9f, 0xa5, 0xd8, 0xb5, 0xe4, 0xad, 0x40, 0xe7, 0x36, 0xdd, 0xb4, 0xd3, 0x54, 0x12, }; static const uint8_t selftest_aes_cbc_128_ctxt[64] = { /* blkno=1 */ 0x93, 0x94, 0x56, 0x36, 0x83, 0xbc, 0xff, 0xa4, 0xe0, 0x24, 0x34, 0x12, 0xbe, 0xfa, 0xb0, 0x7d, 0x88, 0x1e, 0xc5, 0x57, 0x55, 0x23, 0x05, 0x0c, 0x69, 0xa5, 0xc1, 0xda, 0x64, 0xee, 0x74, 0x10, 0xc2, 0xc5, 0xe6, 0x66, 0xd6, 0xa7, 0x49, 0x1c, 0x9d, 0x40, 0xb5, 0x0c, 0x9b, 0x6e, 0x1c, 0xe6, 0xb1, 0x7a, 0x1c, 0xe7, 0x5a, 0xfe, 0xf9, 0x2a, 0x78, 0xfa, 0xb7, 0x7b, 0x08, 0xdf, 0x8e, 0x51, }; static const uint8_t selftest_aes_cbc_256_ptxt[64] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, }; static const uint8_t selftest_aes_cbc_256_ctxt[64] = { /* blkno=0xffff */ 0x6c, 0xa3, 0x15, 0x17, 0x51, 0x90, 0xe9, 0x69, 0x08, 0x36, 0x7b, 0xa6, 0xbb, 0xd1, 0x0b, 0x9e, 0xcd, 0x6b, 0x1e, 0xaf, 0xb6, 0x2e, 0x62, 0x7d, 0x8e, 0xde, 0xf0, 0xed, 0x0d, 0x44, 0xe7, 0x31, 0x26, 0xcf, 0xd5, 0x0b, 0x3e, 0x95, 0x59, 0x89, 0xdf, 0x5d, 0xd6, 0x9a, 0x00, 0x66, 0xcc, 0x7f, 0x45, 0xd3, 0x06, 0x58, 0xed, 0xef, 0x49, 0x47, 0x87, 0x89, 0x17, 0x7d, 0x08, 0x56, 0x50, 0xe1, }; static const uint8_t selftest_3des_cbc_key[24] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, }; static const uint8_t selftest_3des_cbc_ptxt[64] = { 0x27, 0xa7, 0x47, 0x9b, 0xef, 0xa1, 0xd4, 0x76, 0x48, 0x9f, 0x30, 0x8c, 0xd4, 0xcf, 0xa6, 0xe2, 0xa9, 0x6e, 0x4b, 0xbe, 0x32, 0x08, 0xff, 0x25, 0x28, 0x7d, 0xd3, 0x81, 0x96, 0x16, 0xe8, 0x9c, 0xc7, 0x8c, 0xf7, 0xf5, 0xe5, 0x43, 0x44, 0x5f, 0x83, 0x33, 0xd8, 0xfa, 0x7f, 0x56, 0x00, 0x00, 0x05, 0x27, 0x9f, 0xa5, 0xd8, 0xb5, 0xe4, 0xad, 0x40, 0xe7, 0x36, 0xdd, 0xb4, 0xd3, 0x54, 0x12, }; static const uint8_t selftest_3des_cbc_ctxt[64] = { 0xa2, 0xfe, 0x81, 0xaa, 0x10, 0x6c, 0xea, 0xb9, 0x11, 0x58, 0x1f, 0x29, 0xb5, 0x86, 0x71, 0x56, 0xe9, 0x25, 0x1d, 0x07, 0xb1, 0x69, 0x59, 0x6c, 0x96, 0x80, 0xf7, 0x54, 0x38, 0xaa, 0xa7, 0xe4, 0xe8, 0x81, 0xf5, 0x00, 0xbb, 0x1c, 0x00, 0x3c, 0xba, 0x38, 0x45, 0x97, 0x4c, 0xcf, 0x84, 0x14, 0x46, 0x86, 0xd9, 0xf4, 0xc5, 0xe2, 0xf0, 0x54, 0xde, 0x41, 0xf6, 0xa1, 0xef, 0x1b, 0x0a, 0xea, }; static const uint8_t selftest_bf_cbc_key[56] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, }; static const uint8_t selftest_bf_cbc_ptxt[64] = { 0x27, 0xa7, 0x47, 0x9b, 0xef, 0xa1, 0xd4, 0x76, 0x48, 0x9f, 0x30, 0x8c, 0xd4, 0xcf, 0xa6, 0xe2, 0xa9, 0x6e, 0x4b, 0xbe, 0x32, 0x08, 0xff, 0x25, 0x28, 0x7d, 0xd3, 0x81, 0x96, 0x16, 0xe8, 0x9c, 0xc7, 0x8c, 0xf7, 0xf5, 0xe5, 0x43, 0x44, 0x5f, 0x83, 0x33, 0xd8, 0xfa, 0x7f, 0x56, 0x00, 0x00, 0x05, 0x27, 0x9f, 0xa5, 0xd8, 0xb5, 0xe4, 0xad, 0x40, 0xe7, 0x36, 0xdd, 0xb4, 0xd3, 0x54, 0x12, }; static const uint8_t selftest_bf_cbc_ctxt[64] = { 0xec, 0xa2, 0xc0, 0x0e, 0xa9, 0x7f, 0x04, 0x1e, 0x2e, 0x4f, 0x64, 0x07, 0x67, 0x3e, 0xf4, 0x58, 0x61, 0x5f, 0xd3, 0x50, 0x5e, 0xd3, 0x4d, 0x34, 0xa0, 0x53, 0xbe, 0x47, 0x75, 0x69, 0x3b, 0x1f, 0x86, 0xf2, 0xae, 0x8b, 0xb7, 0x91, 0xda, 0xd4, 0x2b, 0xa5, 0x47, 0x9b, 0x7d, 0x13, 0x30, 0xdd, 0x7b, 0xad, 0x86, 0x57, 0x51, 0x11, 0x74, 0x42, 0xb8, 0xbf, 0x69, 0x17, 0x20, 0x0a, 0xf7, 0xda, }; static const uint8_t selftest_aes_cbc_encblkno8_zero64[64]; static const uint8_t selftest_aes_cbc_encblkno8_ctxt[64] = { 0xa2, 0x06, 0x26, 0x26, 0xac, 0xdc, 0xe7, 0xcf, 0x47, 0x68, 0x24, 0x0e, 0xfa, 0x40, 0x44, 0x83, 0x07, 0xe1, 0xf4, 0x5d, 0x53, 0x47, 0xa0, 0xfe, 0xc0, 0x6e, 0x4e, 0xf8, 0x9d, 0x98, 0x63, 0xb8, 0x2c, 0x27, 0xfa, 0x3a, 0xd5, 0x40, 0xda, 0xdb, 0xe6, 0xc3, 0xe4, 0xfb, 0x85, 0x53, 0xfb, 0x78, 0x5d, 0xbd, 0x8f, 0x4c, 0x1a, 0x04, 0x9c, 0x88, 0x85, 0xec, 0x3c, 0x56, 0x46, 0x1a, 0x6e, 0xf5, }; const struct selftest_params selftests[] = { { .alg = "aes-xts", .blocksize = 16, .secsize = 512, .blkno = 1, .keylen = 256, .txtlen = sizeof(selftest_aes_xts_256_ptxt), .key = selftest_aes_xts_256_key, .ptxt = selftest_aes_xts_256_ptxt, .ctxt = selftest_aes_xts_256_ctxt }, { .alg = "aes-xts", .blocksize = 16, .secsize = 512, .blkno = 0xffff, .keylen = 512, .txtlen = sizeof(selftest_aes_xts_512_ptxt), .key = selftest_aes_xts_512_key, .ptxt = selftest_aes_xts_512_ptxt, .ctxt = selftest_aes_xts_512_ctxt }, { .alg = "aes-cbc", .blocksize = 16, .secsize = 512, .blkno = 1, .keylen = 128, .txtlen = sizeof(selftest_aes_cbc_128_ptxt), .key = selftest_aes_cbc_key, .ptxt = selftest_aes_cbc_128_ptxt, .ctxt = selftest_aes_cbc_128_ctxt, }, { .alg = "aes-cbc", .blocksize = 16, .secsize = 512, .blkno = 0xffff, .keylen = 256, .txtlen = sizeof(selftest_aes_cbc_256_ptxt), .key = selftest_aes_cbc_key, .ptxt = selftest_aes_cbc_256_ptxt, .ctxt = selftest_aes_cbc_256_ctxt, }, { .alg = "3des-cbc", .blocksize = 8, .secsize = 512, .blkno = 1, .keylen = 192, /* 168 + 3*8 parity bits */ .txtlen = sizeof(selftest_3des_cbc_ptxt), .key = selftest_3des_cbc_key, .ptxt = selftest_3des_cbc_ptxt, .ctxt = selftest_3des_cbc_ctxt, }, { .alg = "blowfish-cbc", .blocksize = 8, .secsize = 512, .blkno = 1, .keylen = 448, .txtlen = sizeof(selftest_bf_cbc_ptxt), .key = selftest_bf_cbc_key, .ptxt = selftest_bf_cbc_ptxt, .ctxt = selftest_bf_cbc_ctxt, }, { .alg = "aes-cbc", .encblkno8 = 1, .blocksize = 16, .secsize = 512, .blkno = 0, .keylen = 128, .txtlen = sizeof(selftest_aes_cbc_encblkno8_zero64), .key = selftest_aes_cbc_encblkno8_zero64, .ptxt = selftest_aes_cbc_encblkno8_zero64, .ctxt = selftest_aes_cbc_encblkno8_ctxt, }, }; static int cgd_match(device_t, cfdata_t, void *); static void cgd_attach(device_t, device_t, void *); static int cgd_detach(device_t, int); static struct cgd_softc *cgd_spawn(int); static struct cgd_worker *cgd_create_one_worker(void); static void cgd_destroy_one_worker(struct cgd_worker *); static struct cgd_worker *cgd_create_worker(void); static void cgd_destroy_worker(struct cgd_worker *); static int cgd_destroy(device_t); /* Internal Functions */ static int cgd_diskstart(device_t, struct buf *); static void cgd_diskstart2(struct cgd_softc *, struct cgd_xfer *); static void cgdiodone(struct buf *); static void cgd_iodone2(struct cgd_softc *, struct cgd_xfer *); static void cgd_enqueue(struct cgd_softc *, struct cgd_xfer *); static void cgd_process(struct work *, void *); static int cgd_dumpblocks(device_t, void *, daddr_t, int); static int cgd_ioctl_set(struct cgd_softc *, void *, struct lwp *); static int cgd_ioctl_clr(struct cgd_softc *, struct lwp *); static int cgd_ioctl_get(dev_t, void *, struct lwp *); static int cgdinit(struct cgd_softc *, const char *, struct vnode *, struct lwp *); static void cgd_cipher(struct cgd_softc *, void *, const void *, size_t, daddr_t, size_t, int); static void cgd_selftest(void); static const struct dkdriver cgddkdriver = { .d_minphys = minphys, .d_open = cgdopen, .d_close = cgdclose, .d_strategy = cgdstrategy, .d_iosize = NULL, .d_diskstart = cgd_diskstart, .d_dumpblocks = cgd_dumpblocks, .d_lastclose = NULL }; CFATTACH_DECL3_NEW(cgd, sizeof(struct cgd_softc), cgd_match, cgd_attach, cgd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); /* DIAGNOSTIC and DEBUG definitions */ #if defined(CGDDEBUG) && !defined(DEBUG) #define DEBUG #endif #ifdef DEBUG int cgddebug = 0; #define CGDB_FOLLOW 0x1 #define CGDB_IO 0x2 #define CGDB_CRYPTO 0x4 #define IFDEBUG(x,y) if (cgddebug & (x)) y #define DPRINTF(x,y) IFDEBUG(x, printf y) #define DPRINTF_FOLLOW(y) DPRINTF(CGDB_FOLLOW, y) static void hexprint(const char *, void *, int); #else #define IFDEBUG(x,y) #define DPRINTF(x,y) #define DPRINTF_FOLLOW(y) #endif /* Global variables */ static kmutex_t cgd_spawning_mtx; static kcondvar_t cgd_spawning_cv; static bool cgd_spawning; static struct cgd_worker *cgd_worker; static u_int cgd_refcnt; /* number of users of cgd_worker */ /* Utility Functions */ #define CGDUNIT(x) DISKUNIT(x) /* The code */ static int cgd_lock(bool intr) { int error = 0; mutex_enter(&cgd_spawning_mtx); while (cgd_spawning) { if (intr) error = cv_wait_sig(&cgd_spawning_cv, &cgd_spawning_mtx); else cv_wait(&cgd_spawning_cv, &cgd_spawning_mtx); } if (error == 0) cgd_spawning = true; mutex_exit(&cgd_spawning_mtx); return error; } static void cgd_unlock(void) { mutex_enter(&cgd_spawning_mtx); cgd_spawning = false; cv_broadcast(&cgd_spawning_cv); mutex_exit(&cgd_spawning_mtx); } static struct cgd_softc * getcgd_softc(dev_t dev) { return device_lookup_private(&cgd_cd, CGDUNIT(dev)); } static int cgd_match(device_t self, cfdata_t cfdata, void *aux) { return 1; } static void cgd_attach(device_t parent, device_t self, void *aux) { struct cgd_softc *sc = device_private(self); mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_BIO); cv_init(&sc->sc_cv, "cgdcv"); dk_init(&sc->sc_dksc, self, DKTYPE_CGD); disk_init(&sc->sc_dksc.sc_dkdev, sc->sc_dksc.sc_xname, &cgddkdriver); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "unable to register power management hooks\n"); } static int cgd_detach(device_t self, int flags) { int ret; struct cgd_softc *sc = device_private(self); struct dk_softc *dksc = &sc->sc_dksc; if (DK_BUSY(dksc, 0)) return EBUSY; if (DK_ATTACHED(dksc) && (ret = cgd_ioctl_clr(sc, curlwp)) != 0) return ret; disk_destroy(&dksc->sc_dkdev); cv_destroy(&sc->sc_cv); mutex_destroy(&sc->sc_lock); return 0; } void cgdattach(int num) { #ifndef _MODULE int error; mutex_init(&cgd_spawning_mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&cgd_spawning_cv, "cgspwn"); error = config_cfattach_attach(cgd_cd.cd_name, &cgd_ca); if (error != 0) aprint_error("%s: unable to register cfattach\n", cgd_cd.cd_name); #endif cgd_selftest(); } static struct cgd_softc * cgd_spawn(int unit) { cfdata_t cf; struct cgd_worker *cw; struct cgd_softc *sc; cf = kmem_alloc(sizeof(*cf), KM_SLEEP); cf->cf_name = cgd_cd.cd_name; cf->cf_atname = cgd_cd.cd_name; cf->cf_unit = unit; cf->cf_fstate = FSTATE_STAR; cw = cgd_create_one_worker(); if (cw == NULL) { kmem_free(cf, sizeof(*cf)); return NULL; } sc = device_private(config_attach_pseudo(cf)); if (sc == NULL) { cgd_destroy_one_worker(cw); return NULL; } sc->sc_worker = cw; return sc; } static int cgd_destroy(device_t dev) { struct cgd_softc *sc = device_private(dev); struct cgd_worker *cw = sc->sc_worker; cfdata_t cf; int error; cf = device_cfdata(dev); error = config_detach(dev, DETACH_QUIET); if (error) return error; cgd_destroy_one_worker(cw); kmem_free(cf, sizeof(*cf)); return 0; } static void cgd_busy(struct cgd_softc *sc) { mutex_enter(&sc->sc_lock); while (sc->sc_busy) cv_wait(&sc->sc_cv, &sc->sc_lock); sc->sc_busy = true; mutex_exit(&sc->sc_lock); } static void cgd_unbusy(struct cgd_softc *sc) { mutex_enter(&sc->sc_lock); sc->sc_busy = false; cv_broadcast(&sc->sc_cv); mutex_exit(&sc->sc_lock); } static struct cgd_worker * cgd_create_one_worker(void) { KASSERT(cgd_spawning); if (cgd_refcnt++ == 0) { KASSERT(cgd_worker == NULL); cgd_worker = cgd_create_worker(); } KASSERT(cgd_worker != NULL); return cgd_worker; } static void cgd_destroy_one_worker(struct cgd_worker *cw) { KASSERT(cgd_spawning); KASSERT(cw == cgd_worker); if (--cgd_refcnt == 0) { cgd_destroy_worker(cgd_worker); cgd_worker = NULL; } } static struct cgd_worker * cgd_create_worker(void) { struct cgd_worker *cw; struct workqueue *wq; struct pool *cp; int error; cw = kmem_alloc(sizeof(struct cgd_worker), KM_SLEEP); cp = kmem_alloc(sizeof(struct pool), KM_SLEEP); error = workqueue_create(&wq, "cgd", cgd_process, NULL, PRI_BIO, IPL_BIO, WQ_FPU|WQ_MPSAFE|WQ_PERCPU); if (error) { kmem_free(cp, sizeof(struct pool)); kmem_free(cw, sizeof(struct cgd_worker)); return NULL; } cw->cw_cpool = cp; cw->cw_wq = wq; pool_init(cw->cw_cpool, sizeof(struct cgd_xfer), 0, 0, 0, "cgdcpl", NULL, IPL_BIO); mutex_init(&cw->cw_lock, MUTEX_DEFAULT, IPL_BIO); return cw; } static void cgd_destroy_worker(struct cgd_worker *cw) { /* * Wait for all worker threads to complete before destroying * the rest of the cgd_worker. */ if (cw->cw_wq) workqueue_destroy(cw->cw_wq); mutex_destroy(&cw->cw_lock); if (cw->cw_cpool) { pool_destroy(cw->cw_cpool); kmem_free(cw->cw_cpool, sizeof(struct pool)); } kmem_free(cw, sizeof(struct cgd_worker)); } static int cgdopen(dev_t dev, int flags, int fmt, struct lwp *l) { struct cgd_softc *sc; int error; DPRINTF_FOLLOW(("cgdopen(0x%"PRIx64", %d)\n", dev, flags)); error = cgd_lock(true); if (error) return error; sc = getcgd_softc(dev); if (sc == NULL) sc = cgd_spawn(CGDUNIT(dev)); cgd_unlock(); if (sc == NULL) return ENXIO; return dk_open(&sc->sc_dksc, dev, flags, fmt, l); } static int cgdclose(dev_t dev, int flags, int fmt, struct lwp *l) { struct cgd_softc *sc; struct dk_softc *dksc; int error; DPRINTF_FOLLOW(("cgdclose(0x%"PRIx64", %d)\n", dev, flags)); error = cgd_lock(false); if (error) return error; sc = getcgd_softc(dev); if (sc == NULL) { error = ENXIO; goto done; } dksc = &sc->sc_dksc; if ((error = dk_close(dksc, dev, flags, fmt, l)) != 0) goto done; if (!DK_ATTACHED(dksc)) { if ((error = cgd_destroy(sc->sc_dksc.sc_dev)) != 0) { device_printf(dksc->sc_dev, "unable to detach instance\n"); goto done; } } done: cgd_unlock(); return error; } static void cgdstrategy(struct buf *bp) { struct cgd_softc *sc = getcgd_softc(bp->b_dev); DPRINTF_FOLLOW(("cgdstrategy(%p): b_bcount = %ld\n", bp, (long)bp->b_bcount)); /* * Reject unaligned writes. */ if (((uintptr_t)bp->b_data & 3) != 0) { bp->b_error = EINVAL; goto bail; } dk_strategy(&sc->sc_dksc, bp); return; bail: bp->b_resid = bp->b_bcount; biodone(bp); return; } static int cgdsize(dev_t dev) { struct cgd_softc *sc = getcgd_softc(dev); DPRINTF_FOLLOW(("cgdsize(0x%"PRIx64")\n", dev)); if (!sc) return -1; return dk_size(&sc->sc_dksc, dev); } /* * cgd_{get,put}data are functions that deal with getting a buffer * for the new encrypted data. * We can no longer have a buffer per device, we need a buffer per * work queue... */ static void * cgd_getdata(struct cgd_softc *sc, unsigned long size) { void *data = NULL; mutex_enter(&sc->sc_lock); if (!sc->sc_data_used) { sc->sc_data_used = true; data = sc->sc_data; } mutex_exit(&sc->sc_lock); if (data) return data; return kmem_intr_alloc(size, KM_NOSLEEP); } static void cgd_putdata(struct cgd_softc *sc, void *data, unsigned long size) { if (data == sc->sc_data) { mutex_enter(&sc->sc_lock); sc->sc_data_used = false; mutex_exit(&sc->sc_lock); } else kmem_intr_free(data, size); } static int cgd_diskstart(device_t dev, struct buf *bp) { struct cgd_softc *sc = device_private(dev); struct cgd_worker *cw = sc->sc_worker; struct dk_softc *dksc = &sc->sc_dksc; struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; struct cgd_xfer *cx; struct buf *nbp; void * newaddr; daddr_t bn; DPRINTF_FOLLOW(("cgd_diskstart(%p, %p)\n", dksc, bp)); bn = bp->b_rawblkno; /* * We attempt to allocate all of our resources up front, so that * we can fail quickly if they are unavailable. */ nbp = getiobuf(sc->sc_tvn, false); if (nbp == NULL) return EAGAIN; cx = pool_get(cw->cw_cpool, PR_NOWAIT); if (cx == NULL) { putiobuf(nbp); return EAGAIN; } cx->cx_sc = sc; cx->cx_obp = bp; cx->cx_nbp = nbp; cx->cx_srcv = cx->cx_dstv = bp->b_data; cx->cx_blkno = bn; cx->cx_secsize = dg->dg_secsize; /* * If we are writing, then we need to encrypt the outgoing * block into a new block of memory. */ if ((bp->b_flags & B_READ) == 0) { newaddr = cgd_getdata(sc, bp->b_bcount); if (!newaddr) { pool_put(cw->cw_cpool, cx); putiobuf(nbp); return EAGAIN; } cx->cx_dstv = newaddr; cx->cx_len = bp->b_bcount; cx->cx_dir = CGD_CIPHER_ENCRYPT; cgd_enqueue(sc, cx); return 0; } cgd_diskstart2(sc, cx); return 0; } static void cgd_diskstart2(struct cgd_softc *sc, struct cgd_xfer *cx) { struct vnode *vp; struct buf *bp; struct buf *nbp; bp = cx->cx_obp; nbp = cx->cx_nbp; nbp->b_data = cx->cx_dstv; nbp->b_flags = bp->b_flags; nbp->b_oflags = bp->b_oflags; nbp->b_cflags = bp->b_cflags; nbp->b_iodone = cgdiodone; nbp->b_proc = bp->b_proc; nbp->b_blkno = btodb(cx->cx_blkno * cx->cx_secsize); nbp->b_bcount = bp->b_bcount; nbp->b_private = cx; BIO_COPYPRIO(nbp, bp); if ((nbp->b_flags & B_READ) == 0) { vp = nbp->b_vp; mutex_enter(vp->v_interlock); vp->v_numoutput++; mutex_exit(vp->v_interlock); } VOP_STRATEGY(sc->sc_tvn, nbp); } static void cgdiodone(struct buf *nbp) { struct cgd_xfer *cx = nbp->b_private; struct buf *obp = cx->cx_obp; struct cgd_softc *sc = getcgd_softc(obp->b_dev); struct dk_softc *dksc = &sc->sc_dksc; struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; daddr_t bn; KDASSERT(sc); DPRINTF_FOLLOW(("cgdiodone(%p)\n", nbp)); DPRINTF(CGDB_IO, ("cgdiodone: bp %p bcount %d resid %d\n", obp, obp->b_bcount, obp->b_resid)); DPRINTF(CGDB_IO, (" dev 0x%"PRIx64", nbp %p bn %" PRId64 " addr %p bcnt %d\n", nbp->b_dev, nbp, nbp->b_blkno, nbp->b_data, nbp->b_bcount)); if (nbp->b_error != 0) { obp->b_error = nbp->b_error; DPRINTF(CGDB_IO, ("%s: error %d\n", dksc->sc_xname, obp->b_error)); } /* Perform the decryption if we are reading. * * Note: use the blocknumber from nbp, since it is what * we used to encrypt the blocks. */ if (nbp->b_flags & B_READ) { bn = dbtob(nbp->b_blkno) / dg->dg_secsize; cx->cx_obp = obp; cx->cx_nbp = nbp; cx->cx_dstv = obp->b_data; cx->cx_srcv = obp->b_data; cx->cx_len = obp->b_bcount; cx->cx_blkno = bn; cx->cx_secsize = dg->dg_secsize; cx->cx_dir = CGD_CIPHER_DECRYPT; cgd_enqueue(sc, cx); return; } cgd_iodone2(sc, cx); } static void cgd_iodone2(struct cgd_softc *sc, struct cgd_xfer *cx) { struct cgd_worker *cw = sc->sc_worker; struct buf *obp = cx->cx_obp; struct buf *nbp = cx->cx_nbp; struct dk_softc *dksc = &sc->sc_dksc; pool_put(cw->cw_cpool, cx); /* If we allocated memory, free it now... */ if (nbp->b_data != obp->b_data) cgd_putdata(sc, nbp->b_data, nbp->b_bcount); putiobuf(nbp); /* Request is complete for whatever reason */ obp->b_resid = 0; if (obp->b_error != 0) obp->b_resid = obp->b_bcount; dk_done(dksc, obp); dk_start(dksc, NULL); } static int cgd_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk) { struct cgd_softc *sc = device_private(dev); struct dk_softc *dksc = &sc->sc_dksc; struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; size_t nbytes, blksize; void *buf; int error; /* * dk_dump gives us units of disklabel sectors. Everything * else in cgd uses units of diskgeom sectors. These had * better agree; otherwise we need to figure out how to convert * between them. */ KASSERTMSG((dg->dg_secsize == dksc->sc_dkdev.dk_label->d_secsize), "diskgeom secsize %"PRIu32" != disklabel secsize %"PRIu32, dg->dg_secsize, dksc->sc_dkdev.dk_label->d_secsize); blksize = dg->dg_secsize; /* * Compute the number of bytes in this request, which dk_dump * has `helpfully' converted to a number of blocks for us. */ nbytes = nblk*blksize; /* Try to acquire a buffer to store the ciphertext. */ buf = cgd_getdata(sc, nbytes); if (buf == NULL) /* Out of memory: give up. */ return ENOMEM; /* Encrypt the caller's data into the temporary buffer. */ cgd_cipher(sc, buf, va, nbytes, blkno, blksize, CGD_CIPHER_ENCRYPT); /* Pass it on to the underlying disk device. */ error = bdev_dump(sc->sc_tdev, blkno, buf, nbytes); /* Release the buffer. */ cgd_putdata(sc, buf, nbytes); /* Return any error from the underlying disk device. */ return error; } /* XXX: we should probably put these into dksubr.c, mostly */ static int cgdread(dev_t dev, struct uio *uio, int flags) { struct cgd_softc *sc; struct dk_softc *dksc; DPRINTF_FOLLOW(("cgdread(0x%llx, %p, %d)\n", (unsigned long long)dev, uio, flags)); sc = getcgd_softc(dev); if (sc == NULL) return ENXIO; dksc = &sc->sc_dksc; if (!DK_ATTACHED(dksc)) return ENXIO; return physio(cgdstrategy, NULL, dev, B_READ, minphys, uio); } /* XXX: we should probably put these into dksubr.c, mostly */ static int cgdwrite(dev_t dev, struct uio *uio, int flags) { struct cgd_softc *sc; struct dk_softc *dksc; DPRINTF_FOLLOW(("cgdwrite(0x%"PRIx64", %p, %d)\n", dev, uio, flags)); sc = getcgd_softc(dev); if (sc == NULL) return ENXIO; dksc = &sc->sc_dksc; if (!DK_ATTACHED(dksc)) return ENXIO; return physio(cgdstrategy, NULL, dev, B_WRITE, minphys, uio); } static int cgdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { struct cgd_softc *sc; struct dk_softc *dksc; int part = DISKPART(dev); int pmask = 1 << part; int error; DPRINTF_FOLLOW(("cgdioctl(0x%"PRIx64", %ld, %p, %d, %p)\n", dev, cmd, data, flag, l)); switch (cmd) { case CGDIOCGET: return cgd_ioctl_get(dev, data, l); case CGDIOCSET: case CGDIOCCLR: if ((flag & FWRITE) == 0) return EBADF; /* FALLTHROUGH */ default: sc = getcgd_softc(dev); if (sc == NULL) return ENXIO; dksc = &sc->sc_dksc; break; } switch (cmd) { case CGDIOCSET: cgd_busy(sc); if (DK_ATTACHED(dksc)) error = EBUSY; else error = cgd_ioctl_set(sc, data, l); cgd_unbusy(sc); break; case CGDIOCCLR: cgd_busy(sc); if (DK_BUSY(&sc->sc_dksc, pmask)) error = EBUSY; else error = cgd_ioctl_clr(sc, l); cgd_unbusy(sc); break; case DIOCGCACHE: case DIOCCACHESYNC: cgd_busy(sc); if (!DK_ATTACHED(dksc)) { cgd_unbusy(sc); error = ENOENT; break; } /* * We pass this call down to the underlying disk. */ error = VOP_IOCTL(sc->sc_tvn, cmd, data, flag, l->l_cred); cgd_unbusy(sc); break; case DIOCGSECTORALIGN: { struct disk_sectoralign *dsa = data; cgd_busy(sc); if (!DK_ATTACHED(dksc)) { cgd_unbusy(sc); error = ENOENT; break; } /* Get the underlying disk's sector alignment. */ error = VOP_IOCTL(sc->sc_tvn, cmd, data, flag, l->l_cred); if (error) { cgd_unbusy(sc); break; } /* Adjust for the disklabel partition if necessary. */ if (part != RAW_PART) { struct disklabel *lp = dksc->sc_dkdev.dk_label; daddr_t offset = lp->d_partitions[part].p_offset; uint32_t r = offset % dsa->dsa_alignment; if (r < dsa->dsa_firstaligned) dsa->dsa_firstaligned = dsa->dsa_firstaligned - r; else dsa->dsa_firstaligned = (dsa->dsa_firstaligned + dsa->dsa_alignment) - r; } cgd_unbusy(sc); break; } case DIOCGSTRATEGY: case DIOCSSTRATEGY: if (!DK_ATTACHED(dksc)) { error = ENOENT; break; } /*FALLTHROUGH*/ default: error = dk_ioctl(dksc, dev, cmd, data, flag, l); break; case CGDIOCGET: KASSERT(0); error = EINVAL; } return error; } static int cgddump(dev_t dev, daddr_t blkno, void *va, size_t size) { struct cgd_softc *sc; DPRINTF_FOLLOW(("cgddump(0x%"PRIx64", %" PRId64 ", %p, %lu)\n", dev, blkno, va, (unsigned long)size)); sc = getcgd_softc(dev); if (sc == NULL) return ENXIO; return dk_dump(&sc->sc_dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE); } /* * XXXrcd: * for now we hardcode the maximum key length. */ #define MAX_KEYSIZE 1024 static const struct { const char *n; int v; int d; } encblkno[] = { { "encblkno", CGD_CIPHER_CBC_ENCBLKNO8, 1 }, { "encblkno8", CGD_CIPHER_CBC_ENCBLKNO8, 1 }, { "encblkno1", CGD_CIPHER_CBC_ENCBLKNO1, 8 }, }; /* ARGSUSED */ static int cgd_ioctl_set(struct cgd_softc *sc, void *data, struct lwp *l) { struct cgd_ioctl *ci = data; struct vnode *vp; int ret; size_t i; size_t keybytes; /* key length in bytes */ const char *cp; struct pathbuf *pb; char *inbuf; struct dk_softc *dksc = &sc->sc_dksc; cp = ci->ci_disk; ret = pathbuf_copyin(ci->ci_disk, &pb); if (ret != 0) { return ret; } ret = vn_bdev_openpath(pb, &vp, l); pathbuf_destroy(pb); if (ret != 0) { return ret; } inbuf = kmem_alloc(MAX_KEYSIZE, KM_SLEEP); if ((ret = cgdinit(sc, cp, vp, l)) != 0) goto bail; (void)memset(inbuf, 0, MAX_KEYSIZE); ret = copyinstr(ci->ci_alg, inbuf, 256, NULL); if (ret) goto bail; sc->sc_cfuncs = cryptfuncs_find(inbuf); if (!sc->sc_cfuncs) { ret = EINVAL; goto bail; } (void)memset(inbuf, 0, MAX_KEYSIZE); ret = copyinstr(ci->ci_ivmethod, inbuf, MAX_KEYSIZE, NULL); if (ret) goto bail; for (i = 0; i < __arraycount(encblkno); i++) if (strcmp(encblkno[i].n, inbuf) == 0) break; if (i == __arraycount(encblkno)) { ret = EINVAL; goto bail; } keybytes = ci->ci_keylen / 8 + 1; if (keybytes > MAX_KEYSIZE) { ret = EINVAL; goto bail; } (void)memset(inbuf, 0, MAX_KEYSIZE); ret = copyin(ci->ci_key, inbuf, keybytes); if (ret) goto bail; sc->sc_cdata.cf_blocksize = ci->ci_blocksize; sc->sc_cdata.cf_mode = encblkno[i].v; /* * Print a warning if the user selected the legacy encblkno8 * mistake, and reject it altogether for ciphers that it * doesn't apply to. */ if (encblkno[i].v != CGD_CIPHER_CBC_ENCBLKNO1) { if (strcmp(sc->sc_cfuncs->cf_name, "aes-cbc") && strcmp(sc->sc_cfuncs->cf_name, "3des-cbc") && strcmp(sc->sc_cfuncs->cf_name, "blowfish-cbc")) { log(LOG_WARNING, "cgd: %s only makes sense for cbc," " not for %s; ignoring\n", encblkno[i].n, sc->sc_cfuncs->cf_name); sc->sc_cdata.cf_mode = CGD_CIPHER_CBC_ENCBLKNO1; } else { log(LOG_WARNING, "cgd: enabling legacy encblkno8\n"); } } sc->sc_cdata.cf_keylen = ci->ci_keylen; sc->sc_cdata.cf_priv = sc->sc_cfuncs->cf_init(ci->ci_keylen, inbuf, &sc->sc_cdata.cf_blocksize); if (sc->sc_cdata.cf_blocksize > CGD_MAXBLOCKSIZE) { log(LOG_WARNING, "cgd: Disallowed cipher with blocksize %zu > %u\n", sc->sc_cdata.cf_blocksize, CGD_MAXBLOCKSIZE); sc->sc_cdata.cf_priv = NULL; } /* * The blocksize is supposed to be in bytes. Unfortunately originally * it was expressed in bits. For compatibility we maintain encblkno * and encblkno8. */ sc->sc_cdata.cf_blocksize /= encblkno[i].d; (void)explicit_memset(inbuf, 0, MAX_KEYSIZE); if (!sc->sc_cdata.cf_priv) { ret = EINVAL; /* XXX is this the right error? */ goto bail; } kmem_free(inbuf, MAX_KEYSIZE); bufq_alloc(&dksc->sc_bufq, "fcfs", 0); sc->sc_data = kmem_alloc(MAXPHYS, KM_SLEEP); sc->sc_data_used = false; /* Attach the disk. */ dk_attach(dksc); disk_attach(&dksc->sc_dkdev); disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL); /* Discover wedges on this disk. */ dkwedge_discover(&dksc->sc_dkdev); return 0; bail: kmem_free(inbuf, MAX_KEYSIZE); (void)vn_close(vp, FREAD|FWRITE, l->l_cred); return ret; } /* ARGSUSED */ static int cgd_ioctl_clr(struct cgd_softc *sc, struct lwp *l) { struct dk_softc *dksc = &sc->sc_dksc; if (!DK_ATTACHED(dksc)) return ENXIO; /* Delete all of our wedges. */ dkwedge_delall(&dksc->sc_dkdev); /* Kill off any queued buffers. */ dk_drain(dksc); bufq_free(dksc->sc_bufq); (void)vn_close(sc->sc_tvn, FREAD|FWRITE, l->l_cred); sc->sc_cfuncs->cf_destroy(sc->sc_cdata.cf_priv); kmem_free(sc->sc_tpath, sc->sc_tpathlen); kmem_free(sc->sc_data, MAXPHYS); sc->sc_data_used = false; dk_detach(dksc); disk_detach(&dksc->sc_dkdev); return 0; } static int cgd_ioctl_get(dev_t dev, void *data, struct lwp *l) { struct cgd_softc *sc; struct cgd_user *cgu; int unit, error; unit = CGDUNIT(dev); cgu = (struct cgd_user *)data; DPRINTF_FOLLOW(("cgd_ioctl_get(0x%"PRIx64", %d, %p, %p)\n", dev, unit, data, l)); /* XXX, we always return this units data, so if cgu_unit is * not -1, that field doesn't match the rest */ if (cgu->cgu_unit == -1) cgu->cgu_unit = unit; if (cgu->cgu_unit < 0) return EINVAL; /* XXX: should this be ENXIO? */ error = cgd_lock(false); if (error) return error; sc = device_lookup_private(&cgd_cd, unit); if (sc == NULL || !DK_ATTACHED(&sc->sc_dksc)) { cgu->cgu_dev = 0; cgu->cgu_alg[0] = '\0'; cgu->cgu_blocksize = 0; cgu->cgu_mode = 0; cgu->cgu_keylen = 0; } else { mutex_enter(&sc->sc_lock); cgu->cgu_dev = sc->sc_tdev; strncpy(cgu->cgu_alg, sc->sc_cfuncs->cf_name, sizeof(cgu->cgu_alg)); cgu->cgu_blocksize = sc->sc_cdata.cf_blocksize; cgu->cgu_mode = sc->sc_cdata.cf_mode; cgu->cgu_keylen = sc->sc_cdata.cf_keylen; mutex_exit(&sc->sc_lock); } cgd_unlock(); return 0; } static int cgdinit(struct cgd_softc *sc, const char *cpath, struct vnode *vp, struct lwp *l) { struct disk_geom *dg; int ret; char *tmppath; uint64_t psize; unsigned secsize; struct dk_softc *dksc = &sc->sc_dksc; sc->sc_tvn = vp; sc->sc_tpath = NULL; tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); ret = copyinstr(cpath, tmppath, MAXPATHLEN, &sc->sc_tpathlen); if (ret) goto bail; sc->sc_tpath = kmem_alloc(sc->sc_tpathlen, KM_SLEEP); memcpy(sc->sc_tpath, tmppath, sc->sc_tpathlen); sc->sc_tdev = vp->v_rdev; if ((ret = getdisksize(vp, &psize, &secsize)) != 0) goto bail; if (psize == 0) { ret = ENODEV; goto bail; } /* * XXX here we should probe the underlying device. If we * are accessing a partition of type RAW_PART, then * we should populate our initial geometry with the * geometry that we discover from the device. */ dg = &dksc->sc_dkdev.dk_geom; memset(dg, 0, sizeof(*dg)); dg->dg_secperunit = psize; dg->dg_secsize = secsize; dg->dg_ntracks = 1; dg->dg_nsectors = 1024 * 1024 / dg->dg_secsize; dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors; bail: kmem_free(tmppath, MAXPATHLEN); if (ret && sc->sc_tpath) kmem_free(sc->sc_tpath, sc->sc_tpathlen); return ret; } /* * Our generic cipher entry point. This takes care of the * IV mode and passes off the work to the specific cipher. * We implement here the IV method ``encrypted block * number''. * * XXXrcd: for now we rely on our own crypto framework defined * in dev/cgd_crypto.c. This will change when we * get a generic kernel crypto framework. */ static void blkno2blkno_buf(char *sbuf, daddr_t blkno) { int i; /* Set up the blkno in blkno_buf, here we do not care much * about the final layout of the information as long as we * can guarantee that each sector will have a different IV * and that the endianness of the machine will not affect * the representation that we have chosen. * * We choose this representation, because it does not rely * on the size of buf (which is the blocksize of the cipher), * but allows daddr_t to grow without breaking existing * disks. * * Note that blkno2blkno_buf does not take a size as input, * and hence must be called on a pre-zeroed buffer of length * greater than or equal to sizeof(daddr_t). */ for (i=0; i < sizeof(daddr_t); i++) { *sbuf++ = blkno & 0xff; blkno >>= 8; } } static struct cpu_info * cgd_cpu(struct cgd_softc *sc) { struct cgd_worker *cw = sc->sc_worker; struct cpu_info *ci = NULL; u_int cidx, i; if (cw->cw_busy == 0) { cw->cw_last = cpu_index(curcpu()); return NULL; } for (i=0, cidx = cw->cw_last+1; i<maxcpus; ++i, ++cidx) { if (cidx >= maxcpus) cidx = 0; ci = cpu_lookup(cidx); if (ci) { cw->cw_last = cidx; break; } } return ci; } static void cgd_enqueue(struct cgd_softc *sc, struct cgd_xfer *cx) { struct cgd_worker *cw = sc->sc_worker; struct cpu_info *ci; mutex_enter(&cw->cw_lock); ci = cgd_cpu(sc); cw->cw_busy++; mutex_exit(&cw->cw_lock); workqueue_enqueue(cw->cw_wq, &cx->cx_work, ci); } static void cgd_process(struct work *wk, void *arg) { struct cgd_xfer *cx = (struct cgd_xfer *)wk; struct cgd_softc *sc = cx->cx_sc; struct cgd_worker *cw = sc->sc_worker; cgd_cipher(sc, cx->cx_dstv, cx->cx_srcv, cx->cx_len, cx->cx_blkno, cx->cx_secsize, cx->cx_dir); if (cx->cx_dir == CGD_CIPHER_ENCRYPT) { cgd_diskstart2(sc, cx); } else { cgd_iodone2(sc, cx); } mutex_enter(&cw->cw_lock); if (cw->cw_busy > 0) cw->cw_busy--; mutex_exit(&cw->cw_lock); } static void cgd_cipher(struct cgd_softc *sc, void *dstv, const void *srcv, size_t len, daddr_t blkno, size_t secsize, int dir) { char *dst = dstv; const char *src = srcv; cfunc_cipher *cipher = sc->sc_cfuncs->cf_cipher; size_t blocksize = sc->sc_cdata.cf_blocksize; size_t todo; char blkno_buf[CGD_MAXBLOCKSIZE] __aligned(CGD_BLOCKALIGN); DPRINTF_FOLLOW(("cgd_cipher() dir=%d\n", dir)); if (sc->sc_cdata.cf_mode == CGD_CIPHER_CBC_ENCBLKNO8) blocksize /= 8; KASSERT(len % blocksize == 0); /* ensure that sizeof(daddr_t) <= blocksize (for encblkno IVing) */ KASSERT(sizeof(daddr_t) <= blocksize); KASSERT(blocksize <= CGD_MAXBLOCKSIZE); for (; len > 0; len -= todo) { todo = MIN(len, secsize); memset(blkno_buf, 0x0, blocksize); blkno2blkno_buf(blkno_buf, blkno); IFDEBUG(CGDB_CRYPTO, hexprint("step 1: blkno_buf", blkno_buf, blocksize)); /* * Handle bollocksed up encblkno8 mistake. We used to * compute the encryption of a zero block with blkno as * the CBC IV -- except in an early mistake arising * from bit/byte confusion, we actually computed the * encryption of the last of _eight_ zero blocks under * CBC as the CBC IV. * * Encrypting the block number is handled inside the * cipher dispatch now (even though in practice, both * CBC and XTS will do the same thing), so we have to * simulate the block number that would yield the same * result. So we encrypt _six_ zero blocks -- the * first one and the last one are handled inside the * cipher dispatch. */ if (sc->sc_cdata.cf_mode == CGD_CIPHER_CBC_ENCBLKNO8) { static const uint8_t zero[CGD_MAXBLOCKSIZE]; uint8_t iv[CGD_MAXBLOCKSIZE]; memcpy(iv, blkno_buf, blocksize); cipher(sc->sc_cdata.cf_priv, blkno_buf, zero, 6*blocksize, iv, CGD_CIPHER_ENCRYPT); memmove(blkno_buf, blkno_buf + 5*blocksize, blocksize); } cipher(sc->sc_cdata.cf_priv, dst, src, todo, blkno_buf, dir); dst += todo; src += todo; blkno++; } } #ifdef DEBUG static void hexprint(const char *start, void *buf, int len) { char *c = buf; KASSERTMSG(len >= 0, "hexprint: called with len < 0"); printf("%s: len=%06d 0x", start, len); while (len--) printf("%02x", (unsigned char) *c++); } #endif static void cgd_selftest(void) { struct cgd_softc sc; void *buf; for (size_t i = 0; i < __arraycount(selftests); i++) { const char *alg = selftests[i].alg; int encblkno8 = selftests[i].encblkno8; const uint8_t *key = selftests[i].key; int keylen = selftests[i].keylen; int txtlen = selftests[i].txtlen; aprint_debug("cgd: self-test %s-%d%s\n", alg, keylen, encblkno8 ? " (encblkno8)" : ""); memset(&sc, 0, sizeof(sc)); sc.sc_cfuncs = cryptfuncs_find(alg); if (sc.sc_cfuncs == NULL) panic("%s not implemented", alg); sc.sc_cdata.cf_blocksize = 8 * selftests[i].blocksize; sc.sc_cdata.cf_mode = encblkno8 ? CGD_CIPHER_CBC_ENCBLKNO8 : CGD_CIPHER_CBC_ENCBLKNO1; sc.sc_cdata.cf_keylen = keylen; sc.sc_cdata.cf_priv = sc.sc_cfuncs->cf_init(keylen, key, &sc.sc_cdata.cf_blocksize); if (sc.sc_cdata.cf_priv == NULL) panic("cf_priv is NULL"); if (sc.sc_cdata.cf_blocksize > CGD_MAXBLOCKSIZE) panic("bad block size %zu", sc.sc_cdata.cf_blocksize); if (!encblkno8) sc.sc_cdata.cf_blocksize /= 8; buf = kmem_alloc(txtlen, KM_SLEEP); memcpy(buf, selftests[i].ptxt, txtlen); cgd_cipher(&sc, buf, buf, txtlen, selftests[i].blkno, selftests[i].secsize, CGD_CIPHER_ENCRYPT); if (memcmp(buf, selftests[i].ctxt, txtlen) != 0) { hexdump(printf, "was", buf, txtlen); hexdump(printf, "exp", selftests[i].ctxt, txtlen); panic("cgd %s-%d encryption is broken [%zu]", selftests[i].alg, keylen, i); } cgd_cipher(&sc, buf, buf, txtlen, selftests[i].blkno, selftests[i].secsize, CGD_CIPHER_DECRYPT); if (memcmp(buf, selftests[i].ptxt, txtlen) != 0) { hexdump(printf, "was", buf, txtlen); hexdump(printf, "exp", selftests[i].ptxt, txtlen); panic("cgd %s-%d decryption is broken [%zu]", selftests[i].alg, keylen, i); } kmem_free(buf, txtlen); sc.sc_cfuncs->cf_destroy(sc.sc_cdata.cf_priv); } aprint_debug("cgd: self-tests passed\n"); } MODULE(MODULE_CLASS_DRIVER, cgd, "blowfish,des,dk_subr,bufq_fcfs"); #ifdef _MODULE CFDRIVER_DECL(cgd, DV_DISK, NULL); devmajor_t cgd_bmajor = -1, cgd_cmajor = -1; #endif static int cgd_modcmd(modcmd_t cmd, void *arg) { int error = 0; switch (cmd) { case MODULE_CMD_INIT: #ifdef _MODULE mutex_init(&cgd_spawning_mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&cgd_spawning_cv, "cgspwn"); /* * Attach the {b,c}devsw's */ error = devsw_attach("cgd", &cgd_bdevsw, &cgd_bmajor, &cgd_cdevsw, &cgd_cmajor); if (error) { aprint_error("%s: unable to attach %s devsw, " "error %d", __func__, cgd_cd.cd_name, error); break; } /* * Attach to autoconf database */ error = config_cfdriver_attach(&cgd_cd); if (error) { devsw_detach(&cgd_bdevsw, &cgd_cdevsw); aprint_error("%s: unable to register cfdriver for" "%s, error %d\n", __func__, cgd_cd.cd_name, error); break; } error = config_cfattach_attach(cgd_cd.cd_name, &cgd_ca); if (error) { config_cfdriver_detach(&cgd_cd); devsw_detach(&cgd_bdevsw, &cgd_cdevsw); aprint_error("%s: unable to register cfattach for" "%s, error %d\n", __func__, cgd_cd.cd_name, error); break; } #endif break; case MODULE_CMD_FINI: #ifdef _MODULE /* * Remove device from autoconf database */ error = config_cfattach_detach(cgd_cd.cd_name, &cgd_ca); if (error) { aprint_error("%s: failed to detach %s cfattach, " "error %d\n", __func__, cgd_cd.cd_name, error); break; } error = config_cfdriver_detach(&cgd_cd); if (error) { (void)config_cfattach_attach(cgd_cd.cd_name, &cgd_ca); aprint_error("%s: failed to detach %s cfdriver, " "error %d\n", __func__, cgd_cd.cd_name, error); break; } /* * Remove {b,c}devsw's */ devsw_detach(&cgd_bdevsw, &cgd_cdevsw); cv_destroy(&cgd_spawning_cv); mutex_destroy(&cgd_spawning_mtx); #endif break; case MODULE_CMD_STAT: error = ENOTTY; break; default: error = ENOTTY; break; } return error; } |
| 9 1 8 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | /* $NetBSD: if_media_80.c,v 1.5 2022/08/03 01:38:51 riastradh Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1997 * Jonathan Stone and Jason R. Thorpe. All rights reserved. * * This software is derived from information provided by Matt Thomas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Jonathan Stone * and Jason R. Thorpe for the NetBSD Project. * 4. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: if_media_80.c,v 1.5 2022/08/03 01:38:51 riastradh Exp $"); #include <sys/param.h> #include <sys/kernel.h> #include <sys/syscallargs.h> #include <sys/errno.h> #include <sys/malloc.h> #include <sys/proc.h> #include <sys/compat_stub.h> #include <net/if.h> #include <net/if_media.h> #include <compat/sys/sockio.h> #include <compat/common/compat_mod.h> static void ifmword_n2o(int *oldwd, int *newwd) { if (IFM_SUBTYPE(*newwd) > IFM_OTHER) *oldwd = (*newwd & ~(_IFM_ETH_XTMASK | IFM_TMASK)) | IFM_OTHER; else *oldwd = *newwd; } /*ARGSUSED*/ static int compat_ifmediareq_pre(struct ifreq *ifr, u_long *cmd, bool *do_post) { struct ifmediareq *ifmr = (struct ifmediareq *)ifr; switch (*cmd) { case SIOCSIFMEDIA_80: *cmd = SIOCSIFMEDIA; /* Convert to new one */ if ((IFM_TYPE(ifr->ifr_media) == IFM_ETHER) && IFM_SUBTYPE(ifr->ifr_media) > IFM_OTHER) { /* Clear unused bits to not to change to wrong media */ ifr->ifr_media &= ~_IFM_ETH_XTMASK; } return 0; case SIOCGIFMEDIA_80: *cmd = SIOCGIFMEDIA; /* Convert to new one */ if (ifmr->ifm_count != 0) { /* * Tell the upper layer to try to convert each ifmedia * entry in the post process. */ *do_post = true; } return 0; default: return 0; } } /*ARGSUSED*/ static int compat_ifmediareq_post(struct ifreq *ifr, u_long cmd) { struct ifmediareq *ifmr = (struct ifmediareq *)ifr; size_t minwords; size_t count; int error, *kptr; switch (cmd) { case SIOCSIFMEDIA: return 0; case SIOCGIFMEDIA: if (ifmr->ifm_count < 0) return EINVAL; /* * ifmr->ifm_count was already ajusted in ifmedia_ioctl(), so * there is no problem to trust ifm_count. */ minwords = ifmr->ifm_count; kptr = malloc(minwords * sizeof(*kptr), M_TEMP, M_WAITOK|M_ZERO); if (kptr == NULL) return ENOMEM; /* * Convert ifm_current and ifm_active. * It's not required to convert ifm_mask. */ ifmword_n2o(&ifmr->ifm_current, &ifmr->ifm_current); ifmword_n2o(&ifmr->ifm_active, &ifmr->ifm_active); /* Convert ifm_ulist array */ for (count = 0; count < minwords; count++) { int oldmwd; error = ufetch_int(&ifmr->ifm_ulist[count], &oldmwd); if (error != 0) goto out; ifmword_n2o(&kptr[count], &oldmwd); } /* Copy to userland in old format */ error = copyout(kptr, ifmr->ifm_ulist, minwords * sizeof(*kptr)); out: free(kptr, M_TEMP); return error; default: return 0; } } void ifmedia_80_init(void) { MODULE_HOOK_SET(ifmedia_80_pre_hook, compat_ifmediareq_pre); MODULE_HOOK_SET(ifmedia_80_post_hook, compat_ifmediareq_post); } void ifmedia_80_fini(void) { MODULE_HOOK_UNSET(ifmedia_80_post_hook); MODULE_HOOK_UNSET(ifmedia_80_pre_hook); } |
| 3 2 4 2 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 | /* $NetBSD: umap_vfsops.c,v 1.103 2020/04/13 19:23:19 ad Exp $ */ /* * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * the UCLA Ficus project. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)null_vfsops.c 1.5 (Berkeley) 7/10/92 * @(#)umap_vfsops.c 8.8 (Berkeley) 5/14/95 */ /* * Umap Layer * (See mount_umap(8) for a description of this layer.) */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: umap_vfsops.c,v 1.103 2020/04/13 19:23:19 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/sysctl.h> #include <sys/proc.h> #include <sys/time.h> #include <sys/vnode.h> #include <sys/mount.h> #include <sys/namei.h> #include <sys/syslog.h> #include <sys/kauth.h> #include <sys/module.h> #include <miscfs/umapfs/umap.h> #include <miscfs/genfs/layer_extern.h> MODULE(MODULE_CLASS_VFS, umap, "layerfs"); VFS_PROTOS(umapfs); /* * Mount umap layer */ int umapfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct pathbuf *pb; struct nameidata nd; struct umap_args *args = data; struct vnode *lowerrootvp, *vp; struct umap_mount *amp; int error; #ifdef UMAPFS_DIAGNOSTIC int i; #endif fsid_t tfsid; if (args == NULL) return EINVAL; if (*data_len < sizeof *args) { #ifdef UMAPFS_DIAGNOSTIC printf("mount_umap: data len %d < args %d\n", (int)*data_len, (int)(sizeof *args)); #endif return EINVAL; } if (mp->mnt_flag & MNT_GETARGS) { amp = MOUNTTOUMAPMOUNT(mp); if (amp == NULL) return EIO; args->la.target = NULL; args->nentries = amp->info_nentries; args->gnentries = amp->info_gnentries; *data_len = sizeof *args; return 0; } /* only for root */ error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_UMAP, NULL, NULL, NULL); if (error) return error; #ifdef UMAPFS_DIAGNOSTIC printf("umapfs_mount(mp = %p)\n", mp); #endif /* * Update is not supported */ if (mp->mnt_flag & MNT_UPDATE) return EOPNOTSUPP; /* * Find lower node */ error = pathbuf_copyin(args->umap_target, &pb); if (error) { return error; } NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, pb); if ((error = namei(&nd)) != 0) { pathbuf_destroy(pb); return error; } /* * Sanity check on lower vnode */ lowerrootvp = nd.ni_vp; pathbuf_destroy(pb); #ifdef UMAPFS_DIAGNOSTIC printf("vp = %p, check for VDIR...\n", lowerrootvp); #endif if (lowerrootvp->v_type != VDIR) { vput(lowerrootvp); return (EINVAL); } #ifdef UMAPFS_DIAGNOSTIC printf("mp = %p\n", mp); #endif amp = kmem_zalloc(sizeof(struct umap_mount), KM_SLEEP); mp->mnt_data = amp; /* * Now copy in the number of entries and maps for umap mapping. */ if (args->nentries < 0 || args->nentries > MAPFILEENTRIES || args->gnentries < 0 || args->gnentries > GMAPFILEENTRIES) { vput(lowerrootvp); return (EINVAL); } amp->info_nentries = args->nentries; amp->info_gnentries = args->gnentries; error = copyin(args->mapdata, amp->info_mapdata, 2*sizeof(u_long)*args->nentries); if (error) { vput(lowerrootvp); return (error); } #ifdef UMAPFS_DIAGNOSTIC printf("umap_mount:nentries %d\n",args->nentries); for (i = 0; i < args->nentries; i++) printf(" %ld maps to %ld\n", amp->info_mapdata[i][0], amp->info_mapdata[i][1]); #endif error = copyin(args->gmapdata, amp->info_gmapdata, 2*sizeof(u_long)*args->gnentries); if (error) { vput(lowerrootvp); return (error); } #ifdef UMAPFS_DIAGNOSTIC printf("umap_mount:gnentries %d\n",args->gnentries); for (i = 0; i < args->gnentries; i++) printf("\tgroup %ld maps to %ld\n", amp->info_gmapdata[i][0], amp->info_gmapdata[i][1]); #endif /* * Make sure the mount point's sufficiently initialized * that the node create call will work. */ tfsid.__fsid_val[0] = (int32_t)args->fsid; tfsid.__fsid_val[1] = makefstype(MOUNT_UMAP); if (tfsid.__fsid_val[0] == 0) { log(LOG_WARNING, "umapfs: fsid given as 0, ignoring\n"); vfs_getnewfsid(mp); } else if (vfs_getvfs(&tfsid)) { log(LOG_WARNING, "umapfs: fsid %x already mounted\n", tfsid.__fsid_val[0]); vfs_getnewfsid(mp); } else { mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1]; mp->mnt_stat.f_fsid = tfsid.__fsid_val[0]; } log(LOG_DEBUG, "umapfs: using fsid %x/%x\n", mp->mnt_stat.f_fsidx.__fsid_val[0], mp->mnt_stat.f_fsidx.__fsid_val[1]); mp->mnt_lower = lowerrootvp->v_mount; amp->umapm_size = sizeof(struct umap_node); amp->umapm_tag = VT_UMAP; amp->umapm_bypass = umap_bypass; amp->umapm_vnodeop_p = umap_vnodeop_p; /* * fix up umap node for root vnode. */ VOP_UNLOCK(lowerrootvp); error = layer_node_create(mp, lowerrootvp, &vp); /* * Make sure the node alias worked */ if (error) { vrele(lowerrootvp); kmem_free(amp, sizeof(struct umap_mount)); return error; } /* * Keep a held reference to the root vnode. * It is vrele'd in umapfs_unmount. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_vflag |= VV_ROOT; amp->umapm_rootvp = vp; VOP_UNLOCK(vp); error = set_statvfs_info(path, UIO_USERSPACE, args->umap_target, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); if (error) return error; if (mp->mnt_lower->mnt_flag & MNT_LOCAL) mp->mnt_flag |= MNT_LOCAL; #ifdef UMAPFS_DIAGNOSTIC printf("umapfs_mount: lower %s, alias at %s\n", mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); #endif return 0; } /* * Free reference to umap layer */ int umapfs_unmount(struct mount *mp, int mntflags) { struct umap_mount *amp = MOUNTTOUMAPMOUNT(mp); struct vnode *rtvp = amp->umapm_rootvp; int error; int flags = 0; #ifdef UMAPFS_DIAGNOSTIC printf("umapfs_unmount(mp = %p)\n", mp); #endif if (mntflags & MNT_FORCE) flags |= FORCECLOSE; if (vrefcnt(rtvp) > 1 && (mntflags & MNT_FORCE) == 0) return (EBUSY); if ((error = vflush(mp, rtvp, flags)) != 0) return (error); #ifdef UMAPFS_DIAGNOSTIC vprint("alias root of lower", rtvp); #endif /* * Blow it away for future re-use */ vgone(rtvp); /* * Finally, throw away the umap_mount structure */ kmem_free(amp, sizeof(struct umap_mount)); mp->mnt_data = NULL; return 0; } extern const struct vnodeopv_desc umapfs_vnodeop_opv_desc; const struct vnodeopv_desc * const umapfs_vnodeopv_descs[] = { &umapfs_vnodeop_opv_desc, NULL, }; struct vfsops umapfs_vfsops = { .vfs_name = MOUNT_UMAP, .vfs_min_mount_data = sizeof (struct umap_args), .vfs_mount = umapfs_mount, .vfs_start = layerfs_start, .vfs_unmount = umapfs_unmount, .vfs_root = layerfs_root, .vfs_quotactl = layerfs_quotactl, .vfs_statvfs = layerfs_statvfs, .vfs_sync = layerfs_sync, .vfs_loadvnode = layerfs_loadvnode, .vfs_vget = layerfs_vget, .vfs_fhtovp = layerfs_fhtovp, .vfs_vptofh = layerfs_vptofh, .vfs_init = layerfs_init, .vfs_done = layerfs_done, .vfs_snapshot = layerfs_snapshot, .vfs_extattrctl = vfs_stdextattrctl, .vfs_suspendctl = layerfs_suspendctl, .vfs_renamelock_enter = layerfs_renamelock_enter, .vfs_renamelock_exit = layerfs_renamelock_exit, .vfs_fsync = (void *)eopnotsupp, .vfs_opv_descs = umapfs_vnodeopv_descs }; SYSCTL_SETUP(umapfs_sysctl_setup, "umapfs sysctl") { sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "umap", SYSCTL_DESCR("UID/GID remapping file system"), NULL, 0, NULL, 0, CTL_VFS, 10, CTL_EOL); /* * XXX the "10" above could be dynamic, thereby eliminating * one more instance of the "number to vfs" mapping problem, * but "10" is the order as taken from sys/mount.h */ } static int umap_modcmd(modcmd_t cmd, void *arg) { int error; switch (cmd) { case MODULE_CMD_INIT: error = vfs_attach(&umapfs_vfsops); if (error != 0) break; break; case MODULE_CMD_FINI: error = vfs_detach(&umapfs_vfsops); if (error != 0) break; break; default: error = ENOTTY; break; } return (error); } |
| 3298 3300 3298 3302 3301 3304 3300 3300 247 1761 1766 1752 1754 1739 1740 222 222 221 203 205 205 205 203 2111 936 936 937 1680 107 1631 1684 1682 1670 294 234 1671 108 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 | /* $NetBSD: kern_lock.c,v 1.178 2022/08/20 23:37:12 riastradh Exp $ */ /*- * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center, and by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.178 2022/08/20 23:37:12 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_lockdebug.h" #endif #include <sys/param.h> #include <sys/proc.h> #include <sys/lock.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/lockdebug.h> #include <sys/cpu.h> #include <sys/syslog.h> #include <sys/atomic.h> #include <sys/lwp.h> #include <sys/pserialize.h> #if defined(DIAGNOSTIC) && !defined(LOCKDEBUG) #include <sys/ksyms.h> #endif #include <machine/lock.h> #include <dev/lockstat.h> #define RETURN_ADDRESS (uintptr_t)__builtin_return_address(0) bool kernel_lock_dodebug; __cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)] __cacheline_aligned; void assert_sleepable(void) { const char *reason; uint64_t pctr; bool idle; if (panicstr != NULL) { return; } LOCKDEBUG_BARRIER(kernel_lock, 1); /* * Avoid disabling/re-enabling preemption here since this * routine may be called in delicate situations. */ do { pctr = lwp_pctr(); __insn_barrier(); idle = CURCPU_IDLE_P(); __insn_barrier(); } while (pctr != lwp_pctr()); reason = NULL; if (idle && !cold) { reason = "idle"; } if (cpu_intr_p()) { reason = "interrupt"; } if (cpu_softintr_p()) { reason = "softint"; } if (!pserialize_not_in_read_section()) { reason = "pserialize"; } if (reason) { panic("%s: %s caller=%p", __func__, reason, (void *)RETURN_ADDRESS); } } /* * Functions for manipulating the kernel_lock. We put them here * so that they show up in profiles. */ #define _KERNEL_LOCK_ABORT(msg) \ LOCKDEBUG_ABORT(__func__, __LINE__, kernel_lock, &_kernel_lock_ops, msg) #ifdef LOCKDEBUG #define _KERNEL_LOCK_ASSERT(cond) \ do { \ if (!(cond)) \ _KERNEL_LOCK_ABORT("assertion failed: " #cond); \ } while (/* CONSTCOND */ 0) #else #define _KERNEL_LOCK_ASSERT(cond) /* nothing */ #endif static void _kernel_lock_dump(const volatile void *, lockop_printer_t); lockops_t _kernel_lock_ops = { .lo_name = "Kernel lock", .lo_type = LOCKOPS_SPIN, .lo_dump = _kernel_lock_dump, }; #ifdef LOCKDEBUG #include <ddb/ddb.h> static void kernel_lock_trace_ipi(void *cookie) { printf("%s[%d %s]: hogging kernel lock\n", cpu_name(curcpu()), curlwp->l_lid, curlwp->l_name ? curlwp->l_name : curproc->p_comm); db_stacktrace(); } #endif /* * Initialize the kernel lock. */ void kernel_lock_init(void) { __cpu_simple_lock_init(kernel_lock); kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops, RETURN_ADDRESS); } CTASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t)); /* * Print debugging information about the kernel lock. */ static void _kernel_lock_dump(const volatile void *junk, lockop_printer_t pr) { struct cpu_info *ci = curcpu(); (void)junk; pr("curcpu holds : %18d wanted by: %#018lx\n", ci->ci_biglock_count, (long)ci->ci_biglock_wanted); } /* * Acquire 'nlocks' holds on the kernel lock. * * Although it may not look it, this is one of the most central, intricate * routines in the kernel, and tons of code elsewhere depends on its exact * behaviour. If you change something in here, expect it to bite you in the * rear. */ void _kernel_lock(int nlocks) { struct cpu_info *ci; LOCKSTAT_TIMER(spintime); LOCKSTAT_FLAG(lsflag); struct lwp *owant; #ifdef LOCKDEBUG static struct cpu_info *kernel_lock_holder; u_int spins = 0; #endif int s; struct lwp *l = curlwp; _KERNEL_LOCK_ASSERT(nlocks > 0); s = splvm(); ci = curcpu(); if (ci->ci_biglock_count != 0) { _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); ci->ci_biglock_count += nlocks; l->l_blcnt += nlocks; splx(s); return; } _KERNEL_LOCK_ASSERT(l->l_blcnt == 0); LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS, 0); if (__predict_true(__cpu_simple_lock_try(kernel_lock))) { #ifdef LOCKDEBUG kernel_lock_holder = curcpu(); #endif ci->ci_biglock_count = nlocks; l->l_blcnt = nlocks; LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, RETURN_ADDRESS, 0); splx(s); return; } /* * To remove the ordering constraint between adaptive mutexes * and kernel_lock we must make it appear as if this thread is * blocking. For non-interlocked mutex release, a store fence * is required to ensure that the result of any mutex_exit() * by the current LWP becomes visible on the bus before the set * of ci->ci_biglock_wanted becomes visible. */ membar_producer(); owant = ci->ci_biglock_wanted; ci->ci_biglock_wanted = l; #if defined(DIAGNOSTIC) && !defined(LOCKDEBUG) l->l_ld_wanted = __builtin_return_address(0); #endif /* * Spin until we acquire the lock. Once we have it, record the * time spent with lockstat. */ LOCKSTAT_ENTER(lsflag); LOCKSTAT_START_TIMER(lsflag, spintime); do { splx(s); while (__SIMPLELOCK_LOCKED_P(kernel_lock)) { #ifdef LOCKDEBUG extern int start_init_exec; if (SPINLOCK_SPINOUT(spins) && start_init_exec) { ipi_msg_t msg = { .func = kernel_lock_trace_ipi, }; kpreempt_disable(); ipi_unicast(&msg, kernel_lock_holder); ipi_wait(&msg); kpreempt_enable(); _KERNEL_LOCK_ABORT("spinout"); } SPINLOCK_BACKOFF_HOOK; SPINLOCK_SPIN_HOOK; #endif } s = splvm(); } while (!__cpu_simple_lock_try(kernel_lock)); ci->ci_biglock_count = nlocks; l->l_blcnt = nlocks; LOCKSTAT_STOP_TIMER(lsflag, spintime); LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, RETURN_ADDRESS, 0); if (owant == NULL) { LOCKSTAT_EVENT_RA(lsflag, kernel_lock, LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS); } LOCKSTAT_EXIT(lsflag); splx(s); /* * Now that we have kernel_lock, reset ci_biglock_wanted. This * store must be unbuffered (immediately visible on the bus) in * order for non-interlocked mutex release to work correctly. * It must be visible before a mutex_exit() can execute on this * processor. * * Note: only where CAS is available in hardware will this be * an unbuffered write, but non-interlocked release cannot be * done on CPUs without CAS in hardware. */ (void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant); /* * Issue a memory barrier as we have acquired a lock. This also * prevents stores from a following mutex_exit() being reordered * to occur before our store to ci_biglock_wanted above. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_enter(); #endif #ifdef LOCKDEBUG kernel_lock_holder = curcpu(); #endif } /* * Release 'nlocks' holds on the kernel lock. If 'nlocks' is zero, release * all holds. */ void _kernel_unlock(int nlocks, int *countp) { struct cpu_info *ci; u_int olocks; int s; struct lwp *l = curlwp; _KERNEL_LOCK_ASSERT(nlocks < 2); olocks = l->l_blcnt; if (olocks == 0) { _KERNEL_LOCK_ASSERT(nlocks <= 0); if (countp != NULL) *countp = 0; return; } _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); if (nlocks == 0) nlocks = olocks; else if (nlocks == -1) { nlocks = 1; _KERNEL_LOCK_ASSERT(olocks == 1); } s = splvm(); ci = curcpu(); _KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt); if (ci->ci_biglock_count == nlocks) { LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS, 0); ci->ci_biglock_count = 0; __cpu_simple_unlock(kernel_lock); l->l_blcnt -= nlocks; splx(s); if (l->l_dopreempt) kpreempt(0); } else { ci->ci_biglock_count -= nlocks; l->l_blcnt -= nlocks; splx(s); } if (countp != NULL) *countp = olocks; } bool _kernel_locked_p(void) { return __SIMPLELOCK_LOCKED_P(kernel_lock); } |
| 4 4 4 4 4 1 440 440 440 438 437 438 438 438 422 438 438 438 438 438 438 436 438 438 438 438 433 4 4 1 4 6 6 2 1 39 29 38 39 1 1 2 4 3 3 2 2 1 9 2 1 1 1 1 1 1 4 4 3 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 2 2 1 1 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 | /* $NetBSD: sd.c,v 1.334 2022/03/28 12:39:46 riastradh Exp $ */ /*- * Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Originally written by Julian Elischer (julian@dialix.oz.au) * for TRW Financial Systems for use under the MACH(2.5) operating system. * * TRW Financial Systems, in accordance with their agreement with Carnegie * Mellon University, makes this software available to CMU to distribute * or use in any manner that they see fit as long as this message is kept with * the software. For this reason TFS also grants any other persons or * organisations permission to use or modify this software. * * TFS supplies this software to be publicly redistributed * on the understanding that TFS is not responsible for the correct * functioning of this software in any circumstances. * * Ported to run under 386BSD by Julian Elischer (julian@dialix.oz.au) Sept 1992 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.334 2022/03/28 12:39:46 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_scsi.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/file.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/scsiio.h> #include <sys/buf.h> #include <sys/bufq.h> #include <sys/uio.h> #include <sys/malloc.h> #include <sys/errno.h> #include <sys/device.h> #include <sys/disklabel.h> #include <sys/disk.h> #include <sys/proc.h> #include <sys/conf.h> #include <sys/vnode.h> #include <dev/scsipi/scsi_spc.h> #include <dev/scsipi/scsipi_all.h> #include <dev/scsipi/scsi_all.h> #include <dev/scsipi/scsipi_disk.h> #include <dev/scsipi/scsi_disk.h> #include <dev/scsipi/scsiconf.h> #include <dev/scsipi/scsipi_base.h> #include <dev/scsipi/sdvar.h> #include <prop/proplib.h> #define SDUNIT(dev) DISKUNIT(dev) #define SDPART(dev) DISKPART(dev) #define SDMINOR(unit, part) DISKMINOR(unit, part) #define MAKESDDEV(maj, unit, part) MAKEDISKDEV(maj, unit, part) #define SDLABELDEV(dev) (MAKESDDEV(major(dev), SDUNIT(dev), RAW_PART)) #define SD_DEFAULT_BLKSIZE 512 static void sdminphys(struct buf *); static void sdstart(struct scsipi_periph *); static void sdrestart(void *); static void sddone(struct scsipi_xfer *, int); static bool sd_suspend(device_t, const pmf_qual_t *); static bool sd_shutdown(device_t, int); static int sd_interpret_sense(struct scsipi_xfer *); static int sd_diskstart(device_t, struct buf *); static int sd_dumpblocks(device_t, void *, daddr_t, int); static void sd_iosize(device_t, int *); static int sd_lastclose(device_t); static int sd_firstopen(device_t, dev_t, int, int); static void sd_label(device_t, struct disklabel *); static int sd_mode_sense(struct sd_softc *, u_int8_t, void *, size_t, int, int, int *); static int sd_mode_select(struct sd_softc *, u_int8_t, void *, size_t, int, int); static int sd_validate_blksize(struct scsipi_periph *, int); static u_int64_t sd_read_capacity(struct scsipi_periph *, int *, int flags); static int sd_get_simplifiedparms(struct sd_softc *, struct disk_parms *, int); static int sd_get_capacity(struct sd_softc *, struct disk_parms *, int); static int sd_get_parms(struct sd_softc *, struct disk_parms *, int); static int sd_get_parms_page4(struct sd_softc *, struct disk_parms *, int); static int sd_get_parms_page5(struct sd_softc *, struct disk_parms *, int); static int sd_flush(struct sd_softc *, int); static int sd_getcache(struct sd_softc *, int *); static int sd_setcache(struct sd_softc *, int); static int sdmatch(device_t, cfdata_t, void *); static void sdattach(device_t, device_t, void *); static int sddetach(device_t, int); static void sd_set_geometry(struct sd_softc *); CFATTACH_DECL3_NEW(sd, sizeof(struct sd_softc), sdmatch, sdattach, sddetach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); extern struct cfdriver sd_cd; static const struct scsipi_inquiry_pattern sd_patterns[] = { {T_DIRECT, T_FIXED, "", "", ""}, {T_DIRECT, T_REMOV, "", "", ""}, {T_OPTICAL, T_FIXED, "", "", ""}, {T_OPTICAL, T_REMOV, "", "", ""}, {T_SIMPLE_DIRECT, T_FIXED, "", "", ""}, {T_SIMPLE_DIRECT, T_REMOV, "", "", ""}, }; static dev_type_open(sdopen); static dev_type_close(sdclose); static dev_type_read(sdread); static dev_type_write(sdwrite); static dev_type_ioctl(sdioctl); static dev_type_strategy(sdstrategy); static dev_type_dump(sddump); static dev_type_size(sdsize); const struct bdevsw sd_bdevsw = { .d_open = sdopen, .d_close = sdclose, .d_strategy = sdstrategy, .d_ioctl = sdioctl, .d_dump = sddump, .d_psize = sdsize, .d_discard = nodiscard, .d_cfdriver = &sd_cd, .d_devtounit = disklabel_dev_unit, .d_flag = D_DISK | D_MPSAFE }; const struct cdevsw sd_cdevsw = { .d_open = sdopen, .d_close = sdclose, .d_read = sdread, .d_write = sdwrite, .d_ioctl = sdioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_cfdriver = &sd_cd, .d_devtounit = disklabel_dev_unit, .d_flag = D_DISK | D_MPSAFE }; static const struct dkdriver sddkdriver = { .d_open = sdopen, .d_close = sdclose, .d_strategy = sdstrategy, .d_minphys = sdminphys, .d_diskstart = sd_diskstart, .d_dumpblocks = sd_dumpblocks, .d_iosize = sd_iosize, .d_firstopen = sd_firstopen, .d_lastclose = sd_lastclose, .d_label = sd_label, }; static const struct scsipi_periphsw sd_switch = { sd_interpret_sense, /* check our error handler first */ sdstart, /* have a queue, served by this */ NULL, /* have no async handler */ sddone, /* deal with stats at interrupt time */ }; struct sd_mode_sense_data { /* * XXX * We are not going to parse this as-is -- it just has to be large * enough. */ union { struct scsi_mode_parameter_header_6 small; struct scsi_mode_parameter_header_10 big; } header; struct scsi_general_block_descriptor blk_desc; union scsi_disk_pages pages; }; /* * The routine called by the low level scsi routine when it discovers * A device suitable for this driver */ static int sdmatch(device_t parent, cfdata_t match, void *aux) { struct scsipibus_attach_args *sa = aux; int priority; (void)scsipi_inqmatch(&sa->sa_inqbuf, sd_patterns, sizeof(sd_patterns) / sizeof(sd_patterns[0]), sizeof(sd_patterns[0]), &priority); return (priority); } /* * Attach routine common to atapi & scsi. */ static void sdattach(device_t parent, device_t self, void *aux) { struct sd_softc *sd = device_private(self); struct dk_softc *dksc = &sd->sc_dksc; struct scsipibus_attach_args *sa = aux; struct scsipi_periph *periph = sa->sa_periph; int error, result, dtype; struct disk_parms *dp = &sd->params; char pbuf[9]; SC_DEBUG(periph, SCSIPI_DB2, ("sdattach: ")); sd->type = (sa->sa_inqbuf.type & SID_TYPE); strncpy(sd->name, sa->sa_inqbuf.product, sizeof(sd->name)); strncpy(sd->typename, sa->sa_inqbuf.product, sizeof(sd->typename)); if (sd->type == T_SIMPLE_DIRECT) periph->periph_quirks |= PQUIRK_ONLYBIG | PQUIRK_NOBIGMODESENSE; switch (SCSIPI_BUSTYPE_TYPE(scsipi_periph_bustype(sa->sa_periph))) { case SCSIPI_BUSTYPE_SCSI: dtype = DKTYPE_SCSI; if (periph->periph_version == 0) sd->flags |= SDF_ANCIENT; break; case SCSIPI_BUSTYPE_ATAPI: dtype = DKTYPE_ATAPI; break; default: dtype = DKTYPE_UNKNOWN; break; } /* Initialize dk and disk structure. */ dk_init(dksc, self, dtype); disk_init(&dksc->sc_dkdev, dksc->sc_xname, &sddkdriver); /* Attach dk and disk subsystems */ dk_attach(dksc); disk_attach(&dksc->sc_dkdev); bufq_alloc(&dksc->sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK); callout_init(&sd->sc_callout, 0); /* * Store information needed to contact our base driver */ sd->sc_periph = periph; periph->periph_dev = dksc->sc_dev; periph->periph_switch = &sd_switch; /* * Increase our openings to the maximum-per-periph * supported by the adapter. This will either be * clamped down or grown by the adapter if necessary. */ periph->periph_openings = SCSIPI_CHAN_MAX_PERIPH(periph->periph_channel); periph->periph_flags |= PERIPH_GROW_OPENINGS; /* * Use the subdriver to request information regarding the drive. */ aprint_naive("\n"); aprint_normal("\n"); if (periph->periph_quirks & PQUIRK_START) (void)scsipi_start(periph, SSS_START, XS_CTL_SILENT); error = scsipi_test_unit_ready(periph, XS_CTL_DISCOVERY | XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_MEDIA_CHANGE | XS_CTL_SILENT_NODEV); if (error) result = SDGP_RESULT_OFFLINE; else result = sd_get_parms(sd, &sd->params, XS_CTL_DISCOVERY); aprint_normal_dev(dksc->sc_dev, ""); switch (result) { case SDGP_RESULT_OK: format_bytes(pbuf, sizeof(pbuf), (u_int64_t)dp->disksize * dp->blksize); aprint_normal( "%s, %ld cyl, %ld head, %ld sec, %ld bytes/sect x %llu sectors", pbuf, dp->cyls, dp->heads, dp->sectors, dp->blksize, (unsigned long long)dp->disksize); break; case SDGP_RESULT_OFFLINE: aprint_normal("drive offline"); break; case SDGP_RESULT_UNFORMATTED: aprint_normal("unformatted media"); break; #ifdef DIAGNOSTIC default: panic("sdattach: unknown result from get_parms"); break; #endif } aprint_normal("\n"); /* Discover wedges on this disk. */ dkwedge_discover(&dksc->sc_dkdev); /* * Establish a shutdown hook so that we can ensure that * our data has actually made it onto the platter at * shutdown time. Note that this relies on the fact * that the shutdown hooks at the "leaves" of the device tree * are run, first (thus guaranteeing that our hook runs before * our ancestors'). */ if (!pmf_device_register1(self, sd_suspend, NULL, sd_shutdown)) aprint_error_dev(self, "couldn't establish power handler\n"); } static int sddetach(device_t self, int flags) { struct sd_softc *sd = device_private(self); struct dk_softc *dksc = &sd->sc_dksc; struct scsipi_periph *periph = sd->sc_periph; struct scsipi_channel *chan = periph->periph_channel; int bmaj, cmaj, i, mn, rc; if ((rc = disk_begindetach(&dksc->sc_dkdev, sd_lastclose, self, flags)) != 0) return rc; /* locate the major number */ bmaj = bdevsw_lookup_major(&sd_bdevsw); cmaj = cdevsw_lookup_major(&sd_cdevsw); /* Nuke the vnodes for any open instances */ for (i = 0; i < MAXPARTITIONS; i++) { mn = SDMINOR(device_unit(self), i); vdevgone(bmaj, mn, mn, VBLK); vdevgone(cmaj, mn, mn, VCHR); } /* kill any pending restart */ callout_halt(&sd->sc_callout, NULL); dk_drain(dksc); /* Kill off any pending commands. */ mutex_enter(chan_mtx(chan)); scsipi_kill_pending(periph); mutex_exit(chan_mtx(chan)); bufq_free(dksc->sc_bufq); /* Delete all of our wedges. */ dkwedge_delall(&dksc->sc_dkdev); /* Detach from the disk list. */ disk_detach(&dksc->sc_dkdev); disk_destroy(&dksc->sc_dkdev); dk_detach(dksc); callout_destroy(&sd->sc_callout); pmf_device_deregister(self); return (0); } /* * Serialized by caller */ static int sd_firstopen(device_t self, dev_t dev, int flag, int fmt) { struct sd_softc *sd = device_private(self); struct scsipi_periph *periph = sd->sc_periph; struct scsipi_adapter *adapt = periph->periph_channel->chan_adapter; int error, silent; int part, removable; part = SDPART(dev); error = scsipi_adapter_addref(adapt); if (error) return error; if ((part == RAW_PART && fmt == S_IFCHR) || (flag & FSILENT)) silent = XS_CTL_SILENT; else silent = 0; /* Check that it is still responding and ok. */ error = scsipi_test_unit_ready(periph, XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_MEDIA_CHANGE | silent); /* * Start the pack spinning if necessary. Always allow the * raw partition to be opened, for raw IOCTLs. Data transfers * will check for SDEV_MEDIA_LOADED. */ if (error == EIO) { error = scsipi_start(periph, SSS_START, silent); if (error == EINVAL) error = EIO; } if (error) goto bad; removable = (periph->periph_flags & PERIPH_REMOVABLE) != 0; if (removable) { /* Lock the pack in. */ error = scsipi_prevent(periph, SPAMR_PREVENT_DT, XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_MEDIA_CHANGE | XS_CTL_SILENT); if (error) goto bad; } if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0) { int param_error; /* * Load the physical device parameters. * * Note that if media is present but unformatted, * we allow the open (so that it can be formatted!). * The drive should refuse real I/O, if the media is * unformatted. */ param_error = sd_get_parms(sd, &sd->params, 0); if (param_error == SDGP_RESULT_OFFLINE) { error = ENXIO; goto bad2; } periph->periph_flags |= PERIPH_MEDIA_LOADED; SC_DEBUG(periph, SCSIPI_DB3, ("Params loaded ")); } periph->periph_flags |= PERIPH_OPEN; return 0; bad2: if (removable) scsipi_prevent(periph, SPAMR_ALLOW, XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_MEDIA_CHANGE | XS_CTL_SILENT); bad: scsipi_adapter_delref(adapt); return error; } /* * open the device. Make sure the partition info is a up-to-date as can be. */ static int sdopen(dev_t dev, int flag, int fmt, struct lwp *l) { struct sd_softc *sd; struct dk_softc *dksc; struct scsipi_periph *periph; int unit, part; int error; unit = SDUNIT(dev); sd = device_lookup_private(&sd_cd, unit); if (sd == NULL) return (ENXIO); dksc = &sd->sc_dksc; if (!device_is_active(dksc->sc_dev)) return (ENODEV); periph = sd->sc_periph; part = SDPART(dev); SC_DEBUG(periph, SCSIPI_DB1, ("sdopen: dev=0x%"PRIx64" (unit %d (of %d), partition %d)\n", dev, unit, sd_cd.cd_ndevs, SDPART(dev))); /* * If any partition is open, but the disk has been invalidated, * disallow further opens of non-raw partition */ if ((periph->periph_flags & (PERIPH_OPEN | PERIPH_MEDIA_LOADED)) == PERIPH_OPEN) { if (part != RAW_PART || fmt != S_IFCHR) return EIO; } error = dk_open(dksc, dev, flag, fmt, l); SC_DEBUG(periph, SCSIPI_DB3, ("open complete\n")); return error; } /* * Serialized by caller */ static int sd_lastclose(device_t self) { struct sd_softc *sd = device_private(self); struct dk_softc *dksc = &sd->sc_dksc; struct scsipi_periph *periph = sd->sc_periph; struct scsipi_adapter *adapt = periph->periph_channel->chan_adapter; /* * If the disk cache needs flushing, and the disk supports * it, do it now. */ if ((sd->flags & SDF_DIRTY) != 0) { if (sd_flush(sd, 0)) { aprint_error_dev(dksc->sc_dev, "cache synchronization failed\n"); sd->flags &= ~SDF_FLUSHING; } else sd->flags &= ~(SDF_FLUSHING|SDF_DIRTY); } scsipi_wait_drain(periph); if (periph->periph_flags & PERIPH_REMOVABLE) scsipi_prevent(periph, SPAMR_ALLOW, XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_NOT_READY | XS_CTL_SILENT); periph->periph_flags &= ~PERIPH_OPEN; scsipi_wait_drain(periph); scsipi_adapter_delref(adapt); return 0; } /* * close the device.. only called if we are the LAST occurrence of an open * device. Convenient now but usually a pain. */ static int sdclose(dev_t dev, int flag, int fmt, struct lwp *l) { struct sd_softc *sd; struct dk_softc *dksc; int unit; unit = SDUNIT(dev); sd = device_lookup_private(&sd_cd, unit); dksc = &sd->sc_dksc; return dk_close(dksc, dev, flag, fmt, l); } /* * Actually translate the requested transfer into one the physical driver * can understand. The transfer is described by a buf and will include * only one physical transfer. */ static void sdstrategy(struct buf *bp) { struct sd_softc *sd = device_lookup_private(&sd_cd, SDUNIT(bp->b_dev)); struct dk_softc *dksc = &sd->sc_dksc; struct scsipi_periph *periph = sd->sc_periph; SC_DEBUG(sd->sc_periph, SCSIPI_DB2, ("sdstrategy ")); SC_DEBUG(sd->sc_periph, SCSIPI_DB1, ("%d bytes @ blk %" PRId64 "\n", bp->b_bcount, bp->b_blkno)); /* * If the device has been made invalid, error out */ if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0 || !device_is_active(dksc->sc_dev)) { if (periph->periph_flags & PERIPH_OPEN) bp->b_error = EIO; else bp->b_error = ENODEV; bp->b_resid = bp->b_bcount; biodone(bp); return; } dk_strategy(dksc, bp); } /* * Issue single I/O command * * Called from dk_start and implicitly from dk_strategy */ static int sd_diskstart(device_t dev, struct buf *bp) { struct sd_softc *sd = device_private(dev); struct scsipi_periph *periph = sd->sc_periph; struct scsipi_channel *chan = periph->periph_channel; struct scsipi_rw_16 cmd16; struct scsipi_rw_10 cmd_big; struct scsi_rw_6 cmd_small; struct scsipi_generic *cmdp; struct scsipi_xfer *xs; int error, flags, nblks, cmdlen; int cdb_flags; bool havefua = !(periph->periph_quirks & PQUIRK_NOFUA); mutex_enter(chan_mtx(chan)); if (periph->periph_active >= periph->periph_openings) { error = EAGAIN; goto out; } /* * there is excess capacity, but a special waits * It'll need the adapter as soon as we clear out of the * way and let it run (user level wait). */ if (periph->periph_flags & PERIPH_WAITING) { periph->periph_flags &= ~PERIPH_WAITING; cv_broadcast(periph_cv_periph(periph)); error = EAGAIN; goto out; } /* * If the device has become invalid, abort all the * reads and writes until all files have been closed and * re-opened. */ if (__predict_false( (periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)) { error = EIO; goto out; } /* * Mark the disk dirty so that the cache will be * flushed on close. */ if ((bp->b_flags & B_READ) == 0) sd->flags |= SDF_DIRTY; if (sd->params.blksize == DEV_BSIZE) nblks = bp->b_bcount >> DEV_BSHIFT; else nblks = howmany(bp->b_bcount, sd->params.blksize); /* * Pass FUA and/or DPO if requested. Must be done before CDB * selection, as 6-byte CDB doesn't support the flags. */ cdb_flags = 0; if (havefua) { if (bp->b_flags & B_MEDIA_FUA) cdb_flags |= SRWB_FUA; if (bp->b_flags & B_MEDIA_DPO) cdb_flags |= SRWB_DPO; } /* * Fill out the scsi command. Use the smallest CDB possible * (6-byte, 10-byte, or 16-byte). If we need FUA or DPO, * need to use 10-byte or bigger, as the 6-byte doesn't support * the flags. */ if (((bp->b_rawblkno & 0x1fffff) == bp->b_rawblkno) && ((nblks & 0xff) == nblks) && !(periph->periph_quirks & PQUIRK_ONLYBIG) && !cdb_flags) { /* 6-byte CDB */ memset(&cmd_small, 0, sizeof(cmd_small)); cmd_small.opcode = (bp->b_flags & B_READ) ? SCSI_READ_6_COMMAND : SCSI_WRITE_6_COMMAND; _lto3b(bp->b_rawblkno, cmd_small.addr); cmd_small.length = nblks & 0xff; cmdlen = sizeof(cmd_small); cmdp = (struct scsipi_generic *)&cmd_small; } else if ((bp->b_rawblkno & 0xffffffff) == bp->b_rawblkno) { /* 10-byte CDB */ memset(&cmd_big, 0, sizeof(cmd_big)); cmd_big.opcode = (bp->b_flags & B_READ) ? READ_10 : WRITE_10; _lto4b(bp->b_rawblkno, cmd_big.addr); _lto2b(nblks, cmd_big.length); cmdlen = sizeof(cmd_big); cmdp = (struct scsipi_generic *)&cmd_big; } else { /* 16-byte CDB */ memset(&cmd16, 0, sizeof(cmd16)); cmd16.opcode = (bp->b_flags & B_READ) ? READ_16 : WRITE_16; _lto8b(bp->b_rawblkno, cmd16.addr); _lto4b(nblks, cmd16.length); cmdlen = sizeof(cmd16); cmdp = (struct scsipi_generic *)&cmd16; } if (cdb_flags) cmdp->bytes[0] = cdb_flags; /* * Figure out what flags to use. */ flags = XS_CTL_NOSLEEP|XS_CTL_ASYNC|XS_CTL_SIMPLE_TAG; if (bp->b_flags & B_READ) flags |= XS_CTL_DATA_IN; else flags |= XS_CTL_DATA_OUT; /* * Call the routine that chats with the adapter. * Note: we cannot sleep as we may be an interrupt */ xs = scsipi_make_xs_locked(periph, cmdp, cmdlen, (u_char *)bp->b_data, bp->b_bcount, SDRETRIES, SD_IO_TIMEOUT, bp, flags); if (__predict_false(xs == NULL)) { /* * out of memory. Keep this buffer in the queue, and * retry later. */ callout_reset(&sd->sc_callout, hz / 2, sdrestart, sd); error = EAGAIN; goto out; } error = scsipi_execute_xs(xs); /* with a scsipi_xfer preallocated, scsipi_command can't fail */ KASSERT(error == 0); out: mutex_exit(chan_mtx(chan)); return error; } /* * Recover I/O request after memory shortage * * Called from callout */ static void sdrestart(void *v) { struct sd_softc *sd = v; struct dk_softc *dksc = &sd->sc_dksc; dk_start(dksc, NULL); } /* * Recover I/O request after memory shortage * * Called from scsipi midlayer when resources have been freed * with channel lock held */ static void sdstart(struct scsipi_periph *periph) { struct sd_softc *sd = device_private(periph->periph_dev); struct dk_softc *dksc = &sd->sc_dksc; struct scsipi_channel *chan = periph->periph_channel; /* * release channel lock as dk_start may need to acquire * other locks * * sdstart is called from scsipi_put_xs and all its callers * release the lock afterwards. So releasing it here * doesn't matter. */ mutex_exit(chan_mtx(chan)); dk_start(dksc, NULL); mutex_enter(chan_mtx(chan)); } static void sddone(struct scsipi_xfer *xs, int error) { struct sd_softc *sd = device_private(xs->xs_periph->periph_dev); struct dk_softc *dksc = &sd->sc_dksc; struct buf *bp = xs->bp; if (sd->flags & SDF_FLUSHING) { /* Flush completed, no longer dirty. */ sd->flags &= ~(SDF_FLUSHING|SDF_DIRTY); } if (bp) { bp->b_error = error; bp->b_resid = xs->resid; if (error) { /* on a read/write error bp->b_resid is zero, so fix */ bp->b_resid = bp->b_bcount; } dk_done(dksc, bp); /* dk_start is called from scsipi_complete */ } } static void sdminphys(struct buf *bp) { struct sd_softc *sd = device_lookup_private(&sd_cd, SDUNIT(bp->b_dev)); struct dk_softc *dksc = &sd->sc_dksc; long xmax; /* * If the device is ancient, we want to make sure that * the transfer fits into a 6-byte cdb. * * XXX Note that the SCSI-I spec says that 256-block transfers * are allowed in a 6-byte read/write, and are specified * by setting the "length" to 0. However, we're conservative * here, allowing only 255-block transfers in case an * ancient device gets confused by length == 0. A length of 0 * in a 10-byte read/write actually means 0 blocks. */ if ((sd->flags & SDF_ANCIENT) && ((sd->sc_periph->periph_flags & (PERIPH_REMOVABLE | PERIPH_MEDIA_LOADED)) != PERIPH_REMOVABLE)) { xmax = dksc->sc_dkdev.dk_geom.dg_secsize * 0xff; if (bp->b_bcount > xmax) bp->b_bcount = xmax; } scsipi_adapter_minphys(sd->sc_periph->periph_channel, bp); } static void sd_iosize(device_t dev, int *count) { struct buf B; int bmaj; bmaj = bdevsw_lookup_major(&sd_bdevsw); B.b_dev = MAKESDDEV(bmaj,device_unit(dev),RAW_PART); B.b_bcount = *count; sdminphys(&B); *count = B.b_bcount; } static int sdread(dev_t dev, struct uio *uio, int ioflag) { return (physio(sdstrategy, NULL, dev, B_READ, sdminphys, uio)); } static int sdwrite(dev_t dev, struct uio *uio, int ioflag) { return (physio(sdstrategy, NULL, dev, B_WRITE, sdminphys, uio)); } /* * Perform special action on behalf of the user * Knows about the internals of this device */ static int sdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l) { struct sd_softc *sd = device_lookup_private(&sd_cd, SDUNIT(dev)); struct dk_softc *dksc = &sd->sc_dksc; struct scsipi_periph *periph = sd->sc_periph; int part = SDPART(dev); int error; SC_DEBUG(sd->sc_periph, SCSIPI_DB2, ("sdioctl 0x%lx ", cmd)); /* * If the device is not valid, some IOCTLs can still be * handled on the raw partition. Check this here. */ if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0 && part != RAW_PART) return (EIO); switch (cmd) { case DIOCLOCK: if (periph->periph_flags & PERIPH_REMOVABLE) return (scsipi_prevent(periph, (*(int *)addr) ? SPAMR_PREVENT_DT : SPAMR_ALLOW, 0)); else return (ENOTTY); case DIOCEJECT: if ((periph->periph_flags & PERIPH_REMOVABLE) == 0) return (ENOTTY); if (*(int *)addr == 0) { int pmask = __BIT(part); /* * Don't force eject: check that we are the only * partition open. If so, unlock it. */ if (DK_BUSY(dksc, pmask) == 0) { error = scsipi_prevent(periph, SPAMR_ALLOW, XS_CTL_IGNORE_NOT_READY); if (error) return (error); } else { return (EBUSY); } } /* FALLTHROUGH */ case ODIOCEJECT: return ((periph->periph_flags & PERIPH_REMOVABLE) == 0 ? ENOTTY : scsipi_start(periph, SSS_STOP|SSS_LOEJ, 0)); case DIOCGCACHE: return (sd_getcache(sd, (int *) addr)); case DIOCSCACHE: if ((flag & FWRITE) == 0) return (EBADF); return (sd_setcache(sd, *(int *) addr)); case DIOCCACHESYNC: /* * XXX Do we really need to care about having a writable * file descriptor here? */ if ((flag & FWRITE) == 0) return (EBADF); if (((sd->flags & SDF_DIRTY) != 0 || *(int *)addr != 0)) { error = sd_flush(sd, 0); if (error) { sd->flags &= ~SDF_FLUSHING; return (error); } sd->flags &= ~(SDF_FLUSHING|SDF_DIRTY); } return (0); default: error = dk_ioctl(dksc, dev, cmd, addr, flag, l); if (error == ENOTTY) error = scsipi_do_ioctl(periph, dev, cmd, addr, flag, l); return (error); } #ifdef DIAGNOSTIC panic("sdioctl: impossible"); #endif } static void sd_label(device_t self, struct disklabel *lp) { struct sd_softc *sd = device_private(self); strncpy(lp->d_typename, sd->name, 16); lp->d_rpm = sd->params.rot_rate; if (sd->sc_periph->periph_flags & PERIPH_REMOVABLE) lp->d_flags |= D_REMOVABLE; } static bool sd_shutdown(device_t self, int how) { struct sd_softc *sd = device_private(self); struct dk_softc *dksc = &sd->sc_dksc; /* * If the disk cache needs to be flushed, and the disk supports * it, flush it. We're cold at this point, so we poll for * completion. */ if ((sd->flags & SDF_DIRTY) != 0) { if (sd_flush(sd, XS_CTL_NOSLEEP|XS_CTL_POLL)) { aprint_error_dev(dksc->sc_dev, "cache synchronization failed\n"); sd->flags &= ~SDF_FLUSHING; } else sd->flags &= ~(SDF_FLUSHING|SDF_DIRTY); } return true; } static bool sd_suspend(device_t dv, const pmf_qual_t *qual) { return sd_shutdown(dv, boothowto); /* XXX no need to poll */ } /* * Check Errors */ static int sd_interpret_sense(struct scsipi_xfer *xs) { struct scsipi_periph *periph = xs->xs_periph; struct scsipi_channel *chan = periph->periph_channel; struct scsi_sense_data *sense = &xs->sense.scsi_sense; struct sd_softc *sd = device_private(periph->periph_dev); struct dk_softc *dksc = &sd->sc_dksc; int error, retval = EJUSTRETURN; /* * If the periph is already recovering, just do the normal * error processing. */ if (periph->periph_flags & PERIPH_RECOVERING) return (retval); /* * Ignore errors from accessing illegal fields (e.g. trying to * lock the door of a digicam, which doesn't have a door that * can be locked) for the SCSI_PREVENT_ALLOW_MEDIUM_REMOVAL command. */ if (xs->cmd->opcode == SCSI_PREVENT_ALLOW_MEDIUM_REMOVAL && SSD_SENSE_KEY(sense->flags) == SKEY_ILLEGAL_REQUEST && sense->asc == 0x24 && sense->ascq == 0x00) { /* Illegal field in CDB */ if (!(xs->xs_control & XS_CTL_SILENT)) { scsipi_printaddr(periph); printf("no door lock\n"); } xs->xs_control |= XS_CTL_IGNORE_ILLEGAL_REQUEST; return (retval); } /* * If the device is not open yet, let the generic code handle it. */ if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0) return (retval); /* * If it isn't a extended or extended/deferred error, let * the generic code handle it. */ if (SSD_RCODE(sense->response_code) != SSD_RCODE_CURRENT && SSD_RCODE(sense->response_code) != SSD_RCODE_DEFERRED) return (retval); if (SSD_SENSE_KEY(sense->flags) == SKEY_NOT_READY && sense->asc == 0x4) { if (sense->ascq == 0x01) { /* * Unit In The Process Of Becoming Ready. */ printf("%s: waiting for pack to spin up...\n", dksc->sc_xname); if (!callout_pending(&periph->periph_callout)) scsipi_periph_freeze(periph, 1); callout_reset(&periph->periph_callout, 5 * hz, scsipi_periph_timed_thaw, periph); retval = ERESTART; } else if (sense->ascq == 0x02) { printf("%s: pack is stopped, restarting...\n", dksc->sc_xname); mutex_enter(chan_mtx(chan)); periph->periph_flags |= PERIPH_RECOVERING; mutex_exit(chan_mtx(chan)); error = scsipi_start(periph, SSS_START, XS_CTL_URGENT|XS_CTL_HEAD_TAG| XS_CTL_THAW_PERIPH|XS_CTL_FREEZE_PERIPH); if (error) { aprint_error_dev(dksc->sc_dev, "unable to restart pack\n"); retval = error; } else retval = ERESTART; mutex_enter(chan_mtx(chan)); periph->periph_flags &= ~PERIPH_RECOVERING; mutex_exit(chan_mtx(chan)); } } if (SSD_SENSE_KEY(sense->flags) == SKEY_MEDIUM_ERROR && sense->asc == 0x31 && sense->ascq == 0x00) { /* maybe for any asq ? */ /* Medium Format Corrupted */ retval = EFTYPE; } return (retval); } static int sdsize(dev_t dev) { struct sd_softc *sd; struct dk_softc *dksc; int unit; unit = SDUNIT(dev); sd = device_lookup_private(&sd_cd, unit); if (sd == NULL) return (-1); dksc = &sd->sc_dksc; if (!device_is_active(dksc->sc_dev)) return (-1); return dk_size(dksc, dev); } /* #define SD_DUMP_NOT_TRUSTED if you just want to watch */ static struct scsipi_xfer sx; /* * dump all of physical memory into the partition specified, starting * at offset 'dumplo' into the partition. */ static int sddump(dev_t dev, daddr_t blkno, void *va, size_t size) { struct sd_softc *sd; struct dk_softc *dksc; struct scsipi_periph *periph; int unit; unit = SDUNIT(dev); if ((sd = device_lookup_private(&sd_cd, unit)) == NULL) return (ENXIO); dksc = &sd->sc_dksc; if (!device_is_active(dksc->sc_dev)) return (ENODEV); periph = sd->sc_periph; /* Make sure it was initialized. */ if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0) return (ENXIO); return dk_dump(dksc, dev, blkno, va, size, 0); } static int sd_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk) { struct sd_softc *sd = device_private(dev); struct dk_softc *dksc = &sd->sc_dksc; struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; struct scsipi_rw_10 cmd; /* write command */ struct scsipi_xfer *xs; /* ... convenience */ struct scsipi_periph *periph; struct scsipi_channel *chan; size_t sectorsize; periph = sd->sc_periph; chan = periph->periph_channel; sectorsize = dg->dg_secsize; xs = &sx; #ifndef SD_DUMP_NOT_TRUSTED /* * Fill out the scsi command */ memset(&cmd, 0, sizeof(cmd)); cmd.opcode = WRITE_10; _lto4b(blkno, cmd.addr); _lto2b(nblk, cmd.length); /* * Fill out the scsipi_xfer structure * Note: we cannot sleep as we may be an interrupt * don't use scsipi_command() as it may want to wait * for an xs. */ memset(xs, 0, sizeof(sx)); xs->xs_control |= XS_CTL_NOSLEEP | XS_CTL_POLL | XS_CTL_DATA_OUT; xs->xs_status = 0; xs->xs_periph = periph; xs->xs_retries = SDRETRIES; xs->timeout = 10000; /* 10000 millisecs for a disk ! */ xs->cmd = (struct scsipi_generic *)&cmd; xs->cmdlen = sizeof(cmd); xs->resid = nblk * sectorsize; xs->error = XS_NOERROR; xs->bp = 0; xs->data = va; xs->datalen = nblk * sectorsize; callout_init(&xs->xs_callout, 0); /* * Pass all this info to the scsi driver. */ scsipi_adapter_request(chan, ADAPTER_REQ_RUN_XFER, xs); if ((xs->xs_status & XS_STS_DONE) == 0 || xs->error != XS_NOERROR) return (EIO); #else /* SD_DUMP_NOT_TRUSTED */ /* Let's just talk about this first... */ printf("sd%d: dump addr 0x%x, blk %d\n", unit, va, blkno); delay(500 * 1000); /* half a second */ #endif /* SD_DUMP_NOT_TRUSTED */ return (0); } static int sd_mode_sense(struct sd_softc *sd, u_int8_t byte2, void *sense, size_t size, int page, int flags, int *big) { if ((sd->sc_periph->periph_quirks & PQUIRK_ONLYBIG) && !(sd->sc_periph->periph_quirks & PQUIRK_NOBIGMODESENSE)) { *big = 1; return scsipi_mode_sense_big(sd->sc_periph, byte2, page, sense, size + sizeof(struct scsi_mode_parameter_header_10), flags, SDRETRIES, 6000); } else { *big = 0; return scsipi_mode_sense(sd->sc_periph, byte2, page, sense, size + sizeof(struct scsi_mode_parameter_header_6), flags, SDRETRIES, 6000); } } static int sd_mode_select(struct sd_softc *sd, u_int8_t byte2, void *sense, size_t size, int flags, int big) { if (big) { struct scsi_mode_parameter_header_10 *header = sense; _lto2b(0, header->data_length); return scsipi_mode_select_big(sd->sc_periph, byte2, sense, size + sizeof(struct scsi_mode_parameter_header_10), flags, SDRETRIES, 6000); } else { struct scsi_mode_parameter_header_6 *header = sense; header->data_length = 0; return scsipi_mode_select(sd->sc_periph, byte2, sense, size + sizeof(struct scsi_mode_parameter_header_6), flags, SDRETRIES, 6000); } } /* * sd_validate_blksize: * * Validate the block size. Print error if periph is specified, */ static int sd_validate_blksize(struct scsipi_periph *periph, int len) { if (len >= 256 && powerof2(len) && len <= 4096) { return 1; } if (periph) { scsipi_printaddr(periph); printf("%s sector size: 0x%x. Defaulting to %d bytes.\n", !powerof2(len) ? "preposterous" : "unsupported", len, SD_DEFAULT_BLKSIZE); } return 0; } /* * sd_read_capacity: * * Find out from the device what its capacity is. */ static u_int64_t sd_read_capacity(struct scsipi_periph *periph, int *blksize, int flags) { union { struct scsipi_read_capacity_10 cmd; struct scsipi_read_capacity_16 cmd16; } cmd; union { struct scsipi_read_capacity_10_data data; struct scsipi_read_capacity_16_data data16; } *datap; uint64_t rv; memset(&cmd, 0, sizeof(cmd)); cmd.cmd.opcode = READ_CAPACITY_10; /* * Don't allocate data buffer on stack; * The lower driver layer might use the same stack and * if it uses region which is in the same cacheline, * cache flush ops against the data buffer won't work properly. */ datap = malloc(sizeof(*datap), M_TEMP, M_WAITOK); if (datap == NULL) return 0; /* * If the command works, interpret the result as a 4 byte * number of blocks */ rv = 0; memset(datap, 0, sizeof(datap->data)); if (scsipi_command(periph, (void *)&cmd.cmd, sizeof(cmd.cmd), (void *)datap, sizeof(datap->data), SCSIPIRETRIES, 20000, NULL, flags | XS_CTL_DATA_IN | XS_CTL_SILENT) != 0) goto out; if (_4btol(datap->data.addr) != 0xffffffff) { *blksize = _4btol(datap->data.length); rv = _4btol(datap->data.addr) + 1; goto out; } /* * Device is larger than can be reflected by READ CAPACITY (10). * Try READ CAPACITY (16). */ memset(&cmd, 0, sizeof(cmd)); cmd.cmd16.opcode = READ_CAPACITY_16; cmd.cmd16.byte2 = SRC16_SERVICE_ACTION; _lto4b(sizeof(datap->data16), cmd.cmd16.len); memset(datap, 0, sizeof(datap->data16)); if (scsipi_command(periph, (void *)&cmd.cmd16, sizeof(cmd.cmd16), (void *)datap, sizeof(datap->data16), SCSIPIRETRIES, 20000, NULL, flags | XS_CTL_DATA_IN | XS_CTL_SILENT) != 0) goto out; *blksize = _4btol(datap->data16.length); rv = _8btol(datap->data16.addr) + 1; out: free(datap, M_TEMP); return rv; } static int sd_get_simplifiedparms(struct sd_softc *sd, struct disk_parms *dp, int flags) { struct { struct scsi_mode_parameter_header_6 header; /* no block descriptor */ u_int8_t pg_code; /* page code (should be 6) */ u_int8_t pg_length; /* page length (should be 11) */ u_int8_t wcd; /* bit0: cache disable */ u_int8_t lbs[2]; /* logical block size */ u_int8_t size[5]; /* number of log. blocks */ u_int8_t pp; /* power/performance */ u_int8_t flags; u_int8_t resvd; } scsipi_sense; u_int64_t blocks; int error, blksize; /* * sd_read_capacity (ie "read capacity") and mode sense page 6 * give the same information. Do both for now, and check * for consistency. * XXX probably differs for removable media */ dp->blksize = SD_DEFAULT_BLKSIZE; if ((blocks = sd_read_capacity(sd->sc_periph, &blksize, flags)) == 0) return (SDGP_RESULT_OFFLINE); /* XXX? */ error = scsipi_mode_sense(sd->sc_periph, SMS_DBD, 6, &scsipi_sense.header, sizeof(scsipi_sense), flags, SDRETRIES, 6000); if (error != 0) return (SDGP_RESULT_OFFLINE); /* XXX? */ dp->blksize = blksize; if (!sd_validate_blksize(NULL, dp->blksize)) dp->blksize = _2btol(scsipi_sense.lbs); if (!sd_validate_blksize(sd->sc_periph, dp->blksize)) dp->blksize = SD_DEFAULT_BLKSIZE; /* * Create a pseudo-geometry. */ dp->heads = 64; dp->sectors = 32; dp->cyls = blocks / (dp->heads * dp->sectors); dp->disksize = _5btol(scsipi_sense.size); if (dp->disksize <= UINT32_MAX && dp->disksize != blocks) { printf("RBC size: mode sense=%llu, get cap=%llu\n", (unsigned long long)dp->disksize, (unsigned long long)blocks); dp->disksize = blocks; } dp->disksize512 = (dp->disksize * dp->blksize) / DEV_BSIZE; return (SDGP_RESULT_OK); } /* * Get the scsi driver to send a full inquiry to the * device and use the * results to fill out the disk parameter structure. */ static int sd_get_capacity(struct sd_softc *sd, struct disk_parms *dp, int flags) { u_int64_t blocks; int error, blksize; #if 0 int i; u_int8_t *p; #endif dp->disksize = blocks = sd_read_capacity(sd->sc_periph, &blksize, flags); if (blocks == 0) { struct scsipi_read_format_capacities cmd; struct { struct scsipi_capacity_list_header header; struct scsipi_capacity_descriptor desc; } __packed data; memset(&cmd, 0, sizeof(cmd)); memset(&data, 0, sizeof(data)); cmd.opcode = READ_FORMAT_CAPACITIES; _lto2b(sizeof(data), cmd.length); error = scsipi_command(sd->sc_periph, (void *)&cmd, sizeof(cmd), (void *)&data, sizeof(data), SDRETRIES, 20000, NULL, flags | XS_CTL_DATA_IN); if (error == EFTYPE) { /* Medium Format Corrupted, handle as not formatted */ return (SDGP_RESULT_UNFORMATTED); } if (error || data.header.length == 0) return (SDGP_RESULT_OFFLINE); #if 0 printf("rfc: length=%d\n", data.header.length); printf("rfc result:"); for (i = sizeof(struct scsipi_capacity_list_header) + data.header.length, p = (void *)&data; i; i--, p++) printf(" %02x", *p); printf("\n"); #endif switch (data.desc.byte5 & SCSIPI_CAP_DESC_CODE_MASK) { case SCSIPI_CAP_DESC_CODE_RESERVED: case SCSIPI_CAP_DESC_CODE_FORMATTED: break; case SCSIPI_CAP_DESC_CODE_UNFORMATTED: return (SDGP_RESULT_UNFORMATTED); case SCSIPI_CAP_DESC_CODE_NONE: return (SDGP_RESULT_OFFLINE); } dp->disksize = blocks = _4btol(data.desc.nblks); if (blocks == 0) return (SDGP_RESULT_OFFLINE); /* XXX? */ blksize = _3btol(data.desc.blklen); } else if (!sd_validate_blksize(NULL, blksize)) { struct sd_mode_sense_data scsipi_sense; int big, bsize; struct scsi_general_block_descriptor *bdesc; memset(&scsipi_sense, 0, sizeof(scsipi_sense)); error = sd_mode_sense(sd, 0, &scsipi_sense, sizeof(scsipi_sense.blk_desc), 0, flags | XS_CTL_SILENT, &big); if (!error) { if (big) { bdesc = (void *)(&scsipi_sense.header.big + 1); bsize = _2btol(scsipi_sense.header.big.blk_desc_len); } else { bdesc = (void *)(&scsipi_sense.header.small + 1); bsize = scsipi_sense.header.small.blk_desc_len; } #if 0 printf("page 0 sense:"); for (i = sizeof(scsipi_sense), p = (void *)&scsipi_sense; i; i--, p++) printf(" %02x", *p); printf("\n"); printf("page 0 bsize=%d\n", bsize); printf("page 0 ok\n"); #endif if (bsize >= 8) { blksize = _3btol(bdesc->blklen); } } } if (!sd_validate_blksize(sd->sc_periph, blksize)) blksize = SD_DEFAULT_BLKSIZE; dp->blksize = blksize; dp->disksize512 = (blocks * dp->blksize) / DEV_BSIZE; return (0); } static int sd_get_parms_page4(struct sd_softc *sd, struct disk_parms *dp, int flags) { struct sd_mode_sense_data scsipi_sense; int error; int big, byte2; size_t poffset; union scsi_disk_pages *pages; byte2 = SMS_DBD; again: memset(&scsipi_sense, 0, sizeof(scsipi_sense)); error = sd_mode_sense(sd, byte2, &scsipi_sense, (byte2 ? 0 : sizeof(scsipi_sense.blk_desc)) + sizeof(scsipi_sense.pages.rigid_geometry), 4, flags | XS_CTL_SILENT, &big); if (error) { if (byte2 == SMS_DBD) { /* No result; try once more with DBD off */ byte2 = 0; goto again; } return (error); } if (big) { poffset = sizeof scsipi_sense.header.big; poffset += _2btol(scsipi_sense.header.big.blk_desc_len); } else { poffset = sizeof scsipi_sense.header.small; poffset += scsipi_sense.header.small.blk_desc_len; } if (poffset > sizeof(scsipi_sense) - sizeof(pages->rigid_geometry)) return ERESTART; pages = (void *)((u_long)&scsipi_sense + poffset); #if 0 { size_t i; u_int8_t *p; printf("page 4 sense:"); for (i = sizeof(scsipi_sense), p = (void *)&scsipi_sense; i; i--, p++) printf(" %02x", *p); printf("\n"); printf("page 4 pg_code=%d sense=%p/%p\n", pages->rigid_geometry.pg_code, &scsipi_sense, pages); } #endif if ((pages->rigid_geometry.pg_code & PGCODE_MASK) != 4) return (ERESTART); SC_DEBUG(sd->sc_periph, SCSIPI_DB3, ("%d cyls, %d heads, %d precomp, %d red_write, %d land_zone\n", _3btol(pages->rigid_geometry.ncyl), pages->rigid_geometry.nheads, _2btol(pages->rigid_geometry.st_cyl_wp), _2btol(pages->rigid_geometry.st_cyl_rwc), _2btol(pages->rigid_geometry.land_zone))); /* * KLUDGE!! (for zone recorded disks) * give a number of sectors so that sec * trks * cyls * is <= disk_size * can lead to wasted space! THINK ABOUT THIS ! */ dp->heads = pages->rigid_geometry.nheads; dp->cyls = _3btol(pages->rigid_geometry.ncyl); if (dp->heads == 0 || dp->cyls == 0) return (ERESTART); dp->sectors = dp->disksize / (dp->heads * dp->cyls); /* XXX */ dp->rot_rate = _2btol(pages->rigid_geometry.rpm); if (dp->rot_rate == 0) dp->rot_rate = 3600; #if 0 printf("page 4 ok\n"); #endif return (0); } static int sd_get_parms_page5(struct sd_softc *sd, struct disk_parms *dp, int flags) { struct sd_mode_sense_data scsipi_sense; int error; int big, byte2; size_t poffset; union scsi_disk_pages *pages; byte2 = SMS_DBD; again: memset(&scsipi_sense, 0, sizeof(scsipi_sense)); error = sd_mode_sense(sd, 0, &scsipi_sense, (byte2 ? 0 : sizeof(scsipi_sense.blk_desc)) + sizeof(scsipi_sense.pages.flex_geometry), 5, flags | XS_CTL_SILENT, &big); if (error) { if (byte2 == SMS_DBD) { /* No result; try once more with DBD off */ byte2 = 0; goto again; } return (error); } if (big) { poffset = sizeof scsipi_sense.header.big; poffset += _2btol(scsipi_sense.header.big.blk_desc_len); } else { poffset = sizeof scsipi_sense.header.small; poffset += scsipi_sense.header.small.blk_desc_len; } if (poffset > sizeof(scsipi_sense) - sizeof(pages->flex_geometry)) return ERESTART; pages = (void *)((u_long)&scsipi_sense + poffset); #if 0 { size_t i; u_int8_t *p; printf("page 5 sense:"); for (i = sizeof(scsipi_sense), p = (void *)&scsipi_sense; i; i--, p++) printf(" %02x", *p); printf("\n"); printf("page 5 pg_code=%d sense=%p/%p\n", pages->flex_geometry.pg_code, &scsipi_sense, pages); } #endif if ((pages->flex_geometry.pg_code & PGCODE_MASK) != 5) return (ERESTART); SC_DEBUG(sd->sc_periph, SCSIPI_DB3, ("%d cyls, %d heads, %d sec, %d bytes/sec\n", _3btol(pages->flex_geometry.ncyl), pages->flex_geometry.nheads, pages->flex_geometry.ph_sec_tr, _2btol(pages->flex_geometry.bytes_s))); dp->heads = pages->flex_geometry.nheads; dp->cyls = _2btol(pages->flex_geometry.ncyl); dp->sectors = pages->flex_geometry.ph_sec_tr; if (dp->heads == 0 || dp->cyls == 0 || dp->sectors == 0) return (ERESTART); dp->rot_rate = _2btol(pages->rigid_geometry.rpm); if (dp->rot_rate == 0) dp->rot_rate = 3600; #if 0 printf("page 5 ok\n"); #endif return (0); } static int sd_get_parms(struct sd_softc *sd, struct disk_parms *dp, int flags) { struct dk_softc *dksc = &sd->sc_dksc; int error; /* * If offline, the SDEV_MEDIA_LOADED flag will be * cleared by the caller if necessary. */ if (sd->type == T_SIMPLE_DIRECT) { error = sd_get_simplifiedparms(sd, dp, flags); if (!error) goto setprops; return (error); } error = sd_get_capacity(sd, dp, flags); if (error) return (error); if (sd->type == T_OPTICAL) goto page0; if (sd->sc_periph->periph_flags & PERIPH_REMOVABLE) { if (!sd_get_parms_page5(sd, dp, flags) || !sd_get_parms_page4(sd, dp, flags)) goto setprops; } else { if (!sd_get_parms_page4(sd, dp, flags) || !sd_get_parms_page5(sd, dp, flags)) goto setprops; } page0: printf("%s: fabricating a geometry\n", dksc->sc_xname); /* Try calling driver's method for figuring out geometry. */ if (!sd->sc_periph->periph_channel->chan_adapter->adapt_getgeom || !(*sd->sc_periph->periph_channel->chan_adapter->adapt_getgeom) (sd->sc_periph, dp, dp->disksize)) { /* * Use adaptec standard fictitious geometry * this depends on which controller (e.g. 1542C is * different. but we have to put SOMETHING here..) */ dp->heads = 64; dp->sectors = 32; dp->cyls = dp->disksize / (64 * 32); } dp->rot_rate = 3600; setprops: sd_set_geometry(sd); return (SDGP_RESULT_OK); } static int sd_flush(struct sd_softc *sd, int flags) { struct scsipi_periph *periph = sd->sc_periph; struct scsi_synchronize_cache_10 cmd; /* * If the device is SCSI-2, issue a SYNCHRONIZE CACHE. * We issue with address 0 length 0, which should be * interpreted by the device as "all remaining blocks * starting at address 0". We ignore ILLEGAL REQUEST * in the event that the command is not supported by * the device, and poll for completion so that we know * that the cache has actually been flushed. * * Unless, that is, the device can't handle the SYNCHRONIZE CACHE * command, as indicated by our quirks flags. * * XXX What about older devices? */ if (periph->periph_version < 2 || (periph->periph_quirks & PQUIRK_NOSYNCCACHE)) return (0); sd->flags |= SDF_FLUSHING; memset(&cmd, 0, sizeof(cmd)); cmd.opcode = SCSI_SYNCHRONIZE_CACHE_10; return (scsipi_command(periph, (void *)&cmd, sizeof(cmd), 0, 0, SDRETRIES, 100000, NULL, flags | XS_CTL_IGNORE_ILLEGAL_REQUEST)); } static int sd_getcache(struct sd_softc *sd, int *bitsp) { struct scsipi_periph *periph = sd->sc_periph; struct sd_mode_sense_data scsipi_sense; int error, bits = 0; int big; union scsi_disk_pages *pages; uint8_t dev_spec; /* only SCSI-2 and later supported */ if (periph->periph_version < 2) return (EOPNOTSUPP); memset(&scsipi_sense, 0, sizeof(scsipi_sense)); error = sd_mode_sense(sd, SMS_DBD, &scsipi_sense, sizeof(scsipi_sense.pages.caching_params), 8, XS_CTL_SILENT, &big); if (error) return (error); if (big) { pages = (void *)(&scsipi_sense.header.big + 1); dev_spec = scsipi_sense.header.big.dev_spec; } else { pages = (void *)(&scsipi_sense.header.small + 1); dev_spec = scsipi_sense.header.small.dev_spec; } if ((pages->caching_params.flags & CACHING_RCD) == 0) bits |= DKCACHE_READ; if (pages->caching_params.flags & CACHING_WCE) bits |= DKCACHE_WRITE; if (pages->caching_params.pg_code & PGCODE_PS) bits |= DKCACHE_SAVE; /* * Support for FUA/DPO, defined starting with SCSI-2. Use only * if device claims to support it, according to the MODE SENSE. */ if (!(periph->periph_quirks & PQUIRK_NOFUA) && ISSET(dev_spec, SMH_DSP_DPOFUA)) bits |= DKCACHE_FUA | DKCACHE_DPO; memset(&scsipi_sense, 0, sizeof(scsipi_sense)); error = sd_mode_sense(sd, SMS_DBD, &scsipi_sense, sizeof(scsipi_sense.pages.caching_params), SMS_PCTRL_CHANGEABLE|8, XS_CTL_SILENT, &big); if (error == 0) { if (big) pages = (void *)(&scsipi_sense.header.big + 1); else pages = (void *)(&scsipi_sense.header.small + 1); if (pages->caching_params.flags & CACHING_RCD) bits |= DKCACHE_RCHANGE; if (pages->caching_params.flags & CACHING_WCE) bits |= DKCACHE_WCHANGE; } *bitsp = bits; return (0); } static int sd_setcache(struct sd_softc *sd, int bits) { struct scsipi_periph *periph = sd->sc_periph; struct sd_mode_sense_data scsipi_sense; int error; uint8_t oflags, byte2 = 0; int big; union scsi_disk_pages *pages; if (periph->periph_version < 2) return (EOPNOTSUPP); memset(&scsipi_sense, 0, sizeof(scsipi_sense)); error = sd_mode_sense(sd, SMS_DBD, &scsipi_sense, sizeof(scsipi_sense.pages.caching_params), 8, 0, &big); if (error) return (error); if (big) pages = (void *)(&scsipi_sense.header.big + 1); else pages = (void *)(&scsipi_sense.header.small + 1); oflags = pages->caching_params.flags; if (bits & DKCACHE_READ) pages->caching_params.flags &= ~CACHING_RCD; else pages->caching_params.flags |= CACHING_RCD; if (bits & DKCACHE_WRITE) pages->caching_params.flags |= CACHING_WCE; else pages->caching_params.flags &= ~CACHING_WCE; if (oflags == pages->caching_params.flags) return (0); pages->caching_params.pg_code &= PGCODE_MASK; if (bits & DKCACHE_SAVE) byte2 |= SMS_SP; return (sd_mode_select(sd, byte2|SMS_PF, &scsipi_sense, sizeof(struct scsi_mode_page_header) + pages->caching_params.pg_length, 0, big)); } static void sd_set_geometry(struct sd_softc *sd) { struct dk_softc *dksc = &sd->sc_dksc; struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; memset(dg, 0, sizeof(*dg)); dg->dg_secperunit = sd->params.disksize; dg->dg_secsize = sd->params.blksize; dg->dg_nsectors = sd->params.sectors; dg->dg_ntracks = sd->params.heads; dg->dg_ncylinders = sd->params.cyls; disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, sd->typename); } |
| 591 651 214 214 450 40 41 41 40 165 165 164 2 164 164 71 153 71 50 50 50 120 120 94 94 23 93 94 121 120 123 122 29 122 62 99 121 62 61 196 195 196 27 27 322 320 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 | /* $NetBSD: kern_timeout.c,v 1.70 2022/06/29 22:27:01 riastradh Exp $ */ /*- * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe, and by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.70 2022/06/29 22:27:01 riastradh Exp $"); /* * Timeouts are kept in a hierarchical timing wheel. The c_time is the * value of c_cpu->cc_ticks when the timeout should be called. There are * four levels with 256 buckets each. See 'Scheme 7' in "Hashed and * Hierarchical Timing Wheels: Efficient Data Structures for Implementing * a Timer Facility" by George Varghese and Tony Lauck. * * Some of the "math" in here is a bit tricky. We have to beware of * wrapping ints. * * We use the fact that any element added to the queue must be added with * a positive time. That means that any element `to' on the queue cannot * be scheduled to timeout further in time than INT_MAX, but c->c_time can * be positive or negative so comparing it with anything is dangerous. * The only way we can use the c->c_time value in any predictable way is * when we calculate how far in the future `to' will timeout - "c->c_time * - c->c_cpu->cc_ticks". The result will always be positive for future * timeouts and 0 or negative for due timeouts. */ #define _CALLOUT_PRIVATE #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/callout.h> #include <sys/lwp.h> #include <sys/mutex.h> #include <sys/proc.h> #include <sys/sleepq.h> #include <sys/syncobj.h> #include <sys/evcnt.h> #include <sys/intr.h> #include <sys/cpu.h> #include <sys/kmem.h> #ifdef DDB #include <machine/db_machdep.h> #include <ddb/db_interface.h> #include <ddb/db_access.h> #include <ddb/db_cpu.h> #include <ddb/db_sym.h> #include <ddb/db_output.h> #endif #define BUCKETS 1024 #define WHEELSIZE 256 #define WHEELMASK 255 #define WHEELBITS 8 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) #define BUCKET(cc, rel, abs) \ (((rel) <= (1 << (2*WHEELBITS))) \ ? ((rel) <= (1 << WHEELBITS)) \ ? &(cc)->cc_wheel[MASKWHEEL(0, (abs))] \ : &(cc)->cc_wheel[MASKWHEEL(1, (abs)) + WHEELSIZE] \ : ((rel) <= (1 << (3*WHEELBITS))) \ ? &(cc)->cc_wheel[MASKWHEEL(2, (abs)) + 2*WHEELSIZE] \ : &(cc)->cc_wheel[MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) #define MOVEBUCKET(cc, wheel, time) \ CIRCQ_APPEND(&(cc)->cc_todo, \ &(cc)->cc_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) /* * Circular queue definitions. */ #define CIRCQ_INIT(list) \ do { \ (list)->cq_next_l = (list); \ (list)->cq_prev_l = (list); \ } while (/*CONSTCOND*/0) #define CIRCQ_INSERT(elem, list) \ do { \ (elem)->cq_prev_e = (list)->cq_prev_e; \ (elem)->cq_next_l = (list); \ (list)->cq_prev_l->cq_next_l = (elem); \ (list)->cq_prev_l = (elem); \ } while (/*CONSTCOND*/0) #define CIRCQ_APPEND(fst, snd) \ do { \ if (!CIRCQ_EMPTY(snd)) { \ (fst)->cq_prev_l->cq_next_l = (snd)->cq_next_l; \ (snd)->cq_next_l->cq_prev_l = (fst)->cq_prev_l; \ (snd)->cq_prev_l->cq_next_l = (fst); \ (fst)->cq_prev_l = (snd)->cq_prev_l; \ CIRCQ_INIT(snd); \ } \ } while (/*CONSTCOND*/0) #define CIRCQ_REMOVE(elem) \ do { \ (elem)->cq_next_l->cq_prev_e = (elem)->cq_prev_e; \ (elem)->cq_prev_l->cq_next_e = (elem)->cq_next_e; \ } while (/*CONSTCOND*/0) #define CIRCQ_FIRST(list) ((list)->cq_next_e) #define CIRCQ_NEXT(elem) ((elem)->cq_next_e) #define CIRCQ_LAST(elem,list) ((elem)->cq_next_l == (list)) #define CIRCQ_EMPTY(list) ((list)->cq_next_l == (list)) struct callout_cpu { kmutex_t *cc_lock; sleepq_t cc_sleepq; u_int cc_nwait; u_int cc_ticks; lwp_t *cc_lwp; callout_impl_t *cc_active; callout_impl_t *cc_cancel; struct evcnt cc_ev_late; struct evcnt cc_ev_block; struct callout_circq cc_todo; /* Worklist */ struct callout_circq cc_wheel[BUCKETS]; /* Queues of timeouts */ char cc_name1[12]; char cc_name2[12]; }; #ifdef DDB static struct callout_cpu ccb; #endif #ifndef CRASH /* _KERNEL */ static void callout_softclock(void *); static void callout_wait(callout_impl_t *, void *, kmutex_t *); static struct callout_cpu callout_cpu0 __cacheline_aligned; static void *callout_sih __read_mostly; static inline kmutex_t * callout_lock(callout_impl_t *c) { struct callout_cpu *cc; kmutex_t *lock; for (;;) { cc = c->c_cpu; lock = cc->cc_lock; mutex_spin_enter(lock); if (__predict_true(cc == c->c_cpu)) return lock; mutex_spin_exit(lock); } } /* * callout_startup: * * Initialize the callout facility, called at system startup time. * Do just enough to allow callouts to be safely registered. */ void callout_startup(void) { struct callout_cpu *cc; int b; KASSERT(curcpu()->ci_data.cpu_callout == NULL); cc = &callout_cpu0; cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); CIRCQ_INIT(&cc->cc_todo); for (b = 0; b < BUCKETS; b++) CIRCQ_INIT(&cc->cc_wheel[b]); curcpu()->ci_data.cpu_callout = cc; } /* * callout_init_cpu: * * Per-CPU initialization. */ CTASSERT(sizeof(callout_impl_t) <= sizeof(callout_t)); void callout_init_cpu(struct cpu_info *ci) { struct callout_cpu *cc; int b; if ((cc = ci->ci_data.cpu_callout) == NULL) { cc = kmem_zalloc(sizeof(*cc), KM_SLEEP); cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); CIRCQ_INIT(&cc->cc_todo); for (b = 0; b < BUCKETS; b++) CIRCQ_INIT(&cc->cc_wheel[b]); } else { /* Boot CPU, one time only. */ callout_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, callout_softclock, NULL); if (callout_sih == NULL) panic("callout_init_cpu (2)"); } sleepq_init(&cc->cc_sleepq); snprintf(cc->cc_name1, sizeof(cc->cc_name1), "late/%u", cpu_index(ci)); evcnt_attach_dynamic(&cc->cc_ev_late, EVCNT_TYPE_MISC, NULL, "callout", cc->cc_name1); snprintf(cc->cc_name2, sizeof(cc->cc_name2), "wait/%u", cpu_index(ci)); evcnt_attach_dynamic(&cc->cc_ev_block, EVCNT_TYPE_MISC, NULL, "callout", cc->cc_name2); ci->ci_data.cpu_callout = cc; } /* * callout_init: * * Initialize a callout structure. This must be quick, so we fill * only the minimum number of fields. */ void callout_init(callout_t *cs, u_int flags) { callout_impl_t *c = (callout_impl_t *)cs; struct callout_cpu *cc; KASSERT((flags & ~CALLOUT_FLAGMASK) == 0); cc = curcpu()->ci_data.cpu_callout; c->c_func = NULL; c->c_magic = CALLOUT_MAGIC; if (__predict_true((flags & CALLOUT_MPSAFE) != 0 && cc != NULL)) { c->c_flags = flags; c->c_cpu = cc; return; } c->c_flags = flags | CALLOUT_BOUND; c->c_cpu = &callout_cpu0; } /* * callout_destroy: * * Destroy a callout structure. The callout must be stopped. */ void callout_destroy(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; KASSERTMSG(c->c_magic == CALLOUT_MAGIC, "callout %p: c_magic (%#x) != CALLOUT_MAGIC (%#x)", c, c->c_magic, CALLOUT_MAGIC); /* * It's not necessary to lock in order to see the correct value * of c->c_flags. If the callout could potentially have been * running, the current thread should have stopped it. */ KASSERTMSG((c->c_flags & CALLOUT_PENDING) == 0, "pending callout %p: c_func (%p) c_flags (%#x) destroyed from %p", c, c->c_func, c->c_flags, __builtin_return_address(0)); KASSERTMSG(c->c_cpu->cc_lwp == curlwp || c->c_cpu->cc_active != c, "running callout %p: c_func (%p) c_flags (%#x) destroyed from %p", c, c->c_func, c->c_flags, __builtin_return_address(0)); c->c_magic = 0; } /* * callout_schedule_locked: * * Schedule a callout to run. The function and argument must * already be set in the callout structure. Must be called with * callout_lock. */ static void callout_schedule_locked(callout_impl_t *c, kmutex_t *lock, int to_ticks) { struct callout_cpu *cc, *occ; int old_time; KASSERT(to_ticks >= 0); KASSERT(c->c_func != NULL); /* Initialize the time here, it won't change. */ occ = c->c_cpu; c->c_flags &= ~(CALLOUT_FIRED | CALLOUT_INVOKING); /* * If this timeout is already scheduled and now is moved * earlier, reschedule it now. Otherwise leave it in place * and let it be rescheduled later. */ if ((c->c_flags & CALLOUT_PENDING) != 0) { /* Leave on existing CPU. */ old_time = c->c_time; c->c_time = to_ticks + occ->cc_ticks; if (c->c_time - old_time < 0) { CIRCQ_REMOVE(&c->c_list); CIRCQ_INSERT(&c->c_list, &occ->cc_todo); } mutex_spin_exit(lock); return; } cc = curcpu()->ci_data.cpu_callout; if ((c->c_flags & CALLOUT_BOUND) != 0 || cc == occ || !mutex_tryenter(cc->cc_lock)) { /* Leave on existing CPU. */ c->c_time = to_ticks + occ->cc_ticks; c->c_flags |= CALLOUT_PENDING; CIRCQ_INSERT(&c->c_list, &occ->cc_todo); } else { /* Move to this CPU. */ c->c_cpu = cc; c->c_time = to_ticks + cc->cc_ticks; c->c_flags |= CALLOUT_PENDING; CIRCQ_INSERT(&c->c_list, &cc->cc_todo); mutex_spin_exit(cc->cc_lock); } mutex_spin_exit(lock); } /* * callout_reset: * * Reset a callout structure with a new function and argument, and * schedule it to run. */ void callout_reset(callout_t *cs, int to_ticks, void (*func)(void *), void *arg) { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; KASSERT(c->c_magic == CALLOUT_MAGIC); KASSERT(func != NULL); lock = callout_lock(c); c->c_func = func; c->c_arg = arg; callout_schedule_locked(c, lock, to_ticks); } /* * callout_schedule: * * Schedule a callout to run. The function and argument must * already be set in the callout structure. */ void callout_schedule(callout_t *cs, int to_ticks) { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; KASSERT(c->c_magic == CALLOUT_MAGIC); lock = callout_lock(c); callout_schedule_locked(c, lock, to_ticks); } /* * callout_stop: * * Try to cancel a pending callout. It may be too late: the callout * could be running on another CPU. If called from interrupt context, * the callout could already be in progress at a lower priority. */ bool callout_stop(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; struct callout_cpu *cc; kmutex_t *lock; bool expired; KASSERT(c->c_magic == CALLOUT_MAGIC); lock = callout_lock(c); if ((c->c_flags & CALLOUT_PENDING) != 0) CIRCQ_REMOVE(&c->c_list); expired = ((c->c_flags & CALLOUT_FIRED) != 0); c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); cc = c->c_cpu; if (cc->cc_active == c) { /* * This is for non-MPSAFE callouts only. To synchronize * effectively we must be called with kernel_lock held. * It's also taken in callout_softclock. */ cc->cc_cancel = c; } mutex_spin_exit(lock); return expired; } /* * callout_halt: * * Cancel a pending callout. If in-flight, block until it completes. * May not be called from a hard interrupt handler. If the callout * can take locks, the caller of callout_halt() must not hold any of * those locks, otherwise the two could deadlock. If 'interlock' is * non-NULL and we must wait for the callout to complete, it will be * released and re-acquired before returning. */ bool callout_halt(callout_t *cs, void *interlock) { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; int flags; KASSERT(c->c_magic == CALLOUT_MAGIC); KASSERT(!cpu_intr_p()); KASSERT(interlock == NULL || mutex_owned(interlock)); /* Fast path. */ lock = callout_lock(c); flags = c->c_flags; if ((flags & CALLOUT_PENDING) != 0) CIRCQ_REMOVE(&c->c_list); c->c_flags = flags & ~(CALLOUT_PENDING|CALLOUT_FIRED); if (__predict_false(flags & CALLOUT_FIRED)) { callout_wait(c, interlock, lock); return true; } mutex_spin_exit(lock); return false; } /* * callout_wait: * * Slow path for callout_halt(). Deliberately marked __noinline to * prevent unneeded overhead in the caller. */ static void __noinline callout_wait(callout_impl_t *c, void *interlock, kmutex_t *lock) { struct callout_cpu *cc; struct lwp *l; kmutex_t *relock; l = curlwp; relock = NULL; for (;;) { /* * At this point we know the callout is not pending, but it * could be running on a CPU somewhere. That can be curcpu * in a few cases: * * - curlwp is a higher priority soft interrupt * - the callout blocked on a lock and is currently asleep * - the callout itself has called callout_halt() (nice!) */ cc = c->c_cpu; if (__predict_true(cc->cc_active != c || cc->cc_lwp == l)) break; /* It's running - need to wait for it to complete. */ if (interlock != NULL) { /* * Avoid potential scheduler lock order problems by * dropping the interlock without the callout lock * held; then retry. */ mutex_spin_exit(lock); mutex_exit(interlock); relock = interlock; interlock = NULL; } else { /* XXX Better to do priority inheritance. */ KASSERT(l->l_wchan == NULL); cc->cc_nwait++; cc->cc_ev_block.ev_count++; l->l_kpriority = true; sleepq_enter(&cc->cc_sleepq, l, cc->cc_lock); sleepq_enqueue(&cc->cc_sleepq, cc, "callout", &sleep_syncobj, false); sleepq_block(0, false, &sleep_syncobj); } /* * Re-lock the callout and check the state of play again. * It's a common design pattern for callouts to re-schedule * themselves so put a stop to it again if needed. */ lock = callout_lock(c); if ((c->c_flags & CALLOUT_PENDING) != 0) CIRCQ_REMOVE(&c->c_list); c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); } mutex_spin_exit(lock); if (__predict_false(relock != NULL)) mutex_enter(relock); } #ifdef notyet /* * callout_bind: * * Bind a callout so that it will only execute on one CPU. * The callout must be stopped, and must be MPSAFE. * * XXX Disabled for now until it is decided how to handle * offlined CPUs. We may want weak+strong binding. */ void callout_bind(callout_t *cs, struct cpu_info *ci) { callout_impl_t *c = (callout_impl_t *)cs; struct callout_cpu *cc; kmutex_t *lock; KASSERT((c->c_flags & CALLOUT_PENDING) == 0); KASSERT(c->c_cpu->cc_active != c); KASSERT(c->c_magic == CALLOUT_MAGIC); KASSERT((c->c_flags & CALLOUT_MPSAFE) != 0); lock = callout_lock(c); cc = ci->ci_data.cpu_callout; c->c_flags |= CALLOUT_BOUND; if (c->c_cpu != cc) { /* * Assigning c_cpu effectively unlocks the callout * structure, as we don't hold the new CPU's lock. * Issue memory barrier to prevent accesses being * reordered. */ membar_exit(); c->c_cpu = cc; } mutex_spin_exit(lock); } #endif void callout_setfunc(callout_t *cs, void (*func)(void *), void *arg) { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; KASSERT(c->c_magic == CALLOUT_MAGIC); KASSERT(func != NULL); lock = callout_lock(c); c->c_func = func; c->c_arg = arg; mutex_spin_exit(lock); } bool callout_expired(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; bool rv; KASSERT(c->c_magic == CALLOUT_MAGIC); lock = callout_lock(c); rv = ((c->c_flags & CALLOUT_FIRED) != 0); mutex_spin_exit(lock); return rv; } bool callout_active(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; bool rv; KASSERT(c->c_magic == CALLOUT_MAGIC); lock = callout_lock(c); rv = ((c->c_flags & (CALLOUT_PENDING|CALLOUT_FIRED)) != 0); mutex_spin_exit(lock); return rv; } bool callout_pending(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; bool rv; KASSERT(c->c_magic == CALLOUT_MAGIC); lock = callout_lock(c); rv = ((c->c_flags & CALLOUT_PENDING) != 0); mutex_spin_exit(lock); return rv; } bool callout_invoking(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; bool rv; KASSERT(c->c_magic == CALLOUT_MAGIC); lock = callout_lock(c); rv = ((c->c_flags & CALLOUT_INVOKING) != 0); mutex_spin_exit(lock); return rv; } void callout_ack(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; KASSERT(c->c_magic == CALLOUT_MAGIC); lock = callout_lock(c); c->c_flags &= ~CALLOUT_INVOKING; mutex_spin_exit(lock); } /* * callout_hardclock: * * Called from hardclock() once every tick. We schedule a soft * interrupt if there is work to be done. */ void callout_hardclock(void) { struct callout_cpu *cc; int needsoftclock, ticks; cc = curcpu()->ci_data.cpu_callout; mutex_spin_enter(cc->cc_lock); ticks = ++cc->cc_ticks; MOVEBUCKET(cc, 0, ticks); if (MASKWHEEL(0, ticks) == 0) { MOVEBUCKET(cc, 1, ticks); if (MASKWHEEL(1, ticks) == 0) { MOVEBUCKET(cc, 2, ticks); if (MASKWHEEL(2, ticks) == 0) MOVEBUCKET(cc, 3, ticks); } } needsoftclock = !CIRCQ_EMPTY(&cc->cc_todo); mutex_spin_exit(cc->cc_lock); if (needsoftclock) softint_schedule(callout_sih); } /* * callout_softclock: * * Soft interrupt handler, scheduled above if there is work to * be done. Callouts are made in soft interrupt context. */ static void callout_softclock(void *v) { callout_impl_t *c; struct callout_cpu *cc; void (*func)(void *); void *arg; int mpsafe, count, ticks, delta; lwp_t *l; l = curlwp; KASSERT(l->l_cpu == curcpu()); cc = l->l_cpu->ci_data.cpu_callout; mutex_spin_enter(cc->cc_lock); cc->cc_lwp = l; while (!CIRCQ_EMPTY(&cc->cc_todo)) { c = CIRCQ_FIRST(&cc->cc_todo); KASSERT(c->c_magic == CALLOUT_MAGIC); KASSERT(c->c_func != NULL); KASSERT(c->c_cpu == cc); KASSERT((c->c_flags & CALLOUT_PENDING) != 0); KASSERT((c->c_flags & CALLOUT_FIRED) == 0); CIRCQ_REMOVE(&c->c_list); /* If due run it, otherwise insert it into the right bucket. */ ticks = cc->cc_ticks; delta = (int)((unsigned)c->c_time - (unsigned)ticks); if (delta > 0) { CIRCQ_INSERT(&c->c_list, BUCKET(cc, delta, c->c_time)); continue; } if (delta < 0) cc->cc_ev_late.ev_count++; c->c_flags = (c->c_flags & ~CALLOUT_PENDING) | (CALLOUT_FIRED | CALLOUT_INVOKING); mpsafe = (c->c_flags & CALLOUT_MPSAFE); func = c->c_func; arg = c->c_arg; cc->cc_active = c; mutex_spin_exit(cc->cc_lock); KASSERT(func != NULL); if (__predict_false(!mpsafe)) { KERNEL_LOCK(1, NULL); (*func)(arg); KERNEL_UNLOCK_ONE(NULL); } else (*func)(arg); KASSERTMSG(l->l_blcnt == 0, "callout %p func %p leaked %d biglocks", c, func, l->l_blcnt); mutex_spin_enter(cc->cc_lock); /* * We can't touch 'c' here because it might be * freed already. If LWPs waiting for callout * to complete, awaken them. */ cc->cc_active = NULL; if ((count = cc->cc_nwait) != 0) { cc->cc_nwait = 0; /* sleepq_wake() drops the lock. */ sleepq_wake(&cc->cc_sleepq, cc, count, cc->cc_lock); mutex_spin_enter(cc->cc_lock); } } cc->cc_lwp = NULL; mutex_spin_exit(cc->cc_lock); } #endif /* !CRASH */ #ifdef DDB static void db_show_callout_bucket(struct callout_cpu *cc, struct callout_circq *kbucket, struct callout_circq *bucket) { callout_impl_t *c, ci; db_expr_t offset; const char *name; static char question[] = "?"; int b; if (CIRCQ_LAST(bucket, kbucket)) return; for (c = CIRCQ_FIRST(bucket); /*nothing*/; c = CIRCQ_NEXT(&c->c_list)) { db_read_bytes((db_addr_t)c, sizeof(ci), (char *)&ci); c = &ci; db_find_sym_and_offset((db_addr_t)(intptr_t)c->c_func, &name, &offset); name = name ? name : question; b = (bucket - cc->cc_wheel); if (b < 0) b = -WHEELSIZE; db_printf("%9d %2d/%-4d %16lx %s\n", c->c_time - cc->cc_ticks, b / WHEELSIZE, b, (u_long)c->c_arg, name); if (CIRCQ_LAST(&c->c_list, kbucket)) break; } } void db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif) { struct callout_cpu *cc; struct cpu_info *ci; int b; #ifndef CRASH db_printf("hardclock_ticks now: %d\n", getticks()); #endif db_printf(" ticks wheel arg func\n"); /* * Don't lock the callwheel; all the other CPUs are paused * anyhow, and we might be called in a circumstance where * some other CPU was paused while holding the lock. */ for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { db_read_bytes((db_addr_t)ci + offsetof(struct cpu_info, ci_data.cpu_callout), sizeof(cc), (char *)&cc); db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb); db_show_callout_bucket(&ccb, &cc->cc_todo, &ccb.cc_todo); } for (b = 0; b < BUCKETS; b++) { for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { db_read_bytes((db_addr_t)ci + offsetof(struct cpu_info, ci_data.cpu_callout), sizeof(cc), (char *)&cc); db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb); db_show_callout_bucket(&ccb, &cc->cc_wheel[b], &ccb.cc_wheel[b]); } } } #endif /* DDB */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 | /* $NetBSD: ipsec.h,v 1.91 2020/08/28 06:20:44 ozaki-r Exp $ */ /* $FreeBSD: ipsec.h,v 1.2.4.2 2004/02/14 22:23:23 bms Exp $ */ /* $KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETIPSEC_IPSEC_H_ #define _NETIPSEC_IPSEC_H_ #if defined(_KERNEL_OPT) #include "opt_inet.h" #include "opt_ipsec.h" #endif #include <net/pfkeyv2.h> #ifdef _KERNEL #include <sys/socketvar.h> #include <sys/localcount.h> #include <netinet/in_pcb_hdr.h> #include <netipsec/keydb.h> /* * Security Policy Index * Ensure that both address families in the "src" and "dst" are same. * When the value of the ul_proto is ICMPv6, the port field in "src" * specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code. */ struct secpolicyindex { u_int8_t dir; /* direction of packet flow, see blow */ union sockaddr_union src; /* IP src address for SP */ union sockaddr_union dst; /* IP dst address for SP */ u_int8_t prefs; /* prefix length in bits for src */ u_int8_t prefd; /* prefix length in bits for dst */ u_int16_t ul_proto; /* upper layer Protocol */ }; /* Security Policy Data Base */ struct secpolicy { struct pslist_entry pslist_entry; struct localcount localcount; /* reference count */ struct secpolicyindex spidx; /* selector */ u_int32_t id; /* It's unique number on the system. */ u_int state; /* 0: dead, others: alive */ #define IPSEC_SPSTATE_DEAD 0 #define IPSEC_SPSTATE_ALIVE 1 u_int origin; /* who generate this SP. */ #define IPSEC_SPORIGIN_USER 0 #define IPSEC_SPORIGIN_KERNEL 1 u_int policy; /* DISCARD, NONE or IPSEC, see keyv2.h */ struct ipsecrequest *req; /* pointer to the ipsec request tree, */ /* if policy == IPSEC else this value == NULL.*/ /* * lifetime handler. * the policy can be used without limitiation if both lifetime and * validtime are zero. * "lifetime" is passed by sadb_lifetime.sadb_lifetime_addtime. * "validtime" is passed by sadb_lifetime.sadb_lifetime_usetime. */ time_t created; /* time created the policy */ time_t lastused; /* updated every when kernel sends a packet */ time_t lifetime; /* duration of the lifetime of this policy */ time_t validtime; /* duration this policy is valid without use */ }; /* Request for IPsec */ struct ipsecrequest { struct ipsecrequest *next; /* pointer to next structure */ /* If NULL, it means the end of chain. */ struct secasindex saidx;/* hint for search proper SA */ /* if __ss_len == 0 then no address specified.*/ u_int level; /* IPsec level defined below. */ struct secpolicy *sp; /* back pointer to SP */ }; /* security policy in PCB */ struct inpcbpolicy { struct secpolicy *sp_in; struct secpolicy *sp_out; int priv; /* privileged socket ? */ /* cached policy */ struct { struct secpolicy *cachesp; struct secpolicyindex cacheidx; int cachehint; /* processing requirement hint: */ #define IPSEC_PCBHINT_UNKNOWN 0 /* Unknown */ #define IPSEC_PCBHINT_YES 1 /* IPsec processing is required */ #define IPSEC_PCBHINT_NO 2 /* IPsec processing not required */ u_int cachegen; /* spdgen when cache filled */ } sp_cache[3]; /* XXX 3 == IPSEC_DIR_MAX */ int sp_cacheflags; #define IPSEC_PCBSP_CONNECTED 1 struct inpcb_hdr *sp_inph; /* back pointer */ }; extern u_int ipsec_spdgen; static __inline bool ipsec_pcb_skip_ipsec(struct inpcbpolicy *pcbsp, int dir) { KASSERT(inph_locked(pcbsp->sp_inph)); return pcbsp->sp_cache[(dir)].cachehint == IPSEC_PCBHINT_NO && pcbsp->sp_cache[(dir)].cachegen == ipsec_spdgen; } /* SP acquiring list table. */ struct secspacq { LIST_ENTRY(secspacq) chain; struct secpolicyindex spidx; time_t created; /* for lifetime */ int count; /* for lifetime */ /* XXX: here is mbuf place holder to be sent ? */ }; #endif /* _KERNEL */ /* buffer size for formatted output of ipsec address (addr + '%' + scope_id?) */ #define IPSEC_ADDRSTRLEN (INET6_ADDRSTRLEN + 11) /* buffer size for ipsec_logsastr() */ #define IPSEC_LOGSASTRLEN 192 /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */ #define IPSEC_PORT_ANY 0 #define IPSEC_ULPROTO_ANY 255 #define IPSEC_PROTO_ANY 255 /* mode of security protocol */ /* NOTE: DON'T use IPSEC_MODE_ANY at SPD. It's only use in SAD */ #define IPSEC_MODE_ANY 0 /* i.e. wildcard. */ #define IPSEC_MODE_TRANSPORT 1 #define IPSEC_MODE_TUNNEL 2 #define IPSEC_MODE_TCPMD5 3 /* TCP MD5 mode */ /* * Direction of security policy. * NOTE: Since INVALID is used just as flag. * The other are used for loop counter too. */ #define IPSEC_DIR_ANY 0 #define IPSEC_DIR_INBOUND 1 #define IPSEC_DIR_OUTBOUND 2 #define IPSEC_DIR_MAX 3 #define IPSEC_DIR_INVALID 4 #define IPSEC_DIR_IS_VALID(dir) ((dir) >= 0 && (dir) <= IPSEC_DIR_MAX) #define IPSEC_DIR_IS_INOROUT(dir) ((dir) == IPSEC_DIR_INBOUND || \ (dir) == IPSEC_DIR_OUTBOUND) /* Policy level */ /* * IPSEC, ENTRUST and BYPASS are allowed for setsockopt() in PCB, * DISCARD, IPSEC and NONE are allowed for setkey() in SPD. * DISCARD and NONE are allowed for system default. */ #define IPSEC_POLICY_DISCARD 0 /* discarding packet */ #define IPSEC_POLICY_NONE 1 /* through IPsec engine */ #define IPSEC_POLICY_IPSEC 2 /* do IPsec */ #define IPSEC_POLICY_ENTRUST 3 /* consulting SPD if present. */ #define IPSEC_POLICY_BYPASS 4 /* only for privileged socket. */ /* Security protocol level */ #define IPSEC_LEVEL_DEFAULT 0 /* reference to system default */ #define IPSEC_LEVEL_USE 1 /* use SA if present. */ #define IPSEC_LEVEL_REQUIRE 2 /* require SA. */ #define IPSEC_LEVEL_UNIQUE 3 /* unique SA. */ #define IPSEC_MANUAL_REQID_MAX 0x3fff /* * if security policy level == unique, this id * indicate to a relative SA for use, else is * zero. * 1 - 0x3fff are reserved for manual keying. * 0 are reserved for above reason. Others is * for kernel use. * Note that this id doesn't identify SA * by only itself. */ #define IPSEC_REPLAYWSIZE 32 #ifdef _KERNEL extern int ipsec_debug; #ifdef IPSEC_DEBUG extern int ipsec_replay; extern int ipsec_integrity; #endif extern struct secpolicy ip4_def_policy; extern int ip4_esp_trans_deflev; extern int ip4_esp_net_deflev; extern int ip4_ah_trans_deflev; extern int ip4_ah_net_deflev; extern int ip4_ah_cleartos; extern int ip4_ah_offsetmask; extern int ip4_ipsec_dfbit; extern int ip4_ipsec_ecn; extern int crypto_support; #include <sys/syslog.h> #define DPRINTF(fmt, args...) \ do { \ if (ipsec_debug) \ log(LOG_DEBUG, "%s: " fmt, __func__, ##args); \ } while (/*CONSTCOND*/0) #define IPSECLOG(level, fmt, args...) \ do { \ if (ipsec_debug) \ log(level, "%s: " fmt, __func__, ##args); \ } while (/*CONSTCOND*/0) #define ipsec_indone(m) \ ((m->m_flags & M_AUTHIPHDR) || (m->m_flags & M_DECRYPTED)) #define ipsec_outdone(m) \ (m_tag_find((m), PACKET_TAG_IPSEC_OUT_DONE) != NULL) static __inline bool ipsec_skip_pfil(struct mbuf *m) { bool rv; if (ipsec_indone(m) && ((m->m_pkthdr.pkthdr_flags & PKTHDR_FLAG_IPSEC_SKIP_PFIL) != 0)) { m->m_pkthdr.pkthdr_flags &= ~PKTHDR_FLAG_IPSEC_SKIP_PFIL; rv = true; } else { rv = false; } return rv; } void ipsec_pcbconn(struct inpcbpolicy *); void ipsec_pcbdisconn(struct inpcbpolicy *); void ipsec_invalpcbcacheall(void); struct inpcb; int ipsec4_output(struct mbuf *, struct inpcb *, int, u_long *, bool *, bool *, bool *); int ipsec_ip_input_checkpolicy(struct mbuf *, bool); void ipsec_mtu(struct mbuf *, int *); #ifdef INET6 void ipsec6_udp_cksum(struct mbuf *); #endif struct inpcb; int ipsec_init_pcbpolicy(struct socket *so, struct inpcbpolicy **); int ipsec_copy_policy(const struct inpcbpolicy *, struct inpcbpolicy *); u_int ipsec_get_reqlevel(const struct ipsecrequest *); int ipsec_set_policy(void *, const void *, size_t, kauth_cred_t); int ipsec_get_policy(void *, const void *, size_t, struct mbuf **); int ipsec_delete_pcbpolicy(void *); int ipsec_in_reject(struct mbuf *, void *); struct secasvar *ipsec_lookup_sa(const struct ipsecrequest *, const struct mbuf *); struct secas; struct tcpcb; int ipsec_chkreplay(u_int32_t, const struct secasvar *); int ipsec_updatereplay(u_int32_t, const struct secasvar *); size_t ipsec_hdrsiz(struct mbuf *, u_int, void *); size_t ipsec4_hdrsiz_tcp(struct tcpcb *); union sockaddr_union; const char *ipsec_address(const union sockaddr_union* sa, char *, size_t); const char *ipsec_logsastr(const struct secasvar *, char *, size_t); /* NetBSD protosw ctlin entrypoint */ void *esp4_ctlinput(int, const struct sockaddr *, void *); void *ah4_ctlinput(int, const struct sockaddr *, void *); void ipsec_output_init(void); struct m_tag; void ipsec4_common_input(struct mbuf *m, int, int); int ipsec4_common_input_cb(struct mbuf *, struct secasvar *, int, int); int ipsec4_process_packet(struct mbuf *, const struct ipsecrequest *, u_long *); int ipsec_process_done(struct mbuf *, const struct ipsecrequest *, struct secasvar *, int); struct mbuf *m_clone(struct mbuf *); struct mbuf *m_makespace(struct mbuf *, int, int, int *); void *m_pad(struct mbuf *, int); int m_striphdr(struct mbuf *, int, int); extern int ipsec_used __read_mostly; extern int ipsec_enabled __read_mostly; #endif /* _KERNEL */ #ifndef _KERNEL char *ipsec_set_policy(const char *, int); int ipsec_get_policylen(char *); char *ipsec_dump_policy(char *, const char *); const char *ipsec_strerror(void); #endif /* !_KERNEL */ #ifdef _KERNEL /* External declarations of per-file init functions */ void ah_attach(void); void esp_attach(void); void ipcomp_attach(void); void ipe4_attach(void); void tcpsignature_attach(void); void ipsec_attach(void); void sysctl_net_inet_ipsec_setup(struct sysctllog **); #ifdef INET6 void sysctl_net_inet6_ipsec6_setup(struct sysctllog **); #endif #endif /* _KERNEL */ #endif /* !_NETIPSEC_IPSEC_H_ */ |
| 142 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 | /* $NetBSD: bus_space.c,v 1.47 2022/07/17 08:33:48 riastradh Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace * Simulation Facility, NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: bus_space.c,v 1.47 2022/07/17 08:33:48 riastradh Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> #include <sys/extent.h> #include <sys/kmem.h> #include <uvm/uvm_extern.h> #include <dev/isa/isareg.h> #include <sys/bus.h> #include <machine/pio.h> #include <machine/isa_machdep.h> #ifdef XEN #include <xen/hypervisor.h> #endif /* * Macros for sanity-checking the aligned-ness of pointers passed to * bus space ops. These are not strictly necessary on the x86, but * could lead to performance improvements, and help catch problems * with drivers that would creep up on other architectures. */ #ifdef BUS_SPACE_DEBUG #define BUS_SPACE_ALIGNED_ADDRESS(p, t) \ ((((u_long)(p)) & (sizeof(t)-1)) == 0) #define BUS_SPACE_ADDRESS_SANITY(p, t, d) \ ({ \ if (BUS_SPACE_ALIGNED_ADDRESS((p), t) == 0) { \ printf("%s 0x%lx not aligned to %zu bytes %s:%d\n", \ d, (u_long)(p), sizeof(t), __FILE__, __LINE__); \ } \ (void) 0; \ }) #else #define BUS_SPACE_ADDRESS_SANITY(p,t,d) (void) 0 #endif /* BUS_SPACE_DEBUG */ /* * Extent maps to manage I/O and memory space. Allocate * storage for 8 regions in each, initially. Later, ioport_malloc_safe * will indicate that it's safe to use malloc() to dynamically allocate * region descriptors. * * N.B. At least two regions are _always_ allocated from the iomem * extent map; (0 -> ISA hole) and (end of ISA hole -> end of RAM). * * The extent maps are not static! Machine-dependent ISA and EISA * routines need access to them for bus address space allocation. */ static long ioport_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)]; static long iomem_ex_storage[EXTENT_FIXED_STORAGE_SIZE(64) / sizeof(long)]; struct extent *ioport_ex; struct extent *iomem_ex; static int ioport_malloc_safe; static struct bus_space_tag x86_io = { .bst_type = X86_BUS_SPACE_IO }; static struct bus_space_tag x86_mem = { .bst_type = X86_BUS_SPACE_MEM }; bus_space_tag_t x86_bus_space_io = &x86_io; bus_space_tag_t x86_bus_space_mem = &x86_mem; int x86_mem_add_mapping(bus_addr_t, bus_size_t, int, bus_space_handle_t *); static inline bool x86_bus_space_is_io(bus_space_tag_t t) { return t->bst_type == X86_BUS_SPACE_IO; } static inline bool x86_bus_space_is_mem(bus_space_tag_t t) { return t->bst_type == X86_BUS_SPACE_MEM; } void x86_bus_space_init(void) { /* * Initialize the I/O port and I/O mem extent maps. * Note: we don't have to check the return value since * creation of a fixed extent map will never fail (since * descriptor storage has already been allocated). * * N.B. The iomem extent manages _all_ physical addresses * on the machine. When the amount of RAM is found, the two * extents of RAM are allocated from the map (0 -> ISA hole * and end of ISA hole -> end of RAM). */ ioport_ex = extent_create("ioport", 0x0, 0xffff, (void *)ioport_ex_storage, sizeof(ioport_ex_storage), EX_NOCOALESCE|EX_NOWAIT); iomem_ex = extent_create("iomem", 0x0, MAXIOMEM, (void *)iomem_ex_storage, sizeof(iomem_ex_storage), EX_NOCOALESCE|EX_NOWAIT); #ifdef XENPV /* We are privileged guest os - should have IO privileges. */ if (xendomain_is_privileged()) { struct physdev_set_iopl set_iopl; memset(&set_iopl, 0, sizeof(set_iopl)); set_iopl.iopl = 1; if (HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl) != 0) panic("Unable to obtain IOPL, " "despite being SIF_PRIVILEGED"); } #endif /* XENPV */ } void x86_bus_space_mallocok(void) { ioport_malloc_safe = 1; } int bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size, int flags, bus_space_handle_t *bshp) { bus_space_reservation_t bsr; bus_space_tag_t it; int error; if ((t->bst_exists & BUS_SPACE_OVERRIDE_MAP) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bst_super) { if ((it->bst_present & BUS_SPACE_OVERRIDE_MAP) == 0) continue; return (*it->bst_ov->ov_space_map)(it->bst_ctx, t, bpa, size, flags, bshp); } error = bus_space_reserve(t, bpa, size, flags, &bsr); if (error != 0) return error; error = bus_space_reservation_map(t, &bsr, flags, bshp); if (error != 0) bus_space_release(t, &bsr); return error; } int bus_space_reservation_map(bus_space_tag_t t, bus_space_reservation_t *bsr, int flags, bus_space_handle_t *bshp) { bus_addr_t bpa; bus_size_t size; bus_space_tag_t it; if ((t->bst_exists & BUS_SPACE_OVERRIDE_RESERVATION_MAP) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bst_super) { if ((it->bst_present & BUS_SPACE_OVERRIDE_RESERVATION_MAP) == 0) continue; return (*it->bst_ov->ov_space_reservation_map)(it->bst_ctx, t, bsr, flags, bshp); } bpa = bus_space_reservation_addr(bsr); size = bus_space_reservation_size(bsr); /* * For I/O space, that's all she wrote. */ if (x86_bus_space_is_io(t)) { *bshp = bpa; return 0; } #ifndef XENPV if (bpa >= IOM_BEGIN && (bpa + size) != 0 && (bpa + size) <= IOM_END) { *bshp = (bus_space_handle_t)ISA_HOLE_VADDR(bpa); return 0; } #endif /* !XENPV */ /* * For memory space, map the bus physical address to * a kernel virtual address. */ return x86_mem_add_mapping(bpa, size, flags, bshp); } int _x86_memio_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size, int flags, bus_space_handle_t *bshp) { /* * For I/O space, just fill in the handle. */ if (x86_bus_space_is_io(t)) { if (flags & BUS_SPACE_MAP_LINEAR) return (EOPNOTSUPP); *bshp = bpa; return (0); } /* * For memory space, map the bus physical address to * a kernel virtual address. */ return x86_mem_add_mapping(bpa, size, flags, bshp); } int bus_space_reserve(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size, int flags, bus_space_reservation_t *bsrp) { struct extent *ex; int error; bus_space_tag_t it; if ((t->bst_exists & BUS_SPACE_OVERRIDE_RESERVE) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bst_super) { if ((it->bst_present & BUS_SPACE_OVERRIDE_RESERVE) == 0) continue; return (*it->bst_ov->ov_space_reserve)(it->bst_ctx, t, bpa, size, flags, bsrp); } /* * Pick the appropriate extent map. */ if (x86_bus_space_is_io(t)) { if (flags & BUS_SPACE_MAP_LINEAR) return (EOPNOTSUPP); ex = ioport_ex; } else if (x86_bus_space_is_mem(t)) ex = iomem_ex; else panic("x86_memio_alloc: bad bus space tag"); /* * Before we go any further, let's make sure that this * region is available. */ error = extent_alloc_region(ex, bpa, size, EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0)); if (error != 0) return error; bus_space_reservation_init(bsrp, bpa, size); return 0; } int bus_space_reserve_subregion(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend, const bus_size_t size, const bus_size_t alignment, const bus_size_t boundary, const int flags, bus_space_reservation_t *bsrp) { bus_space_reservation_t bsr; struct extent *ex; u_long bpa; int error; bus_space_tag_t it; if ((t->bst_exists & BUS_SPACE_OVERRIDE_RESERVE_SUBREGION) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bst_super) { if ((it->bst_present & BUS_SPACE_OVERRIDE_RESERVE_SUBREGION) == 0) continue; return (*it->bst_ov->ov_space_reserve_subregion)(it->bst_ctx, t, rstart, rend, size, alignment, boundary, flags, bsrp); } /* * Pick the appropriate extent map. */ if (x86_bus_space_is_io(t)) { if (flags & BUS_SPACE_MAP_LINEAR) return (EOPNOTSUPP); ex = ioport_ex; } else if (x86_bus_space_is_mem(t)) ex = iomem_ex; else panic("x86_memio_alloc: bad bus space tag"); /* * Sanity check the allocation against the extent's boundaries. */ rstart = MAX(rstart, ex->ex_start); rend = MIN(rend, ex->ex_end); if (rstart >= rend) panic("x86_memio_alloc: bad region start/end"); /* * Do the requested allocation. */ error = extent_alloc_subregion(ex, rstart, rend, size, alignment, boundary, EX_FAST | EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0), &bpa); if (error) return (error); bus_space_reservation_init(&bsr, bpa, size); *bsrp = bsr; return 0; } void bus_space_release(bus_space_tag_t t, bus_space_reservation_t *bsr) { struct extent *ex; bus_space_tag_t it; if ((t->bst_exists & BUS_SPACE_OVERRIDE_RELEASE) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bst_super) { if ((it->bst_present & BUS_SPACE_OVERRIDE_RELEASE) == 0) continue; (*it->bst_ov->ov_space_release)(it->bst_ctx, t, bsr); return; } /* * Pick the appropriate extent map. */ if (x86_bus_space_is_io(t)) { ex = ioport_ex; } else if (x86_bus_space_is_mem(t)) ex = iomem_ex; else panic("x86_memio_alloc: bad bus space tag"); if (extent_free(ex, bus_space_reservation_addr(bsr), bus_space_reservation_size(bsr), EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) { printf("%s: pa 0x%jx, size 0x%jx\n", __func__, (uintmax_t)bus_space_reservation_addr(bsr), (uintmax_t)bus_space_reservation_size(bsr)); printf("%s: can't free region\n", __func__); } } int bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend, bus_size_t size, bus_size_t alignment, bus_size_t boundary, int flags, bus_addr_t *bpap, bus_space_handle_t *bshp) { bus_space_reservation_t bsr; bus_space_tag_t it; int error; if ((t->bst_exists & BUS_SPACE_OVERRIDE_ALLOC) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bst_super) { if ((it->bst_present & BUS_SPACE_OVERRIDE_ALLOC) == 0) continue; return (*it->bst_ov->ov_space_alloc)(it->bst_ctx, t, rstart, rend, size, alignment, boundary, flags, bpap, bshp); } /* * Do the requested allocation. */ error = bus_space_reserve_subregion(t, rstart, rend, size, alignment, boundary, flags, &bsr); if (error != 0) return error; error = bus_space_reservation_map(t, &bsr, flags, bshp); if (error != 0) bus_space_release(t, &bsr); *bpap = bus_space_reservation_addr(&bsr); return error; } int x86_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int flags, bus_space_handle_t *bshp) { paddr_t pa, endpa; vaddr_t va, sva; u_int pmapflags; pa = x86_trunc_page(bpa); endpa = x86_round_page(bpa + size); pmapflags = PMAP_NOCACHE; if ((flags & BUS_SPACE_MAP_CACHEABLE) != 0) pmapflags = 0; else if (flags & BUS_SPACE_MAP_PREFETCHABLE) pmapflags = PMAP_WRITE_COMBINE; #ifdef DIAGNOSTIC if (endpa != 0 && endpa <= pa) panic("x86_mem_add_mapping: overflow"); #endif #ifdef XENPV if (bpa >= IOM_BEGIN && (bpa + size) != 0 && (bpa + size) <= IOM_END) { sva = (vaddr_t)ISA_HOLE_VADDR(pa); } else #endif /* XENPV */ { sva = uvm_km_alloc(kernel_map, endpa - pa, 0, UVM_KMF_VAONLY | UVM_KMF_NOWAIT); if (sva == 0) return (ENOMEM); } *bshp = (bus_space_handle_t)(sva + (bpa & PGOFSET)); for (va = sva; pa != endpa; pa += PAGE_SIZE, va += PAGE_SIZE) { pmap_kenter_ma(va, pa, VM_PROT_READ | VM_PROT_WRITE, pmapflags); } pmap_update(pmap_kernel()); return 0; } bool bus_space_is_equal(bus_space_tag_t t1, bus_space_tag_t t2) { if (t1 == NULL || t2 == NULL) return false; return t1->bst_type == t2->bst_type; } /* * void _x86_memio_unmap(bus_space_tag bst, bus_space_handle bsh, * bus_size_t size, bus_addr_t *adrp) * * This function unmaps memory- or io-space mapped by the function * _x86_memio_map(). This function works nearly as same as * x86_memio_unmap(), but this function does not ask kernel * built-in extents and returns physical address of the bus space, * for the convenience of the extra extent manager. */ void _x86_memio_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size, bus_addr_t *adrp) { u_long va, endva; bus_addr_t bpa; /* * Find the correct extent and bus physical address. */ if (x86_bus_space_is_io(t)) { bpa = bsh; } else if (x86_bus_space_is_mem(t)) { if (bsh >= atdevbase && (bsh + size) != 0 && (bsh + size) <= (atdevbase + IOM_SIZE)) { bpa = (bus_addr_t)ISA_PHYSADDR(bsh); } else { va = x86_trunc_page(bsh); endva = x86_round_page(bsh + size); #ifdef DIAGNOSTIC if (endva <= va) { panic("_x86_memio_unmap: overflow"); } #endif if (pmap_extract_ma(pmap_kernel(), va, &bpa) == FALSE) { panic("_x86_memio_unmap:" " wrong virtual address"); } bpa += (bsh & PGOFSET); pmap_kremove(va, endva - va); pmap_update(pmap_kernel()); /* * Free the kernel virtual mapping. */ uvm_km_free(kernel_map, va, endva - va, UVM_KMF_VAONLY); } } else { panic("_x86_memio_unmap: bad bus space tag"); } if (adrp != NULL) { *adrp = bpa; } } static void bus_space_reservation_unmap1(bus_space_tag_t t, const bus_space_handle_t bsh, const bus_size_t size, bus_addr_t *bpap) { u_long va, endva; bus_addr_t bpa; /* * Find the correct extent and bus physical address. */ if (x86_bus_space_is_io(t)) { bpa = bsh; } else if (x86_bus_space_is_mem(t)) { if (bsh >= atdevbase && (bsh + size) != 0 && (bsh + size) <= (atdevbase + IOM_SIZE)) { bpa = (bus_addr_t)ISA_PHYSADDR(bsh); goto ok; } va = x86_trunc_page(bsh); endva = x86_round_page(bsh + size); #ifdef DIAGNOSTIC if (endva <= va) panic("x86_memio_unmap: overflow"); #endif (void) pmap_extract_ma(pmap_kernel(), va, &bpa); bpa += (bsh & PGOFSET); pmap_kremove(va, endva - va); pmap_update(pmap_kernel()); /* * Free the kernel virtual mapping. */ uvm_km_free(kernel_map, va, endva - va, UVM_KMF_VAONLY); } else panic("x86_memio_unmap: bad bus space tag"); ok: if (bpap != NULL) *bpap = bpa; } void bus_space_reservation_unmap(bus_space_tag_t t, const bus_space_handle_t bsh, const bus_size_t size) { bus_space_tag_t it; if ((t->bst_exists & BUS_SPACE_OVERRIDE_RESERVATION_UNMAP) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bst_super) { if ((it->bst_present & BUS_SPACE_OVERRIDE_RESERVATION_UNMAP) == 0) continue; (*it->bst_ov->ov_space_reservation_unmap)(it->bst_ctx, t, bsh, size); return; } bus_space_reservation_unmap1(t, bsh, size, NULL); } void bus_space_unmap(bus_space_tag_t t, const bus_space_handle_t bsh, const bus_size_t size) { bus_addr_t addr; bus_space_reservation_t bsr; bus_space_tag_t it; if ((t->bst_exists & BUS_SPACE_OVERRIDE_UNMAP) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bst_super) { if ((it->bst_present & BUS_SPACE_OVERRIDE_UNMAP) == 0) continue; (*it->bst_ov->ov_space_unmap)(it->bst_ctx, t, bsh, size); return; } bus_space_reservation_unmap1(t, bsh, size, &addr); bus_space_reservation_init(&bsr, addr, size); bus_space_release(t, &bsr); } void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size) { bus_space_tag_t it; if ((t->bst_exists & BUS_SPACE_OVERRIDE_FREE) == 0) ; /* skip override */ else for (it = t; it != NULL; it = it->bst_super) { if ((it->bst_present & BUS_SPACE_OVERRIDE_FREE) == 0) continue; (*it->bst_ov->ov_space_free)(it->bst_ctx, t, bsh, size); return; } /* bus_space_unmap() does all that we need to do. */ bus_space_unmap(t, bsh, size); } int bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp) { *nbshp = bsh + offset; return (0); } paddr_t bus_space_mmap(bus_space_tag_t t, bus_addr_t addr, off_t off, int prot, int flags) { paddr_t pflags = 0; /* Can't mmap I/O space. */ if (x86_bus_space_is_io(t)) return (-1); /* * "addr" is the base address of the device we're mapping. * "off" is the offset into that device. * * Note we are called for each "page" in the device that * the upper layers want to map. */ if (flags & BUS_SPACE_MAP_PREFETCHABLE) pflags |= X86_MMAP_FLAG_PREFETCH; return x86_btop(addr + off) | (pflags << X86_MMAP_FLAG_SHIFT); } void bus_space_set_multi_1(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o, uint8_t v, size_t c) { vaddr_t addr = h + o; if (x86_bus_space_is_io(t)) while (c--) outb(addr, v); else while (c--) *(volatile uint8_t *)(addr) = v; } void bus_space_set_multi_2(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o, uint16_t v, size_t c) { vaddr_t addr = h + o; BUS_SPACE_ADDRESS_SANITY(addr, uint16_t, "bus addr"); if (x86_bus_space_is_io(t)) while (c--) outw(addr, v); else while (c--) *(volatile uint16_t *)(addr) = v; } void bus_space_set_multi_4(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o, uint32_t v, size_t c) { vaddr_t addr = h + o; BUS_SPACE_ADDRESS_SANITY(addr, uint32_t, "bus addr"); if (x86_bus_space_is_io(t)) while (c--) outl(addr, v); else while (c--) *(volatile uint32_t *)(addr) = v; } void bus_space_set_region_1(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o, uint8_t v, size_t c) { vaddr_t addr = h + o; if (x86_bus_space_is_io(t)) for (; c != 0; c--, addr++) outb(addr, v); else for (; c != 0; c--, addr++) *(volatile uint8_t *)(addr) = v; } void bus_space_set_region_2(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o, uint16_t v, size_t c) { vaddr_t addr = h + o; BUS_SPACE_ADDRESS_SANITY(addr, uint16_t, "bus addr"); if (x86_bus_space_is_io(t)) for (; c != 0; c--, addr += 2) outw(addr, v); else for (; c != 0; c--, addr += 2) *(volatile uint16_t *)(addr) = v; } void bus_space_set_region_4(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o, uint32_t v, size_t c) { vaddr_t addr = h + o; BUS_SPACE_ADDRESS_SANITY(addr, uint32_t, "bus addr"); if (x86_bus_space_is_io(t)) for (; c != 0; c--, addr += 4) outl(addr, v); else for (; c != 0; c--, addr += 4) *(volatile uint32_t *)(addr) = v; } void bus_space_copy_region_1(bus_space_tag_t t, bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { vaddr_t addr1 = h1 + o1; vaddr_t addr2 = h2 + o2; if (x86_bus_space_is_io(t)) { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1++, addr2++) outb(addr2, inb(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += (c - 1), addr2 += (c - 1); c != 0; c--, addr1--, addr2--) outb(addr2, inb(addr1)); } } else { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1++, addr2++) *(volatile uint8_t *)(addr2) = *(volatile uint8_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += (c - 1), addr2 += (c - 1); c != 0; c--, addr1--, addr2--) *(volatile uint8_t *)(addr2) = *(volatile uint8_t *)(addr1); } } } void bus_space_copy_region_2(bus_space_tag_t t, bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { vaddr_t addr1 = h1 + o1; vaddr_t addr2 = h2 + o2; BUS_SPACE_ADDRESS_SANITY(addr1, uint16_t, "bus addr 1"); BUS_SPACE_ADDRESS_SANITY(addr2, uint16_t, "bus addr 2"); if (x86_bus_space_is_io(t)) { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1 += 2, addr2 += 2) outw(addr2, inw(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (c - 1), addr2 += 2 * (c - 1); c != 0; c--, addr1 -= 2, addr2 -= 2) outw(addr2, inw(addr1)); } } else { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1 += 2, addr2 += 2) *(volatile uint16_t *)(addr2) = *(volatile uint16_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (c - 1), addr2 += 2 * (c - 1); c != 0; c--, addr1 -= 2, addr2 -= 2) *(volatile uint16_t *)(addr2) = *(volatile uint16_t *)(addr1); } } } void bus_space_copy_region_4(bus_space_tag_t t, bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { vaddr_t addr1 = h1 + o1; vaddr_t addr2 = h2 + o2; BUS_SPACE_ADDRESS_SANITY(addr1, uint32_t, "bus addr 1"); BUS_SPACE_ADDRESS_SANITY(addr2, uint32_t, "bus addr 2"); if (x86_bus_space_is_io(t)) { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1 += 4, addr2 += 4) outl(addr2, inl(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (c - 1), addr2 += 4 * (c - 1); c != 0; c--, addr1 -= 4, addr2 -= 4) outl(addr2, inl(addr1)); } } else { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1 += 4, addr2 += 4) *(volatile uint32_t *)(addr2) = *(volatile uint32_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (c - 1), addr2 += 4 * (c - 1); c != 0; c--, addr1 -= 4, addr2 -= 4) *(volatile uint32_t *)(addr2) = *(volatile uint32_t *)(addr1); } } } void bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, bus_size_t len, int flags) { /* I/O instructions always happen in program order. */ if (x86_bus_space_is_io(tag)) return; /* * For default mappings, which are mapped with UC-type memory * regions, all loads and stores are issued in program order. * * For BUS_SPACE_MAP_PREFETCHABLE mappings, which are mapped * with WC-type memory regions, loads and stores may be issued * out of order, potentially requiring any of the three x86 * fences -- LFENCE, SFENCE, MFENCE. * * For BUS_SPACE_MAP_CACHEABLE mappings, which are mapped with * WB-type memory regions (like normal memory), store/load may * be reordered to load/store, potentially requiring MFENCE. * * We can't easily tell here how the region was mapped (without * consulting the page tables), so just issue the fence * unconditionally. Chances are either it's necessary or the * cost is small in comparison to device register I/O. * * Reference: * * AMD64 Architecture Programmer's Manual, Volume 2: * System Programming, 24593--Rev. 3.38--November 2021, * Sec. 7.4.2 Memory Barrier Interaction with Memory * Types, Table 7-3, p. 196. * https://web.archive.org/web/20220625040004/https://www.amd.com/system/files/TechDocs/24593.pdf#page=256 */ switch (flags) { case 0: break; case BUS_SPACE_BARRIER_READ: x86_lfence(); break; case BUS_SPACE_BARRIER_WRITE: x86_sfence(); break; case BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE: x86_mfence(); break; default: panic("unknown bus space barrier: 0x%x", (unsigned)flags); } } void * bus_space_vaddr(bus_space_tag_t tag, bus_space_handle_t bsh) { return x86_bus_space_is_mem(tag) ? (void *)bsh : NULL; } static const void * bit_to_function_pointer(const struct bus_space_overrides *ov, uint64_t bit) { switch (bit) { case BUS_SPACE_OVERRIDE_MAP: return ov->ov_space_map; case BUS_SPACE_OVERRIDE_UNMAP: return ov->ov_space_unmap; case BUS_SPACE_OVERRIDE_ALLOC: return ov->ov_space_alloc; case BUS_SPACE_OVERRIDE_FREE: return ov->ov_space_free; case BUS_SPACE_OVERRIDE_RESERVE: return ov->ov_space_reserve; case BUS_SPACE_OVERRIDE_RELEASE: return ov->ov_space_release; case BUS_SPACE_OVERRIDE_RESERVATION_MAP: return ov->ov_space_reservation_map; case BUS_SPACE_OVERRIDE_RESERVATION_UNMAP: return ov->ov_space_reservation_unmap; case BUS_SPACE_OVERRIDE_RESERVE_SUBREGION: return ov->ov_space_reserve_subregion; default: return NULL; } } void bus_space_tag_destroy(bus_space_tag_t bst) { kmem_free(bst, sizeof(struct bus_space_tag)); } int bus_space_tag_create(bus_space_tag_t obst, const uint64_t present, const uint64_t extpresent, const struct bus_space_overrides *ov, void *ctx, bus_space_tag_t *bstp) { uint64_t bit, bits, nbits; bus_space_tag_t bst; const void *fp; if (ov == NULL || present == 0 || extpresent != 0) return EINVAL; bst = kmem_alloc(sizeof(struct bus_space_tag), KM_SLEEP); bst->bst_super = obst; bst->bst_type = obst->bst_type; for (bits = present; bits != 0; bits = nbits) { nbits = bits & (bits - 1); bit = nbits ^ bits; if ((fp = bit_to_function_pointer(ov, bit)) == NULL) { printf("%s: missing bit %" PRIx64 "\n", __func__, bit); goto einval; } } bst->bst_ov = ov; bst->bst_exists = obst->bst_exists | present; bst->bst_present = present; bst->bst_ctx = ctx; *bstp = bst; return 0; einval: kmem_free(bst, sizeof(struct bus_space_tag)); return EINVAL; } |
| 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 | /* $NetBSD: slurm.c,v 1.4 2019/01/22 06:47:20 skrll Exp $ */ /* * Copyright (c) 2012 Jonathan A. Kollasch * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: slurm.c,v 1.4 2019/01/22 06:47:20 skrll Exp $"); #include <sys/param.h> #include <sys/proc.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/device.h> #include <sys/conf.h> #include <dev/usb/usb.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdivar.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdevs.h> #include <dev/ic/si470x_reg.h> #include <sys/radioio.h> #include <dev/radio_if.h> #ifdef SLURM_DEBUG int slurmdebug = 0; #define DPRINTFN(n, x) do { if (slurmdebug > (n)) printf x; } while (0) #else #define DPRINTFN(n, x) #endif #define DPRINTF(x) DPRINTFN(0, x) #define SI470X_VOLFACT (255 / __SHIFTOUT_MASK(SI470X_VOLUME)) struct slurm_softc { device_t sc_dev; struct usbd_device * sc_udev; struct usbd_interface * sc_uif; uint32_t sc_band; uint32_t sc_space; }; static const struct usb_devno slurm_devs[] = { { USB_VENDOR_ADS, USB_PRODUCT_ADS_RDX155 }, }; static int slurm_match(device_t, cfdata_t, void *); static void slurm_attach(device_t, device_t, void *); static int slurm_detach(device_t, int); static int slurm_get_info(void *, struct radio_info *); static int slurm_set_info(void *, struct radio_info *); static int slurm_search(void *, int); static usbd_status slurm_setreg(struct slurm_softc *, int, uint16_t); static usbd_status slurm_getreg(struct slurm_softc *, int, uint16_t *); static uint32_t slurm_si470x_get_freq(struct slurm_softc *, uint16_t); static void slurm_si470x_get_bandspace(struct slurm_softc *, uint16_t); static int slurm_si470x_get_info(uint16_t); static int slurm_si470x_get_mute(uint16_t); static int slurm_si470x_get_stereo(uint16_t); static int slurm_si470x_get_volume(uint16_t); static int slurm_si470x_search(struct slurm_softc *, int); static void slurm_si470x_set_freq(struct slurm_softc *, uint32_t); static void slurm_si470x_set_powercfg(struct slurm_softc *, int, int); static void slurm_si470x_set_volume(struct slurm_softc *, int); static const struct radio_hw_if slurm_radio = { .get_info = slurm_get_info, .set_info = slurm_set_info, .search = slurm_search, }; CFATTACH_DECL_NEW(slurm, sizeof(struct slurm_softc), slurm_match, slurm_attach, slurm_detach, NULL); static int slurm_match(device_t parent, cfdata_t match, void *aux) { const struct usbif_attach_arg * const uiaa = aux; if (uiaa->uiaa_ifaceno != 2) return UMATCH_NONE; if (usb_lookup(slurm_devs, uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL) { return UMATCH_VENDOR_PRODUCT; } return UMATCH_NONE; } static void slurm_attach(device_t parent, device_t self, void *aux) { struct slurm_softc * const sc = device_private(self); const struct usbif_attach_arg * const uiaa = aux; sc->sc_dev = self; sc->sc_udev = uiaa->uiaa_device; sc->sc_uif = uiaa->uiaa_iface; aprint_normal("\n"); aprint_naive("\n"); usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev); #ifdef SLURM_DEBUG { uint16_t val; for (int i = 0; i < 16; i++) { slurm_getreg(sc, i, &val); device_printf(self, "%02x -> %04x\n", i, val); } } #endif radio_attach_mi(&slurm_radio, sc, self); } static int slurm_detach(device_t self, int flags) { struct slurm_softc * const sc = device_private(self); int rv = 0; if ((rv = config_detach_children(self, flags)) != 0) return rv; usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev); return rv; } static int slurm_get_info(void *v, struct radio_info *ri) { struct slurm_softc * const sc = v; uint16_t powercfg, sysconfig2, readchannel, statusrssi; slurm_getreg(sc, SI470X_POWERCFG, &powercfg); slurm_getreg(sc, SI470X_SYSCONFIG2, &sysconfig2); slurm_getreg(sc, SI470X_STATUSRSSI, &statusrssi); slurm_getreg(sc, SI470X_READCHANNEL, &readchannel); ri->mute = slurm_si470x_get_mute(powercfg); ri->volume = slurm_si470x_get_volume(sysconfig2); ri->stereo = slurm_si470x_get_stereo(powercfg); ri->rfreq = 0; ri->lock = 0; slurm_si470x_get_bandspace(sc, sysconfig2); ri->freq = slurm_si470x_get_freq(sc, readchannel); ri->caps = RADIO_CAPS_DETECT_STEREO | RADIO_CAPS_DETECT_SIGNAL | RADIO_CAPS_SET_MONO | RADIO_CAPS_HW_SEARCH | RADIO_CAPS_HW_AFC | RADIO_CAPS_LOCK_SENSITIVITY; ri->info = slurm_si470x_get_info(statusrssi); return 0; } static int slurm_set_info(void *v, struct radio_info *ri) { struct slurm_softc * const sc = v; slurm_si470x_set_freq(sc, ri->freq); slurm_si470x_set_powercfg(sc, ri->mute, ri->stereo); slurm_si470x_set_volume(sc, ri->volume); return 0; } static int slurm_search(void *v, int f) { struct slurm_softc * const sc = v; return slurm_si470x_search(sc, f); } static usbd_status slurm_getreg(struct slurm_softc *sc, int reg, uint16_t *val) { usbd_status status; uint8_t s[3]; ++reg; s[0] = reg; s[1] = s[2] = 0; status = usbd_get_report(sc->sc_uif, UHID_FEATURE_REPORT, reg, &s, sizeof(s)); *val = (s[1] << 8) | s[2]; return status; } static usbd_status slurm_setreg(struct slurm_softc *sc, int reg, uint16_t val) { usbd_status status; uint8_t s[3]; ++reg; s[0] = reg; s[1] = (val >> 8) & 0xff; s[2] = (val >> 0) & 0xff; status = usbd_set_report(sc->sc_uif, UHID_FEATURE_REPORT, reg, &s, sizeof(s)); return status; } static int slurm_si470x_await_stc(struct slurm_softc *sc) { int i; uint16_t statusrssi; for (i = 50; i > 0; i--) { usbd_delay_ms(sc->sc_udev, 2); slurm_getreg(sc, SI470X_STATUSRSSI, &statusrssi); if ((statusrssi & (SI470X_STC|SI470X_SF_BL)) != 0) break; } if (i == 0) return -1; else return 0; } static void slurm_si470x_get_bandspace(struct slurm_softc *sc, uint16_t sysconfig2) { switch (__SHIFTOUT(sysconfig2, SI470X_SPACE)) { default: case 0: sc->sc_space = 200; break; case 1: sc->sc_space = 100; break; case 2: sc->sc_space = 50; break; } switch (__SHIFTOUT(sysconfig2, SI470X_BAND)) { default: case 0: sc->sc_band = 87500; break; case 1: case 2: sc->sc_band = 76000; break; } } static uint32_t slurm_si470x_get_freq(struct slurm_softc *sc, uint16_t readchannel) { readchannel = __SHIFTOUT(readchannel, SI470X_READCHAN); return sc->sc_band + readchannel * sc->sc_space; } static int slurm_si470x_get_info(uint16_t statusrssi) { return (__SHIFTOUT(statusrssi, SI470X_ST) ? RADIO_INFO_STEREO : 0) | (__SHIFTOUT(statusrssi, SI470X_AFCRL) ? 0 : RADIO_INFO_SIGNAL); } static int slurm_si470x_get_mute(uint16_t powercfg) { return __SHIFTOUT(powercfg, SI470X_DMUTE) ? 0 : 1; } static int slurm_si470x_get_stereo(uint16_t powercfg) { return __SHIFTOUT(powercfg, SI470X_MONO) ? 0 : 1; } static int slurm_si470x_get_volume(uint16_t sysconfig2) { return __SHIFTOUT(sysconfig2, SI470X_VOLUME) * SI470X_VOLFACT; } static int slurm_si470x_search(struct slurm_softc *sc, int up) { uint16_t powercfg; slurm_getreg(sc, SI470X_POWERCFG, &powercfg); powercfg &= ~(SI470X_SKMODE|SI470X_SEEKUP|SI470X_SEEK); powercfg |= up ? SI470X_SEEKUP : 0; slurm_setreg(sc, SI470X_POWERCFG, SI470X_SEEK|powercfg); slurm_si470x_await_stc(sc); slurm_setreg(sc, SI470X_POWERCFG, powercfg); return 0; } static void slurm_si470x_set_freq(struct slurm_softc *sc, uint32_t freq) { uint16_t channel; channel = (freq - sc->sc_band) / sc->sc_space; slurm_setreg(sc, SI470X_CHANNEL, SI470X_TUNE|channel); slurm_si470x_await_stc(sc); slurm_setreg(sc, SI470X_CHANNEL, channel); #ifdef SLURM_DEBUG device_printf(sc->sc_dev, "%s 0a -> %04x after %d\n", __func__, val, i); #endif } static void slurm_si470x_set_powercfg(struct slurm_softc *sc, int mute, int stereo) { uint16_t powercfg; slurm_getreg(sc, SI470X_POWERCFG, &powercfg); powercfg &= ~(SI470X_DMUTE|SI470X_MONO); powercfg |= SI470X_DSMUTE; powercfg |= mute ? 0 : SI470X_DMUTE; powercfg |= stereo ? 0 : SI470X_MONO; slurm_setreg(sc, SI470X_POWERCFG, powercfg); } static void slurm_si470x_set_volume(struct slurm_softc *sc, int volume) { uint16_t sysconfig2; slurm_getreg(sc, SI470X_SYSCONFIG2, &sysconfig2); sysconfig2 &= ~SI470X_VOLUME; sysconfig2 |= __SHIFTIN(volume / SI470X_VOLFACT, SI470X_VOLUME); slurm_setreg(sc, SI470X_SYSCONFIG2, sysconfig2); } |
| 12 9 12 3 12 1 11 11 12 9 9 9 3 3 3 12 9 3 3 12 23 23 23 23 18 5 23 23 2 2 2 2 2 2 2 2 2 1 21 11 10 10 21 21 3 21 11 10 10 19 19 18 21 21 21 21 1 1 1 1 12 11 10 12 12 11 12 12 11 12 12 9 3 12 3 12 12 12 29 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 | /* $NetBSD: tcp_subr.c,v 1.290 2022/06/27 01:29:51 knakahara Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1997, 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation * Facility, NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.290 2022/06/27 01:29:51 knakahara Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" #include "opt_ipsec.h" #include "opt_inet_csum.h" #include "opt_mbuftrace.h" #endif #include <sys/param.h> #include <sys/atomic.h> #include <sys/proc.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/once.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/protosw.h> #include <sys/errno.h> #include <sys/kernel.h> #include <sys/pool.h> #include <sys/md5.h> #include <sys/cprng.h> #include <net/route.h> #include <net/if.h> #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/in_pcb.h> #include <netinet/ip_var.h> #include <netinet/ip_icmp.h> #ifdef INET6 #include <netinet/ip6.h> #include <netinet6/in6_pcb.h> #include <netinet6/ip6_var.h> #include <netinet6/in6_var.h> #include <netinet6/ip6protosw.h> #include <netinet/icmp6.h> #include <netinet6/nd6.h> #endif #include <netinet/tcp.h> #include <netinet/tcp_fsm.h> #include <netinet/tcp_seq.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #include <netinet/tcp_vtw.h> #include <netinet/tcp_private.h> #include <netinet/tcp_congctl.h> #ifdef IPSEC #include <netipsec/ipsec.h> #ifdef INET6 #include <netipsec/ipsec6.h> #endif #include <netipsec/key.h> #endif struct inpcbtable tcbtable; /* head of queue of active tcpcb's */ u_int32_t tcp_now; /* slow ticks, for RFC 1323 timestamps */ percpu_t *tcpstat_percpu; /* patchable/settable parameters for tcp */ int tcp_mssdflt = TCP_MSS; int tcp_minmss = TCP_MINMSS; int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; int tcp_do_rfc1323 = 1; /* window scaling / timestamps (obsolete) */ int tcp_do_rfc1948 = 0; /* ISS by cryptographic hash */ int tcp_do_sack = 1; /* selective acknowledgement */ int tcp_do_win_scale = 1; /* RFC1323 window scaling */ int tcp_do_timestamps = 1; /* RFC1323 timestamps */ int tcp_ack_on_push = 0; /* set to enable immediate ACK-on-PUSH */ int tcp_do_ecn = 0; /* Explicit Congestion Notification */ #ifndef TCP_INIT_WIN #define TCP_INIT_WIN 4 /* initial slow start window */ #endif #ifndef TCP_INIT_WIN_LOCAL #define TCP_INIT_WIN_LOCAL 4 /* initial slow start window for local nets */ #endif /* * Up to 5 we scale linearly, to reach 3 * 1460; then (iw) * 1460. * This is to simulate current behavior for iw == 4 */ int tcp_init_win_max[] = { 1 * 1460, 1 * 1460, 2 * 1460, 2 * 1460, 3 * 1460, 5 * 1460, 6 * 1460, 7 * 1460, 8 * 1460, 9 * 1460, 10 * 1460 }; int tcp_init_win = TCP_INIT_WIN; int tcp_init_win_local = TCP_INIT_WIN_LOCAL; int tcp_mss_ifmtu = 0; int tcp_rst_ppslim = 100; /* 100pps */ int tcp_ackdrop_ppslim = 100; /* 100pps */ int tcp_do_loopback_cksum = 0; int tcp_do_abc = 1; /* RFC3465 Appropriate byte counting. */ int tcp_abc_aggressive = 1; /* 1: L=2*SMSS 0: L=1*SMSS */ int tcp_sack_tp_maxholes = 32; int tcp_sack_globalmaxholes = 1024; int tcp_sack_globalholes = 0; int tcp_ecn_maxretries = 1; int tcp_msl_enable = 1; /* enable TIME_WAIT truncation */ int tcp_msl_loop = PR_SLOWHZ; /* MSL for loopback */ int tcp_msl_local = 5 * PR_SLOWHZ; /* MSL for 'local' */ int tcp_msl_remote = TCPTV_MSL; /* MSL otherwise */ int tcp_msl_remote_threshold = TCPTV_SRTTDFLT; /* RTT threshold */ int tcp_rttlocal = 0; /* Use RTT to decide who's 'local' */ int tcp4_vtw_enable = 0; /* 1 to enable */ int tcp6_vtw_enable = 0; /* 1 to enable */ int tcp_vtw_was_enabled = 0; int tcp_vtw_entries = 1 << 4; /* 16 vestigial TIME_WAIT entries */ /* tcb hash */ #ifndef TCBHASHSIZE #define TCBHASHSIZE 128 #endif int tcbhashsize = TCBHASHSIZE; /* syn hash parameters */ #define TCP_SYN_HASH_SIZE 293 #define TCP_SYN_BUCKET_SIZE 35 int tcp_syn_cache_size = TCP_SYN_HASH_SIZE; int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE; int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE; struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE]; int tcp_freeq(struct tcpcb *); static int tcp_iss_secret_init(void); static void tcp_mtudisc_callback(struct in_addr); #ifdef INET6 static void tcp6_mtudisc(struct in6pcb *, int); #endif static struct pool tcpcb_pool; static int tcp_drainwanted; #ifdef TCP_CSUM_COUNTERS #include <sys/device.h> struct evcnt tcp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "hwcsum bad"); struct evcnt tcp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "hwcsum ok"); struct evcnt tcp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "hwcsum data"); struct evcnt tcp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "swcsum"); EVCNT_ATTACH_STATIC(tcp_hwcsum_bad); EVCNT_ATTACH_STATIC(tcp_hwcsum_ok); EVCNT_ATTACH_STATIC(tcp_hwcsum_data); EVCNT_ATTACH_STATIC(tcp_swcsum); #if defined(INET6) struct evcnt tcp6_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp6", "hwcsum bad"); struct evcnt tcp6_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp6", "hwcsum ok"); struct evcnt tcp6_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp6", "hwcsum data"); struct evcnt tcp6_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp6", "swcsum"); EVCNT_ATTACH_STATIC(tcp6_hwcsum_bad); EVCNT_ATTACH_STATIC(tcp6_hwcsum_ok); EVCNT_ATTACH_STATIC(tcp6_hwcsum_data); EVCNT_ATTACH_STATIC(tcp6_swcsum); #endif /* defined(INET6) */ #endif /* TCP_CSUM_COUNTERS */ #ifdef TCP_OUTPUT_COUNTERS #include <sys/device.h> struct evcnt tcp_output_bigheader = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "output big header"); struct evcnt tcp_output_predict_hit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "output predict hit"); struct evcnt tcp_output_predict_miss = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "output predict miss"); struct evcnt tcp_output_copysmall = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "output copy small"); struct evcnt tcp_output_copybig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "output copy big"); struct evcnt tcp_output_refbig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp", "output reference big"); EVCNT_ATTACH_STATIC(tcp_output_bigheader); EVCNT_ATTACH_STATIC(tcp_output_predict_hit); EVCNT_ATTACH_STATIC(tcp_output_predict_miss); EVCNT_ATTACH_STATIC(tcp_output_copysmall); EVCNT_ATTACH_STATIC(tcp_output_copybig); EVCNT_ATTACH_STATIC(tcp_output_refbig); #endif /* TCP_OUTPUT_COUNTERS */ #ifdef TCP_REASS_COUNTERS #include <sys/device.h> struct evcnt tcp_reass_ = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "tcp_reass", "calls"); struct evcnt tcp_reass_empty = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "insert into empty queue"); struct evcnt tcp_reass_iteration[8] = { EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", ">7 iterations"), EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "1 iteration"), EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "2 iterations"), EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "3 iterations"), EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "4 iterations"), EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "5 iterations"), EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "6 iterations"), EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "7 iterations"), }; struct evcnt tcp_reass_prependfirst = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "prepend to first"); struct evcnt tcp_reass_prepend = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "prepend"); struct evcnt tcp_reass_insert = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "insert"); struct evcnt tcp_reass_inserttail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "insert at tail"); struct evcnt tcp_reass_append = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "append"); struct evcnt tcp_reass_appendtail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "append to tail fragment"); struct evcnt tcp_reass_overlaptail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "overlap at end"); struct evcnt tcp_reass_overlapfront = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "overlap at start"); struct evcnt tcp_reass_segdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "duplicate segment"); struct evcnt tcp_reass_fragdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "duplicate fragment"); EVCNT_ATTACH_STATIC(tcp_reass_); EVCNT_ATTACH_STATIC(tcp_reass_empty); EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 0); EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 1); EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 2); EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 3); EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 4); EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 5); EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 6); EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 7); EVCNT_ATTACH_STATIC(tcp_reass_prependfirst); EVCNT_ATTACH_STATIC(tcp_reass_prepend); EVCNT_ATTACH_STATIC(tcp_reass_insert); EVCNT_ATTACH_STATIC(tcp_reass_inserttail); EVCNT_ATTACH_STATIC(tcp_reass_append); EVCNT_ATTACH_STATIC(tcp_reass_appendtail); EVCNT_ATTACH_STATIC(tcp_reass_overlaptail); EVCNT_ATTACH_STATIC(tcp_reass_overlapfront); EVCNT_ATTACH_STATIC(tcp_reass_segdup); EVCNT_ATTACH_STATIC(tcp_reass_fragdup); #endif /* TCP_REASS_COUNTERS */ #ifdef MBUFTRACE struct mowner tcp_mowner = MOWNER_INIT("tcp", ""); struct mowner tcp_rx_mowner = MOWNER_INIT("tcp", "rx"); struct mowner tcp_tx_mowner = MOWNER_INIT("tcp", "tx"); struct mowner tcp_sock_mowner = MOWNER_INIT("tcp", "sock"); struct mowner tcp_sock_rx_mowner = MOWNER_INIT("tcp", "sock rx"); struct mowner tcp_sock_tx_mowner = MOWNER_INIT("tcp", "sock tx"); #endif static int do_tcpinit(void) { in_pcbinit(&tcbtable, tcbhashsize, tcbhashsize); pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl", NULL, IPL_SOFTNET); tcp_usrreq_init(); /* Initialize timer state. */ tcp_timer_init(); /* Initialize the compressed state engine. */ syn_cache_init(); /* Initialize the congestion control algorithms. */ tcp_congctl_init(); /* Initialize the TCPCB template. */ tcp_tcpcb_template(); /* Initialize reassembly queue */ tcpipqent_init(); /* SACK */ tcp_sack_init(); MOWNER_ATTACH(&tcp_tx_mowner); MOWNER_ATTACH(&tcp_rx_mowner); MOWNER_ATTACH(&tcp_reass_mowner); MOWNER_ATTACH(&tcp_sock_mowner); MOWNER_ATTACH(&tcp_sock_tx_mowner); MOWNER_ATTACH(&tcp_sock_rx_mowner); MOWNER_ATTACH(&tcp_mowner); tcpstat_percpu = percpu_alloc(sizeof(uint64_t) * TCP_NSTATS); vtw_earlyinit(); tcp_slowtimo_init(); return 0; } void tcp_init_common(unsigned basehlen) { static ONCE_DECL(dotcpinit); unsigned hlen = basehlen + sizeof(struct tcphdr); unsigned oldhlen; if (max_linkhdr + hlen > MHLEN) panic("tcp_init"); while ((oldhlen = max_protohdr) < hlen) atomic_cas_uint(&max_protohdr, oldhlen, hlen); RUN_ONCE(&dotcpinit, do_tcpinit); } /* * Tcp initialization */ void tcp_init(void) { icmp_mtudisc_callback_register(tcp_mtudisc_callback); tcp_init_common(sizeof(struct ip)); } /* * Create template to be used to send tcp packets on a connection. * Call after host entry created, allocates an mbuf and fills * in a skeletal tcp/ip header, minimizing the amount of work * necessary when the connection is used. */ struct mbuf * tcp_template(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; #ifdef INET6 struct in6pcb *in6p = tp->t_in6pcb; #endif struct tcphdr *n; struct mbuf *m; int hlen; switch (tp->t_family) { case AF_INET: hlen = sizeof(struct ip); if (inp) break; #ifdef INET6 if (in6p) { /* mapped addr case */ if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr) && IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) break; } #endif return NULL; /*EINVAL*/ #ifdef INET6 case AF_INET6: hlen = sizeof(struct ip6_hdr); if (in6p) { /* more sainty check? */ break; } return NULL; /*EINVAL*/ #endif default: return NULL; /*EAFNOSUPPORT*/ } KASSERT(hlen + sizeof(struct tcphdr) <= MCLBYTES); m = tp->t_template; if (m && m->m_len == hlen + sizeof(struct tcphdr)) { ; } else { if (m) m_freem(m); m = tp->t_template = NULL; MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m && hlen + sizeof(struct tcphdr) > MHLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); m = NULL; } } if (m == NULL) return NULL; MCLAIM(m, &tcp_mowner); m->m_pkthdr.len = m->m_len = hlen + sizeof(struct tcphdr); } memset(mtod(m, void *), 0, m->m_len); n = (struct tcphdr *)(mtod(m, char *) + hlen); switch (tp->t_family) { case AF_INET: { struct ipovly *ipov; mtod(m, struct ip *)->ip_v = 4; mtod(m, struct ip *)->ip_hl = hlen >> 2; ipov = mtod(m, struct ipovly *); ipov->ih_pr = IPPROTO_TCP; ipov->ih_len = htons(sizeof(struct tcphdr)); if (inp) { ipov->ih_src = inp->inp_laddr; ipov->ih_dst = inp->inp_faddr; } #ifdef INET6 else if (in6p) { /* mapped addr case */ bcopy(&in6p->in6p_laddr.s6_addr32[3], &ipov->ih_src, sizeof(ipov->ih_src)); bcopy(&in6p->in6p_faddr.s6_addr32[3], &ipov->ih_dst, sizeof(ipov->ih_dst)); } #endif /* * Compute the pseudo-header portion of the checksum * now. We incrementally add in the TCP option and * payload lengths later, and then compute the TCP * checksum right before the packet is sent off onto * the wire. */ n->th_sum = in_cksum_phdr(ipov->ih_src.s_addr, ipov->ih_dst.s_addr, htons(sizeof(struct tcphdr) + IPPROTO_TCP)); break; } #ifdef INET6 case AF_INET6: { struct ip6_hdr *ip6; mtod(m, struct ip *)->ip_v = 6; ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_src = in6p->in6p_laddr; ip6->ip6_dst = in6p->in6p_faddr; ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK; if (ip6_auto_flowlabel) { ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK; ip6->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); } ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* * Compute the pseudo-header portion of the checksum * now. We incrementally add in the TCP option and * payload lengths later, and then compute the TCP * checksum right before the packet is sent off onto * the wire. */ n->th_sum = in6_cksum_phdr(&in6p->in6p_laddr, &in6p->in6p_faddr, htonl(sizeof(struct tcphdr)), htonl(IPPROTO_TCP)); break; } #endif } if (inp) { n->th_sport = inp->inp_lport; n->th_dport = inp->inp_fport; } #ifdef INET6 else if (in6p) { n->th_sport = in6p->in6p_lport; n->th_dport = in6p->in6p_fport; } #endif n->th_seq = 0; n->th_ack = 0; n->th_x2 = 0; n->th_off = 5; n->th_flags = 0; n->th_win = 0; n->th_urp = 0; return m; } /* * Send a single message to the TCP at address specified by * the given TCP/IP header. If m == 0, then we make a copy * of the tcpiphdr at ti and send directly to the addressed host. * This is used to force keep alive messages out using the TCP * template for a connection tp->t_template. If flags are given * then we send a message back to the TCP which originated the * segment ti, and discard the mbuf containing it and any other * attached mbufs. * * In any case the ack and sequence number of the transmitted * segment are as specified by the parameters. */ int tcp_respond(struct tcpcb *tp, struct mbuf *mtemplate, struct mbuf *m, struct tcphdr *th0, tcp_seq ack, tcp_seq seq, int flags) { struct route *ro; int error, tlen, win = 0; int hlen; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif int family; /* family on packet, not inpcb/in6pcb! */ struct tcphdr *th; if (tp != NULL && (flags & TH_RST) == 0) { KASSERT(!(tp->t_inpcb && tp->t_in6pcb)); if (tp->t_inpcb) win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); #ifdef INET6 if (tp->t_in6pcb) win = sbspace(&tp->t_in6pcb->in6p_socket->so_rcv); #endif } th = NULL; /* Quell uninitialized warning */ ip = NULL; #ifdef INET6 ip6 = NULL; #endif if (m == NULL) { if (!mtemplate) return EINVAL; /* get family information from template */ switch (mtod(mtemplate, struct ip *)->ip_v) { case 4: family = AF_INET; hlen = sizeof(struct ip); break; #ifdef INET6 case 6: family = AF_INET6; hlen = sizeof(struct ip6_hdr); break; #endif default: return EAFNOSUPPORT; } MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m) { MCLAIM(m, &tcp_tx_mowner); MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); m = NULL; } } if (m == NULL) return ENOBUFS; tlen = 0; m->m_data += max_linkhdr; bcopy(mtod(mtemplate, void *), mtod(m, void *), mtemplate->m_len); switch (family) { case AF_INET: ip = mtod(m, struct ip *); th = (struct tcphdr *)(ip + 1); break; #ifdef INET6 case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); break; #endif } flags = TH_ACK; } else { if ((m->m_flags & M_PKTHDR) == 0) { m_freem(m); return EINVAL; } KASSERT(th0 != NULL); /* get family information from m */ switch (mtod(m, struct ip *)->ip_v) { case 4: family = AF_INET; hlen = sizeof(struct ip); ip = mtod(m, struct ip *); break; #ifdef INET6 case 6: family = AF_INET6; hlen = sizeof(struct ip6_hdr); ip6 = mtod(m, struct ip6_hdr *); break; #endif default: m_freem(m); return EAFNOSUPPORT; } /* clear h/w csum flags inherited from rx packet */ m->m_pkthdr.csum_flags = 0; if ((flags & TH_SYN) == 0 || sizeof(*th0) > (th0->th_off << 2)) tlen = sizeof(*th0); else tlen = th0->th_off << 2; if (m->m_len > hlen + tlen && (m->m_flags & M_EXT) == 0 && mtod(m, char *) + hlen == (char *)th0) { m->m_len = hlen + tlen; m_freem(m->m_next); m->m_next = NULL; } else { struct mbuf *n; KASSERT(max_linkhdr + hlen + tlen <= MCLBYTES); MGETHDR(n, M_DONTWAIT, MT_HEADER); if (n && max_linkhdr + hlen + tlen > MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_freem(n); n = NULL; } } if (!n) { m_freem(m); return ENOBUFS; } MCLAIM(n, &tcp_tx_mowner); n->m_data += max_linkhdr; n->m_len = hlen + tlen; m_copyback(n, 0, hlen, mtod(m, void *)); m_copyback(n, hlen, tlen, (void *)th0); m_freem(m); m = n; n = NULL; } #define xchg(a,b,type) { type t; t=a; a=b; b=t; } switch (family) { case AF_INET: ip = mtod(m, struct ip *); th = (struct tcphdr *)(ip + 1); ip->ip_p = IPPROTO_TCP; xchg(ip->ip_dst, ip->ip_src, struct in_addr); ip->ip_p = IPPROTO_TCP; break; #ifdef INET6 case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); ip6->ip6_nxt = IPPROTO_TCP; xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); ip6->ip6_nxt = IPPROTO_TCP; break; #endif } xchg(th->th_dport, th->th_sport, u_int16_t); #undef xchg tlen = 0; /*be friendly with the following code*/ } th->th_seq = htonl(seq); th->th_ack = htonl(ack); th->th_x2 = 0; if ((flags & TH_SYN) == 0) { if (tp) win >>= tp->rcv_scale; if (win > TCP_MAXWIN) win = TCP_MAXWIN; th->th_win = htons((u_int16_t)win); th->th_off = sizeof (struct tcphdr) >> 2; tlen += sizeof(*th); } else { tlen += th->th_off << 2; } m->m_len = hlen + tlen; m->m_pkthdr.len = hlen + tlen; m_reset_rcvif(m); th->th_flags = flags; th->th_urp = 0; switch (family) { case AF_INET: { struct ipovly *ipov = (struct ipovly *)ip; memset(ipov->ih_x1, 0, sizeof ipov->ih_x1); ipov->ih_len = htons((u_int16_t)tlen); th->th_sum = 0; th->th_sum = in_cksum(m, hlen + tlen); ip->ip_len = htons(hlen + tlen); ip->ip_ttl = ip_defttl; break; } #ifdef INET6 case AF_INET6: { th->th_sum = 0; th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen); ip6->ip6_plen = htons(tlen); if (tp && tp->t_in6pcb) ip6->ip6_hlim = in6_selecthlim_rt(tp->t_in6pcb); else ip6->ip6_hlim = ip6_defhlim; ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK; if (ip6_auto_flowlabel) { ip6->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); } break; } #endif } if (tp != NULL && tp->t_inpcb != NULL) { ro = &tp->t_inpcb->inp_route; KASSERT(family == AF_INET); KASSERT(in_hosteq(ip->ip_dst, tp->t_inpcb->inp_faddr)); } #ifdef INET6 else if (tp != NULL && tp->t_in6pcb != NULL) { ro = (struct route *)&tp->t_in6pcb->in6p_route; #ifdef DIAGNOSTIC if (family == AF_INET) { if (!IN6_IS_ADDR_V4MAPPED(&tp->t_in6pcb->in6p_faddr)) panic("tcp_respond: not mapped addr"); if (memcmp(&ip->ip_dst, &tp->t_in6pcb->in6p_faddr.s6_addr32[3], sizeof(ip->ip_dst)) != 0) { panic("tcp_respond: ip_dst != in6p_faddr"); } } else if (family == AF_INET6) { if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &tp->t_in6pcb->in6p_faddr)) panic("tcp_respond: ip6_dst != in6p_faddr"); } else panic("tcp_respond: address family mismatch"); #endif } #endif else ro = NULL; switch (family) { case AF_INET: error = ip_output(m, NULL, ro, (tp && tp->t_mtudisc ? IP_MTUDISC : 0), NULL, tp ? tp->t_inpcb : NULL); break; #ifdef INET6 case AF_INET6: error = ip6_output(m, NULL, ro, 0, NULL, tp ? tp->t_in6pcb : NULL, NULL); break; #endif default: error = EAFNOSUPPORT; break; } return error; } /* * Template TCPCB. Rather than zeroing a new TCPCB and initializing * a bunch of members individually, we maintain this template for the * static and mostly-static components of the TCPCB, and copy it into * the new TCPCB instead. */ static struct tcpcb tcpcb_template = { .t_srtt = TCPTV_SRTTBASE, .t_rttmin = TCPTV_MIN, .snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT, .snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT, .snd_numholes = 0, .snd_cubic_wmax = 0, .snd_cubic_wmax_last = 0, .snd_cubic_ctime = 0, .t_partialacks = -1, .t_bytes_acked = 0, .t_sndrexmitpack = 0, .t_rcvoopack = 0, .t_sndzerowin = 0, }; /* * Updates the TCPCB template whenever a parameter that would affect * the template is changed. */ void tcp_tcpcb_template(void) { struct tcpcb *tp = &tcpcb_template; int flags; tp->t_peermss = tcp_mssdflt; tp->t_ourmss = tcp_mssdflt; tp->t_segsz = tcp_mssdflt; flags = 0; if (tcp_do_rfc1323 && tcp_do_win_scale) flags |= TF_REQ_SCALE; if (tcp_do_rfc1323 && tcp_do_timestamps) flags |= TF_REQ_TSTMP; tp->t_flags = flags; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives * reasonable initial retransmit time. */ tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1); TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), TCPTV_MIN, TCPTV_REXMTMAX); /* Keep Alive */ tp->t_keepinit = MIN(tcp_keepinit, TCP_TIMER_MAXTICKS); tp->t_keepidle = MIN(tcp_keepidle, TCP_TIMER_MAXTICKS); tp->t_keepintvl = MIN(tcp_keepintvl, TCP_TIMER_MAXTICKS); tp->t_keepcnt = MAX(1, MIN(tcp_keepcnt, TCP_TIMER_MAXTICKS)); tp->t_maxidle = tp->t_keepcnt * MIN(tp->t_keepintvl, TCP_TIMER_MAXTICKS/tp->t_keepcnt); /* MSL */ tp->t_msl = TCPTV_MSL; } /* * Create a new TCP control block, making an * empty reassembly queue and hooking it to the argument * protocol control block. */ /* family selects inpcb, or in6pcb */ struct tcpcb * tcp_newtcpcb(int family, void *aux) { struct tcpcb *tp; int i; /* XXX Consider using a pool_cache for speed. */ tp = pool_get(&tcpcb_pool, PR_NOWAIT); /* splsoftnet via tcp_usrreq */ if (tp == NULL) return NULL; memcpy(tp, &tcpcb_template, sizeof(*tp)); TAILQ_INIT(&tp->segq); TAILQ_INIT(&tp->timeq); tp->t_family = family; /* may be overridden later on */ TAILQ_INIT(&tp->snd_holes); LIST_INIT(&tp->t_sc); /* XXX can template this */ /* Don't sweat this loop; hopefully the compiler will unroll it. */ for (i = 0; i < TCPT_NTIMERS; i++) { callout_init(&tp->t_timer[i], CALLOUT_MPSAFE); TCP_TIMER_INIT(tp, i); } callout_init(&tp->t_delack_ch, CALLOUT_MPSAFE); switch (family) { case AF_INET: { struct inpcb *inp = (struct inpcb *)aux; inp->inp_ip.ip_ttl = ip_defttl; inp->inp_ppcb = (void *)tp; tp->t_inpcb = inp; tp->t_mtudisc = ip_mtudisc; break; } #ifdef INET6 case AF_INET6: { struct in6pcb *in6p = (struct in6pcb *)aux; in6p->in6p_ip6.ip6_hlim = in6_selecthlim_rt(in6p); in6p->in6p_ppcb = (void *)tp; tp->t_in6pcb = in6p; /* for IPv6, always try to run path MTU discovery */ tp->t_mtudisc = 1; break; } #endif /* INET6 */ default: for (i = 0; i < TCPT_NTIMERS; i++) callout_destroy(&tp->t_timer[i]); callout_destroy(&tp->t_delack_ch); pool_put(&tcpcb_pool, tp); /* splsoftnet via tcp_usrreq */ return NULL; } /* * Initialize our timebase. When we send timestamps, we take * the delta from tcp_now -- this means each connection always * gets a timebase of 1, which makes it, among other things, * more difficult to determine how long a system has been up, * and thus how many TCP sequence increments have occurred. * * We start with 1, because 0 doesn't work with linux, which * considers timestamp 0 in a SYN packet as a bug and disables * timestamps. */ tp->ts_timebase = tcp_now - 1; tcp_congctl_select(tp, tcp_congctl_global_name); return tp; } /* * Drop a TCP connection, reporting * the specified error. If connection is synchronized, * then send a RST to peer. */ struct tcpcb * tcp_drop(struct tcpcb *tp, int errno) { struct socket *so = NULL; KASSERT(!(tp->t_inpcb && tp->t_in6pcb)); if (tp->t_inpcb) so = tp->t_inpcb->inp_socket; #ifdef INET6 if (tp->t_in6pcb) so = tp->t_in6pcb->in6p_socket; #endif if (!so) return NULL; if (TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_state = TCPS_CLOSED; (void) tcp_output(tp); TCP_STATINC(TCP_STAT_DROPS); } else TCP_STATINC(TCP_STAT_CONNDROPS); if (errno == ETIMEDOUT && tp->t_softerror) errno = tp->t_softerror; so->so_error = errno; return (tcp_close(tp)); } /* * Close a TCP control block: * discard all space held by the tcp * discard internet protocol block * wake up any sleepers */ struct tcpcb * tcp_close(struct tcpcb *tp) { struct inpcb *inp; #ifdef INET6 struct in6pcb *in6p; #endif struct socket *so; #ifdef RTV_RTT struct rtentry *rt = NULL; #endif struct route *ro; int j; inp = tp->t_inpcb; #ifdef INET6 in6p = tp->t_in6pcb; #endif so = NULL; ro = NULL; if (inp) { so = inp->inp_socket; ro = &inp->inp_route; } #ifdef INET6 else if (in6p) { so = in6p->in6p_socket; ro = (struct route *)&in6p->in6p_route; } #endif #ifdef RTV_RTT /* * If we sent enough data to get some meaningful characteristics, * save them in the routing entry. 'Enough' is arbitrarily * defined as the sendpipesize (default 4K) * 16. This would * give us 16 rtt samples assuming we only get one sample per * window (the usual case on a long haul net). 16 samples is * enough for the srtt filter to converge to within 5% of the correct * value; fewer samples and we could save a very bogus rtt. * * Don't update the default route's characteristics and don't * update anything that the user "locked". */ if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) && ro && (rt = rtcache_validate(ro)) != NULL && !in_nullhost(satocsin(rt_getkey(rt))->sin_addr)) { u_long i = 0; if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { i = tp->t_srtt * ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2)); if (rt->rt_rmx.rmx_rtt && i) /* * filter this update to half the old & half * the new values, converting scale. * See route.h and tcp_var.h for a * description of the scaling constants. */ rt->rt_rmx.rmx_rtt = (rt->rt_rmx.rmx_rtt + i) / 2; else rt->rt_rmx.rmx_rtt = i; } if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { i = tp->t_rttvar * ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTTVAR_SHIFT + 2)); if (rt->rt_rmx.rmx_rttvar && i) rt->rt_rmx.rmx_rttvar = (rt->rt_rmx.rmx_rttvar + i) / 2; else rt->rt_rmx.rmx_rttvar = i; } /* * update the pipelimit (ssthresh) if it has been updated * already or if a pipesize was specified & the threshold * got below half the pipesize. I.e., wait for bad news * before we start updating, then update on both good * and bad news. */ if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) || i < (rt->rt_rmx.rmx_sendpipe / 2)) { /* * convert the limit from user data bytes to * packets then to packet data bytes. */ i = (i + tp->t_segsz / 2) / tp->t_segsz; if (i < 2) i = 2; i *= (u_long)(tp->t_segsz + sizeof (struct tcpiphdr)); if (rt->rt_rmx.rmx_ssthresh) rt->rt_rmx.rmx_ssthresh = (rt->rt_rmx.rmx_ssthresh + i) / 2; else rt->rt_rmx.rmx_ssthresh = i; } } rtcache_unref(rt, ro); #endif /* RTV_RTT */ /* free the reassembly queue, if any */ TCP_REASS_LOCK(tp); (void) tcp_freeq(tp); TCP_REASS_UNLOCK(tp); /* free the SACK holes list. */ tcp_free_sackholes(tp); tcp_congctl_release(tp); syn_cache_cleanup(tp); if (tp->t_template) { m_free(tp->t_template); tp->t_template = NULL; } /* * Detaching the pcb will unlock the socket/tcpcb, and stopping * the timers can also drop the lock. We need to prevent access * to the tcpcb as it's half torn down. Flag the pcb as dead * (prevents access by timers) and only then detach it. */ tp->t_flags |= TF_DEAD; if (inp) { inp->inp_ppcb = 0; soisdisconnected(so); in_pcbdetach(inp); } #ifdef INET6 else if (in6p) { in6p->in6p_ppcb = 0; soisdisconnected(so); in6_pcbdetach(in6p); } #endif /* * pcb is no longer visble elsewhere, so we can safely release * the lock in callout_halt() if needed. */ TCP_STATINC(TCP_STAT_CLOSED); for (j = 0; j < TCPT_NTIMERS; j++) { callout_halt(&tp->t_timer[j], softnet_lock); callout_destroy(&tp->t_timer[j]); } callout_halt(&tp->t_delack_ch, softnet_lock); callout_destroy(&tp->t_delack_ch); pool_put(&tcpcb_pool, tp); return NULL; } int tcp_freeq(struct tcpcb *tp) { struct ipqent *qe; int rv = 0; TCP_REASS_LOCK_CHECK(tp); while ((qe = TAILQ_FIRST(&tp->segq)) != NULL) { TAILQ_REMOVE(&tp->segq, qe, ipqe_q); TAILQ_REMOVE(&tp->timeq, qe, ipqe_timeq); m_freem(qe->ipqe_m); tcpipqent_free(qe); rv = 1; } tp->t_segqlen = 0; KASSERT(TAILQ_EMPTY(&tp->timeq)); return (rv); } void tcp_fasttimo(void) { if (tcp_drainwanted) { tcp_drain(); tcp_drainwanted = 0; } } void tcp_drainstub(void) { tcp_drainwanted = 1; } /* * Protocol drain routine. Called when memory is in short supply. * Called from pr_fasttimo thus a callout context. */ void tcp_drain(void) { struct inpcb_hdr *inph; struct tcpcb *tp; mutex_enter(softnet_lock); KERNEL_LOCK(1, NULL); /* * Free the sequence queue of all TCP connections. */ TAILQ_FOREACH(inph, &tcbtable.inpt_queue, inph_queue) { switch (inph->inph_af) { case AF_INET: tp = intotcpcb((struct inpcb *)inph); break; #ifdef INET6 case AF_INET6: tp = in6totcpcb((struct in6pcb *)inph); break; #endif default: tp = NULL; break; } if (tp != NULL) { /* * If the tcpcb is already busy, * just bail out now. */ if (tcp_reass_lock_try(tp) == 0) continue; if (tcp_freeq(tp)) TCP_STATINC(TCP_STAT_CONNSDRAINED); TCP_REASS_UNLOCK(tp); } } KERNEL_UNLOCK_ONE(NULL); mutex_exit(softnet_lock); } /* * Notify a tcp user of an asynchronous error; * store error as soft error, but wake up user * (for now, won't do anything until can select for soft error). */ void tcp_notify(struct inpcb *inp, int error) { struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; struct socket *so = inp->inp_socket; /* * Ignore some errors if we are hooked up. * If connection hasn't completed, has retransmitted several times, * and receives a second error, give up now. This is better * than waiting a long time to establish a connection that * can never complete. */ if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { return; } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 && tp->t_rxtshift > 3 && tp->t_softerror) so->so_error = error; else tp->t_softerror = error; cv_broadcast(&so->so_cv); sorwakeup(so); sowwakeup(so); } #ifdef INET6 void tcp6_notify(struct in6pcb *in6p, int error) { struct tcpcb *tp = (struct tcpcb *)in6p->in6p_ppcb; struct socket *so = in6p->in6p_socket; /* * Ignore some errors if we are hooked up. * If connection hasn't completed, has retransmitted several times, * and receives a second error, give up now. This is better * than waiting a long time to establish a connection that * can never complete. */ if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { return; } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 && tp->t_rxtshift > 3 && tp->t_softerror) so->so_error = error; else tp->t_softerror = error; cv_broadcast(&so->so_cv); sorwakeup(so); sowwakeup(so); } #endif #ifdef INET6 void * tcp6_ctlinput(int cmd, const struct sockaddr *sa, void *d) { struct tcphdr th; void (*notify)(struct in6pcb *, int) = tcp6_notify; int nmatch; struct ip6_hdr *ip6; const struct sockaddr_in6 *sa6_src = NULL; const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa; struct mbuf *m; int off; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return NULL; if ((unsigned)cmd >= PRC_NCMDS) return NULL; else if (cmd == PRC_QUENCH) { /* * Don't honor ICMP Source Quench messages meant for * TCP connections. */ return NULL; } else if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_MSGSIZE) ; /* special code is present, see below */ else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return NULL; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; sa6_src = &sa6_any; off = 0; } if (ip6) { /* check if we can safely examine src and dst ports */ if (m->m_pkthdr.len < off + sizeof(th)) { if (cmd == PRC_MSGSIZE) icmp6_mtudisc_update((struct ip6ctlparam *)d, 0); return NULL; } memset(&th, 0, sizeof(th)); m_copydata(m, off, sizeof(th), (void *)&th); if (cmd == PRC_MSGSIZE) { int valid = 0; /* * Check to see if we have a valid TCP connection * corresponding to the address in the ICMPv6 message * payload. */ if (in6_pcblookup_connect(&tcbtable, &sa6->sin6_addr, th.th_dport, (const struct in6_addr *)&sa6_src->sin6_addr, th.th_sport, 0, 0)) valid++; /* * Depending on the value of "valid" and routing table * size (mtudisc_{hi,lo}wat), we will: * - recalcurate the new MTU and create the * corresponding routing entry, or * - ignore the MTU change notification. */ icmp6_mtudisc_update((struct ip6ctlparam *)d, valid); /* * no need to call in6_pcbnotify, it should have been * called via callback if necessary */ return NULL; } nmatch = in6_pcbnotify(&tcbtable, sa, th.th_dport, (const struct sockaddr *)sa6_src, th.th_sport, cmd, NULL, notify); if (nmatch == 0 && syn_cache_count && (inet6ctlerrmap[cmd] == EHOSTUNREACH || inet6ctlerrmap[cmd] == ENETUNREACH || inet6ctlerrmap[cmd] == EHOSTDOWN)) syn_cache_unreach((const struct sockaddr *)sa6_src, sa, &th); } else { (void) in6_pcbnotify(&tcbtable, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify); } return NULL; } #endif /* assumes that ip header and tcp header are contiguous on mbuf */ void * tcp_ctlinput(int cmd, const struct sockaddr *sa, void *v) { struct ip *ip = v; struct tcphdr *th; struct icmp *icp; extern const int inetctlerrmap[]; void (*notify)(struct inpcb *, int) = tcp_notify; int errno; int nmatch; struct tcpcb *tp; u_int mtu; tcp_seq seq; struct inpcb *inp; #ifdef INET6 struct in6pcb *in6p; struct in6_addr src6, dst6; #endif if (sa->sa_family != AF_INET || sa->sa_len != sizeof(struct sockaddr_in)) return NULL; if ((unsigned)cmd >= PRC_NCMDS) return NULL; errno = inetctlerrmap[cmd]; if (cmd == PRC_QUENCH) /* * Don't honor ICMP Source Quench messages meant for * TCP connections. */ return NULL; else if (PRC_IS_REDIRECT(cmd)) notify = in_rtchange, ip = 0; else if (cmd == PRC_MSGSIZE && ip && ip->ip_v == 4) { /* * Check to see if we have a valid TCP connection * corresponding to the address in the ICMP message * payload. * * Boundary check is made in icmp_input(), with ICMP_ADVLENMIN. */ th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); #ifdef INET6 in6_in_2_v4mapin6(&ip->ip_src, &src6); in6_in_2_v4mapin6(&ip->ip_dst, &dst6); #endif if ((inp = in_pcblookup_connect(&tcbtable, ip->ip_dst, th->th_dport, ip->ip_src, th->th_sport, 0)) != NULL) #ifdef INET6 in6p = NULL; #else ; #endif #ifdef INET6 else if ((in6p = in6_pcblookup_connect(&tcbtable, &dst6, th->th_dport, &src6, th->th_sport, 0, 0)) != NULL) ; #endif else return NULL; /* * Now that we've validated that we are actually communicating * with the host indicated in the ICMP message, locate the * ICMP header, recalculate the new MTU, and create the * corresponding routing entry. */ icp = (struct icmp *)((char *)ip - offsetof(struct icmp, icmp_ip)); if (inp) { if ((tp = intotcpcb(inp)) == NULL) return NULL; } #ifdef INET6 else if (in6p) { if ((tp = in6totcpcb(in6p)) == NULL) return NULL; } #endif else return NULL; seq = ntohl(th->th_seq); if (SEQ_LT(seq, tp->snd_una) || SEQ_GT(seq, tp->snd_max)) return NULL; /* * If the ICMP message advertises a Next-Hop MTU * equal or larger than the maximum packet size we have * ever sent, drop the message. */ mtu = (u_int)ntohs(icp->icmp_nextmtu); if (mtu >= tp->t_pmtud_mtu_sent) return NULL; if (mtu >= tcp_hdrsz(tp) + tp->t_pmtud_mss_acked) { /* * Calculate new MTU, and create corresponding * route (traditional PMTUD). */ tp->t_flags &= ~TF_PMTUD_PEND; icmp_mtudisc(icp, ip->ip_dst); } else { /* * Record the information got in the ICMP * message; act on it later. * If we had already recorded an ICMP message, * replace the old one only if the new message * refers to an older TCP segment */ if (tp->t_flags & TF_PMTUD_PEND) { if (SEQ_LT(tp->t_pmtud_th_seq, seq)) return NULL; } else tp->t_flags |= TF_PMTUD_PEND; tp->t_pmtud_th_seq = seq; tp->t_pmtud_nextmtu = icp->icmp_nextmtu; tp->t_pmtud_ip_len = icp->icmp_ip.ip_len; tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl; } return NULL; } else if (cmd == PRC_HOSTDEAD) ip = 0; else if (errno == 0) return NULL; if (ip && ip->ip_v == 4 && sa->sa_family == AF_INET) { th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); nmatch = in_pcbnotify(&tcbtable, satocsin(sa)->sin_addr, th->th_dport, ip->ip_src, th->th_sport, errno, notify); if (nmatch == 0 && syn_cache_count && (inetctlerrmap[cmd] == EHOSTUNREACH || inetctlerrmap[cmd] == ENETUNREACH || inetctlerrmap[cmd] == EHOSTDOWN)) { struct sockaddr_in sin; memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; sin.sin_port = th->th_sport; sin.sin_addr = ip->ip_src; syn_cache_unreach((struct sockaddr *)&sin, sa, th); } /* XXX mapped address case */ } else in_pcbnotifyall(&tcbtable, satocsin(sa)->sin_addr, errno, notify); return NULL; } /* * When a source quench is received, we are being notified of congestion. * Close the congestion window down to the Loss Window (one segment). * We will gradually open it again as we proceed. */ void tcp_quench(struct inpcb *inp) { struct tcpcb *tp = intotcpcb(inp); if (tp) { tp->snd_cwnd = tp->t_segsz; tp->t_bytes_acked = 0; } } #ifdef INET6 void tcp6_quench(struct in6pcb *in6p) { struct tcpcb *tp = in6totcpcb(in6p); if (tp) { tp->snd_cwnd = tp->t_segsz; tp->t_bytes_acked = 0; } } #endif /* * Path MTU Discovery handlers. */ void tcp_mtudisc_callback(struct in_addr faddr) { #ifdef INET6 struct in6_addr in6; #endif in_pcbnotifyall(&tcbtable, faddr, EMSGSIZE, tcp_mtudisc); #ifdef INET6 in6_in_2_v4mapin6(&faddr, &in6); tcp6_mtudisc_callback(&in6); #endif } /* * On receipt of path MTU corrections, flush old route and replace it * with the new one. Retransmit all unacknowledged packets, to ensure * that all packets will be received. */ void tcp_mtudisc(struct inpcb *inp, int errno) { struct tcpcb *tp = intotcpcb(inp); struct rtentry *rt; if (tp == NULL) return; rt = in_pcbrtentry(inp); if (rt != NULL) { /* * If this was not a host route, remove and realloc. */ if ((rt->rt_flags & RTF_HOST) == 0) { in_pcbrtentry_unref(rt, inp); in_rtchange(inp, errno); if ((rt = in_pcbrtentry(inp)) == NULL) return; } /* * Slow start out of the error condition. We * use the MTU because we know it's smaller * than the previously transmitted segment. * * Note: This is more conservative than the * suggestion in draft-floyd-incr-init-win-03. */ if (rt->rt_rmx.rmx_mtu != 0) tp->snd_cwnd = TCP_INITIAL_WINDOW(tcp_init_win, rt->rt_rmx.rmx_mtu); in_pcbrtentry_unref(rt, inp); } /* * Resend unacknowledged packets. */ tp->snd_nxt = tp->sack_newdata = tp->snd_una; tcp_output(tp); } #ifdef INET6 /* * Path MTU Discovery handlers. */ void tcp6_mtudisc_callback(struct in6_addr *faddr) { struct sockaddr_in6 sin6; memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *faddr; (void) in6_pcbnotify(&tcbtable, (struct sockaddr *)&sin6, 0, (const struct sockaddr *)&sa6_any, 0, PRC_MSGSIZE, NULL, tcp6_mtudisc); } void tcp6_mtudisc(struct in6pcb *in6p, int errno) { struct tcpcb *tp = in6totcpcb(in6p); struct rtentry *rt; if (tp == NULL) return; rt = in6_pcbrtentry(in6p); if (rt != NULL) { /* * If this was not a host route, remove and realloc. */ if ((rt->rt_flags & RTF_HOST) == 0) { in6_pcbrtentry_unref(rt, in6p); in6_rtchange(in6p, errno); rt = in6_pcbrtentry(in6p); if (rt == NULL) return; } /* * Slow start out of the error condition. We * use the MTU because we know it's smaller * than the previously transmitted segment. * * Note: This is more conservative than the * suggestion in draft-floyd-incr-init-win-03. */ if (rt->rt_rmx.rmx_mtu != 0) { tp->snd_cwnd = TCP_INITIAL_WINDOW(tcp_init_win, rt->rt_rmx.rmx_mtu); } in6_pcbrtentry_unref(rt, in6p); } /* * Resend unacknowledged packets. */ tp->snd_nxt = tp->sack_newdata = tp->snd_una; tcp_output(tp); } #endif /* INET6 */ /* * Compute the MSS to advertise to the peer. Called only during * the 3-way handshake. If we are the server (peer initiated * connection), we are called with a pointer to the interface * on which the SYN packet arrived. If we are the client (we * initiated connection), we are called with a pointer to the * interface out which this connection should go. * * NOTE: Do not subtract IP option/extension header size nor IPsec * header size from MSS advertisement. MSS option must hold the maximum * segment size we can accept, so it must always be: * max(if mtu) - ip header - tcp header */ u_long tcp_mss_to_advertise(const struct ifnet *ifp, int af) { extern u_long in_maxmtu; u_long mss = 0; u_long hdrsiz; /* * In order to avoid defeating path MTU discovery on the peer, * we advertise the max MTU of all attached networks as our MSS, * per RFC 1191, section 3.1. * * We provide the option to advertise just the MTU of * the interface on which we hope this connection will * be receiving. If we are responding to a SYN, we * will have a pretty good idea about this, but when * initiating a connection there is a bit more doubt. * * We also need to ensure that loopback has a large enough * MSS, as the loopback MTU is never included in in_maxmtu. */ if (ifp != NULL) switch (af) { #ifdef INET6 case AF_INET6: /* FALLTHROUGH */ #endif case AF_INET: mss = ifp->if_mtu; break; } if (tcp_mss_ifmtu == 0) switch (af) { #ifdef INET6 case AF_INET6: /* FALLTHROUGH */ #endif case AF_INET: mss = uimax(in_maxmtu, mss); break; } switch (af) { case AF_INET: hdrsiz = sizeof(struct ip); break; #ifdef INET6 case AF_INET6: hdrsiz = sizeof(struct ip6_hdr); break; #endif default: hdrsiz = 0; break; } hdrsiz += sizeof(struct tcphdr); if (mss > hdrsiz) mss -= hdrsiz; mss = uimax(tcp_mssdflt, mss); return (mss); } /* * Set connection variables based on the peer's advertised MSS. * We are passed the TCPCB for the actual connection. If we * are the server, we are called by the compressed state engine * when the 3-way handshake is complete. If we are the client, * we are called when we receive the SYN,ACK from the server. * * NOTE: Our advertised MSS value must be initialized in the TCPCB * before this routine is called! */ void tcp_mss_from_peer(struct tcpcb *tp, int offer) { struct socket *so; #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) struct rtentry *rt; #endif u_long bufsize; int mss; KASSERT(!(tp->t_inpcb && tp->t_in6pcb)); so = NULL; rt = NULL; if (tp->t_inpcb) { so = tp->t_inpcb->inp_socket; #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) rt = in_pcbrtentry(tp->t_inpcb); #endif } #ifdef INET6 if (tp->t_in6pcb) { so = tp->t_in6pcb->in6p_socket; #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) rt = in6_pcbrtentry(tp->t_in6pcb); #endif } #endif /* * As per RFC1122, use the default MSS value, unless they * sent us an offer. Do not accept offers less than 256 bytes. */ mss = tcp_mssdflt; if (offer) mss = offer; mss = uimax(mss, 256); /* sanity */ tp->t_peermss = mss; mss -= tcp_optlen(tp); if (tp->t_inpcb) mss -= ip_optlen(tp->t_inpcb); #ifdef INET6 if (tp->t_in6pcb) mss -= ip6_optlen(tp->t_in6pcb); #endif /* * XXX XXX What if mss goes negative or zero? This can happen if a * socket has large IPv6 options. We crash below. */ /* * If there's a pipesize, change the socket buffer to that size. * Make the socket buffer an integral number of MSS units. If * the MSS is larger than the socket buffer, artificially decrease * the MSS. */ #ifdef RTV_SPIPE if (rt != NULL && rt->rt_rmx.rmx_sendpipe != 0) bufsize = rt->rt_rmx.rmx_sendpipe; else #endif { KASSERT(so != NULL); bufsize = so->so_snd.sb_hiwat; } if (bufsize < mss) mss = bufsize; else { bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; (void) sbreserve(&so->so_snd, bufsize, so); } tp->t_segsz = mss; #ifdef RTV_SSTHRESH if (rt != NULL && rt->rt_rmx.rmx_ssthresh) { /* * There's some sort of gateway or interface buffer * limit on the path. Use this to set the slow * start threshold, but set the threshold to no less * than 2 * MSS. */ tp->snd_ssthresh = uimax(2 * mss, rt->rt_rmx.rmx_ssthresh); } #endif #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) if (tp->t_inpcb) in_pcbrtentry_unref(rt, tp->t_inpcb); #ifdef INET6 if (tp->t_in6pcb) in6_pcbrtentry_unref(rt, tp->t_in6pcb); #endif #endif } /* * Processing necessary when a TCP connection is established. */ void tcp_established(struct tcpcb *tp) { struct socket *so; #ifdef RTV_RPIPE struct rtentry *rt; #endif u_long bufsize; KASSERT(!(tp->t_inpcb && tp->t_in6pcb)); so = NULL; rt = NULL; /* This is a while() to reduce the dreadful stairstepping below */ while (tp->t_inpcb) { so = tp->t_inpcb->inp_socket; #if defined(RTV_RPIPE) rt = in_pcbrtentry(tp->t_inpcb); #endif if (__predict_true(tcp_msl_enable)) { if (tp->t_inpcb->inp_laddr.s_addr == INADDR_LOOPBACK) { tp->t_msl = tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2); break; } if (__predict_false(tcp_rttlocal)) { /* This may be adjusted by tcp_input */ tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1); break; } if (in_localaddr(tp->t_inpcb->inp_faddr)) { tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1); break; } } tp->t_msl = tcp_msl_remote ? tcp_msl_remote : TCPTV_MSL; break; } /* Clamp to a reasonable range. */ tp->t_msl = MIN(tp->t_msl, TCP_MAXMSL); #ifdef INET6 /* The !tp->t_inpcb lets the compiler know it can't be v4 *and* v6 */ while (!tp->t_inpcb && tp->t_in6pcb) { so = tp->t_in6pcb->in6p_socket; #if defined(RTV_RPIPE) rt = in6_pcbrtentry(tp->t_in6pcb); #endif if (__predict_true(tcp_msl_enable)) { extern const struct in6_addr in6addr_loopback; if (IN6_ARE_ADDR_EQUAL(&tp->t_in6pcb->in6p_laddr, &in6addr_loopback)) { tp->t_msl = tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2); break; } if (__predict_false(tcp_rttlocal)) { /* This may be adjusted by tcp_input */ tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1); break; } if (in6_localaddr(&tp->t_in6pcb->in6p_faddr)) { tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1); break; } } tp->t_msl = tcp_msl_remote ? tcp_msl_remote : TCPTV_MSL; break; } /* Clamp to a reasonable range. */ tp->t_msl = MIN(tp->t_msl, TCP_MAXMSL); #endif tp->t_state = TCPS_ESTABLISHED; TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle); #ifdef RTV_RPIPE if (rt != NULL && rt->rt_rmx.rmx_recvpipe != 0) bufsize = rt->rt_rmx.rmx_recvpipe; else #endif { KASSERT(so != NULL); bufsize = so->so_rcv.sb_hiwat; } if (bufsize > tp->t_ourmss) { bufsize = roundup(bufsize, tp->t_ourmss); if (bufsize > sb_max) bufsize = sb_max; (void) sbreserve(&so->so_rcv, bufsize, so); } #ifdef RTV_RPIPE if (tp->t_inpcb) in_pcbrtentry_unref(rt, tp->t_inpcb); #ifdef INET6 if (tp->t_in6pcb) in6_pcbrtentry_unref(rt, tp->t_in6pcb); #endif #endif } /* * Check if there's an initial rtt or rttvar. Convert from the * route-table units to scaled multiples of the slow timeout timer. * Called only during the 3-way handshake. */ void tcp_rmx_rtt(struct tcpcb *tp) { #ifdef RTV_RTT struct rtentry *rt = NULL; int rtt; KASSERT(!(tp->t_inpcb && tp->t_in6pcb)); if (tp->t_inpcb) rt = in_pcbrtentry(tp->t_inpcb); #ifdef INET6 if (tp->t_in6pcb) rt = in6_pcbrtentry(tp->t_in6pcb); #endif if (rt == NULL) return; if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { /* * XXX The lock bit for MTU indicates that the value * is also a minimum value; this is subject to time. */ if (rt->rt_rmx.rmx_locks & RTV_RTT) TCPT_RANGESET(tp->t_rttmin, rtt / (RTM_RTTUNIT / PR_SLOWHZ), TCPTV_MIN, TCPTV_REXMTMAX); tp->t_srtt = rtt / ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2)); if (rt->rt_rmx.rmx_rttvar) { tp->t_rttvar = rt->rt_rmx.rmx_rttvar / ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTTVAR_SHIFT + 2)); } else { /* Default variation is +- 1 rtt */ tp->t_rttvar = tp->t_srtt >> (TCP_RTT_SHIFT - TCP_RTTVAR_SHIFT); } TCPT_RANGESET(tp->t_rxtcur, ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2), tp->t_rttmin, TCPTV_REXMTMAX); } if (tp->t_inpcb) in_pcbrtentry_unref(rt, tp->t_inpcb); #ifdef INET6 if (tp->t_in6pcb) in6_pcbrtentry_unref(rt, tp->t_in6pcb); #endif #endif } tcp_seq tcp_iss_seq = 0; /* tcp initial seq # */ /* * Get a new sequence value given a tcp control block */ tcp_seq tcp_new_iss(struct tcpcb *tp) { if (tp->t_inpcb != NULL) { return tcp_new_iss1(&tp->t_inpcb->inp_laddr, &tp->t_inpcb->inp_faddr, tp->t_inpcb->inp_lport, tp->t_inpcb->inp_fport, sizeof(tp->t_inpcb->inp_laddr)); } #ifdef INET6 if (tp->t_in6pcb != NULL) { return tcp_new_iss1(&tp->t_in6pcb->in6p_laddr, &tp->t_in6pcb->in6p_faddr, tp->t_in6pcb->in6p_lport, tp->t_in6pcb->in6p_fport, sizeof(tp->t_in6pcb->in6p_laddr)); } #endif panic("tcp_new_iss: unreachable"); } static u_int8_t tcp_iss_secret[16]; /* 128 bits; should be plenty */ /* * Initialize RFC 1948 ISS Secret */ static int tcp_iss_secret_init(void) { cprng_strong(kern_cprng, tcp_iss_secret, sizeof(tcp_iss_secret), 0); return 0; } /* * This routine actually generates a new TCP initial sequence number. */ tcp_seq tcp_new_iss1(void *laddr, void *faddr, u_int16_t lport, u_int16_t fport, size_t addrsz) { tcp_seq tcp_iss; if (tcp_do_rfc1948) { MD5_CTX ctx; u_int8_t hash[16]; /* XXX MD5 knowledge */ static ONCE_DECL(tcp_iss_secret_control); /* * If we haven't been here before, initialize our cryptographic * hash secret. */ RUN_ONCE(&tcp_iss_secret_control, tcp_iss_secret_init); /* * Compute the base value of the ISS. It is a hash * of (saddr, sport, daddr, dport, secret). */ MD5Init(&ctx); MD5Update(&ctx, (u_char *) laddr, addrsz); MD5Update(&ctx, (u_char *) &lport, sizeof(lport)); MD5Update(&ctx, (u_char *) faddr, addrsz); MD5Update(&ctx, (u_char *) &fport, sizeof(fport)); MD5Update(&ctx, tcp_iss_secret, sizeof(tcp_iss_secret)); MD5Final(hash, &ctx); memcpy(&tcp_iss, hash, sizeof(tcp_iss)); #ifdef TCPISS_DEBUG printf("ISS hash 0x%08x, ", tcp_iss); #endif } else { /* * Randomize. */ tcp_iss = cprng_fast32() & TCP_ISS_RANDOM_MASK; #ifdef TCPISS_DEBUG printf("ISS random 0x%08x, ", tcp_iss); #endif } /* * Add the offset in to the computed value. */ tcp_iss += tcp_iss_seq; #ifdef TCPISS_DEBUG printf("ISS %08x\n", tcp_iss); #endif return tcp_iss; } #if defined(IPSEC) /* compute ESP/AH header size for TCP, including outer IP header. */ size_t ipsec4_hdrsiz_tcp(struct tcpcb *tp) { struct inpcb *inp; size_t hdrsiz; /* XXX mapped addr case (tp->t_in6pcb) */ if (!tp || !tp->t_template || !(inp = tp->t_inpcb)) return 0; switch (tp->t_family) { case AF_INET: /* XXX: should use correct direction. */ hdrsiz = ipsec_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, inp); break; default: hdrsiz = 0; break; } return hdrsiz; } #ifdef INET6 size_t ipsec6_hdrsiz_tcp(struct tcpcb *tp) { struct in6pcb *in6p; size_t hdrsiz; if (!tp || !tp->t_template || !(in6p = tp->t_in6pcb)) return 0; switch (tp->t_family) { case AF_INET6: /* XXX: should use correct direction. */ hdrsiz = ipsec_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, in6p); break; case AF_INET: /* mapped address case - tricky */ default: hdrsiz = 0; break; } return hdrsiz; } #endif #endif /*IPSEC*/ /* * Determine the length of the TCP options for this connection. * * XXX: What do we do for SACK, when we add that? Just reserve * all of the space? Otherwise we can't exactly be incrementing * cwnd by an amount that varies depending on the amount we last * had to SACK! */ u_int tcp_optlen(struct tcpcb *tp) { u_int optlen; optlen = 0; if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == (TF_REQ_TSTMP | TF_RCVD_TSTMP)) optlen += TCPOLEN_TSTAMP_APPA; #ifdef TCP_SIGNATURE if (tp->t_flags & TF_SIGNATURE) optlen += TCPOLEN_SIGLEN; #endif return optlen; } u_int tcp_hdrsz(struct tcpcb *tp) { u_int hlen; switch (tp->t_family) { #ifdef INET6 case AF_INET6: hlen = sizeof(struct ip6_hdr); break; #endif case AF_INET: hlen = sizeof(struct ip); break; default: hlen = 0; break; } hlen += sizeof(struct tcphdr); if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) hlen += TCPOLEN_TSTAMP_APPA; #ifdef TCP_SIGNATURE if (tp->t_flags & TF_SIGNATURE) hlen += TCPOLEN_SIGLEN; #endif return hlen; } void tcp_statinc(u_int stat) { KASSERT(stat < TCP_NSTATS); TCP_STATINC(stat); } void tcp_statadd(u_int stat, uint64_t val) { KASSERT(stat < TCP_NSTATS); TCP_STATADD(stat, val); } |
| 3 1 53 42 1 52 7 48 46 42 39 6 69 11 2 1 1 68 31 31 9 9 9 31 53 43 53 54 54 42 1 40 39 53 1 43 1 53 43 54 26 9 26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | /* $NetBSD: scope6.c,v 1.23 2020/06/16 17:12:18 maxv Exp $ */ /* $KAME$ */ /* * Copyright (C) 2000 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: scope6.c,v 1.23 2020/06/16 17:12:18 maxv Exp $"); #include <sys/param.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/systm.h> #include <sys/queue.h> #include <sys/syslog.h> #include <net/if.h> #include <netinet/in.h> #include <netinet6/in6_var.h> #include <netinet6/scope6_var.h> #ifdef ENABLE_DEFAULT_SCOPE int ip6_use_defzone = 1; #else int ip6_use_defzone = 0; #endif static struct scope6_id sid_default; #define SID(ifp) \ ((ifp)->if_afdata[AF_INET6] == NULL ? NULL : \ ((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->scope6_id) void scope6_init(void) { memset(&sid_default, 0, sizeof(sid_default)); } struct scope6_id * scope6_ifattach(struct ifnet *ifp) { struct scope6_id *sid; sid = malloc(sizeof(*sid), M_IFADDR, M_WAITOK | M_ZERO); /* * XXX: IPV6_ADDR_SCOPE_xxx macros are not standard. * Should we rather hardcode here? */ sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = ifp->if_index; sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index; #ifdef MULTI_SCOPE /* by default, we don't care about scope boundary for these scopes. */ sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL] = 1; sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL] = 1; #endif return sid; } void scope6_ifdetach(struct scope6_id *sid) { free(sid, M_IFADDR); } /* * Get a scope of the address. Interface-local, link-local, site-local * or global. */ int in6_addrscope(const struct in6_addr *addr) { int scope; if (addr->s6_addr[0] == 0xfe) { scope = addr->s6_addr[1] & 0xc0; switch (scope) { case 0x80: return IPV6_ADDR_SCOPE_LINKLOCAL; case 0xc0: return IPV6_ADDR_SCOPE_SITELOCAL; default: return IPV6_ADDR_SCOPE_GLOBAL; /* just in case */ } } if (addr->s6_addr[0] == 0xff) { scope = addr->s6_addr[1] & 0x0f; /* * due to other scope such as reserved, * return scope doesn't work. */ switch (scope) { case IPV6_ADDR_SCOPE_INTFACELOCAL: return IPV6_ADDR_SCOPE_INTFACELOCAL; case IPV6_ADDR_SCOPE_LINKLOCAL: return IPV6_ADDR_SCOPE_LINKLOCAL; case IPV6_ADDR_SCOPE_SITELOCAL: return IPV6_ADDR_SCOPE_SITELOCAL; default: return IPV6_ADDR_SCOPE_GLOBAL; } } if (memcmp(&in6addr_loopback, addr, sizeof(*addr) - 1) == 0) { if (addr->s6_addr[15] == 1) /* loopback */ return IPV6_ADDR_SCOPE_LINKLOCAL; if (addr->s6_addr[15] == 0) { /* * Regard the unspecified addresses as global, * since it has no ambiguity. * XXX: not sure if it's correct... */ return IPV6_ADDR_SCOPE_GLOBAL; } } return IPV6_ADDR_SCOPE_GLOBAL; } uint32_t scope6_addr2default(const struct in6_addr *addr) { uint32_t id; /* * special case: The loopback address should be considered as * link-local, but there's no ambiguity in the syntax. */ if (IN6_IS_ADDR_LOOPBACK(addr)) return 0; /* * XXX: 32-bit read is atomic on all our platforms, is it OK * not to lock here? */ id = sid_default.s6id_list[in6_addrscope(addr)]; return id; } /* * Validate the specified scope zone ID in the sin6_scope_id field. If the ID * is unspecified (=0), needs to be specified, and the default zone ID can be * used, the default value will be used. * This routine then generates the kernel-internal form: if the address scope * of is interface-local or link-local, embed the interface index in the * address. */ int sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok) { struct ifnet *ifp; uint32_t zoneid; if ((zoneid = sin6->sin6_scope_id) == 0 && defaultok) zoneid = scope6_addr2default(&sin6->sin6_addr); if (zoneid != 0 && (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) || IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr))) { int s; /* * At this moment, we only check interface-local and * link-local scope IDs, and use interface indices as the * zone IDs assuming a one-to-one mapping between interfaces * and links. */ s = pserialize_read_enter(); ifp = if_byindex(zoneid); if (ifp == NULL) { pserialize_read_exit(s); return ENXIO; } pserialize_read_exit(s); /* XXX assignment to 16bit from 32bit variable */ sin6->sin6_addr.s6_addr16[1] = htons(zoneid & 0xffff); sin6->sin6_scope_id = 0; } return 0; } struct sockaddr * sockaddr_in6_externalize(struct sockaddr *dst, socklen_t socklen, const struct sockaddr *src) { struct sockaddr_in6 *sin6; sin6 = satosin6(sockaddr_copy(dst, socklen, src)); if (sin6 == NULL || sa6_recoverscope(sin6) != 0) return NULL; return dst; } /* * generate standard sockaddr_in6 from embedded form. */ int sa6_recoverscope(struct sockaddr_in6 *sin6) { uint32_t zoneid; char ip6buf[INET6_ADDRSTRLEN]; if (sin6->sin6_scope_id != 0) { log(LOG_NOTICE, "%s: assumption failure (non 0 ID): %s%%%d\n", __func__, IN6_PRINT(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id); /* XXX: proceed anyway... */ } if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) || IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr)) { /* * KAME assumption: link id == interface id */ zoneid = ntohs(sin6->sin6_addr.s6_addr16[1]); if (zoneid) { int s = pserialize_read_enter(); if (!if_byindex(zoneid)) { pserialize_read_exit(s); return ENXIO; } pserialize_read_exit(s); sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = zoneid; } } return 0; } int in6_setzoneid(struct in6_addr *in6, uint32_t zoneid) { if (IN6_IS_SCOPE_EMBEDDABLE(in6)) in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */ return 0; } /* * Determine the appropriate scope zone ID for in6 and ifp. If ret_id is * non NULL, it is set to the zone ID. If the zone ID needs to be embedded * in the in6_addr structure, in6 will be modified. */ int in6_setscope(struct in6_addr *in6, const struct ifnet *ifp, uint32_t *ret_id) { int scope; uint32_t zoneid = 0; const struct scope6_id *sid = SID(ifp); if (sid == NULL) { log(LOG_NOTICE, "%s: no scope id for %s\n", __func__, if_name(ifp)); return EINVAL; } /* * special case: the loopback address can only belong to a loopback * interface. */ if (IN6_IS_ADDR_LOOPBACK(in6)) { if (!(ifp->if_flags & IFF_LOOPBACK)) { char ip6buf[INET6_ADDRSTRLEN]; log(LOG_NOTICE, "%s: can't set scope for not loopback " "interface %s and loopback address %s\n", __func__, if_name(ifp), IN6_PRINT(ip6buf, in6)); return EINVAL; } else { if (ret_id != NULL) *ret_id = 0; /* there's no ambiguity */ return 0; } } scope = in6_addrscope(in6); switch (scope) { case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */ zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL]; break; case IPV6_ADDR_SCOPE_LINKLOCAL: zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]; break; case IPV6_ADDR_SCOPE_SITELOCAL: zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]; break; case IPV6_ADDR_SCOPE_ORGLOCAL: zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]; break; default: zoneid = 0; /* XXX: treat as global. */ break; } if (ret_id != NULL) *ret_id = zoneid; return in6_setzoneid(in6, zoneid); } const char * in6_getscopename(const struct in6_addr *addr) { switch (in6_addrscope(addr)) { case IPV6_ADDR_SCOPE_INTFACELOCAL: return "interface"; #if IPV6_ADDR_SCOPE_INTFACELOCAL != IPV6_ADDR_SCOPE_NODELOCAL case IPV6_ADDR_SCOPE_NODELOCAL: return "node"; #endif case IPV6_ADDR_SCOPE_LINKLOCAL: return "link"; case IPV6_ADDR_SCOPE_SITELOCAL: return "site"; case IPV6_ADDR_SCOPE_ORGLOCAL: return "organization"; case IPV6_ADDR_SCOPE_GLOBAL: return "global"; default: return "unknown"; } } /* * Just clear the embedded scope identifier. Return 0 if the original address * is intact; return non 0 if the address is modified. */ int in6_clearscope(struct in6_addr *in6) { int modified = 0; if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { if (in6->s6_addr16[1] != 0) modified = 1; in6->s6_addr16[1] = 0; } return modified; } |
| 167 159 161 24 167 168 168 106 168 168 1 160 10 10 168 157 157 13 26 11 1 16 16 16 149 141 8 149 146 103 148 21 21 21 16 16 15 15 15 20 20 20 11 2 10 10 10 10 3 10 10 10 10 10 10 20 20 19 19 19 19 3 16 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 | /* $NetBSD: ffs_balloc.c,v 1.65 2020/09/05 16:30:13 riastradh Exp $ */ /* * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.65 2020/09/05 16:30:13 riastradh Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" #include "opt_uvmhist.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/buf.h> #include <sys/file.h> #include <sys/mount.h> #include <sys/vnode.h> #include <sys/kauth.h> #include <sys/fstrans.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_bswap.h> #include <ufs/ffs/fs.h> #include <ufs/ffs/ffs_extern.h> #ifdef UVMHIST #include <uvm/uvm.h> #endif #include <uvm/uvm_extern.h> #include <uvm/uvm_stat.h> static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int, struct buf **); static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int, struct buf **); static daddr_t ffs_extb(struct fs *fs, struct ufs2_dinode *dp, daddr_t nb) { return ufs_rw64(dp->di_extb[nb], UFS_FSNEEDSWAP(fs)); } /* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags, struct buf **bpp) { int error; if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC) error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp); else error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp); if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0) brelse(*bpp, 0); return error; } static int ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags, struct buf **bpp) { daddr_t lbn, lastlbn; struct buf *bp, *nbp; struct inode *ip = VTOI(vp); struct fs *fs = ip->i_fs; struct ufsmount *ump = ip->i_ump; struct indir indirs[UFS_NIADDR + 2]; daddr_t newb, pref, nb; int32_t *bap; /* XXX ondisk32 */ int deallocated, osize, nsize, num, i, error; int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1]; int32_t *allocib; int unwindidx = -1; const int needswap = UFS_FSNEEDSWAP(fs); UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist); lbn = ffs_lblkno(fs, off); size = ffs_blkoff(fs, off) + size; if (size > fs->fs_bsize) panic("ffs_balloc: blk too big"); if (bpp != NULL) { *bpp = NULL; } UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp, lbn, size, 0); if (lbn < 0) return (EFBIG); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = ffs_lblkno(fs, ip->i_size); if (lastlbn < UFS_NDADDR && lastlbn < lbn) { nb = lastlbn; osize = ffs_blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { mutex_enter(&ump->um_lock); error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, nb), ffs_blkpref_ufs1(ip, lastlbn, nb, flags, &ip->i_ffs1_db[0]), osize, (int)fs->fs_bsize, flags, cred, bpp, &newb); if (error) return (error); ip->i_size = ffs_lblktosize(fs, nb + 1); ip->i_ffs1_size = ip->i_size; uvm_vnp_setsize(vp, ip->i_ffs1_size); ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp && *bpp) { if (flags & B_SYNC) bwrite(*bpp); else bawrite(*bpp); } } } /* * The first UFS_NDADDR blocks are direct blocks */ if (lbn < UFS_NDADDR) { nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap); if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) { /* * The block is an already-allocated direct block * and the file already extends past this block, * thus this must be a whole block. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, B_MODIFY, bpp); if (error) { return (error); } } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size)); nsize = ffs_fragroundup(fs, size); if (nsize <= osize) { /* * The existing block is already * at least as big as we want. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, osize, B_MODIFY, bpp); if (error) { return (error); } } return 0; } else { /* * The existing block is smaller than we want, * grow it. */ mutex_enter(&ump->um_lock); error = ffs_realloccg(ip, lbn, ffs_getdb(fs, ip, lbn), ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags, &ip->i_ffs1_db[0]), osize, nsize, flags, cred, bpp, &newb); if (error) return (error); } } else { /* * the block was not previously allocated, * allocate a new block or fragment. */ if (ip->i_size < ffs_lblktosize(fs, lbn + 1)) nsize = ffs_fragroundup(fs, size); else nsize = fs->fs_bsize; mutex_enter(&ump->um_lock); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags, &ip->i_ffs1_db[0]), nsize, flags, cred, &newb); if (error) return (error); if (bpp != NULL) { error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb), nsize, (flags & B_CLRBUF) != 0, bpp); if (error) return error; } } ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap); ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return (error); /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap); allocib = NULL; allocblk = allociblk; if (nb == 0) { mutex_enter(&ump->um_lock); pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | B_METAONLY, cred, &newb); if (error) goto fail; nb = newb; *allocblk++ = nb; error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb), fs->fs_bsize, true, &bp); if (error) goto fail; /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) goto fail; unwindidx = 0; allocib = &ip->i_ffs1_ib[indirs[0].in_off]; *allocib = ufs_rw32(nb, needswap); ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp); if (error) { goto fail; } bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ nb = ufs_rw32(bap[indirs[i].in_off], needswap); if (i == num) break; i++; if (nb != 0) { brelse(bp, 0); continue; } if (fscow_run(bp, true) != 0) { brelse(bp, 0); goto fail; } mutex_enter(&ump->um_lock); /* Try to keep snapshot indirect blocks contiguous. */ if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0) pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off, flags | B_METAONLY, &bap[0]); if (pref == 0) pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | B_METAONLY, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb), fs->fs_bsize, true, &nbp); if (error) { brelse(bp, 0); goto fail; } /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp, 0); goto fail; } if (unwindidx < 0) unwindidx = i - 1; bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } if (flags & B_METAONLY) { KASSERT(bpp != NULL); *bpp = bp; return (0); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { if (fscow_run(bp, true) != 0) { brelse(bp, 0); goto fail; } mutex_enter(&ump->um_lock); pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; if (bpp != NULL) { error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb), fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp); if (error) { brelse(bp, 0); goto fail; } } bap[indirs[num].in_off] = ufs_rw32(nb, needswap); if (allocib == NULL && unwindidx < 0) { unwindidx = i - 1; } /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } return (0); } brelse(bp, 0); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->fs_bsize, B_MODIFY, &nbp); if (error) { goto fail; } } else { error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb), fs->fs_bsize, true, &nbp); if (error) goto fail; } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ if (unwindidx >= 0) { /* * First write out any buffers we've created to resolve their * softdeps. This must be done in reverse order of creation * so that we resolve the dependencies in one pass. * Write the cylinder group buffers for these buffers too. */ for (i = num; i >= unwindidx; i--) { if (i == 0) { break; } if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK, fs->fs_bsize, false, &bp) != 0) continue; if (bp->b_oflags & BO_DELWRI) { nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs, FFS_DBTOFSB(fs, bp->b_blkno)))); bwrite(bp); if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK, fs->fs_cgsize, false, &bp) != 0) continue; if (bp->b_oflags & BO_DELWRI) { bwrite(bp); } else { brelse(bp, BC_INVAL); } } else { brelse(bp, BC_INVAL); } } /* * Undo the partial allocation. */ if (unwindidx == 0) { *allocib = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; } else { int r; r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, 0, &bp); if (r) { panic("Could not unwind indirect block, error %d", r); } else { bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ bap[indirs[unwindidx].in_off] = 0; bwrite(bp); } } for (i = unwindidx + 1; i <= num; i++) { if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK, fs->fs_bsize, false, &bp) == 0) brelse(bp, BC_INVAL); } } for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); deallocated += fs->fs_bsize; } if (deallocated) { #if defined(QUOTA) || defined(QUOTA2) /* * Restore user's disk quota because allocation failed. */ (void)chkdq(ip, -btodb(deallocated), cred, FORCE); #endif ip->i_ffs1_blocks -= btodb(deallocated); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); } static int ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags, struct buf **bpp) { daddr_t lbn, lastlbn; struct buf *bp, *nbp; struct inode *ip = VTOI(vp); struct fs *fs = ip->i_fs; struct ufsmount *ump = ip->i_ump; struct indir indirs[UFS_NIADDR + 2]; daddr_t newb, pref, nb; int64_t *bap; int deallocated, osize, nsize, num, i, error; daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1]; int64_t *allocib; int unwindidx = -1; const int needswap = UFS_FSNEEDSWAP(fs); UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist); lbn = ffs_lblkno(fs, off); size = ffs_blkoff(fs, off) + size; if (size > fs->fs_bsize) panic("ffs_balloc: blk too big"); if (bpp != NULL) { *bpp = NULL; } UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp, lbn, size, 0); if (lbn < 0) return (EFBIG); /* * Check for allocating external data. */ if (flags & IO_EXT) { struct ufs2_dinode *dp = ip->i_din.ffs2_din; if (lbn >= UFS_NXADDR) return (EFBIG); /* * If the next write will extend the data into a new block, * and the data is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = ffs_lblkno(fs, dp->di_extsize); if (lastlbn < lbn) { nb = lastlbn; osize = ffs_sblksize(fs, dp->di_extsize, nb); if (osize < fs->fs_bsize && osize > 0) { mutex_enter(&ump->um_lock); error = ffs_realloccg(ip, -1 - nb, ffs_extb(fs, dp, nb), ffs_blkpref_ufs2(ip, lastlbn, (int)nb, flags, &dp->di_extb[0]), osize, (int)fs->fs_bsize, flags, cred, &bp, &newb); if (error) return (error); dp->di_extsize = ffs_lblktosize(fs, nb + 1); dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * All blocks are direct blocks */ nb = dp->di_extb[lbn]; if (nb != 0 && dp->di_extsize >= ffs_lblktosize(fs, lbn + 1)) { error = bread(vp, -1 - lbn, fs->fs_bsize, 0, &bp); if (error) { return (error); } mutex_enter(bp->b_objlock); bp->b_blkno = FFS_FSBTODB(fs, nb); mutex_exit(bp->b_objlock); *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize)); nsize = ffs_fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, -1 - lbn, osize, 0, &bp); if (error) { return (error); } mutex_enter(bp->b_objlock); bp->b_blkno = FFS_FSBTODB(fs, nb); mutex_exit(bp->b_objlock); } else { mutex_enter(&ump->um_lock); error = ffs_realloccg(ip, -1 - lbn, ffs_extb(fs, dp, lbn), ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags, &dp->di_extb[0]), osize, nsize, flags, cred, &bp, &newb); if (error) return (error); } } else { if (dp->di_extsize < ffs_lblktosize(fs, lbn + 1)) nsize = ffs_fragroundup(fs, size); else nsize = fs->fs_bsize; mutex_enter(&ump->um_lock); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags, &dp->di_extb[0]), nsize, flags, cred, &newb); if (error) return (error); error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb), nsize, (flags & B_CLRBUF) != 0, &bp); if (error) return error; } dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = ffs_lblkno(fs, ip->i_size); if (lastlbn < UFS_NDADDR && lastlbn < lbn) { nb = lastlbn; osize = ffs_blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { mutex_enter(&ump->um_lock); error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, lbn), ffs_blkpref_ufs2(ip, lastlbn, nb, flags, &ip->i_ffs2_db[0]), osize, (int)fs->fs_bsize, flags, cred, bpp, &newb); if (error) return (error); ip->i_size = ffs_lblktosize(fs, nb + 1); ip->i_ffs2_size = ip->i_size; uvm_vnp_setsize(vp, ip->i_size); ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp) { if (flags & B_SYNC) bwrite(*bpp); else bawrite(*bpp); } } } /* * The first UFS_NDADDR blocks are direct blocks */ if (lbn < UFS_NDADDR) { nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap); if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) { /* * The block is an already-allocated direct block * and the file already extends past this block, * thus this must be a whole block. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, B_MODIFY, bpp); if (error) { return (error); } } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size)); nsize = ffs_fragroundup(fs, size); if (nsize <= osize) { /* * The existing block is already * at least as big as we want. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, osize, B_MODIFY, bpp); if (error) { return (error); } } return 0; } else { /* * The existing block is smaller than we want, * grow it. */ mutex_enter(&ump->um_lock); error = ffs_realloccg(ip, lbn, ffs_getdb(fs, ip, lbn), ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags, &ip->i_ffs2_db[0]), osize, nsize, flags, cred, bpp, &newb); if (error) return (error); } } else { /* * the block was not previously allocated, * allocate a new block or fragment. */ if (ip->i_size < ffs_lblktosize(fs, lbn + 1)) nsize = ffs_fragroundup(fs, size); else nsize = fs->fs_bsize; mutex_enter(&ump->um_lock); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags, &ip->i_ffs2_db[0]), nsize, flags, cred, &newb); if (error) return (error); if (bpp != NULL) { error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb), nsize, (flags & B_CLRBUF) != 0, bpp); if (error) return error; } } ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap); ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return (error); /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap); allocib = NULL; allocblk = allociblk; if (nb == 0) { mutex_enter(&ump->um_lock); pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | B_METAONLY, cred, &newb); if (error) goto fail; nb = newb; *allocblk++ = nb; error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb), fs->fs_bsize, true, &bp); if (error) goto fail; /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) goto fail; unwindidx = 0; allocib = &ip->i_ffs2_ib[indirs[0].in_off]; *allocib = ufs_rw64(nb, needswap); ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp); if (error) { goto fail; } bap = (int64_t *)bp->b_data; nb = ufs_rw64(bap[indirs[i].in_off], needswap); if (i == num) break; i++; if (nb != 0) { brelse(bp, 0); continue; } if (fscow_run(bp, true) != 0) { brelse(bp, 0); goto fail; } mutex_enter(&ump->um_lock); /* Try to keep snapshot indirect blocks contiguous. */ if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0) pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off, flags | B_METAONLY, &bap[0]); if (pref == 0) pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | B_METAONLY, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb), fs->fs_bsize, true, &nbp); if (error) { brelse(bp, 0); goto fail; } /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp, 0); goto fail; } if (unwindidx < 0) unwindidx = i - 1; bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } if (flags & B_METAONLY) { KASSERT(bpp != NULL); *bpp = bp; return (0); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { if (fscow_run(bp, true) != 0) { brelse(bp, 0); goto fail; } mutex_enter(&ump->um_lock); pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; if (bpp != NULL) { error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb), fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp); if (error) { brelse(bp, 0); goto fail; } } bap[indirs[num].in_off] = ufs_rw64(nb, needswap); if (allocib == NULL && unwindidx < 0) { unwindidx = i - 1; } /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } return (0); } brelse(bp, 0); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->fs_bsize, B_MODIFY, &nbp); if (error) { goto fail; } } else { error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb), fs->fs_bsize, true, &nbp); if (error) goto fail; } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ if (unwindidx >= 0) { /* * First write out any buffers we've created to resolve their * softdeps. This must be done in reverse order of creation * so that we resolve the dependencies in one pass. * Write the cylinder group buffers for these buffers too. */ for (i = num; i >= unwindidx; i--) { if (i == 0) { break; } if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK, fs->fs_bsize, false, &bp) != 0) continue; if (bp->b_oflags & BO_DELWRI) { nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs, FFS_DBTOFSB(fs, bp->b_blkno)))); bwrite(bp); if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK, fs->fs_cgsize, false, &bp) != 0) continue; if (bp->b_oflags & BO_DELWRI) { bwrite(bp); } else { brelse(bp, BC_INVAL); } } else { brelse(bp, BC_INVAL); } } /* * Now that any dependencies that we created have been * resolved, we can undo the partial allocation. */ if (unwindidx == 0) { *allocib = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; } else { int r; r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, 0, &bp); if (r) { panic("Could not unwind indirect block, error %d", r); } else { bap = (int64_t *)bp->b_data; bap[indirs[unwindidx].in_off] = 0; bwrite(bp); } } for (i = unwindidx + 1; i <= num; i++) { if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK, fs->fs_bsize, false, &bp) == 0) brelse(bp, BC_INVAL); } } for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); deallocated += fs->fs_bsize; } if (deallocated) { #if defined(QUOTA) || defined(QUOTA2) /* * Restore user's disk quota because allocation failed. */ (void)chkdq(ip, -btodb(deallocated), cred, FORCE); #endif ip->i_ffs2_blocks -= btodb(deallocated); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); } |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | /* $NetBSD: tty_bsdpty.c,v 1.20 2014/04/04 18:11:58 christos Exp $ */ /*- * Copyright (c) 2004 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: tty_bsdpty.c,v 1.20 2014/04/04 18:11:58 christos Exp $"); #include "opt_ptm.h" #ifndef NO_DEV_PTM #ifdef COMPAT_BSDPTY /* bsd tty implementation for pty multiplexor driver /dev/ptm{,x} */ #include <sys/param.h> #include <sys/systm.h> #include <sys/ioctl.h> #include <sys/lwp.h> #include <sys/tty.h> #include <sys/stat.h> #include <sys/file.h> #include <sys/uio.h> #include <sys/kernel.h> #include <sys/vnode.h> #include <sys/namei.h> #include <sys/signalvar.h> #include <sys/filedesc.h> #include <sys/conf.h> #include <sys/poll.h> #include <sys/pty.h> #include <sys/kauth.h> /* * pts == /dev/tty[pqrs]? * ptc == /dev/pty[pqrs]? */ /* * All this hard-coding is really evil. */ #define TTY_GID 4 #define TTY_PERM (S_IRUSR|S_IWUSR|S_IWGRP) #define TTY_TEMPLATE "/dev/XtyXX" #define TTY_NAMESIZE sizeof(TTY_TEMPLATE) #define TTY_LETTERS "pqrstuvwxyzPQRST" #define TTY_OLD_SUFFIX "0123456789abcdef" #define TTY_NEW_SUFFIX "ghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" static int pty_makename(struct mount *, struct lwp *, char *, size_t, dev_t, char); static int pty_allocvp(struct mount *, struct lwp *, struct vnode **, dev_t, char); static void pty_getvattr(struct mount *, struct lwp *, struct vattr *); static int pty__getmp(struct lwp *, struct mount **); struct ptm_pty ptm_bsdpty = { pty_allocvp, pty_makename, pty_getvattr, pty__getmp, }; static int /*ARGSUSED*/ pty_makename(struct mount *mp, struct lwp *l, char *bf, size_t bufsiz, dev_t dev, char c) { size_t nt; dev_t minor = minor(dev); const char *suffix; if (bufsiz < TTY_NAMESIZE) return EINVAL; (void)memcpy(bf, TTY_TEMPLATE, TTY_NAMESIZE); if (minor < 256) { suffix = TTY_OLD_SUFFIX; nt = sizeof(TTY_OLD_SUFFIX) - 1; } else { minor -= 256; suffix = TTY_NEW_SUFFIX; nt = sizeof(TTY_NEW_SUFFIX) - 1; } bf[5] = c; bf[8] = TTY_LETTERS[minor / nt]; bf[9] = suffix[minor % nt]; return 0; } static int /*ARGSUSED*/ pty_allocvp(struct mount *mp, struct lwp *l, struct vnode **vp, dev_t dev, char ms) { int error; struct pathbuf *pb; struct nameidata nd; char name[TTY_NAMESIZE]; error = pty_makename(NULL, l, name, sizeof(name), dev, ms); if (error) return error; pb = pathbuf_create(name); if (pb == NULL) { return ENOMEM; } NDINIT(&nd, LOOKUP, NOFOLLOW|LOCKLEAF, pb); if ((error = namei(&nd)) != 0) { pathbuf_destroy(pb); return error; } *vp = nd.ni_vp; pathbuf_destroy(pb); return 0; } static void /*ARGSUSED*/ pty_getvattr(struct mount *mp, struct lwp *l, struct vattr *vattr) { vattr_null(vattr); /* get real uid */ vattr->va_uid = kauth_cred_getuid(l->l_cred); vattr->va_gid = TTY_GID; vattr->va_mode = TTY_PERM; } static int pty__getmp(struct lwp *l __unused, struct mount **mpp) { *mpp = 0; return 0; } #endif /* COMPAT_BSDPTY */ #endif /* NO_DEV_PTM */ |
| 4 4 2 3 2 2 1 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 3 3 2 2 2 2 2 2 2 1 2 1 2 1 1 2 2 2 2 2 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 | /* $NetBSD: nvlist.c,v 1.8 2019/07/23 00:49:16 rmind Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009-2013 The FreeBSD Foundation * Copyright (c) 2013-2015 Mariusz Zaborski <oshogbo@FreeBSD.org> * All rights reserved. * * This software was developed by Pawel Jakub Dawidek under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <sys/cdefs.h> #ifdef __FreeBSD__ __FBSDID("$FreeBSD: head/sys/contrib/libnv/nvlist.c 335347 2018-06-18 22:57:32Z oshogbo $"); #else __RCSID("$NetBSD: nvlist.c,v 1.8 2019/07/23 00:49:16 rmind Exp $"); #endif #include <sys/param.h> #include <sys/endian.h> #include <sys/queue.h> #if defined(_KERNEL) || defined(_STANDALONE) #include <sys/errno.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/systm.h> #ifdef __FreeBSD__ #include <machine/stdarg.h> #endif #else #include <sys/socket.h> #include <errno.h> #include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include "msgio.h" #endif #ifdef HAVE_PJDLOG #include <pjdlog.h> #endif #ifdef __FreeBSD__ #include <sys/nv.h> #else #include "nv.h" #endif #include "nv_impl.h" #include "nvlist_impl.h" #include "nvpair_impl.h" #ifndef HAVE_PJDLOG #if defined(_KERNEL) || defined(_STANDALONE) #ifdef __FreeBSD__ #define PJDLOG_ASSERT(...) MPASS(__VA_ARGS__) #else #define PJDLOG_ASSERT(...) KASSERT(__VA_ARGS__) #endif #define PJDLOG_RASSERT(expr, ...) KASSERT(expr, (__VA_ARGS__)) #define PJDLOG_ABORT(...) panic(__VA_ARGS__) #else #ifndef __lint__ #include <assert.h> #define PJDLOG_ASSERT(...) assert(__VA_ARGS__) #define PJDLOG_RASSERT(expr, ...) assert(expr) #define PJDLOG_ABORT(...) do { \ fprintf(stderr, "%s:%u: ", __FILE__, __LINE__); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ abort(); \ } while (/*CONSTCOND*/0) #else #define PJDLOG_ASSERT(...) #define PJDLOG_RASSERT(expr, ...) #define PJDLOG_ABORT(...) #endif #endif #endif #define NV_FLAG_PRIVATE_MASK (NV_FLAG_BIG_ENDIAN | NV_FLAG_IN_ARRAY) #define NV_FLAG_PUBLIC_MASK (NV_FLAG_IGNORE_CASE | NV_FLAG_NO_UNIQUE) #define NV_FLAG_ALL_MASK (NV_FLAG_PRIVATE_MASK | NV_FLAG_PUBLIC_MASK) #define NVLIST_MAGIC 0x6e766c /* "nvl" */ struct nvlist { int nvl_magic; int nvl_error; int nvl_flags; nvpair_t *nvl_parent; nvpair_t *nvl_array_next; struct nvl_head nvl_head; }; #define NVLIST_ASSERT(nvl) do { \ PJDLOG_ASSERT((nvl) != NULL); \ PJDLOG_ASSERT((nvl)->nvl_magic == NVLIST_MAGIC); \ } while (/*CONSTCOND*/0) #ifdef _KERNEL MALLOC_DEFINE(M_NVLIST, "nvlist", "kernel nvlist"); #endif #define NVPAIR_ASSERT(nvp) nvpair_assert(nvp) #define NVLIST_HEADER_MAGIC 0x6c #define NVLIST_HEADER_VERSION 0x00 struct nvlist_header { uint8_t nvlh_magic; uint8_t nvlh_version; uint8_t nvlh_flags; uint64_t nvlh_descriptors; uint64_t nvlh_size; } __packed; nvlist_t * nvlist_create(int flags) { nvlist_t *nvl; PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0); nvl = nv_malloc(sizeof(*nvl)); if (nvl == NULL) return (NULL); nvl->nvl_error = 0; nvl->nvl_flags = flags; nvl->nvl_parent = NULL; nvl->nvl_array_next = NULL; TAILQ_INIT(&nvl->nvl_head); nvl->nvl_magic = NVLIST_MAGIC; return (nvl); } void nvlist_destroy(nvlist_t *nvl) { nvpair_t *nvp; if (nvl == NULL) return; ERRNO_SAVE(); NVLIST_ASSERT(nvl); while ((nvp = nvlist_first_nvpair(nvl)) != NULL) { nvlist_remove_nvpair(nvl, nvp); nvpair_free(nvp); } if (nvl->nvl_array_next != NULL) nvpair_free_structure(nvl->nvl_array_next); nvl->nvl_array_next = NULL; nvl->nvl_parent = NULL; nvl->nvl_magic = 0; nv_free(nvl); ERRNO_RESTORE(); } void nvlist_set_error(nvlist_t *nvl, int error) { PJDLOG_ASSERT(error != 0); /* * Check for error != 0 so that we don't do the wrong thing if somebody * tries to abuse this API when asserts are disabled. */ if (nvl != NULL && error != 0 && nvl->nvl_error == 0) nvl->nvl_error = error; } int nvlist_error(const nvlist_t *nvl) { if (nvl == NULL) return (ENOMEM); NVLIST_ASSERT(nvl); return (nvl->nvl_error); } nvpair_t * nvlist_get_nvpair_parent(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); return (nvl->nvl_parent); } const nvlist_t * nvlist_get_parent(const nvlist_t *nvl, void **cookiep) { nvpair_t *nvp; NVLIST_ASSERT(nvl); nvp = nvl->nvl_parent; if (cookiep != NULL) *cookiep = nvp; if (nvp == NULL) return (NULL); return (nvpair_nvlist(nvp)); } void nvlist_set_parent(nvlist_t *nvl, nvpair_t *parent) { NVLIST_ASSERT(nvl); nvl->nvl_parent = parent; } void nvlist_set_array_next(nvlist_t *nvl, nvpair_t *ele) { NVLIST_ASSERT(nvl); if (ele != NULL) { nvl->nvl_flags |= NV_FLAG_IN_ARRAY; } else { nvl->nvl_flags &= ~NV_FLAG_IN_ARRAY; nv_free(nvl->nvl_array_next); } nvl->nvl_array_next = ele; } nvpair_t * nvlist_get_array_next_nvpair(nvlist_t *nvl) { NVLIST_ASSERT(nvl); return (nvl->nvl_array_next); } bool nvlist_in_array(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); return ((nvl->nvl_flags & NV_FLAG_IN_ARRAY) != 0); } const nvlist_t * nvlist_get_array_next(const nvlist_t *nvl) { nvpair_t *nvp; NVLIST_ASSERT(nvl); nvp = nvl->nvl_array_next; if (nvp == NULL) return (NULL); return (nvpair_get_nvlist(nvp)); } const nvlist_t * nvlist_get_pararr(const nvlist_t *nvl, void **cookiep) { const nvlist_t *ret; ret = nvlist_get_array_next(nvl); if (ret != NULL) { if (cookiep != NULL) *cookiep = NULL; return (ret); } return (nvlist_get_parent(nvl, cookiep)); } bool nvlist_empty(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); return (nvlist_first_nvpair(nvl) == NULL); } int nvlist_flags(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); return (nvl->nvl_flags & NV_FLAG_PUBLIC_MASK); } void nvlist_set_flags(nvlist_t *nvl, int flags) { NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); nvl->nvl_flags = flags; } __dead void nvlist_report_missing(int type, const char *name) { PJDLOG_ABORT("Element '%s' of type %s doesn't exist.", name, nvpair_type_string(type)); } static nvpair_t * nvlist_find(const nvlist_t *nvl, int type, const char *name) { nvpair_t *nvp; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); PJDLOG_ASSERT(type == NV_TYPE_NONE || (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST)); for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { if (type != NV_TYPE_NONE && nvpair_type(nvp) != type) continue; if ((nvl->nvl_flags & NV_FLAG_IGNORE_CASE) != 0) { if (strcasecmp(nvpair_name(nvp), name) != 0) continue; } else { if (strcmp(nvpair_name(nvp), name) != 0) continue; } break; } if (nvp == NULL) ERRNO_SET(ENOENT); return (nvp); } bool nvlist_exists_type(const nvlist_t *nvl, const char *name, int type) { NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); PJDLOG_ASSERT(type == NV_TYPE_NONE || (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST)); return (nvlist_find(nvl, type, name) != NULL); } void nvlist_free_type(nvlist_t *nvl, const char *name, int type) { nvpair_t *nvp; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); PJDLOG_ASSERT(type == NV_TYPE_NONE || (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST)); nvp = nvlist_find(nvl, type, name); if (nvp != NULL) nvlist_free_nvpair(nvl, nvp); else nvlist_report_missing(type, name); } nvlist_t * nvlist_clone(const nvlist_t *nvl) { nvlist_t *newnvl; nvpair_t *nvp, *newnvp; NVLIST_ASSERT(nvl); if (nvl->nvl_error != 0) { ERRNO_SET(nvl->nvl_error); return (NULL); } newnvl = nvlist_create(nvl->nvl_flags & NV_FLAG_PUBLIC_MASK); for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { newnvp = nvpair_clone(nvp); if (newnvp == NULL) break; (void)nvlist_move_nvpair(newnvl, newnvp); } if (nvp != NULL) { nvlist_destroy(newnvl); return (NULL); } return (newnvl); } #if !defined(_KERNEL) && !defined(_STANDALONE) static bool nvlist_dump_error_check(const nvlist_t *nvl, int fd, int level) { if (nvlist_error(nvl) != 0) { dprintf(fd, "%*serror: %d\n", level * 4, "", nvlist_error(nvl)); return (true); } return (false); } /* * Dump content of nvlist. */ void nvlist_dump(const nvlist_t *nvl, int fd) { const nvlist_t *tmpnvl; nvpair_t *nvp, *tmpnvp; void *cookie; int level; level = 0; if (nvlist_dump_error_check(nvl, fd, level)) return; nvp = nvlist_first_nvpair(nvl); while (nvp != NULL) { dprintf(fd, "%*s%s (%s):", level * 4, "", nvpair_name(nvp), nvpair_type_string(nvpair_type(nvp))); switch (nvpair_type(nvp)) { case NV_TYPE_NULL: dprintf(fd, " null\n"); break; case NV_TYPE_BOOL: dprintf(fd, " %s\n", nvpair_get_bool(nvp) ? "TRUE" : "FALSE"); break; case NV_TYPE_NUMBER: dprintf(fd, " %ju (%jd) (0x%jx)\n", (uintmax_t)nvpair_get_number(nvp), (intmax_t)nvpair_get_number(nvp), (uintmax_t)nvpair_get_number(nvp)); break; case NV_TYPE_STRING: dprintf(fd, " [%s]\n", nvpair_get_string(nvp)); break; case NV_TYPE_NVLIST: dprintf(fd, "\n"); tmpnvl = nvpair_get_nvlist(nvp); if (nvlist_dump_error_check(tmpnvl, fd, level + 1)) break; tmpnvp = nvlist_first_nvpair(tmpnvl); if (tmpnvp != NULL) { nvl = tmpnvl; nvp = tmpnvp; level++; continue; } break; case NV_TYPE_DESCRIPTOR: dprintf(fd, " %d\n", nvpair_get_descriptor(nvp)); break; case NV_TYPE_BINARY: { const unsigned char *binary; unsigned int ii; size_t size; binary = nvpair_get_binary(nvp, &size); dprintf(fd, " %zu ", size); for (ii = 0; ii < size; ii++) dprintf(fd, "%02hhx", binary[ii]); dprintf(fd, "\n"); break; } case NV_TYPE_BOOL_ARRAY: { const bool *value; unsigned int ii; size_t nitems; value = nvpair_get_bool_array(nvp, &nitems); dprintf(fd, " [ "); for (ii = 0; ii < nitems; ii++) { dprintf(fd, "%s", value[ii] ? "TRUE" : "FALSE"); if (ii != nitems - 1) dprintf(fd, ", "); } dprintf(fd, " ]\n"); break; } case NV_TYPE_STRING_ARRAY: { const char * const *value; unsigned int ii; size_t nitems; value = nvpair_get_string_array(nvp, &nitems); dprintf(fd, " [ "); for (ii = 0; ii < nitems; ii++) { if (value[ii] == NULL) dprintf(fd, "NULL"); else dprintf(fd, "\"%s\"", value[ii]); if (ii != nitems - 1) dprintf(fd, ", "); } dprintf(fd, " ]\n"); break; } case NV_TYPE_NUMBER_ARRAY: { const uint64_t *value; unsigned int ii; size_t nitems; value = nvpair_get_number_array(nvp, &nitems); dprintf(fd, " [ "); for (ii = 0; ii < nitems; ii++) { dprintf(fd, "%ju (%jd) (0x%jx)", value[ii], value[ii], value[ii]); if (ii != nitems - 1) dprintf(fd, ", "); } dprintf(fd, " ]\n"); break; } case NV_TYPE_DESCRIPTOR_ARRAY: { const int *value; unsigned int ii; size_t nitems; value = nvpair_get_descriptor_array(nvp, &nitems); dprintf(fd, " [ "); for (ii = 0; ii < nitems; ii++) { dprintf(fd, "%d", value[ii]); if (ii != nitems - 1) dprintf(fd, ", "); } dprintf(fd, " ]\n"); break; } case NV_TYPE_NVLIST_ARRAY: { const nvlist_t * const *value; unsigned int ii; size_t nitems; value = nvpair_get_nvlist_array(nvp, &nitems); dprintf(fd, " %zu\n", nitems); tmpnvl = NULL; tmpnvp = NULL; for (ii = 0; ii < nitems; ii++) { if (nvlist_dump_error_check(value[ii], fd, level + 1)) { break; } if (tmpnvl == NULL) { tmpnvp = nvlist_first_nvpair(value[ii]); if (tmpnvp != NULL) { tmpnvl = value[ii]; } else { dprintf(fd, "%*s,\n", (level + 1) * 4, ""); } } } if (tmpnvp != NULL) { nvl = tmpnvl; nvp = tmpnvp; level++; continue; } break; } default: PJDLOG_ABORT("Unknown type: %d.", nvpair_type(nvp)); } while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) { do { cookie = NULL; if (nvlist_in_array(nvl)) dprintf(fd, "%*s,\n", level * 4, ""); nvl = nvlist_get_pararr(nvl, &cookie); if (nvl == NULL) return; if (nvlist_in_array(nvl) && cookie == NULL) { nvp = nvlist_first_nvpair(nvl); } else { nvp = cookie; level--; } } while (nvp == NULL); if (nvlist_in_array(nvl) && cookie == NULL) break; } } } void nvlist_fdump(const nvlist_t *nvl, FILE *fp) { fflush(fp); nvlist_dump(nvl, fileno(fp)); } #endif /* * The function obtains size of the nvlist after nvlist_pack(). */ size_t nvlist_size(const nvlist_t *nvl) { const nvlist_t *tmpnvl; const nvlist_t * const *nvlarray; const nvpair_t *nvp, *tmpnvp; void *cookie; size_t size, nitems; unsigned int ii; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); size = sizeof(struct nvlist_header); nvp = nvlist_first_nvpair(nvl); while (nvp != NULL) { size += nvpair_header_size(); size += strlen(nvpair_name(nvp)) + 1; if (nvpair_type(nvp) == NV_TYPE_NVLIST) { size += sizeof(struct nvlist_header); size += nvpair_header_size() + 1; tmpnvl = nvpair_get_nvlist(nvp); PJDLOG_ASSERT(tmpnvl->nvl_error == 0); tmpnvp = nvlist_first_nvpair(tmpnvl); if (tmpnvp != NULL) { nvl = tmpnvl; nvp = tmpnvp; continue; } } else if (nvpair_type(nvp) == NV_TYPE_NVLIST_ARRAY) { nvlarray = nvpair_get_nvlist_array(nvp, &nitems); PJDLOG_ASSERT(nitems > 0); size += (nvpair_header_size() + 1) * nitems; size += sizeof(struct nvlist_header) * nitems; tmpnvl = NULL; tmpnvp = NULL; for (ii = 0; ii < nitems; ii++) { PJDLOG_ASSERT(nvlarray[ii]->nvl_error == 0); tmpnvp = nvlist_first_nvpair(nvlarray[ii]); if (tmpnvp != NULL) { tmpnvl = nvlarray[ii]; break; } } if (tmpnvp != NULL) { nvp = tmpnvp; nvl = tmpnvl; continue; } } else { size += nvpair_size(nvp); } while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) { do { cookie = NULL; nvl = nvlist_get_pararr(nvl, &cookie); if (nvl == NULL) goto out; if (nvlist_in_array(nvl) && cookie == NULL) { nvp = nvlist_first_nvpair(nvl); } else { nvp = cookie; } } while (nvp == NULL); if (nvlist_in_array(nvl) && cookie == NULL) break; } } out: return (size); } #if !defined(_KERNEL) && !defined(_STANDALONE) static int * nvlist_xdescriptors(const nvlist_t *nvl, int *descs) { void *cookie; nvpair_t *nvp; int type; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); cookie = NULL; do { while (nvlist_next(nvl, &type, &cookie) != NULL) { nvp = cookie; switch (type) { case NV_TYPE_DESCRIPTOR: *descs = nvpair_get_descriptor(nvp); descs++; break; case NV_TYPE_DESCRIPTOR_ARRAY: { const int *value; size_t nitems; unsigned int ii; value = nvpair_get_descriptor_array(nvp, &nitems); for (ii = 0; ii < nitems; ii++) { *descs = value[ii]; descs++; } break; } case NV_TYPE_NVLIST: nvl = nvpair_get_nvlist(nvp); cookie = NULL; break; case NV_TYPE_NVLIST_ARRAY: { const nvlist_t * const *value; size_t nitems; value = nvpair_get_nvlist_array(nvp, &nitems); PJDLOG_ASSERT(value != NULL); PJDLOG_ASSERT(nitems > 0); nvl = value[0]; cookie = NULL; break; } } } } while ((nvl = nvlist_get_pararr(nvl, &cookie)) != NULL); return (descs); } #endif #if !defined(_KERNEL) && !defined(_STANDALONE) int * nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp) { size_t nitems; int *fds; nitems = nvlist_ndescriptors(nvl); fds = nv_malloc(sizeof(fds[0]) * (nitems + 1)); if (fds == NULL) return (NULL); if (nitems > 0) nvlist_xdescriptors(nvl, fds); fds[nitems] = -1; if (nitemsp != NULL) *nitemsp = nitems; return (fds); } #endif size_t nvlist_ndescriptors(const nvlist_t *nvl) { #if !defined(_KERNEL) && !defined(_STANDALONE) void *cookie; nvpair_t *nvp; size_t ndescs; int type; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); ndescs = 0; cookie = NULL; do { while (nvlist_next(nvl, &type, &cookie) != NULL) { nvp = cookie; switch (type) { case NV_TYPE_DESCRIPTOR: ndescs++; break; case NV_TYPE_NVLIST: nvl = nvpair_get_nvlist(nvp); cookie = NULL; break; case NV_TYPE_NVLIST_ARRAY: { const nvlist_t * const *value; size_t nitems; value = nvpair_get_nvlist_array(nvp, &nitems); PJDLOG_ASSERT(value != NULL); PJDLOG_ASSERT(nitems > 0); nvl = value[0]; cookie = NULL; break; } case NV_TYPE_DESCRIPTOR_ARRAY: { size_t nitems; (void)nvpair_get_descriptor_array(nvp, &nitems); ndescs += nitems; break; } } } } while ((nvl = nvlist_get_pararr(nvl, &cookie)) != NULL); return (ndescs); #else return (0); #endif } static unsigned char * nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp) { struct nvlist_header nvlhdr; NVLIST_ASSERT(nvl); nvlhdr.nvlh_magic = NVLIST_HEADER_MAGIC; nvlhdr.nvlh_version = NVLIST_HEADER_VERSION; nvlhdr.nvlh_flags = nvl->nvl_flags; #if BYTE_ORDER == BIG_ENDIAN nvlhdr.nvlh_flags |= NV_FLAG_BIG_ENDIAN; #endif nvlhdr.nvlh_descriptors = nvlist_ndescriptors(nvl); nvlhdr.nvlh_size = *leftp - sizeof(nvlhdr); PJDLOG_ASSERT(*leftp >= sizeof(nvlhdr)); memcpy(ptr, &nvlhdr, sizeof(nvlhdr)); ptr += sizeof(nvlhdr); *leftp -= sizeof(nvlhdr); return (ptr); } static void * nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep) { unsigned char *buf, *ptr; size_t left, size; const nvlist_t *tmpnvl; nvpair_t *nvp, *tmpnvp; void *cookie; NVLIST_ASSERT(nvl); if (nvl->nvl_error != 0) { ERRNO_SET(nvl->nvl_error); return (NULL); } size = nvlist_size(nvl); buf = nv_malloc(size); if (buf == NULL) return (NULL); ptr = buf; left = size; ptr = nvlist_pack_header(nvl, ptr, &left); nvp = nvlist_first_nvpair(nvl); while (nvp != NULL) { NVPAIR_ASSERT(nvp); nvpair_init_datasize(nvp); ptr = nvpair_pack_header(nvp, ptr, &left); if (ptr == NULL) goto fail; switch (nvpair_type(nvp)) { case NV_TYPE_NULL: ptr = nvpair_pack_null(nvp, ptr, &left); break; case NV_TYPE_BOOL: ptr = nvpair_pack_bool(nvp, ptr, &left); break; case NV_TYPE_NUMBER: ptr = nvpair_pack_number(nvp, ptr, &left); break; case NV_TYPE_STRING: ptr = nvpair_pack_string(nvp, ptr, &left); break; case NV_TYPE_NVLIST: tmpnvl = nvpair_get_nvlist(nvp); ptr = nvlist_pack_header(tmpnvl, ptr, &left); if (ptr == NULL) goto fail; tmpnvp = nvlist_first_nvpair(tmpnvl); if (tmpnvp != NULL) { nvl = tmpnvl; nvp = tmpnvp; continue; } ptr = nvpair_pack_nvlist_up(ptr, &left); break; #if !defined(_KERNEL) && !defined(_STANDALONE) case NV_TYPE_DESCRIPTOR: ptr = nvpair_pack_descriptor(nvp, ptr, fdidxp, &left); break; case NV_TYPE_DESCRIPTOR_ARRAY: ptr = nvpair_pack_descriptor_array(nvp, ptr, fdidxp, &left); break; #endif case NV_TYPE_BINARY: ptr = nvpair_pack_binary(nvp, ptr, &left); break; case NV_TYPE_BOOL_ARRAY: ptr = nvpair_pack_bool_array(nvp, ptr, &left); break; case NV_TYPE_NUMBER_ARRAY: ptr = nvpair_pack_number_array(nvp, ptr, &left); break; case NV_TYPE_STRING_ARRAY: ptr = nvpair_pack_string_array(nvp, ptr, &left); break; case NV_TYPE_NVLIST_ARRAY: { const nvlist_t * const * value; size_t nitems; unsigned int ii; tmpnvl = NULL; value = nvpair_get_nvlist_array(nvp, &nitems); for (ii = 0; ii < nitems; ii++) { ptr = nvlist_pack_header(value[ii], ptr, &left); if (ptr == NULL) goto out; tmpnvp = nvlist_first_nvpair(value[ii]); if (tmpnvp != NULL) { tmpnvl = value[ii]; break; } ptr = nvpair_pack_nvlist_array_next(ptr, &left); if (ptr == NULL) goto out; } if (tmpnvl != NULL) { nvl = tmpnvl; nvp = tmpnvp; continue; } break; } default: PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp)); } if (ptr == NULL) goto fail; while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) { do { cookie = NULL; if (nvlist_in_array(nvl)) { ptr = nvpair_pack_nvlist_array_next(ptr, &left); if (ptr == NULL) goto fail; } nvl = nvlist_get_pararr(nvl, &cookie); if (nvl == NULL) goto out; if (nvlist_in_array(nvl) && cookie == NULL) { nvp = nvlist_first_nvpair(nvl); ptr = nvlist_pack_header(nvl, ptr, &left); if (ptr == NULL) goto fail; } else if (nvpair_type((nvpair_t *)cookie) != NV_TYPE_NVLIST_ARRAY) { ptr = nvpair_pack_nvlist_up(ptr, &left); if (ptr == NULL) goto fail; nvp = cookie; } else { nvp = cookie; } } while (nvp == NULL); if (nvlist_in_array(nvl) && cookie == NULL) break; } } out: if (sizep != NULL) *sizep = size; return (buf); fail: nv_free(buf); return (NULL); } void * nvlist_pack(const nvlist_t *nvl, size_t *sizep) { NVLIST_ASSERT(nvl); if (nvl->nvl_error != 0) { ERRNO_SET(nvl->nvl_error); return (NULL); } if (nvlist_ndescriptors(nvl) > 0) { ERRNO_SET(EOPNOTSUPP); return (NULL); } return (nvlist_xpack(nvl, NULL, sizep)); } static bool nvlist_check_header(struct nvlist_header *nvlhdrp) { if (nvlhdrp->nvlh_magic != NVLIST_HEADER_MAGIC) { ERRNO_SET(EINVAL); return (false); } if ((nvlhdrp->nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) { ERRNO_SET(EINVAL); return (false); } #if BYTE_ORDER == BIG_ENDIAN if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) == 0) { nvlhdrp->nvlh_size = le64toh(nvlhdrp->nvlh_size); nvlhdrp->nvlh_descriptors = le64toh(nvlhdrp->nvlh_descriptors); } #else if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0) { nvlhdrp->nvlh_size = be64toh(nvlhdrp->nvlh_size); nvlhdrp->nvlh_descriptors = be64toh(nvlhdrp->nvlh_descriptors); } #endif return (true); } const unsigned char * nvlist_unpack_header(nvlist_t *nvl, const unsigned char *ptr, size_t nfds, bool *isbep, size_t *leftp) { struct nvlist_header nvlhdr; int inarrayf; if (*leftp < sizeof(nvlhdr)) goto fail; memcpy(&nvlhdr, ptr, sizeof(nvlhdr)); if (!nvlist_check_header(&nvlhdr)) goto fail; if (nvlhdr.nvlh_size != *leftp - sizeof(nvlhdr)) goto fail; /* * nvlh_descriptors might be smaller than nfds in embedded nvlists. */ if (nvlhdr.nvlh_descriptors > nfds) goto fail; if ((nvlhdr.nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) goto fail; inarrayf = (nvl->nvl_flags & NV_FLAG_IN_ARRAY); nvl->nvl_flags = (nvlhdr.nvlh_flags & NV_FLAG_PUBLIC_MASK) | inarrayf; ptr += sizeof(nvlhdr); if (isbep != NULL) *isbep = (((int)nvlhdr.nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0); *leftp -= sizeof(nvlhdr); return (ptr); fail: ERRNO_SET(EINVAL); return (NULL); } static nvlist_t * nvlist_xunpack(const void *buf, size_t size, const int *fds, size_t nfds, int flags) { const unsigned char *ptr; nvlist_t *nvl, *retnvl, *tmpnvl, *array; nvpair_t *nvp; size_t left; bool isbe; PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0); left = size; ptr = buf; tmpnvl = array = NULL; nvl = retnvl = nvlist_create(0); if (nvl == NULL) goto fail; ptr = nvlist_unpack_header(nvl, ptr, nfds, &isbe, &left); if (ptr == NULL) goto fail; if (nvl->nvl_flags != flags) { ERRNO_SET(EILSEQ); goto fail; } while (left > 0) { ptr = nvpair_unpack(isbe, ptr, &left, &nvp); if (ptr == NULL) goto fail; switch (nvpair_type(nvp)) { case NV_TYPE_NULL: ptr = nvpair_unpack_null(isbe, nvp, ptr, &left); break; case NV_TYPE_BOOL: ptr = nvpair_unpack_bool(isbe, nvp, ptr, &left); break; case NV_TYPE_NUMBER: ptr = nvpair_unpack_number(isbe, nvp, ptr, &left); break; case NV_TYPE_STRING: ptr = nvpair_unpack_string(isbe, nvp, ptr, &left); break; case NV_TYPE_NVLIST: ptr = nvpair_unpack_nvlist(isbe, nvp, ptr, &left, nfds, &tmpnvl); if (tmpnvl == NULL || ptr == NULL) goto fail; nvlist_set_parent(tmpnvl, nvp); break; #if !defined(_KERNEL) && !defined(_STANDALONE) && !defined(__NetBSD__) case NV_TYPE_DESCRIPTOR: ptr = nvpair_unpack_descriptor(isbe, nvp, ptr, &left, fds, nfds); break; case NV_TYPE_DESCRIPTOR_ARRAY: ptr = nvpair_unpack_descriptor_array(isbe, nvp, ptr, &left, fds, nfds); break; #endif case NV_TYPE_BINARY: ptr = nvpair_unpack_binary(isbe, nvp, ptr, &left); break; case NV_TYPE_NVLIST_UP: if (nvl->nvl_parent == NULL) goto fail; nvl = nvpair_nvlist(nvl->nvl_parent); nvpair_free_structure(nvp); continue; case NV_TYPE_NVLIST_ARRAY_NEXT: if (nvl->nvl_array_next == NULL) { if (nvl->nvl_parent == NULL) goto fail; nvl = nvpair_nvlist(nvl->nvl_parent); } else { nvl = __DECONST(nvlist_t *, nvlist_get_array_next(nvl)); ptr = nvlist_unpack_header(nvl, ptr, nfds, &isbe, &left); if (ptr == NULL) goto fail; } nvpair_free_structure(nvp); continue; case NV_TYPE_BOOL_ARRAY: ptr = nvpair_unpack_bool_array(isbe, nvp, ptr, &left); break; case NV_TYPE_NUMBER_ARRAY: ptr = nvpair_unpack_number_array(isbe, nvp, ptr, &left); break; case NV_TYPE_STRING_ARRAY: ptr = nvpair_unpack_string_array(isbe, nvp, ptr, &left); break; case NV_TYPE_NVLIST_ARRAY: ptr = nvpair_unpack_nvlist_array(isbe, nvp, ptr, &left, &array); if (ptr == NULL) goto fail; PJDLOG_ASSERT(array != NULL); tmpnvl = array; do { nvlist_set_parent(array, nvp); array = __DECONST(nvlist_t *, nvlist_get_array_next(array)); } while (array != NULL); ptr = nvlist_unpack_header(tmpnvl, ptr, nfds, &isbe, &left); break; default: PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp)); } if (ptr == NULL) goto fail; if (!nvlist_move_nvpair(nvl, nvp)) goto fail; if (tmpnvl != NULL) { nvl = tmpnvl; tmpnvl = NULL; } } return (retnvl); fail: nvlist_destroy(retnvl); return (NULL); } nvlist_t * nvlist_unpack(const void *buf, size_t size, int flags) { return (nvlist_xunpack(buf, size, NULL, 0, flags)); } #if !defined(_KERNEL) && !defined(_STANDALONE) && defined(WITH_MSGIO) int nvlist_send(int sock, const nvlist_t *nvl) { size_t datasize, nfds; int *fds; void *data; int64_t fdidx; int ret; if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return (-1); } fds = nvlist_descriptors(nvl, &nfds); if (fds == NULL) return (-1); ret = -1; fdidx = 0; data = nvlist_xpack(nvl, &fdidx, &datasize); if (data == NULL) goto out; if (buf_send(sock, data, datasize) == -1) goto out; if (nfds > 0) { if (fd_send(sock, fds, nfds) == -1) goto out; } ret = 0; out: ERRNO_SAVE(); nv_free(fds); nv_free(data); ERRNO_RESTORE(); return (ret); } nvlist_t * nvlist_recv(int sock, int flags) { struct nvlist_header nvlhdr; nvlist_t *nvl, *ret; unsigned char *buf; size_t nfds, size, i; int *fds; if (buf_recv(sock, &nvlhdr, sizeof(nvlhdr)) == -1) return (NULL); if (!nvlist_check_header(&nvlhdr)) return (NULL); nfds = (size_t)nvlhdr.nvlh_descriptors; size = sizeof(nvlhdr) + (size_t)nvlhdr.nvlh_size; buf = nv_malloc(size); if (buf == NULL) return (NULL); memcpy(buf, &nvlhdr, sizeof(nvlhdr)); ret = NULL; fds = NULL; if (buf_recv(sock, buf + sizeof(nvlhdr), size - sizeof(nvlhdr)) == -1) goto out; if (nfds > 0) { fds = nv_malloc(nfds * sizeof(fds[0])); if (fds == NULL) goto out; if (fd_recv(sock, fds, nfds) == -1) goto out; } nvl = nvlist_xunpack(buf, size, fds, nfds, flags); if (nvl == NULL) { ERRNO_SAVE(); for (i = 0; i < nfds; i++) close(fds[i]); ERRNO_RESTORE(); goto out; } ret = nvl; out: ERRNO_SAVE(); nv_free(buf); nv_free(fds); ERRNO_RESTORE(); return (ret); } nvlist_t * nvlist_xfer(int sock, nvlist_t *nvl, int flags) { if (nvlist_send(sock, nvl) < 0) { nvlist_destroy(nvl); return (NULL); } nvlist_destroy(nvl); return (nvlist_recv(sock, flags)); } #endif nvpair_t * nvlist_first_nvpair(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); return (TAILQ_FIRST(&nvl->nvl_head)); } nvpair_t * nvlist_next_nvpair(const nvlist_t *nvl, const nvpair_t *nvp) { nvpair_t *retnvp; NVLIST_ASSERT(nvl); NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); retnvp = nvpair_next(nvp); PJDLOG_ASSERT(retnvp == NULL || nvpair_nvlist(retnvp) == nvl); return (retnvp); } nvpair_t * nvlist_prev_nvpair(const nvlist_t *nvl, const nvpair_t *nvp) { nvpair_t *retnvp; NVLIST_ASSERT(nvl); NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); retnvp = nvpair_prev(nvp); PJDLOG_ASSERT(nvpair_nvlist(retnvp) == nvl); return (retnvp); } const char * nvlist_next(const nvlist_t *nvl, int *typep, void **cookiep) { nvpair_t *nvp; NVLIST_ASSERT(nvl); if (cookiep == NULL || *cookiep == NULL) nvp = nvlist_first_nvpair(nvl); else nvp = nvlist_next_nvpair(nvl, *cookiep); if (nvp == NULL) return (NULL); if (typep != NULL) *typep = nvpair_type(nvp); if (cookiep != NULL) *cookiep = nvp; return (nvpair_name(nvp)); } bool nvlist_exists(const nvlist_t *nvl, const char *name) { return (nvlist_find(nvl, NV_TYPE_NONE, name) != NULL); } #define NVLIST_EXISTS(type, TYPE) \ bool \ nvlist_exists_##type(const nvlist_t *nvl, const char *name) \ { \ \ return (nvlist_find(nvl, NV_TYPE_##TYPE, name) != NULL); \ } NVLIST_EXISTS(null, NULL) NVLIST_EXISTS(bool, BOOL) NVLIST_EXISTS(number, NUMBER) NVLIST_EXISTS(string, STRING) NVLIST_EXISTS(nvlist, NVLIST) NVLIST_EXISTS(binary, BINARY) NVLIST_EXISTS(bool_array, BOOL_ARRAY) NVLIST_EXISTS(number_array, NUMBER_ARRAY) NVLIST_EXISTS(string_array, STRING_ARRAY) NVLIST_EXISTS(nvlist_array, NVLIST_ARRAY) #if !defined(_KERNEL) && !defined(_STANDALONE) NVLIST_EXISTS(descriptor, DESCRIPTOR) NVLIST_EXISTS(descriptor_array, DESCRIPTOR_ARRAY) #endif #undef NVLIST_EXISTS void nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp) { nvpair_t *newnvp; NVPAIR_ASSERT(nvp); if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return; } if ((nvl->nvl_flags & NV_FLAG_NO_UNIQUE) == 0) { if (nvlist_exists(nvl, nvpair_name(nvp))) { nvl->nvl_error = EEXIST; ERRNO_SET(nvlist_error(nvl)); return; } } newnvp = nvpair_clone(nvp); if (newnvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvlist_error(nvl)); return; } nvpair_insert(&nvl->nvl_head, newnvp, nvl); } #if !defined(_STANDALONE) void nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...) { va_list valueap; va_start(valueap, valuefmt); nvlist_add_stringv(nvl, name, valuefmt, valueap); va_end(valueap); } void nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt, va_list valueap) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_create_stringv(name, valuefmt, valueap); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #endif void nvlist_add_null(nvlist_t *nvl, const char *name) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_create_null(name); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value, size_t size) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_create_binary(name, value, size); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #define NVLIST_ADD(vtype, type) \ void \ nvlist_add_##type(nvlist_t *nvl, const char *name, vtype value) \ { \ nvpair_t *nvp; \ \ if (nvlist_error(nvl) != 0) { \ ERRNO_SET(nvlist_error(nvl)); \ return; \ } \ \ nvp = nvpair_create_##type(name, value); \ if (nvp == NULL) { \ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); \ ERRNO_SET(nvl->nvl_error); \ } else { \ (void)nvlist_move_nvpair(nvl, nvp); \ } \ } NVLIST_ADD(bool, bool) NVLIST_ADD(uint64_t, number) NVLIST_ADD(const char *, string) NVLIST_ADD(const nvlist_t *, nvlist) #if !defined(_KERNEL) && !defined(_STANDALONE) NVLIST_ADD(int, descriptor); #endif #undef NVLIST_ADD #define NVLIST_ADD_ARRAY(vtype, type) \ void \ nvlist_add_##type##_array(nvlist_t *nvl, const char *name, vtype value, \ size_t nitems) \ { \ nvpair_t *nvp; \ \ if (nvlist_error(nvl) != 0) { \ ERRNO_SET(nvlist_error(nvl)); \ return; \ } \ \ nvp = nvpair_create_##type##_array(name, value, nitems); \ if (nvp == NULL) { \ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); \ ERRNO_SET(nvl->nvl_error); \ } else { \ (void)nvlist_move_nvpair(nvl, nvp); \ } \ } NVLIST_ADD_ARRAY(const bool *, bool) NVLIST_ADD_ARRAY(const uint64_t *, number) NVLIST_ADD_ARRAY(const char * const *, string) NVLIST_ADD_ARRAY(const nvlist_t * const *, nvlist) #if !defined(_KERNEL) && !defined(_STANDALONE) NVLIST_ADD_ARRAY(const int *, descriptor) #endif #undef NVLIST_ADD_ARRAY #define NVLIST_APPEND_ARRAY(vtype, type, TYPE) \ void \ nvlist_append_##type##_array(nvlist_t *nvl, const char *name, vtype value)\ { \ nvpair_t *nvp; \ \ if (nvlist_error(nvl) != 0) { \ ERRNO_SET(nvlist_error(nvl)); \ return; \ } \ nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name); \ if (nvp == NULL) { \ nvlist_add_##type##_array(nvl, name, &value, 1); \ return; \ } \ if (nvpair_append_##type##_array(nvp, value) == -1) { \ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); \ ERRNO_SET(nvl->nvl_error); \ } \ } NVLIST_APPEND_ARRAY(const bool, bool, BOOL) NVLIST_APPEND_ARRAY(const uint64_t, number, NUMBER) NVLIST_APPEND_ARRAY(const char *, string, STRING) NVLIST_APPEND_ARRAY(const nvlist_t *, nvlist, NVLIST) #if !defined(_KERNEL) && !defined(_STANDALONE) NVLIST_APPEND_ARRAY(const int, descriptor, DESCRIPTOR) #endif #undef NVLIST_APPEND_ARRAY bool nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp) { NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == NULL); if (nvlist_error(nvl) != 0) { nvpair_free(nvp); ERRNO_SET(nvlist_error(nvl)); return (false); } if ((nvl->nvl_flags & NV_FLAG_NO_UNIQUE) == 0) { if (nvlist_exists(nvl, nvpair_name(nvp))) { nvpair_free(nvp); nvl->nvl_error = EEXIST; ERRNO_SET(nvl->nvl_error); return (false); } } nvpair_insert(&nvl->nvl_head, nvp, nvl); return (true); } void nvlist_move_string(nvlist_t *nvl, const char *name, char *value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_string(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_nvlist(nvlist_t *nvl, const char *name, nvlist_t *value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { if (value != NULL && nvlist_get_nvpair_parent(value) != NULL) nvlist_destroy(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_nvlist(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #if !defined(_KERNEL) && !defined(_STANDALONE) void nvlist_move_descriptor(nvlist_t *nvl, const char *name, int value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { close(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_descriptor(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #endif void nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_binary(name, value, size); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_bool_array(nvlist_t *nvl, const char *name, bool *value, size_t nitems) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_bool_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_string_array(nvlist_t *nvl, const char *name, char **value, size_t nitems) { nvpair_t *nvp; size_t i; if (nvlist_error(nvl) != 0) { if (value != NULL) { for (i = 0; i < nitems; i++) nv_free(value[i]); nv_free(value); } ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_string_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **value, size_t nitems) { nvpair_t *nvp; size_t i; if (nvlist_error(nvl) != 0) { if (value != NULL) { for (i = 0; i < nitems; i++) { if (nvlist_get_pararr(value[i], NULL) == NULL) nvlist_destroy(value[i]); } } nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_nvlist_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_number_array(nvlist_t *nvl, const char *name, uint64_t *value, size_t nitems) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_number_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #if !defined(_KERNEL) && !defined(_STANDALONE) void nvlist_move_descriptor_array(nvlist_t *nvl, const char *name, int *value, size_t nitems) { nvpair_t *nvp; size_t i; if (nvlist_error(nvl) != 0) { if (value != 0) { for (i = 0; i < nitems; i++) close(value[i]); nv_free(value); } ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_descriptor_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #endif const nvpair_t * nvlist_get_nvpair(const nvlist_t *nvl, const char *name) { return (nvlist_find(nvl, NV_TYPE_NONE, name)); } #define NVLIST_GET(ftype, type, TYPE) \ ftype \ nvlist_get_##type(const nvlist_t *nvl, const char *name) \ { \ const nvpair_t *nvp; \ \ nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name); \ if (nvp == NULL) \ nvlist_report_missing(NV_TYPE_##TYPE, name); \ return (nvpair_get_##type(nvp)); \ } NVLIST_GET(bool, bool, BOOL) NVLIST_GET(uint64_t, number, NUMBER) NVLIST_GET(const char *, string, STRING) NVLIST_GET(const nvlist_t *, nvlist, NVLIST) #if !defined(_KERNEL) && !defined(_STANDALONE) NVLIST_GET(int, descriptor, DESCRIPTOR) #endif #undef NVLIST_GET const void * nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep) { nvpair_t *nvp; nvp = nvlist_find(nvl, NV_TYPE_BINARY, name); if (nvp == NULL) nvlist_report_missing(NV_TYPE_BINARY, name); return (nvpair_get_binary(nvp, sizep)); } #define NVLIST_GET_ARRAY(ftype, type, TYPE) \ ftype \ nvlist_get_##type##_array(const nvlist_t *nvl, const char *name, \ size_t *nitems) \ { \ const nvpair_t *nvp; \ \ nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name); \ if (nvp == NULL) \ nvlist_report_missing(NV_TYPE_##TYPE##_ARRAY, name); \ return (nvpair_get_##type##_array(nvp, nitems)); \ } NVLIST_GET_ARRAY(const bool *, bool, BOOL) NVLIST_GET_ARRAY(const uint64_t *, number, NUMBER) NVLIST_GET_ARRAY(const char * const *, string, STRING) NVLIST_GET_ARRAY(const nvlist_t * const *, nvlist, NVLIST) #if !defined(_KERNEL) && !defined(_STANDALONE) NVLIST_GET_ARRAY(const int *, descriptor, DESCRIPTOR) #endif #undef NVLIST_GET_ARRAY #define NVLIST_TAKE(ftype, type, TYPE) \ ftype \ nvlist_take_##type(nvlist_t *nvl, const char *name) \ { \ nvpair_t *nvp; \ ftype value; \ \ nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name); \ if (nvp == NULL) \ nvlist_report_missing(NV_TYPE_##TYPE, name); \ value = (ftype)(intptr_t)nvpair_get_##type(nvp); \ nvlist_remove_nvpair(nvl, nvp); \ nvpair_free_structure(nvp); \ return (value); \ } NVLIST_TAKE(bool, bool, BOOL) NVLIST_TAKE(uint64_t, number, NUMBER) NVLIST_TAKE(char *, string, STRING) NVLIST_TAKE(nvlist_t *, nvlist, NVLIST) #if !defined(_KERNEL) && !defined(_STANDALONE) NVLIST_TAKE(int, descriptor, DESCRIPTOR) #endif #undef NVLIST_TAKE void * nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep) { nvpair_t *nvp; void *value; nvp = nvlist_find(nvl, NV_TYPE_BINARY, name); if (nvp == NULL) nvlist_report_missing(NV_TYPE_BINARY, name); value = (void *)(intptr_t)nvpair_get_binary(nvp, sizep); nvlist_remove_nvpair(nvl, nvp); nvpair_free_structure(nvp); return (value); } #define NVLIST_TAKE_ARRAY(ftype, type, TYPE) \ ftype \ nvlist_take_##type##_array(nvlist_t *nvl, const char *name, \ size_t *nitems) \ { \ nvpair_t *nvp; \ ftype value; \ \ nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name); \ if (nvp == NULL) \ nvlist_report_missing(NV_TYPE_##TYPE##_ARRAY, name); \ value = (ftype)(intptr_t)nvpair_get_##type##_array(nvp, nitems);\ nvlist_remove_nvpair(nvl, nvp); \ nvpair_free_structure(nvp); \ return (value); \ } NVLIST_TAKE_ARRAY(bool *, bool, BOOL) NVLIST_TAKE_ARRAY(uint64_t *, number, NUMBER) NVLIST_TAKE_ARRAY(char **, string, STRING) NVLIST_TAKE_ARRAY(nvlist_t **, nvlist, NVLIST) #if !defined(_KERNEL) && !defined(_STANDALONE) NVLIST_TAKE_ARRAY(int *, descriptor, DESCRIPTOR) #endif void nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp) { NVLIST_ASSERT(nvl); NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); nvpair_remove(&nvl->nvl_head, nvp, nvl); } void nvlist_free(nvlist_t *nvl, const char *name) { nvlist_free_type(nvl, name, NV_TYPE_NONE); } #define NVLIST_FREE(type, TYPE) \ void \ nvlist_free_##type(nvlist_t *nvl, const char *name) \ { \ \ nvlist_free_type(nvl, name, NV_TYPE_##TYPE); \ } NVLIST_FREE(null, NULL) NVLIST_FREE(bool, BOOL) NVLIST_FREE(number, NUMBER) NVLIST_FREE(string, STRING) NVLIST_FREE(nvlist, NVLIST) NVLIST_FREE(binary, BINARY) NVLIST_FREE(bool_array, BOOL_ARRAY) NVLIST_FREE(number_array, NUMBER_ARRAY) NVLIST_FREE(string_array, STRING_ARRAY) NVLIST_FREE(nvlist_array, NVLIST_ARRAY) #if !defined(_KERNEL) && !defined(_STANDALONE) NVLIST_FREE(descriptor, DESCRIPTOR) NVLIST_FREE(descriptor_array, DESCRIPTOR_ARRAY) #endif #undef NVLIST_FREE void nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp) { NVLIST_ASSERT(nvl); NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); nvlist_remove_nvpair(nvl, nvp); nvpair_free(nvp); } |
| 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 | /* $NetBSD: rf_map.c,v 1.51 2021/07/23 00:54:45 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. * * Author: Mark Holland * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /************************************************************************** * * map.c -- main code for mapping RAID addresses to physical disk addresses * **************************************************************************/ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: rf_map.c,v 1.51 2021/07/23 00:54:45 oster Exp $"); #include <dev/raidframe/raidframevar.h> #include "rf_threadstuff.h" #include "rf_raid.h" #include "rf_general.h" #include "rf_map.h" #include "rf_shutdown.h" static void rf_FreePDAList(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda_list); static void rf_FreeASMList(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asm_list); /*************************************************************************** * * MapAccess -- main 1st order mapping routine. Maps an access in the * RAID address space to the corresponding set of physical disk * addresses. The result is returned as a list of AccessStripeMap * structures, one per stripe accessed. Each ASM structure contains a * pointer to a list of PhysDiskAddr structures, which describe the * physical locations touched by the user access. Note that this * routine returns only static mapping information, i.e. the list of * physical addresses returned does not necessarily identify the set * of physical locations that will actually be read or written. The * routine also maps the parity. The physical disk location returned * always indicates the entire parity unit, even when only a subset of * it is being accessed. This is because an access that is not stripe * unit aligned but that spans a stripe unit boundary may require * access two distinct portions of the parity unit, and we can't yet * tell which portion(s) we'll actually need. We leave it up to the * algorithm selection code to decide what subset of the parity unit * to access. Note that addresses in the RAID address space must * always be maintained as longs, instead of ints. * * This routine returns NULL if numBlocks is 0 * * raidAddress - starting address in RAID address space * numBlocks - number of blocks in RAID address space to access * buffer - buffer to supply/receive data * remap - 1 => remap address to spare space ***************************************************************************/ RF_AccessStripeMapHeader_t * rf_MapAccess(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, void *buffer, int remap) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_AccessStripeMapHeader_t *asm_hdr = NULL; RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL; int faultsTolerated = layoutPtr->map->faultsTolerated; /* we'll change raidAddress along the way */ RF_RaidAddr_t startAddress = raidAddress; RF_RaidAddr_t endAddress = raidAddress + numBlocks; RF_RaidDisk_t *disks = raidPtr->Disks; RF_PhysDiskAddr_t *pda_p; #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) RF_PhysDiskAddr_t *pda_q; #endif RF_StripeCount_t numStripes = 0; RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress; RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr; RF_StripeCount_t totStripes; RF_StripeNum_t stripeID, lastSID, SUID, lastSUID; RF_AccessStripeMap_t *asmList, *t_asm; RF_PhysDiskAddr_t *pdaList, *t_pda; /* allocate all the ASMs and PDAs up front */ lastRaidAddr = raidAddress + numBlocks - 1; stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress); lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr); totStripes = lastSID - stripeID + 1; SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress); lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr); asmList = rf_AllocASMList(raidPtr, totStripes); /* may also need pda(s) per stripe for parity */ pdaList = rf_AllocPDAList(raidPtr, lastSUID - SUID + 1 + faultsTolerated * totStripes); if (raidAddress + numBlocks > raidPtr->totalSectors) { RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n", (int) raidAddress); return (NULL); } #if RF_DEBUG_MAP if (rf_mapDebug) rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks); #endif for (; raidAddress < endAddress;) { /* make the next stripe structure */ RF_ASSERT(asmList); t_asm = asmList; asmList = asmList->next; memset(t_asm, 0, sizeof(*t_asm)); if (!asm_p) asm_list = asm_p = t_asm; else { asm_p->next = t_asm; asm_p = asm_p->next; } numStripes++; /* map SUs from current location to the end of the stripe */ asm_p->stripeID = /* rf_RaidAddressToStripeID(layoutPtr, raidAddress) */ stripeID++; stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress); stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress); asm_p->raidAddress = raidAddress; asm_p->endRaidAddress = stripeEndAddress; /* map each stripe unit in the stripe */ pda_p = NULL; /* Raid addr of start of portion of access that is within this stripe */ startAddrWithinStripe = raidAddress; for (; raidAddress < stripeEndAddress;) { RF_ASSERT(pdaList); t_pda = pdaList; pdaList = pdaList->next; memset(t_pda, 0, sizeof(*t_pda)); if (!pda_p) asm_p->physInfo = pda_p = t_pda; else { pda_p->next = t_pda; pda_p = pda_p->next; } pda_p->type = RF_PDA_TYPE_DATA; (layoutPtr->map->MapSector) (raidPtr, raidAddress, &(pda_p->col), &(pda_p->startSector), remap); /* mark any failures we find. failedPDA is * don't-care if there is more than one * failure */ /* the RAID address corresponding to this physical diskaddress */ pda_p->raidAddress = raidAddress; nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress); pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress; RF_ASSERT(pda_p->numSector != 0); rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0); pda_p->bufPtr = (char *)buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress)); asm_p->totalSectorsAccessed += pda_p->numSector; asm_p->numStripeUnitsAccessed++; raidAddress = RF_MIN(endAddress, nextStripeUnitAddress); } /* Map the parity. At this stage, the startSector and * numSector fields for the parity unit are always set * to indicate the entire parity unit. We may modify * this after mapping the data portion. */ switch (faultsTolerated) { case 0: break; case 1: /* single fault tolerant */ RF_ASSERT(pdaList); t_pda = pdaList; pdaList = pdaList->next; memset(t_pda, 0, sizeof(*t_pda)); pda_p = asm_p->parityInfo = t_pda; pda_p->type = RF_PDA_TYPE_PARITY; (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), &(pda_p->col), &(pda_p->startSector), remap); pda_p->numSector = layoutPtr->sectorsPerStripeUnit; /* raidAddr may be needed to find unit to redirect to */ pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); rf_ASMParityAdjust(raidPtr, asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); break; #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) case 2: /* two fault tolerant */ RF_ASSERT(pdaList && pdaList->next); t_pda = pdaList; pdaList = pdaList->next; memset(t_pda, 0, sizeof(*t_pda)); pda_p = asm_p->parityInfo = t_pda; pda_p->type = RF_PDA_TYPE_PARITY; t_pda = pdaList; pdaList = pdaList->next; memset(t_pda, 0, sizeof(*t_pda)); pda_q = asm_p->qInfo = t_pda; pda_q->type = RF_PDA_TYPE_Q; (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), &(pda_p->col), &(pda_p->startSector), remap); (layoutPtr->map->MapQ) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), &(pda_q->col), &(pda_q->startSector), remap); pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit; /* raidAddr may be needed to find unit to redirect to */ pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); /* failure mode stuff */ rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1); rf_ASMParityAdjust(raidPtr, asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); rf_ASMParityAdjust(raidPtr, asm_p->qInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); break; #endif } } RF_ASSERT(asmList == NULL && pdaList == NULL); /* make the header structure */ asm_hdr = rf_AllocAccessStripeMapHeader(raidPtr); RF_ASSERT(numStripes == totStripes); asm_hdr->numStripes = numStripes; asm_hdr->stripeMap = asm_list; #if RF_DEBUG_MAP if (rf_mapDebug) rf_PrintAccessStripeMap(asm_hdr); #endif return (asm_hdr); } /*************************************************************************** * This routine walks through an ASM list and marks the PDAs that have * failed. It's called only when a disk failure causes an in-flight * DAG to fail. The parity may consist of two components, but we want * to use only one failedPDA pointer. Thus we set failedPDA to point * to the first parity component, and rely on the rest of the code to * do the right thing with this. ***************************************************************************/ void rf_MarkFailuresInASMList(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asm_h) { RF_RaidDisk_t *disks = raidPtr->Disks; RF_AccessStripeMap_t *asmap; RF_PhysDiskAddr_t *pda; for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) { asmap->numDataFailed = 0; asmap->numParityFailed = 0; asmap->numQFailed = 0; asmap->numFailedPDAs = 0; memset(asmap->failedPDAs, 0, RF_MAX_FAILED_PDA * sizeof(*asmap->failedPDAs)); for (pda = asmap->physInfo; pda; pda = pda->next) { if (RF_DEAD_DISK(disks[pda->col].status)) { asmap->numDataFailed++; asmap->failedPDAs[asmap->numFailedPDAs] = pda; asmap->numFailedPDAs++; } } pda = asmap->parityInfo; if (pda && RF_DEAD_DISK(disks[pda->col].status)) { asmap->numParityFailed++; asmap->failedPDAs[asmap->numFailedPDAs] = pda; asmap->numFailedPDAs++; } pda = asmap->qInfo; if (pda && RF_DEAD_DISK(disks[pda->col].status)) { asmap->numQFailed++; asmap->failedPDAs[asmap->numFailedPDAs] = pda; asmap->numFailedPDAs++; } } } /*************************************************************************** * * routines to allocate and free list elements. All allocation * routines zero the structure before returning it. * * FreePhysDiskAddr is static. It should never be called directly, * because FreeAccessStripeMap takes care of freeing the PhysDiskAddr * list. * ***************************************************************************/ #define RF_MAX_FREE_ASMHDR 128 #define RF_MIN_FREE_ASMHDR 32 #define RF_MAX_FREE_ASM 192 #define RF_MIN_FREE_ASM 64 #define RF_MAX_FREE_PDA 192 #define RF_MIN_FREE_PDA 64 #define RF_MAX_FREE_ASMHLE 64 #define RF_MIN_FREE_ASMHLE 16 #define RF_MAX_FREE_FSS 128 #define RF_MIN_FREE_FSS 32 #define RF_MAX_FREE_VFPLE 128 #define RF_MIN_FREE_VFPLE 32 #define RF_MAX_FREE_VPLE 128 #define RF_MIN_FREE_VPLE 32 /* called at shutdown time. So far, all that is necessary is to release all the free lists */ static void rf_ShutdownMapModule(void *); static void rf_ShutdownMapModule(void *arg) { RF_Raid_t *raidPtr; raidPtr = (RF_Raid_t *) arg; pool_destroy(&raidPtr->pools.asm_hdr); pool_destroy(&raidPtr->pools.asmap); pool_destroy(&raidPtr->pools.asmhle); pool_destroy(&raidPtr->pools.pda); pool_destroy(&raidPtr->pools.fss); pool_destroy(&raidPtr->pools.vfple); pool_destroy(&raidPtr->pools.vple); } int rf_ConfigureMapModule(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) { rf_pool_init(raidPtr, raidPtr->poolNames.asm_hdr, &raidPtr->pools.asm_hdr, sizeof(RF_AccessStripeMapHeader_t), "asmhdr", RF_MIN_FREE_ASMHDR, RF_MAX_FREE_ASMHDR); rf_pool_init(raidPtr, raidPtr->poolNames.asmap, &raidPtr->pools.asmap, sizeof(RF_AccessStripeMap_t), "asmap", RF_MIN_FREE_ASM, RF_MAX_FREE_ASM); rf_pool_init(raidPtr, raidPtr->poolNames.asmhle, &raidPtr->pools.asmhle, sizeof(RF_ASMHeaderListElem_t), "asmhle", RF_MIN_FREE_ASMHLE, RF_MAX_FREE_ASMHLE); rf_pool_init(raidPtr, raidPtr->poolNames.pda, &raidPtr->pools.pda, sizeof(RF_PhysDiskAddr_t), "pda", RF_MIN_FREE_PDA, RF_MAX_FREE_PDA); rf_pool_init(raidPtr, raidPtr->poolNames.fss, &raidPtr->pools.fss, sizeof(RF_FailedStripe_t), "fss", RF_MIN_FREE_FSS, RF_MAX_FREE_FSS); rf_pool_init(raidPtr, raidPtr->poolNames.vfple, &raidPtr->pools.vfple, sizeof(RF_VoidFunctionPointerListElem_t), "vfple", RF_MIN_FREE_VFPLE, RF_MAX_FREE_VFPLE); rf_pool_init(raidPtr, raidPtr->poolNames.vple, &raidPtr->pools.vple, sizeof(RF_VoidPointerListElem_t), "vple", RF_MIN_FREE_VPLE, RF_MAX_FREE_VPLE); rf_ShutdownCreate(listp, rf_ShutdownMapModule, raidPtr); return (0); } RF_AccessStripeMapHeader_t * rf_AllocAccessStripeMapHeader(RF_Raid_t *raidPtr) { return pool_get(&raidPtr->pools.asm_hdr, PR_WAITOK | PR_ZERO); } void rf_FreeAccessStripeMapHeader(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *p) { pool_put(&raidPtr->pools.asm_hdr, p); } RF_VoidFunctionPointerListElem_t * rf_AllocVFPListElem(RF_Raid_t *raidPtr) { return pool_get(&raidPtr->pools.vfple, PR_WAITOK | PR_ZERO); } void rf_FreeVFPListElem(RF_Raid_t *raidPtr, RF_VoidFunctionPointerListElem_t *p) { pool_put(&raidPtr->pools.vfple, p); } RF_VoidPointerListElem_t * rf_AllocVPListElem(RF_Raid_t *raidPtr) { return pool_get(&raidPtr->pools.vple, PR_WAITOK | PR_ZERO); } void rf_FreeVPListElem(RF_Raid_t *raidPtr, RF_VoidPointerListElem_t *p) { pool_put(&raidPtr->pools.vple, p); } RF_ASMHeaderListElem_t * rf_AllocASMHeaderListElem(RF_Raid_t *raidPtr) { return pool_get(&raidPtr->pools.asmhle, PR_WAITOK | PR_ZERO); } void rf_FreeASMHeaderListElem(RF_Raid_t *raidPtr, RF_ASMHeaderListElem_t *p) { pool_put(&raidPtr->pools.asmhle, p); } RF_FailedStripe_t * rf_AllocFailedStripeStruct(RF_Raid_t *raidPtr) { return pool_get(&raidPtr->pools.fss, PR_WAITOK | PR_ZERO); } void rf_FreeFailedStripeStruct(RF_Raid_t *raidPtr, RF_FailedStripe_t *p) { pool_put(&raidPtr->pools.fss, p); } RF_PhysDiskAddr_t * rf_AllocPhysDiskAddr(RF_Raid_t *raidPtr) { return pool_get(&raidPtr->pools.pda, PR_WAITOK | PR_ZERO); } /* allocates a list of PDAs, locking the free list only once when we * have to call calloc, we do it one component at a time to simplify * the process of freeing the list at program shutdown. This should * not be much of a performance hit, because it should be very * infrequently executed. */ RF_PhysDiskAddr_t * rf_AllocPDAList(RF_Raid_t *raidPtr, int count) { RF_PhysDiskAddr_t *p, *prev; int i; p = NULL; prev = NULL; for (i = 0; i < count; i++) { p = pool_get(&raidPtr->pools.pda, PR_WAITOK); p->next = prev; prev = p; } return (p); } void rf_FreePhysDiskAddr(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *p) { pool_put(&raidPtr->pools.pda, p); } static void rf_FreePDAList(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda_list) { RF_PhysDiskAddr_t *p, *tmp; p=pda_list; while (p) { tmp = p->next; pool_put(&raidPtr->pools.pda, p); p = tmp; } } /* this is essentially identical to AllocPDAList. I should combine * the two. when we have to call calloc, we do it one component at a * time to simplify the process of freeing the list at program * shutdown. This should not be much of a performance hit, because it * should be very infrequently executed. */ RF_AccessStripeMap_t * rf_AllocASMList(RF_Raid_t *raidPtr, int count) { RF_AccessStripeMap_t *p, *prev; int i; p = NULL; prev = NULL; for (i = 0; i < count; i++) { p = pool_get(&raidPtr->pools.asmap, PR_WAITOK); p->next = prev; prev = p; } return (p); } static void rf_FreeASMList(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asm_list) { RF_AccessStripeMap_t *p, *tmp; p=asm_list; while (p) { tmp = p->next; pool_put(&raidPtr->pools.asmap, p); p = tmp; } } void rf_FreeAccessStripeMap(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *hdr) { RF_AccessStripeMap_t *p; RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL; int count = 0, t; for (p = hdr->stripeMap; p; p = p->next) { /* link the 3 pda lists into the accumulating pda list */ if (!pdaList) pdaList = p->qInfo; else pdaEnd->next = p->qInfo; for (trailer = NULL, pdp = p->qInfo; pdp;) { trailer = pdp; pdp = pdp->next; count++; } if (trailer) pdaEnd = trailer; if (!pdaList) pdaList = p->parityInfo; else pdaEnd->next = p->parityInfo; for (trailer = NULL, pdp = p->parityInfo; pdp;) { trailer = pdp; pdp = pdp->next; count++; } if (trailer) pdaEnd = trailer; if (!pdaList) pdaList = p->physInfo; else pdaEnd->next = p->physInfo; for (trailer = NULL, pdp = p->physInfo; pdp;) { trailer = pdp; pdp = pdp->next; count++; } if (trailer) pdaEnd = trailer; } /* debug only */ for (t = 0, pdp = pdaList; pdp; pdp = pdp->next) t++; RF_ASSERT(t == count); if (pdaList) rf_FreePDAList(raidPtr, pdaList); rf_FreeASMList(raidPtr, hdr->stripeMap); rf_FreeAccessStripeMapHeader(raidPtr, hdr); } /* We can't use the large write optimization if there are any failures * in the stripe. In the declustered layout, there is no way to * immediately determine what disks constitute a stripe, so we * actually have to hunt through the stripe looking for failures. The * reason we map the parity instead of just using asm->parityInfo->col * is because the latter may have been already redirected to a spare * drive, which would mess up the computation of the stripe offset. * * ASSUMES AT MOST ONE FAILURE IN THE STRIPE. */ int rf_CheckStripeForFailures(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap) { RF_RowCol_t tcol, pcol, *diskids, i; RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_StripeCount_t stripeOffset; int numFailures; RF_RaidAddr_t sosAddr; RF_SectorNum_t diskOffset, poffset; /* quick out in the fault-free case. */ rf_lock_mutex2(raidPtr->mutex); numFailures = raidPtr->numFailures; rf_unlock_mutex2(raidPtr->mutex); if (numFailures == 0) return (0); sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids); (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress, &pcol, &poffset, 0); /* get pcol */ /* this need not be true if we've redirected the access to a * spare in another row RF_ASSERT(row == testrow); */ stripeOffset = 0; for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) { if (diskids[i] != pcol) { if (RF_DEAD_DISK(raidPtr->Disks[diskids[i]].status)) { if (raidPtr->status != rf_rs_reconstructing) return (1); RF_ASSERT(raidPtr->reconControl->fcol == diskids[i]); layoutPtr->map->MapSector(raidPtr, sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit, &tcol, &diskOffset, 0); RF_ASSERT(tcol == diskids[i]); if (!rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, diskOffset)) return (1); asmap->flags |= RF_ASM_REDIR_LARGE_WRITE; return (0); } stripeOffset++; } } return (0); } #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD >0) /* return the number of failed data units in the stripe. */ int rf_NumFailedDataUnitsInStripe(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_RowCol_t tcol, i; RF_SectorNum_t diskOffset; RF_RaidAddr_t sosAddr; int numFailures; /* quick out in the fault-free case. */ rf_lock_mutex2(raidPtr->mutex); numFailures = raidPtr->numFailures; rf_unlock_mutex2(raidPtr->mutex); if (numFailures == 0) return (0); numFailures = 0; sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); for (i = 0; i < layoutPtr->numDataCol; i++) { (layoutPtr->map->MapSector) (raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit, &tcol, &diskOffset, 0); if (RF_DEAD_DISK(raidPtr->Disks[tcol].status)) numFailures++; } return numFailures; } #endif /**************************************************************************** * * debug routines * ***************************************************************************/ #if RF_DEBUG_MAP void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h) { rf_PrintFullAccessStripeMap(asm_h, 0); } #endif /* prbuf - flag to print buffer pointers */ void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h, int prbuf) { int i; RF_AccessStripeMap_t *asmap = asm_h->stripeMap; RF_PhysDiskAddr_t *p; printf("%d stripes total\n", (int) asm_h->numStripes); for (; asmap; asmap = asmap->next) { /* printf("Num failures: %d\n",asmap->numDataFailed); */ /* printf("Num sectors: * %d\n",(int)asmap->totalSectorsAccessed); */ printf("Stripe %d (%d sectors), failures: %d data, %d parity: ", (int) asmap->stripeID, (int) asmap->totalSectorsAccessed, (int) asmap->numDataFailed, (int) asmap->numParityFailed); if (asmap->parityInfo) { printf("Parity [c%d s%d-%d", asmap->parityInfo->col, (int) asmap->parityInfo->startSector, (int) (asmap->parityInfo->startSector + asmap->parityInfo->numSector - 1)); if (prbuf) printf(" b0x%lx", (unsigned long) asmap->parityInfo->bufPtr); if (asmap->parityInfo->next) { printf(", c%d s%d-%d", asmap->parityInfo->next->col, (int) asmap->parityInfo->next->startSector, (int) (asmap->parityInfo->next->startSector + asmap->parityInfo->next->numSector - 1)); if (prbuf) printf(" b0x%lx", (unsigned long) asmap->parityInfo->next->bufPtr); RF_ASSERT(asmap->parityInfo->next->next == NULL); } printf("]\n\t"); } for (i = 0, p = asmap->physInfo; p; p = p->next, i++) { printf("SU c%d s%d-%d ", p->col, (int) p->startSector, (int) (p->startSector + p->numSector - 1)); if (prbuf) printf("b0x%lx ", (unsigned long) p->bufPtr); if (i && !(i & 1)) printf("\n\t"); } printf("\n"); p = asm_h->stripeMap->failedPDAs[0]; if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1) printf("[multiple failures]\n"); else if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0) printf("\t[Failed PDA: c%d s%d-%d]\n", p->col, (int) p->startSector, (int) (p->startSector + p->numSector - 1)); } } #if RF_MAP_DEBUG void rf_PrintRaidAddressInfo(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, RF_SectorCount_t numBlocks) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t"); for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) { printf("%d (0x%x), ", (int) ra, (int) ra); } printf("\n"); printf("Offset into stripe unit: %d (0x%x)\n", (int) (raidAddr % layoutPtr->sectorsPerStripeUnit), (int) (raidAddr % layoutPtr->sectorsPerStripeUnit)); } #endif /* given a parity descriptor and the starting address within a stripe, * range restrict the parity descriptor to touch only the correct * stuff. */ void rf_ASMParityAdjust(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *toAdjust, RF_StripeNum_t startAddrWithinStripe, RF_SectorNum_t endAddress, RF_RaidLayout_t *layoutPtr, RF_AccessStripeMap_t *asm_p) { RF_PhysDiskAddr_t *new_pda; /* when we're accessing only a portion of one stripe unit, we * want the parity descriptor to identify only the chunk of * parity associated with the data. When the access spans * exactly one stripe unit boundary and is less than a stripe * unit in size, it uses two disjoint regions of the parity * unit. When an access spans more than one stripe unit * boundary, it uses all of the parity unit. * * To better handle the case where stripe units are small, we * may eventually want to change the 2nd case so that if the * SU size is below some threshold, we just read/write the * whole thing instead of breaking it up into two accesses. */ if (asm_p->numStripeUnitsAccessed == 1) { int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); toAdjust->startSector += x; toAdjust->raidAddress += x; toAdjust->numSector = asm_p->physInfo->numSector; RF_ASSERT(toAdjust->numSector != 0); } else if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit) { int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); /* create a second pda and copy the parity map info * into it */ RF_ASSERT(toAdjust->next == NULL); /* the following will get freed in rf_FreeAccessStripeMap() via rf_FreePDAList() */ new_pda = toAdjust->next = rf_AllocPhysDiskAddr(raidPtr); *new_pda = *toAdjust; /* structure assignment */ new_pda->next = NULL; /* adjust the start sector & number of blocks for the * first parity pda */ toAdjust->startSector += x; toAdjust->raidAddress += x; toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe; RF_ASSERT(toAdjust->numSector != 0); /* adjust the second pda */ new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress); /* new_pda->raidAddress = * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, * toAdjust->raidAddress); */ RF_ASSERT(new_pda->numSector != 0); } } /* Check if a disk has been spared or failed. If spared, redirect the * I/O. If it has been failed, record it in the asm pointer. Fifth * arg is whether data or parity. */ void rf_ASMCheckStatus(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda_p, RF_AccessStripeMap_t *asm_p, RF_RaidDisk_t *disks, int parity) { RF_DiskStatus_t dstatus; RF_RowCol_t fcol; dstatus = disks[pda_p->col].status; if (dstatus == rf_ds_spared) { /* if the disk has been spared, redirect access to the spare */ fcol = pda_p->col; pda_p->col = disks[fcol].spareCol; } else if (dstatus == rf_ds_dist_spared) { /* ditto if disk has been spared to dist spare space */ #if RF_DEBUG_MAP RF_RowCol_t oc = pda_p->col; RF_SectorNum_t oo = pda_p->startSector; #endif if (pda_p->type == RF_PDA_TYPE_DATA) raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->col, &pda_p->startSector, RF_REMAP); else raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->col, &pda_p->startSector, RF_REMAP); #if RF_DEBUG_MAP if (rf_mapDebug) { printf("Redirected c %d o %d -> c %d o %d\n", oc, (int) oo, pda_p->col, (int) pda_p->startSector); } #endif } else if (RF_DEAD_DISK(dstatus)) { /* if the disk is inaccessible, mark the * failure */ if (parity) asm_p->numParityFailed++; else { asm_p->numDataFailed++; } asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p; asm_p->numFailedPDAs++; #if 0 switch (asm_p->numParityFailed + asm_p->numDataFailed) { case 1: asm_p->failedPDAs[0] = pda_p; break; case 2: asm_p->failedPDAs[1] = pda_p; default: break; } #endif } /* the redirected access should never span a stripe unit boundary */ RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress) == rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress + pda_p->numSector - 1)); RF_ASSERT(pda_p->col != -1); } |
| 3 2 4 2 2 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 | /* $NetBSD: msdosfs_vfsops.c,v 1.138 2022/04/16 07:58:21 hannken Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.138 2022/04/16 07:58:21 hannken Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/sysctl.h> #include <sys/namei.h> #include <sys/proc.h> #include <sys/kernel.h> #include <sys/vnode.h> #include <miscfs/genfs/genfs.h> #include <miscfs/specfs/specdev.h> /* XXX */ /* defines v_rdev */ #include <sys/mount.h> #include <sys/buf.h> #include <sys/file.h> #include <sys/device.h> #include <sys/disklabel.h> #include <sys/disk.h> #include <sys/ioctl.h> #include <sys/malloc.h> #include <sys/dirent.h> #include <sys/stat.h> #include <sys/conf.h> #include <sys/kauth.h> #include <sys/module.h> #include <fs/msdosfs/bpb.h> #include <fs/msdosfs/bootsect.h> #include <fs/msdosfs/direntry.h> #include <fs/msdosfs/denode.h> #include <fs/msdosfs/msdosfsmount.h> #include <fs/msdosfs/fat.h> MODULE(MODULE_CLASS_VFS, msdos, NULL); #ifdef MSDOSFS_DEBUG #define DPRINTF(fmt, ...) uprintf("%s(): " fmt "\n", __func__, ##__VA_ARGS__) #else #define DPRINTF(fmt, ...) #endif #define GEMDOSFS_BSIZE 512 #define MSDOSFS_NAMEMAX(pmp) \ (pmp)->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12 int msdosfs_mountfs(struct vnode *, struct mount *, struct lwp *, struct msdosfs_args *); static int update_mp(struct mount *, struct msdosfs_args *); MALLOC_JUSTDEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOS FS mount structure"); MALLOC_JUSTDEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOS FS FAT table"); MALLOC_JUSTDEFINE(M_MSDOSFSTMP, "MSDOSFS temp", "MSDOS FS temp. structures"); extern const struct vnodeopv_desc msdosfs_vnodeop_opv_desc; const struct vnodeopv_desc * const msdosfs_vnodeopv_descs[] = { &msdosfs_vnodeop_opv_desc, NULL, }; struct vfsops msdosfs_vfsops = { .vfs_name = MOUNT_MSDOS, .vfs_min_mount_data = sizeof (struct msdosfs_args), .vfs_mount = msdosfs_mount, .vfs_start = msdosfs_start, .vfs_unmount = msdosfs_unmount, .vfs_root = msdosfs_root, .vfs_quotactl = (void *)eopnotsupp, .vfs_statvfs = msdosfs_statvfs, .vfs_sync = msdosfs_sync, .vfs_vget = msdosfs_vget, .vfs_loadvnode = msdosfs_loadvnode, .vfs_fhtovp = msdosfs_fhtovp, .vfs_vptofh = msdosfs_vptofh, .vfs_init = msdosfs_init, .vfs_reinit = msdosfs_reinit, .vfs_done = msdosfs_done, .vfs_mountroot = msdosfs_mountroot, .vfs_snapshot = (void *)eopnotsupp, .vfs_extattrctl = vfs_stdextattrctl, .vfs_suspendctl = genfs_suspendctl, .vfs_renamelock_enter = genfs_renamelock_enter, .vfs_renamelock_exit = genfs_renamelock_exit, .vfs_fsync = (void *)eopnotsupp, .vfs_opv_descs = msdosfs_vnodeopv_descs }; SYSCTL_SETUP(msdosfs_sysctl_setup, "msdosfs sysctl") { sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "msdosfs", SYSCTL_DESCR("MS-DOS file system"), NULL, 0, NULL, 0, CTL_VFS, 4, CTL_EOL); /* * XXX the "4" above could be dynamic, thereby eliminating one * more instance of the "number to vfs" mapping problem, but * "4" is the order as taken from sys/mount.h */ } static int msdos_modcmd(modcmd_t cmd, void *arg) { int error; switch (cmd) { case MODULE_CMD_INIT: error = vfs_attach(&msdosfs_vfsops); if (error != 0) break; break; case MODULE_CMD_FINI: error = vfs_detach(&msdosfs_vfsops); if (error != 0) break; break; default: error = ENOTTY; break; } return (error); } static int update_mp(struct mount *mp, struct msdosfs_args *argp) { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error; pmp->pm_gid = argp->gid; pmp->pm_uid = argp->uid; pmp->pm_mask = argp->mask & ALLPERMS; pmp->pm_dirmask = argp->dirmask & ALLPERMS; pmp->pm_gmtoff = argp->gmtoff; pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT; /* * GEMDOS knows nothing about win95 long filenames */ if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS) pmp->pm_flags |= MSDOSFSMNT_NOWIN95; if (pmp->pm_flags & MSDOSFSMNT_NOWIN95) pmp->pm_flags |= MSDOSFSMNT_SHORTNAME; else if (!(pmp->pm_flags & (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) { struct vnode *rtvp; /* * Try to divine whether to support Win'95 long filenames */ if (FAT32(pmp)) pmp->pm_flags |= MSDOSFSMNT_LONGNAME; else { error = msdosfs_root(mp, LK_EXCLUSIVE, &rtvp); if (error != 0) return error; pmp->pm_flags |= msdosfs_findwin95(VTODE(rtvp)) ? MSDOSFSMNT_LONGNAME : MSDOSFSMNT_SHORTNAME; vput(rtvp); } } mp->mnt_stat.f_namemax = MSDOSFS_NAMEMAX(pmp); return 0; } int msdosfs_mountroot(void) { struct mount *mp; struct lwp *l = curlwp; /* XXX */ int error; struct msdosfs_args args; if (device_class(root_device) != DV_DISK) return (ENODEV); if ((error = vfs_rootmountalloc(MOUNT_MSDOS, "root_device", &mp))) { vrele(rootvp); return (error); } args.flags = MSDOSFSMNT_VERSIONED; args.uid = 0; args.gid = 0; args.mask = 0777; args.version = MSDOSFSMNT_VERSION; args.dirmask = 0777; if ((error = msdosfs_mountfs(rootvp, mp, l, &args)) != 0) { vfs_unbusy(mp); vfs_rele(mp); return (error); } if ((error = update_mp(mp, &args)) != 0) { (void)msdosfs_unmount(mp, 0); vfs_unbusy(mp); vfs_rele(mp); vrele(rootvp); return (error); } mountlist_append(mp); (void)msdosfs_statvfs(mp, &mp->mnt_stat); vfs_unbusy(mp); return (0); } /* * mp - path - addr in user space of mount point (ie /usr or whatever) * data - addr in user space of mount params including the name of the block * special file to treat as a filesystem. */ int msdosfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct vnode *devvp; /* vnode for blk device to mount */ struct msdosfs_args *args = data; /* holds data from mount request */ /* msdosfs specific mount control block */ struct msdosfsmount *pmp = NULL; int error, flags; mode_t accessmode; if (args == NULL) return EINVAL; if (*data_len < sizeof *args) return EINVAL; if (mp->mnt_flag & MNT_GETARGS) { pmp = VFSTOMSDOSFS(mp); if (pmp == NULL) return EIO; args->fspec = NULL; args->uid = pmp->pm_uid; args->gid = pmp->pm_gid; args->mask = pmp->pm_mask; args->flags = pmp->pm_flags; args->version = MSDOSFSMNT_VERSION; args->dirmask = pmp->pm_dirmask; args->gmtoff = pmp->pm_gmtoff; *data_len = sizeof *args; return 0; } /* * If not versioned (i.e. using old mount_msdos(8)), fill in * the additional structure items with suitable defaults. */ if ((args->flags & MSDOSFSMNT_VERSIONED) == 0) { args->version = 1; args->dirmask = args->mask; } /* * Reset GMT offset for pre-v3 mount structure args. */ if (args->version < 3) args->gmtoff = 0; /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { pmp = VFSTOMSDOSFS(mp); error = 0; if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = vflush(mp, NULLVP, flags); } if (!error && (mp->mnt_flag & MNT_RELOAD)) /* not yet implemented */ error = EOPNOTSUPP; if (error) { DPRINTF("vflush %d", error); return (error); } if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_iflag & IMNT_WANTRDWR)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. * * Permission to update a mount is checked higher, so * here we presume updating the mount is okay (for * example, as far as securelevel goes) which leaves us * with the normal check. */ devvp = pmp->pm_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(VREAD | VWRITE)); VOP_UNLOCK(devvp); DPRINTF("KAUTH_REQ_SYSTEM_MOUNT_DEVICE %d", error); if (error) return (error); pmp->pm_flags &= ~MSDOSFSMNT_RONLY; } if (args->fspec == NULL) { DPRINTF("missing fspec"); return EINVAL; } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ error = namei_simple_user(args->fspec, NSM_FOLLOW_NOEMULROOT, &devvp); if (error != 0) { DPRINTF("namei %d", error); return (error); } if (devvp->v_type != VBLK) { DPRINTF("not block"); vrele(devvp); return (ENOTBLK); } if (bdevsw_lookup(devvp->v_rdev) == NULL) { DPRINTF("no block switch"); vrele(devvp); return (ENXIO); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode)); VOP_UNLOCK(devvp); if (error) { DPRINTF("KAUTH_REQ_SYSTEM_MOUNT_DEVICE %d", error); vrele(devvp); return (error); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { int xflags; if (mp->mnt_flag & MNT_RDONLY) xflags = FREAD; else xflags = FREAD|FWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_OPEN(devvp, xflags, FSCRED); VOP_UNLOCK(devvp); if (error) { DPRINTF("VOP_OPEN %d", error); goto fail; } error = msdosfs_mountfs(devvp, mp, l, args); if (error) { DPRINTF("msdosfs_mountfs %d", error); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); (void) VOP_CLOSE(devvp, xflags, NOCRED); VOP_UNLOCK(devvp); goto fail; } #ifdef MSDOSFS_DEBUG /* only needed for the printf below */ pmp = VFSTOMSDOSFS(mp); #endif } else { vrele(devvp); if (devvp != pmp->pm_devvp) { DPRINTF("devvp %p pmp %p", devvp, pmp->pm_devvp); return (EINVAL); /* needs translation */ } } if ((error = update_mp(mp, args)) != 0) { msdosfs_unmount(mp, MNT_FORCE); DPRINTF("update_mp %d", error); return error; } #ifdef MSDOSFS_DEBUG printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap); #endif return set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); fail: vrele(devvp); return (error); } int msdosfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l, struct msdosfs_args *argp) { struct msdosfsmount *pmp; struct buf *bp; dev_t dev = devvp->v_rdev; union bootsector *bsp; struct byte_bpb33 *b33; struct byte_bpb50 *b50; struct byte_bpb710 *b710; uint8_t SecPerClust; int ronly, error, BlkPerSec; uint64_t psize; unsigned secsize; u_long fatbytes, fatblocksecs; /* Flush out any old buffers remaining from a previous use. */ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = vinvalbuf(devvp, V_SAVE, l->l_cred, l, 0, 0); VOP_UNLOCK(devvp); if (error) return (error); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; bp = NULL; /* both used in error_exit */ pmp = NULL; error = getdisksize(devvp, &psize, &secsize); if (error) { if (argp->flags & MSDOSFSMNT_GEMDOSFS) goto error_exit; /* ok, so it failed. we most likely don't need the info */ secsize = DEV_BSIZE; psize = 0; error = 0; } if (secsize < DEV_BSIZE) { DPRINTF("Invalid block secsize (%d < DEV_BSIZE)", secsize); error = EINVAL; goto error_exit; } if (argp->flags & MSDOSFSMNT_GEMDOSFS) { if (secsize != GEMDOSFS_BSIZE) { DPRINTF("Invalid block secsize %d for GEMDOS", secsize); error = EINVAL; goto error_exit; } } /* * Read the boot sector of the filesystem, and then check the * boot signature. If not a dos boot sector then error out. */ if (secsize < sizeof(*b50)) { DPRINTF("50 bootsec %u\n", secsize); error = EINVAL; goto error_exit; } if ((error = bread(devvp, 0, secsize, 0, &bp)) != 0) goto error_exit; bsp = (union bootsector *)bp->b_data; b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB; b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB; b710 = (struct byte_bpb710 *)bsp->bs710.bsBPB; #if 0 /* * Some FAT partition, for example Raspberry Pi Pico's * USB mass storage, does not have exptected BOOTSIGs. * According to FreeBSD's comment, some PC-9800/9821 * FAT floppy disks have similar problems. */ if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) { if (bsp->bs50.bsBootSectSig0 != BOOTSIG0 || bsp->bs50.bsBootSectSig1 != BOOTSIG1) { DPRINTF("bootsig0 %d bootsig1 %d", bsp->bs50.bsBootSectSig0, bsp->bs50.bsBootSectSig1); error = EINVAL; goto error_exit; } } #endif pmp = malloc(sizeof(*pmp), M_MSDOSFSMNT, M_WAITOK|M_ZERO); pmp->pm_mountp = mp; /* * Compute several useful quantities from the bpb in the * bootsector. Copy in the dos 5 variant of the bpb then fix up * the fields that are different between dos 5 and dos 3.3. */ SecPerClust = b50->bpbSecPerClust; pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec); pmp->pm_ResSectors = getushort(b50->bpbResSectors); pmp->pm_FATs = b50->bpbFATs; pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts); pmp->pm_Sectors = getushort(b50->bpbSectors); pmp->pm_FATsecs = getushort(b50->bpbFATsecs); pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack); pmp->pm_Heads = getushort(b50->bpbHeads); pmp->pm_Media = b50->bpbMedia; if (pmp->pm_Sectors == 0) { pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs); pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors); } else { if (secsize < sizeof(*b33)) { DPRINTF("33 bootsec %u\n", secsize); error = EINVAL; goto error_exit; } pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs); pmp->pm_HugeSectors = pmp->pm_Sectors; } /* * Sanity checks, from the FAT specification: * - sectors per cluster: >= 1, power of 2 * - logical sector size: >= 1, power of 2 * - cluster size: <= max FS block size * - number of sectors: >= 1 */ if ((SecPerClust == 0) || !powerof2(SecPerClust) || (pmp->pm_BytesPerSec == 0) || !powerof2(pmp->pm_BytesPerSec) || (SecPerClust * pmp->pm_BytesPerSec > MAXBSIZE) || (pmp->pm_HugeSectors == 0)) { DPRINTF("consistency checks"); error = EINVAL; goto error_exit; } if (!(argp->flags & MSDOSFSMNT_GEMDOSFS) && (pmp->pm_SecPerTrack > 63)) { DPRINTF("SecPerTrack %d", pmp->pm_SecPerTrack); error = EINVAL; goto error_exit; } if (pmp->pm_RootDirEnts == 0) { if (secsize < sizeof(*b710)) { DPRINTF("710 bootsec %u\n", secsize); error = EINVAL; goto error_exit; } unsigned short FSVers = getushort(b710->bpbFSVers); unsigned short ExtFlags = getushort(b710->bpbExtFlags); /* * Some say that bsBootSectSig[23] must be zero, but * Windows does not require this and some digital cameras * do not set these to zero. Therefore, do not insist. */ if (pmp->pm_Sectors || pmp->pm_FATsecs || FSVers) { DPRINTF("Sectors %d FATsecs %lu FSVers %d", pmp->pm_Sectors, pmp->pm_FATsecs, FSVers); error = EINVAL; goto error_exit; } pmp->pm_fatmask = FAT32_MASK; pmp->pm_fatmult = 4; pmp->pm_fatdiv = 1; pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs); /* Mirroring is enabled if the FATMIRROR bit is not set. */ if ((ExtFlags & FATMIRROR) == 0) pmp->pm_flags |= MSDOSFS_FATMIRROR; else pmp->pm_curfat = ExtFlags & FATNUM; } else pmp->pm_flags |= MSDOSFS_FATMIRROR; if (argp->flags & MSDOSFSMNT_GEMDOSFS) { if (FAT32(pmp)) { /* GEMDOS doesn't know FAT32. */ DPRINTF("FAT32 for GEMDOS"); error = EINVAL; goto error_exit; } /* * Check a few values (could do some more): * - logical sector size: >= block size * - number of sectors: <= size of partition */ if ((pmp->pm_BytesPerSec < GEMDOSFS_BSIZE) || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / GEMDOSFS_BSIZE) > psize)) { DPRINTF("consistency checks for GEMDOS"); error = EINVAL; goto error_exit; } /* * XXX - Many parts of the msdosfs driver seem to assume that * the number of bytes per logical sector (BytesPerSec) will * always be the same as the number of bytes per disk block * Let's pretend it is. */ BlkPerSec = pmp->pm_BytesPerSec / GEMDOSFS_BSIZE; pmp->pm_BytesPerSec = GEMDOSFS_BSIZE; pmp->pm_HugeSectors *= BlkPerSec; pmp->pm_HiddenSects *= BlkPerSec; pmp->pm_ResSectors *= BlkPerSec; pmp->pm_Sectors *= BlkPerSec; pmp->pm_FATsecs *= BlkPerSec; SecPerClust *= BlkPerSec; } /* Check that fs has nonzero FAT size */ if (pmp->pm_FATsecs == 0) { DPRINTF("FATsecs is 0"); error = EINVAL; goto error_exit; } pmp->pm_fatblk = pmp->pm_ResSectors; if (FAT32(pmp)) { if (secsize < sizeof(*b710)) { DPRINTF("710 bootsec %u\n", secsize); error = EINVAL; goto error_exit; } pmp->pm_rootdirblk = getulong(b710->bpbRootClust); pmp->pm_firstcluster = pmp->pm_fatblk + (pmp->pm_FATs * pmp->pm_FATsecs); pmp->pm_fsinfo = getushort(b710->bpbFSInfo); } else { pmp->pm_rootdirblk = pmp->pm_fatblk + (pmp->pm_FATs * pmp->pm_FATsecs); pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry) + pmp->pm_BytesPerSec - 1) / pmp->pm_BytesPerSec;/* in sectors */ pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize; } pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) / SecPerClust; pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1; pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec; if (argp->flags & MSDOSFSMNT_GEMDOSFS) { if (pmp->pm_nmbrofclusters <= (0xff0 - 2)) { pmp->pm_fatmask = FAT12_MASK; pmp->pm_fatmult = 3; pmp->pm_fatdiv = 2; } else { pmp->pm_fatmask = FAT16_MASK; pmp->pm_fatmult = 2; pmp->pm_fatdiv = 1; } } else if (pmp->pm_fatmask == 0) { if (pmp->pm_maxcluster <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) { /* * This will usually be a floppy disk. This size makes * sure that one FAT entry will not be split across * multiple blocks. */ pmp->pm_fatmask = FAT12_MASK; pmp->pm_fatmult = 3; pmp->pm_fatdiv = 2; } else { pmp->pm_fatmask = FAT16_MASK; pmp->pm_fatmult = 2; pmp->pm_fatdiv = 1; } } /* validate cluster count against FAT */ if ((pmp->pm_maxcluster & pmp->pm_fatmask) != pmp->pm_maxcluster) { DPRINTF("maxcluster %lu outside of mask %#lx\n", pmp->pm_maxcluster, pmp->pm_fatmask); error = EINVAL; goto error_exit; } /* validate FAT size */ fatbytes = (pmp->pm_maxcluster+1) * pmp->pm_fatmult / pmp->pm_fatdiv; fatblocksecs = howmany(fatbytes, pmp->pm_BytesPerSec); if (pmp->pm_FATsecs < fatblocksecs) { DPRINTF("FATsecs %lu < real %lu\n", pmp->pm_FATsecs, fatblocksecs); error = EINVAL; goto error_exit; } if (FAT12(pmp)) { /* * limit block size to what is needed to read a FAT block * to not exceed MAXBSIZE */ pmp->pm_fatblocksec = uimin(3, fatblocksecs); pmp->pm_fatblocksize = pmp->pm_fatblocksec * pmp->pm_BytesPerSec; } else { pmp->pm_fatblocksize = MAXBSIZE; pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec; } pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1; /* * Compute mask and shift value for isolating cluster relative byte * offsets and cluster numbers from a file offset. */ pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec; pmp->pm_crbomask = pmp->pm_bpcluster - 1; pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1; /* * Check for valid cluster size * must be a power of 2 */ if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) { DPRINTF("bpcluster %lu cnshift %lu", pmp->pm_bpcluster, pmp->pm_cnshift); error = EINVAL; goto error_exit; } /* * Cluster size must be within limit of MAXBSIZE. * Many FAT filesystems will not have clusters larger than * 32KiB due to limits in Windows versions before Vista. */ if (pmp->pm_bpcluster > MAXBSIZE) { DPRINTF("bpcluster %lu > MAXBSIZE %d", pmp->pm_bpcluster, MAXBSIZE); error = EINVAL; goto error_exit; } /* * Release the bootsector buffer. */ brelse(bp, BC_AGE); bp = NULL; /* * Check FSInfo. */ if (pmp->pm_fsinfo) { struct fsinfo *fp; const int rdsz = roundup(sizeof(*fp), pmp->pm_BytesPerSec); /* * XXX If the fsinfo block is stored on media with * 2KB or larger sectors, is the fsinfo structure * padded at the end or in the middle? */ if ((error = bread(devvp, de_bn2kb(pmp, pmp->pm_fsinfo), rdsz, 0, &bp)) != 0) goto error_exit; fp = (struct fsinfo *)bp->b_data; if (!memcmp(fp->fsisig1, "RRaA", 4) && !memcmp(fp->fsisig2, "rrAa", 4) && !memcmp(fp->fsisig3, "\0\0\125\252", 4) && !memcmp(fp->fsisig4, "\0\0\125\252", 4)) pmp->pm_nxtfree = getulong(fp->fsinxtfree); else pmp->pm_fsinfo = 0; brelse(bp, 0); bp = NULL; } /* * Check and validate (or perhaps invalidate?) the fsinfo structure? * XXX */ if (pmp->pm_fsinfo) { if ((pmp->pm_nxtfree == 0xffffffffUL) || (pmp->pm_nxtfree > pmp->pm_maxcluster)) pmp->pm_fsinfo = 0; } /* * Allocate memory for the bitmap of allocated clusters, and then * fill it in. */ pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS) / N_INUSEBITS) * sizeof(*pmp->pm_inusemap), M_MSDOSFSFAT, M_WAITOK); /* * fillinusemap() needs pm_devvp. */ pmp->pm_dev = dev; pmp->pm_devvp = devvp; /* * Have the inuse map filled in. */ if ((error = msdosfs_fillinusemap(pmp)) != 0) { DPRINTF("fillinusemap %d", error); goto error_exit; } /* * If they want FAT updates to be synchronous then let them suffer * the performance degradation in exchange for the on disk copy of * the FAT being correct just about all the time. I suppose this * would be a good thing to turn on if the kernel is still flakey. */ if (mp->mnt_flag & MNT_SYNCHRONOUS) pmp->pm_flags |= MSDOSFSMNT_WAITONFAT; /* * Finish up. */ if (ronly) pmp->pm_flags |= MSDOSFSMNT_RONLY; else pmp->pm_fmod = 1; mp->mnt_data = pmp; mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev; mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_MSDOS); mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; mp->mnt_stat.f_namemax = MSDOSFS_NAMEMAX(pmp); mp->mnt_flag |= MNT_LOCAL; mp->mnt_iflag |= IMNT_SHRLOOKUP; mp->mnt_dev_bshift = pmp->pm_bnshift; mp->mnt_fs_bshift = pmp->pm_cnshift; /* * If we ever do quotas for DOS filesystems this would be a place * to fill in the info in the msdosfsmount structure. You dolt, * quotas on dos filesystems make no sense because files have no * owners on dos filesystems. of course there is some empty space * in the directory entry where we could put uid's and gid's. */ spec_node_setmountedfs(devvp, mp); return (0); error_exit: if (bp) brelse(bp, BC_AGE); if (pmp) { if (pmp->pm_inusemap) free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; } return (error); } int msdosfs_start(struct mount *mp, int flags) { return (0); } /* * Unmount the filesystem described by mp. */ int msdosfs_unmount(struct mount *mp, int mntflags) { struct msdosfsmount *pmp; int error, flags; flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; if ((error = vflush(mp, NULLVP, flags)) != 0) return (error); pmp = VFSTOMSDOSFS(mp); if (pmp->pm_devvp->v_type != VBAD) spec_node_setmountedfs(pmp->pm_devvp, NULL); #ifdef MSDOSFS_DEBUG { struct vnode *vp = pmp->pm_devvp; printf("msdosfs_umount(): just before calling VOP_CLOSE()\n"); printf("flag %08x, usecount %d, writecount %d, holdcnt %d\n", vp->v_vflag | vp->v_iflag | vp->v_uflag, vrefcnt(vp), vp->v_writecount, vp->v_holdcnt); printf("mount %p, op %p\n", vp->v_mount, vp->v_op); printf("cleanblkhd %p, dirtyblkhd %p, numoutput %d, type %d\n", vp->v_cleanblkhd.lh_first, vp->v_dirtyblkhd.lh_first, vp->v_numoutput, vp->v_type); printf("union %p, tag %d, data[0] %08x, data[1] %08x\n", vp->v_socket, vp->v_tag, ((u_int *)vp->v_data)[0], ((u_int *)vp->v_data)[1]); } #endif vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY); (void) VOP_CLOSE(pmp->pm_devvp, pmp->pm_flags & MSDOSFSMNT_RONLY ? FREAD : FREAD|FWRITE, NOCRED); vput(pmp->pm_devvp); msdosfs_fh_destroy(pmp); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; mp->mnt_flag &= ~MNT_LOCAL; return (0); } int msdosfs_root(struct mount *mp, int lktype, struct vnode **vpp) { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error; #ifdef MSDOSFS_DEBUG printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp); #endif if ((error = msdosfs_deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, vpp)) != 0) return error; error = vn_lock(*vpp, lktype); if (error) { vrele(*vpp); *vpp = NULL; return error; } return 0; } int msdosfs_statvfs(struct mount *mp, struct statvfs *sbp) { struct msdosfsmount *pmp; pmp = VFSTOMSDOSFS(mp); sbp->f_bsize = pmp->pm_bpcluster; sbp->f_frsize = sbp->f_bsize; sbp->f_iosize = pmp->pm_bpcluster; sbp->f_blocks = pmp->pm_nmbrofclusters; sbp->f_bfree = pmp->pm_freeclustercount; sbp->f_bavail = pmp->pm_freeclustercount; sbp->f_bresvd = 0; sbp->f_files = pmp->pm_RootDirEnts; /* XXX */ sbp->f_ffree = 0; /* what to put in here? */ sbp->f_favail = 0; /* what to put in here? */ sbp->f_fresvd = 0; copy_statvfs_info(sbp, mp); return (0); } struct msdosfs_sync_ctx { int waitfor; }; static bool msdosfs_sync_selector(void *cl, struct vnode *vp) { struct msdosfs_sync_ctx *c = cl; struct denode *dep; KASSERT(mutex_owned(vp->v_interlock)); dep = VTODE(vp); if (c->waitfor == MNT_LAZY || vp->v_type == VNON || dep == NULL || (((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) && (LIST_EMPTY(&vp->v_dirtyblkhd) && (vp->v_iflag & VI_ONWORKLST) == 0))) return false; return true; } int msdosfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) { struct vnode *vp; struct vnode_iterator *marker; struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error, allerror = 0; struct msdosfs_sync_ctx ctx; /* * If we ever switch to not updating all of the FATs all the time, * this would be the place to update them from the first one. */ if (pmp->pm_fmod != 0) { if (pmp->pm_flags & MSDOSFSMNT_RONLY) panic("msdosfs_sync: rofs mod"); else { /* update FATs here */ } } /* * Write back each (modified) denode. */ vfs_vnode_iterator_init(mp, &marker); ctx.waitfor = waitfor; while ((vp = vfs_vnode_iterator_next(marker, msdosfs_sync_selector, &ctx))) { error = vn_lock(vp, LK_EXCLUSIVE); if (error) { vrele(vp); continue; } if ((error = VOP_FSYNC(vp, cred, waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0) allerror = error; vput(vp); } vfs_vnode_iterator_destroy(marker); /* * Force stale file system control information to be flushed. */ vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0) allerror = error; VOP_UNLOCK(pmp->pm_devvp); return (allerror); } int msdosfs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp) { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); struct defid defh; uint32_t gen; int error; if (fhp->fid_len != sizeof(struct defid)) { DPRINTF("fid_len %d %zd", fhp->fid_len, sizeof(struct defid)); return EINVAL; } memcpy(&defh, fhp, sizeof(defh)); error = msdosfs_fh_lookup(pmp, defh.defid_dirclust, defh.defid_dirofs, &gen); if (error == 0 && gen != defh.defid_gen) error = ESTALE; if (error) { *vpp = NULLVP; return error; } error = msdosfs_deget(pmp, defh.defid_dirclust, defh.defid_dirofs, vpp); if (error) { DPRINTF("deget %d", error); *vpp = NULLVP; return error; } error = vn_lock(*vpp, lktype); if (error) { vrele(*vpp); *vpp = NULLVP; return error; } return 0; } int msdosfs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) { struct msdosfsmount *pmp = VFSTOMSDOSFS(vp->v_mount); struct denode *dep; struct defid defh; int error; if (*fh_size < sizeof(struct defid)) { *fh_size = sizeof(struct defid); return E2BIG; } *fh_size = sizeof(struct defid); dep = VTODE(vp); memset(&defh, 0, sizeof(defh)); defh.defid_len = sizeof(struct defid); defh.defid_dirclust = dep->de_dirclust; defh.defid_dirofs = dep->de_diroffset; error = msdosfs_fh_enter(pmp, dep->de_dirclust, dep->de_diroffset, &defh.defid_gen); if (error == 0) memcpy(fhp, &defh, sizeof(defh)); return error; } int msdosfs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp) { return (EOPNOTSUPP); } |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 | /* $NetBSD: ata.c,v 1.169 2022/05/31 08:43:15 andvar Exp $ */ /* * Copyright (c) 1998, 2001 Manuel Bouyer. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: ata.c,v 1.169 2022/05/31 08:43:15 andvar Exp $"); #include "opt_ata.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/device.h> #include <sys/conf.h> #include <sys/fcntl.h> #include <sys/proc.h> #include <sys/kthread.h> #include <sys/errno.h> #include <sys/ataio.h> #include <sys/kmem.h> #include <sys/intr.h> #include <sys/bus.h> #include <sys/once.h> #include <sys/bitops.h> #include <sys/cpu.h> #define ATABUS_PRIVATE #include <dev/ata/ataconf.h> #include <dev/ata/atareg.h> #include <dev/ata/atavar.h> #include <dev/ic/wdcvar.h> /* for PIOBM */ #include "ioconf.h" #include "locators.h" #include "atapibus.h" #include "ataraid.h" #include "sata_pmp.h" #if NATARAID > 0 #include <dev/ata/ata_raidvar.h> #endif #if NSATA_PMP > 0 #include <dev/ata/satapmpvar.h> #endif #include <dev/ata/satapmpreg.h> #define DEBUG_FUNCS 0x08 #define DEBUG_PROBE 0x10 #define DEBUG_DETACH 0x20 #define DEBUG_XFERS 0x40 #ifdef ATADEBUG #ifndef ATADEBUG_MASK #define ATADEBUG_MASK 0 #endif int atadebug_mask = ATADEBUG_MASK; #define ATADEBUG_PRINT(args, level) \ if (atadebug_mask & (level)) \ printf args #else #define ATADEBUG_PRINT(args, level) #endif #if defined(ATA_DOWNGRADE_MODE) && NATA_DMA static int ata_downgrade_mode(struct ata_drive_datas *, int); #endif static ONCE_DECL(ata_init_ctrl); static struct pool ata_xfer_pool; /* * A queue of atabus instances, used to ensure the same bus probe order * for a given hardware configuration at each boot. Kthread probing * devices on a atabus. Only one probing at once. */ static TAILQ_HEAD(, atabus_initq) atabus_initq_head; static kmutex_t atabus_qlock; static kcondvar_t atabus_qcv; static lwp_t * atabus_cfg_lwp; /***************************************************************************** * ATA bus layer. * * ATA controllers attach an atabus instance, which handles probing the bus * for drives, etc. *****************************************************************************/ dev_type_open(atabusopen); dev_type_close(atabusclose); dev_type_ioctl(atabusioctl); const struct cdevsw atabus_cdevsw = { .d_open = atabusopen, .d_close = atabusclose, .d_read = noread, .d_write = nowrite, .d_ioctl = atabusioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER }; static void atabus_childdetached(device_t, device_t); static int atabus_rescan(device_t, const char *, const int *); static bool atabus_resume(device_t, const pmf_qual_t *); static bool atabus_suspend(device_t, const pmf_qual_t *); static void atabusconfig_thread(void *); static void ata_channel_idle(struct ata_channel *); static void ata_activate_xfer_locked(struct ata_channel *, struct ata_xfer *); static void ata_channel_freeze_locked(struct ata_channel *); static void ata_thread_wake_locked(struct ata_channel *); /* * atabus_init: * * Initialize ATA subsystem structures. */ static int atabus_init(void) { pool_init(&ata_xfer_pool, sizeof(struct ata_xfer), 0, 0, 0, "ataspl", NULL, IPL_BIO); TAILQ_INIT(&atabus_initq_head); mutex_init(&atabus_qlock, MUTEX_DEFAULT, IPL_NONE); cv_init(&atabus_qcv, "atainitq"); return 0; } /* * atabusprint: * * Autoconfiguration print routine used by ATA controllers when * attaching an atabus instance. */ int atabusprint(void *aux, const char *pnp) { struct ata_channel *chan = aux; if (pnp) aprint_normal("atabus at %s", pnp); aprint_normal(" channel %d", chan->ch_channel); return (UNCONF); } /* * ataprint: * * Autoconfiguration print routine. */ int ataprint(void *aux, const char *pnp) { struct ata_device *adev = aux; if (pnp) aprint_normal("wd at %s", pnp); aprint_normal(" drive %d", adev->adev_drv_data->drive); return (UNCONF); } /* * ata_channel_attach: * * Common parts of attaching an atabus to an ATA controller channel. */ void ata_channel_attach(struct ata_channel *chp) { if (chp->ch_flags & ATACH_DISABLED) return; ata_channel_init(chp); KASSERT(chp->ch_queue != NULL); chp->atabus = config_found(chp->ch_atac->atac_dev, chp, atabusprint, CFARGS(.iattr = "ata")); } /* * ata_channel_detach: * * Common parts of detaching an atabus to an ATA controller channel. */ void ata_channel_detach(struct ata_channel *chp) { if (chp->ch_flags & ATACH_DISABLED) return; ata_channel_destroy(chp); chp->ch_flags |= ATACH_DETACHED; } static void atabusconfig(struct atabus_softc *atabus_sc) { struct ata_channel *chp = atabus_sc->sc_chan; struct atac_softc *atac = chp->ch_atac; struct atabus_initq *atabus_initq = NULL; int i, error; /* we are in the atabus's thread context */ /* * Probe for the drives attached to controller, unless a PMP * is already known */ /* XXX for SATA devices we will power up all drives at once */ if (chp->ch_satapmp_nports == 0) (*atac->atac_probe)(chp); if (chp->ch_ndrives >= 2) { ATADEBUG_PRINT(("atabusattach: ch_drive_type 0x%x 0x%x\n", chp->ch_drive[0].drive_type, chp->ch_drive[1].drive_type), DEBUG_PROBE); } /* Make sure the devices probe in atabus order to avoid jitter. */ mutex_enter(&atabus_qlock); for (;;) { atabus_initq = TAILQ_FIRST(&atabus_initq_head); if (atabus_initq->atabus_sc == atabus_sc) break; cv_wait(&atabus_qcv, &atabus_qlock); } mutex_exit(&atabus_qlock); ata_channel_lock(chp); KASSERT(ata_is_thread_run(chp)); /* If no drives, abort here */ if (chp->ch_drive == NULL) goto out; KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL); for (i = 0; i < chp->ch_ndrives; i++) if (chp->ch_drive[i].drive_type != ATA_DRIVET_NONE) break; if (i == chp->ch_ndrives) goto out; /* Shortcut in case we've been shutdown */ if (chp->ch_flags & ATACH_SHUTDOWN) goto out; ata_channel_unlock(chp); if ((error = kthread_create(PRI_NONE, 0, NULL, atabusconfig_thread, atabus_sc, &atabus_cfg_lwp, "%scnf", device_xname(atac->atac_dev))) != 0) aprint_error_dev(atac->atac_dev, "unable to create config thread: error %d\n", error); return; out: ata_channel_unlock(chp); mutex_enter(&atabus_qlock); TAILQ_REMOVE(&atabus_initq_head, atabus_initq, atabus_initq); cv_broadcast(&atabus_qcv); mutex_exit(&atabus_qlock); kmem_free(atabus_initq, sizeof(*atabus_initq)); ata_delref(chp); config_pending_decr(atabus_sc->sc_dev); } /* * atabus_configthread: finish attach of atabus's childrens, in a separate * kernel thread. */ static void atabusconfig_thread(void *arg) { struct atabus_softc *atabus_sc = arg; struct ata_channel *chp = atabus_sc->sc_chan; struct atac_softc *atac = chp->ch_atac; struct atabus_initq *atabus_initq = NULL; int i, s; /* XXX seems wrong */ mutex_enter(&atabus_qlock); atabus_initq = TAILQ_FIRST(&atabus_initq_head); KASSERT(atabus_initq->atabus_sc == atabus_sc); mutex_exit(&atabus_qlock); /* * First look for a port multiplier */ if (chp->ch_ndrives == PMP_MAX_DRIVES && chp->ch_drive[PMP_PORT_CTL].drive_type == ATA_DRIVET_PM) { #if NSATA_PMP > 0 satapmp_attach(chp); #else aprint_error_dev(atabus_sc->sc_dev, "SATA port multiplier not supported\n"); /* no problems going on, all drives are ATA_DRIVET_NONE */ #endif } /* * Attach an ATAPI bus, if needed. */ KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL); for (i = 0; i < chp->ch_ndrives && chp->atapibus == NULL; i++) { if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATAPI) { #if NATAPIBUS > 0 (*atac->atac_atapibus_attach)(atabus_sc); #else /* * Fake the autoconfig "not configured" message */ aprint_normal("atapibus at %s not configured\n", device_xname(atac->atac_dev)); chp->atapibus = NULL; s = splbio(); for (i = 0; i < chp->ch_ndrives; i++) { if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATAPI) chp->ch_drive[i].drive_type = ATA_DRIVET_NONE; } splx(s); #endif break; } } for (i = 0; i < chp->ch_ndrives; i++) { struct ata_device adev; if (chp->ch_drive[i].drive_type != ATA_DRIVET_ATA && chp->ch_drive[i].drive_type != ATA_DRIVET_OLD) { continue; } if (chp->ch_drive[i].drv_softc != NULL) continue; memset(&adev, 0, sizeof(struct ata_device)); adev.adev_bustype = atac->atac_bustype_ata; adev.adev_channel = chp->ch_channel; adev.adev_drv_data = &chp->ch_drive[i]; chp->ch_drive[i].drv_softc = config_found(atabus_sc->sc_dev, &adev, ataprint, CFARGS(.iattr = "ata_hl")); if (chp->ch_drive[i].drv_softc != NULL) { ata_probe_caps(&chp->ch_drive[i]); } else { s = splbio(); chp->ch_drive[i].drive_type = ATA_DRIVET_NONE; splx(s); } } /* now that we know the drives, the controller can set its modes */ if (atac->atac_set_modes) { (*atac->atac_set_modes)(chp); ata_print_modes(chp); } #if NATARAID > 0 if (atac->atac_cap & ATAC_CAP_RAID) { for (i = 0; i < chp->ch_ndrives; i++) { if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATA) { ata_raid_check_component( chp->ch_drive[i].drv_softc); } } } #endif /* NATARAID > 0 */ /* * reset drive_flags for unattached devices, reset state for attached * ones */ s = splbio(); for (i = 0; i < chp->ch_ndrives; i++) { if (chp->ch_drive[i].drive_type == ATA_DRIVET_PM) continue; if (chp->ch_drive[i].drv_softc == NULL) { chp->ch_drive[i].drive_flags = 0; chp->ch_drive[i].drive_type = ATA_DRIVET_NONE; } else chp->ch_drive[i].state = 0; } splx(s); mutex_enter(&atabus_qlock); TAILQ_REMOVE(&atabus_initq_head, atabus_initq, atabus_initq); cv_broadcast(&atabus_qcv); mutex_exit(&atabus_qlock); kmem_free(atabus_initq, sizeof(*atabus_initq)); ata_delref(chp); config_pending_decr(atabus_sc->sc_dev); kthread_exit(0); } /* * atabus_thread: * * Worker thread for the ATA bus. */ static void atabus_thread(void *arg) { struct atabus_softc *sc = arg; struct ata_channel *chp = sc->sc_chan; struct ata_queue *chq = chp->ch_queue; struct ata_xfer *xfer; int i, rv; ata_channel_lock(chp); KASSERT(ata_is_thread_run(chp)); /* * Probe the drives. Reset type to indicate to controllers * that can re-probe that all drives must be probed.. * * Note: ch_ndrives may be changed during the probe. */ KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL); for (i = 0; i < chp->ch_ndrives; i++) { chp->ch_drive[i].drive_flags = 0; chp->ch_drive[i].drive_type = ATA_DRIVET_NONE; } ata_channel_unlock(chp); atabusconfig(sc); ata_channel_lock(chp); for (;;) { if ((chp->ch_flags & (ATACH_TH_RESET | ATACH_TH_DRIVE_RESET | ATACH_TH_RECOVERY | ATACH_SHUTDOWN)) == 0 && (chq->queue_active == 0 || chq->queue_freeze == 0)) { cv_wait(&chp->ch_thr_idle, &chp->ch_lock); } if (chp->ch_flags & ATACH_SHUTDOWN) { break; } if (chp->ch_flags & ATACH_TH_RESCAN) { chp->ch_flags &= ~ATACH_TH_RESCAN; ata_channel_unlock(chp); atabusconfig(sc); ata_channel_lock(chp); } if (chp->ch_flags & ATACH_TH_RESET) { /* this will unfreeze the channel */ ata_thread_run(chp, AT_WAIT, ATACH_TH_RESET, ATACH_NODRIVE); } else if (chp->ch_flags & ATACH_TH_DRIVE_RESET) { /* this will unfreeze the channel */ for (i = 0; i < chp->ch_ndrives; i++) { struct ata_drive_datas *drvp; drvp = &chp->ch_drive[i]; if (drvp->drive_flags & ATA_DRIVE_TH_RESET) { ata_thread_run(chp, AT_WAIT, ATACH_TH_DRIVE_RESET, i); } } chp->ch_flags &= ~ATACH_TH_DRIVE_RESET; } else if (chp->ch_flags & ATACH_TH_RECOVERY) { /* * This will unfreeze the channel; drops locks during * run, so must wrap in splbio()/splx() to avoid * spurious interrupts. XXX MPSAFE */ int s = splbio(); ata_thread_run(chp, AT_WAIT, ATACH_TH_RECOVERY, chp->recovery_tfd); splx(s); } else if (chq->queue_active > 0 && chq->queue_freeze == 1) { /* * Caller has bumped queue_freeze, decrease it. This * flow shalt never be executed for NCQ commands. */ KASSERT((chp->ch_flags & ATACH_NCQ) == 0); KASSERT(chq->queue_active == 1); ata_channel_thaw_locked(chp); xfer = ata_queue_get_active_xfer_locked(chp); KASSERT(xfer != NULL); KASSERT((xfer->c_flags & C_POLL) == 0); switch ((rv = ata_xfer_start(xfer))) { case ATASTART_STARTED: case ATASTART_POLL: case ATASTART_ABORT: break; case ATASTART_TH: default: panic("%s: ata_xfer_start() unexpected rv %d", __func__, rv); /* NOTREACHED */ } } else if (chq->queue_freeze > 1) panic("%s: queue_freeze", __func__); /* Try to run down the queue once channel is unfrozen */ if (chq->queue_freeze == 0) { ata_channel_unlock(chp); atastart(chp); ata_channel_lock(chp); } } chp->ch_thread = NULL; cv_signal(&chp->ch_thr_idle); ata_channel_unlock(chp); kthread_exit(0); } bool ata_is_thread_run(struct ata_channel *chp) { KASSERT(mutex_owned(&chp->ch_lock)); return (chp->ch_thread == curlwp && !cpu_intr_p()); } static void ata_thread_wake_locked(struct ata_channel *chp) { KASSERT(mutex_owned(&chp->ch_lock)); ata_channel_freeze_locked(chp); cv_signal(&chp->ch_thr_idle); } /* * atabus_match: * * Autoconfiguration match routine. */ static int atabus_match(device_t parent, cfdata_t cf, void *aux) { struct ata_channel *chp = aux; if (chp == NULL) return (0); if (cf->cf_loc[ATACF_CHANNEL] != chp->ch_channel && cf->cf_loc[ATACF_CHANNEL] != ATACF_CHANNEL_DEFAULT) return (0); return (1); } /* * atabus_attach: * * Autoconfiguration attach routine. */ static void atabus_attach(device_t parent, device_t self, void *aux) { struct atabus_softc *sc = device_private(self); struct ata_channel *chp = aux; struct atabus_initq *initq; int error; sc->sc_chan = chp; aprint_normal("\n"); aprint_naive("\n"); sc->sc_dev = self; if (ata_addref(chp)) return; RUN_ONCE(&ata_init_ctrl, atabus_init); initq = kmem_zalloc(sizeof(*initq), KM_SLEEP); initq->atabus_sc = sc; mutex_enter(&atabus_qlock); TAILQ_INSERT_TAIL(&atabus_initq_head, initq, atabus_initq); mutex_exit(&atabus_qlock); config_pending_incr(sc->sc_dev); /* XXX MPSAFE - no KTHREAD_MPSAFE, so protected by KERNEL_LOCK() */ if ((error = kthread_create(PRI_NONE, 0, NULL, atabus_thread, sc, &chp->ch_thread, "%s", device_xname(self))) != 0) aprint_error_dev(self, "unable to create kernel thread: error %d\n", error); if (!pmf_device_register(self, atabus_suspend, atabus_resume)) aprint_error_dev(self, "couldn't establish power handler\n"); } /* * atabus_detach: * * Autoconfiguration detach routine. */ static int atabus_detach(device_t self, int flags) { struct atabus_softc *sc = device_private(self); struct ata_channel *chp = sc->sc_chan; device_t dev = NULL; int i, error = 0; /* * Detach atapibus and its children. */ if ((dev = chp->atapibus) != NULL) { ATADEBUG_PRINT(("atabus_detach: %s: detaching %s\n", device_xname(self), device_xname(dev)), DEBUG_DETACH); error = config_detach(dev, flags); if (error) goto out; KASSERT(chp->atapibus == NULL); } KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL); /* * Detach our other children. */ for (i = 0; i < chp->ch_ndrives; i++) { if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATAPI) continue; if (chp->ch_drive[i].drive_type == ATA_DRIVET_PM) chp->ch_drive[i].drive_type = ATA_DRIVET_NONE; if ((dev = chp->ch_drive[i].drv_softc) != NULL) { ATADEBUG_PRINT(("%s.%d: %s: detaching %s\n", __func__, __LINE__, device_xname(self), device_xname(dev)), DEBUG_DETACH); error = config_detach(dev, flags); if (error) goto out; KASSERT(chp->ch_drive[i].drv_softc == NULL); KASSERT(chp->ch_drive[i].drive_type == 0); } } /* Shutdown the channel. */ ata_channel_lock(chp); chp->ch_flags |= ATACH_SHUTDOWN; while (chp->ch_thread != NULL) { cv_signal(&chp->ch_thr_idle); cv_wait(&chp->ch_thr_idle, &chp->ch_lock); } ata_channel_unlock(chp); atabus_free_drives(chp); out: #ifdef ATADEBUG if (dev != NULL && error != 0) ATADEBUG_PRINT(("%s: %s: error %d detaching %s\n", __func__, device_xname(self), error, device_xname(dev)), DEBUG_DETACH); #endif /* ATADEBUG */ return (error); } void atabus_childdetached(device_t self, device_t child) { bool found = false; struct atabus_softc *sc = device_private(self); struct ata_channel *chp = sc->sc_chan; int i; KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL); /* * atapibus detached. */ if (child == chp->atapibus) { chp->atapibus = NULL; found = true; for (i = 0; i < chp->ch_ndrives; i++) { if (chp->ch_drive[i].drive_type != ATA_DRIVET_ATAPI) continue; KASSERT(chp->ch_drive[i].drv_softc != NULL); chp->ch_drive[i].drv_softc = NULL; chp->ch_drive[i].drive_flags = 0; chp->ch_drive[i].drive_type = ATA_DRIVET_NONE; } } /* * Detach our other children. */ for (i = 0; i < chp->ch_ndrives; i++) { if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATAPI) continue; if (child == chp->ch_drive[i].drv_softc) { chp->ch_drive[i].drv_softc = NULL; chp->ch_drive[i].drive_flags = 0; if (chp->ch_drive[i].drive_type == ATA_DRIVET_PM) chp->ch_satapmp_nports = 0; chp->ch_drive[i].drive_type = ATA_DRIVET_NONE; found = true; } } if (!found) panic("%s: unknown child %p", device_xname(self), (const void *)child); } CFATTACH_DECL3_NEW(atabus, sizeof(struct atabus_softc), atabus_match, atabus_attach, atabus_detach, NULL, atabus_rescan, atabus_childdetached, DVF_DETACH_SHUTDOWN); /***************************************************************************** * Common ATA bus operations. *****************************************************************************/ /* allocate/free the channel's ch_drive[] array */ int atabus_alloc_drives(struct ata_channel *chp, int ndrives) { int i; if (chp->ch_ndrives != ndrives) atabus_free_drives(chp); if (chp->ch_drive == NULL) { void *drv; ata_channel_unlock(chp); drv = kmem_zalloc(sizeof(*chp->ch_drive) * ndrives, KM_SLEEP); ata_channel_lock(chp); if (chp->ch_drive != NULL) { /* lost the race */ kmem_free(drv, sizeof(*chp->ch_drive) * ndrives); return 0; } chp->ch_drive = drv; } for (i = 0; i < ndrives; i++) { chp->ch_drive[i].chnl_softc = chp; chp->ch_drive[i].drive = i; } chp->ch_ndrives = ndrives; return 0; } void atabus_free_drives(struct ata_channel *chp) { #ifdef DIAGNOSTIC int i; int dopanic = 0; KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL); for (i = 0; i < chp->ch_ndrives; i++) { if (chp->ch_drive[i].drive_type != ATA_DRIVET_NONE) { printf("%s: ch_drive[%d] type %d != ATA_DRIVET_NONE\n", device_xname(chp->atabus), i, chp->ch_drive[i].drive_type); dopanic = 1; } if (chp->ch_drive[i].drv_softc != NULL) { printf("%s: ch_drive[%d] attached to %s\n", device_xname(chp->atabus), i, device_xname(chp->ch_drive[i].drv_softc)); dopanic = 1; } } if (dopanic) panic("atabus_free_drives"); #endif if (chp->ch_drive == NULL) return; kmem_free(chp->ch_drive, sizeof(struct ata_drive_datas) * chp->ch_ndrives); chp->ch_ndrives = 0; chp->ch_drive = NULL; } /* Get the disk's parameters */ int ata_get_params(struct ata_drive_datas *drvp, uint8_t flags, struct ataparams *prms) { struct ata_xfer *xfer; struct ata_channel *chp = drvp->chnl_softc; struct atac_softc *atac = chp->ch_atac; char *tb; int i, rv; uint16_t *p; ATADEBUG_PRINT(("%s\n", __func__), DEBUG_FUNCS); xfer = ata_get_xfer(chp, false); if (xfer == NULL) { ATADEBUG_PRINT(("%s: no xfer\n", __func__), DEBUG_FUNCS|DEBUG_PROBE); return CMD_AGAIN; } tb = kmem_zalloc(ATA_BSIZE, KM_SLEEP); memset(prms, 0, sizeof(struct ataparams)); if (drvp->drive_type == ATA_DRIVET_ATA) { xfer->c_ata_c.r_command = WDCC_IDENTIFY; xfer->c_ata_c.r_st_bmask = WDCS_DRDY; xfer->c_ata_c.r_st_pmask = WDCS_DRQ; xfer->c_ata_c.timeout = 3000; /* 3s */ } else if (drvp->drive_type == ATA_DRIVET_ATAPI) { xfer->c_ata_c.r_command = ATAPI_IDENTIFY_DEVICE; xfer->c_ata_c.r_st_bmask = 0; xfer->c_ata_c.r_st_pmask = WDCS_DRQ; xfer->c_ata_c.timeout = 10000; /* 10s */ } else { ATADEBUG_PRINT(("ata_get_parms: no disks\n"), DEBUG_FUNCS|DEBUG_PROBE); rv = CMD_ERR; goto out; } xfer->c_ata_c.flags = AT_READ | flags; xfer->c_ata_c.data = tb; xfer->c_ata_c.bcount = ATA_BSIZE; (*atac->atac_bustype_ata->ata_exec_command)(drvp, xfer); ata_wait_cmd(chp, xfer); if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) { ATADEBUG_PRINT(("ata_get_parms: ata_c.flags=0x%x\n", xfer->c_ata_c.flags), DEBUG_FUNCS|DEBUG_PROBE); rv = CMD_ERR; goto out; } /* if we didn't read any data something is wrong */ if ((xfer->c_ata_c.flags & AT_XFDONE) == 0) { rv = CMD_ERR; goto out; } /* Read in parameter block. */ memcpy(prms, tb, sizeof(struct ataparams)); /* * Shuffle string byte order. * ATAPI NEC, Mitsumi and Pioneer drives and * old ATA TDK CompactFlash cards * have different byte order. */ #if BYTE_ORDER == BIG_ENDIAN # define M(n) prms->atap_model[(n) ^ 1] #else # define M(n) prms->atap_model[n] #endif if ( #if BYTE_ORDER == BIG_ENDIAN ! #endif ((drvp->drive_type == ATA_DRIVET_ATAPI) ? ((M(0) == 'N' && M(1) == 'E') || (M(0) == 'F' && M(1) == 'X') || (M(0) == 'P' && M(1) == 'i')) : ((M(0) == 'T' && M(1) == 'D' && M(2) == 'K')))) { rv = CMD_OK; goto out; } #undef M for (i = 0; i < sizeof(prms->atap_model); i += 2) { p = (uint16_t *)(prms->atap_model + i); *p = bswap16(*p); } for (i = 0; i < sizeof(prms->atap_serial); i += 2) { p = (uint16_t *)(prms->atap_serial + i); *p = bswap16(*p); } for (i = 0; i < sizeof(prms->atap_revision); i += 2) { p = (uint16_t *)(prms->atap_revision + i); *p = bswap16(*p); } rv = CMD_OK; out: kmem_free(tb, ATA_BSIZE); ata_free_xfer(chp, xfer); return rv; } int ata_set_mode(struct ata_drive_datas *drvp, uint8_t mode, uint8_t flags) { struct ata_xfer *xfer; int rv; struct ata_channel *chp = drvp->chnl_softc; struct atac_softc *atac = chp->ch_atac; ATADEBUG_PRINT(("ata_set_mode=0x%x\n", mode), DEBUG_FUNCS); xfer = ata_get_xfer(chp, false); if (xfer == NULL) { ATADEBUG_PRINT(("%s: no xfer\n", __func__), DEBUG_FUNCS|DEBUG_PROBE); return CMD_AGAIN; } xfer->c_ata_c.r_command = SET_FEATURES; xfer->c_ata_c.r_st_bmask = 0; xfer->c_ata_c.r_st_pmask = 0; xfer->c_ata_c.r_features = WDSF_SET_MODE; xfer->c_ata_c.r_count = mode; xfer->c_ata_c.flags = flags; xfer->c_ata_c.timeout = 1000; /* 1s */ (*atac->atac_bustype_ata->ata_exec_command)(drvp, xfer); ata_wait_cmd(chp, xfer); if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) { rv = CMD_ERR; goto out; } rv = CMD_OK; out: ata_free_xfer(chp, xfer); return rv; } #if NATA_DMA void ata_dmaerr(struct ata_drive_datas *drvp, int flags) { ata_channel_lock_owned(drvp->chnl_softc); /* * Downgrade decision: if we get NERRS_MAX in NXFER. * We start with n_dmaerrs set to NERRS_MAX-1 so that the * first error within the first NXFER ops will immediately trigger * a downgrade. * If we got an error and n_xfers is bigger than NXFER reset counters. */ drvp->n_dmaerrs++; if (drvp->n_dmaerrs >= NERRS_MAX && drvp->n_xfers <= NXFER) { #ifdef ATA_DOWNGRADE_MODE ata_downgrade_mode(drvp, flags); drvp->n_dmaerrs = NERRS_MAX-1; #else static struct timeval last; static const struct timeval serrintvl = { 300, 0 }; if (ratecheck(&last, &serrintvl)) { aprint_error_dev(drvp->drv_softc, "excessive DMA errors - %d in last %d transfers\n", drvp->n_dmaerrs, drvp->n_xfers); } #endif drvp->n_xfers = 0; return; } if (drvp->n_xfers > NXFER) { drvp->n_dmaerrs = 1; /* just got an error */ drvp->n_xfers = 1; /* restart counting from this error */ } } #endif /* NATA_DMA */ /* * freeze the queue and wait for the controller to be idle. Caller has to * unfreeze/restart the queue */ static void ata_channel_idle(struct ata_channel *chp) { ata_channel_lock(chp); ata_channel_freeze_locked(chp); while (chp->ch_queue->queue_active > 0) { chp->ch_queue->queue_flags |= QF_IDLE_WAIT; cv_timedwait(&chp->ch_queue->queue_idle, &chp->ch_lock, 1); } ata_channel_unlock(chp); } /* * Add a command to the queue and start controller. * * MUST BE CALLED AT splbio()! */ void ata_exec_xfer(struct ata_channel *chp, struct ata_xfer *xfer) { ATADEBUG_PRINT(("ata_exec_xfer %p channel %d drive %d\n", xfer, chp->ch_channel, xfer->c_drive), DEBUG_XFERS); /* complete xfer setup */ xfer->c_chp = chp; ata_channel_lock(chp); /* * Standard commands are added to the end of command list, but * recovery commands must be run immediately. */ if ((xfer->c_flags & C_SKIP_QUEUE) == 0) SIMPLEQ_INSERT_TAIL(&chp->ch_queue->queue_xfer, xfer, c_xferchain); else SIMPLEQ_INSERT_HEAD(&chp->ch_queue->queue_xfer, xfer, c_xferchain); /* * if polling and can sleep, wait for the xfer to be at head of queue */ if ((xfer->c_flags & (C_POLL | C_WAIT)) == (C_POLL | C_WAIT)) { while (chp->ch_queue->queue_active > 0 || SIMPLEQ_FIRST(&chp->ch_queue->queue_xfer) != xfer) { xfer->c_flags |= C_WAITACT; cv_wait(&chp->ch_queue->c_active, &chp->ch_lock); xfer->c_flags &= ~C_WAITACT; } /* * Free xfer now if it there was attempt to free it * while we were waiting. */ if ((xfer->c_flags & (C_FREE|C_WAITTIMO)) == C_FREE) { ata_channel_unlock(chp); ata_free_xfer(chp, xfer); return; } } ata_channel_unlock(chp); ATADEBUG_PRINT(("atastart from ata_exec_xfer, flags 0x%x\n", chp->ch_flags), DEBUG_XFERS); atastart(chp); } /* * Start I/O on a controller, for the given channel. * The first xfer may be not for our channel if the channel queues * are shared. * * MUST BE CALLED AT splbio()! * * XXX FIS-based switching with PMP * Currently atastart() never schedules concurrent NCQ transfers to more than * one drive, even when channel has several SATA drives attached via PMP. * To support concurrent transfers to different drives with PMP, it would be * necessary to implement FIS-based switching support in controller driver, * and then adjust error handling and recovery to stop assuming at most * one active drive. */ void atastart(struct ata_channel *chp) { struct atac_softc *atac = chp->ch_atac; struct ata_queue *chq = chp->ch_queue; struct ata_xfer *xfer, *axfer; bool skipq; #ifdef ATA_DEBUG int spl1, spl2; spl1 = splbio(); spl2 = splbio(); if (spl2 != spl1) { printf("atastart: not at splbio()\n"); panic("atastart"); } splx(spl2); splx(spl1); #endif /* ATA_DEBUG */ ata_channel_lock(chp); again: /* is there a xfer ? */ if ((xfer = SIMPLEQ_FIRST(&chp->ch_queue->queue_xfer)) == NULL) { ATADEBUG_PRINT(("%s(chp=%p): channel %d queue_xfer is empty\n", __func__, chp, chp->ch_channel), DEBUG_XFERS); goto out; } /* * if someone is waiting for the command to be active, wake it up * and let it process the command */ if (__predict_false(xfer->c_flags & C_WAITACT)) { ATADEBUG_PRINT(("atastart: xfer %p channel %d drive %d " "wait active\n", xfer, chp->ch_channel, xfer->c_drive), DEBUG_XFERS); cv_broadcast(&chp->ch_queue->c_active); goto out; } skipq = ISSET(xfer->c_flags, C_SKIP_QUEUE); /* is the queue frozen? */ if (__predict_false(!skipq && chq->queue_freeze > 0)) { if (chq->queue_flags & QF_IDLE_WAIT) { chq->queue_flags &= ~QF_IDLE_WAIT; cv_signal(&chp->ch_queue->queue_idle); } ATADEBUG_PRINT(("%s(chp=%p): channel %d drive %d " "queue frozen: %d\n", __func__, chp, chp->ch_channel, xfer->c_drive, chq->queue_freeze), DEBUG_XFERS); goto out; } /* all xfers on same queue must belong to the same channel */ KASSERT(xfer->c_chp == chp); /* * Can only take the command if there are no current active * commands, or if the command is NCQ and the active commands are also * NCQ. If PM is in use and HBA driver doesn't support/use FIS-based * switching, can only send commands to single drive. * Need only check first xfer. * XXX FIS-based switching - revisit */ if (!skipq && (axfer = TAILQ_FIRST(&chp->ch_queue->active_xfers))) { if (!ISSET(xfer->c_flags, C_NCQ) || !ISSET(axfer->c_flags, C_NCQ) || xfer->c_drive != axfer->c_drive) goto out; } struct ata_drive_datas * const drvp = &chp->ch_drive[xfer->c_drive]; /* * Are we on limit of active xfers ? If the queue has more * than 1 openings, we keep one slot reserved for recovery or dump. */ KASSERT(chq->queue_active <= chq->queue_openings); const uint8_t chq_openings = (!skipq && chq->queue_openings > 1) ? (chq->queue_openings - 1) : chq->queue_openings; const uint8_t drv_openings = ISSET(xfer->c_flags, C_NCQ) ? drvp->drv_openings : ATA_MAX_OPENINGS; if (chq->queue_active >= MIN(chq_openings, drv_openings)) { if (skipq) { panic("%s: channel %d busy, xfer not possible", __func__, chp->ch_channel); } ATADEBUG_PRINT(("%s(chp=%p): channel %d completely busy\n", __func__, chp, chp->ch_channel), DEBUG_XFERS); goto out; } /* Slot allocation can fail if drv_openings < ch_openings */ if (!ata_queue_alloc_slot(chp, &xfer->c_slot, drv_openings)) goto out; if (__predict_false(atac->atac_claim_hw)) { if (!atac->atac_claim_hw(chp, 0)) { ata_queue_free_slot(chp, xfer->c_slot); goto out; } } /* Now committed to start the xfer */ ATADEBUG_PRINT(("%s(chp=%p): xfer %p channel %d drive %d\n", __func__, chp, xfer, chp->ch_channel, xfer->c_drive), DEBUG_XFERS); if (drvp->drive_flags & ATA_DRIVE_RESET) { drvp->drive_flags &= ~ATA_DRIVE_RESET; drvp->state = 0; } if (ISSET(xfer->c_flags, C_NCQ)) SET(chp->ch_flags, ATACH_NCQ); else CLR(chp->ch_flags, ATACH_NCQ); SIMPLEQ_REMOVE_HEAD(&chq->queue_xfer, c_xferchain); ata_activate_xfer_locked(chp, xfer); if (atac->atac_cap & ATAC_CAP_NOIRQ) KASSERT(xfer->c_flags & C_POLL); switch (ata_xfer_start(xfer)) { case ATASTART_TH: case ATASTART_ABORT: /* don't start any further commands in this case */ goto out; default: /* nothing to do */ break; } /* Queue more commands if possible, but not during recovery or dump */ if (!skipq && chq->queue_active < chq->queue_openings) goto again; out: ata_channel_unlock(chp); } int ata_xfer_start(struct ata_xfer *xfer) { struct ata_channel *chp = xfer->c_chp; int rv, status; KASSERT(mutex_owned(&chp->ch_lock)); again: rv = xfer->ops->c_start(chp, xfer); switch (rv) { case ATASTART_STARTED: /* nothing to do */ break; case ATASTART_TH: /* postpone xfer to thread */ ata_thread_wake_locked(chp); break; case ATASTART_POLL: /* can happen even in thread context for some ATAPI devices */ ata_channel_unlock(chp); KASSERT(xfer->ops != NULL && xfer->ops->c_poll != NULL); status = xfer->ops->c_poll(chp, xfer); ata_channel_lock(chp); if (status == ATAPOLL_AGAIN) goto again; break; case ATASTART_ABORT: ata_channel_unlock(chp); KASSERT(xfer->ops != NULL && xfer->ops->c_abort != NULL); xfer->ops->c_abort(chp, xfer); ata_channel_lock(chp); break; } return rv; } static void ata_activate_xfer_locked(struct ata_channel *chp, struct ata_xfer *xfer) { struct ata_queue * const chq = chp->ch_queue; KASSERT(mutex_owned(&chp->ch_lock)); KASSERT((chq->active_xfers_used & __BIT(xfer->c_slot)) == 0); if ((xfer->c_flags & C_SKIP_QUEUE) == 0) TAILQ_INSERT_TAIL(&chq->active_xfers, xfer, c_activechain); else { /* * Must go to head, so that ata_queue_get_active_xfer() * returns the recovery command, and not some other * random active transfer. */ TAILQ_INSERT_HEAD(&chq->active_xfers, xfer, c_activechain); } chq->active_xfers_used |= __BIT(xfer->c_slot); chq->queue_active++; } /* * Does it's own locking, does not require splbio(). * flags - whether to block waiting for free xfer */ struct ata_xfer * ata_get_xfer(struct ata_channel *chp, bool waitok) { return pool_get(&ata_xfer_pool, PR_ZERO | (waitok ? PR_WAITOK : PR_NOWAIT)); } /* * ata_deactivate_xfer() must be always called prior to ata_free_xfer() */ void ata_free_xfer(struct ata_channel *chp, struct ata_xfer *xfer) { struct ata_queue *chq = chp->ch_queue; ata_channel_lock(chp); if (__predict_false(xfer->c_flags & (C_WAITACT|C_WAITTIMO))) { /* Someone is waiting for this xfer, so we can't free now */ xfer->c_flags |= C_FREE; cv_broadcast(&chq->c_active); ata_channel_unlock(chp); return; } /* XXX move PIOBM and free_gw to deactivate? */ #if NATA_PIOBM /* XXX wdc dependent code */ if (__predict_false(xfer->c_flags & C_PIOBM)) { struct wdc_softc *wdc = CHAN_TO_WDC(chp); /* finish the busmastering PIO */ (*wdc->piobm_done)(wdc->dma_arg, chp->ch_channel, xfer->c_drive); chp->ch_flags &= ~(ATACH_DMA_WAIT | ATACH_PIOBM_WAIT | ATACH_IRQ_WAIT); } #endif if (__predict_false(chp->ch_atac->atac_free_hw)) chp->ch_atac->atac_free_hw(chp); ata_channel_unlock(chp); if (__predict_true(!ISSET(xfer->c_flags, C_PRIVATE_ALLOC))) pool_put(&ata_xfer_pool, xfer); } void ata_deactivate_xfer(struct ata_channel *chp, struct ata_xfer *xfer) { struct ata_queue * const chq = chp->ch_queue; ata_channel_lock(chp); KASSERT(chq->queue_active > 0); KASSERT((chq->active_xfers_used & __BIT(xfer->c_slot)) != 0); /* Stop only when this is last active xfer */ if (chq->queue_active == 1) callout_stop(&chp->c_timo_callout); if (callout_invoking(&chp->c_timo_callout)) xfer->c_flags |= C_WAITTIMO; TAILQ_REMOVE(&chq->active_xfers, xfer, c_activechain); chq->active_xfers_used &= ~__BIT(xfer->c_slot); chq->queue_active--; ata_queue_free_slot(chp, xfer->c_slot); if (xfer->c_flags & C_WAIT) cv_broadcast(&chq->c_cmd_finish); ata_channel_unlock(chp); } /* * Called in c_intr hook. Must be called before before any deactivations * are done - if there is drain pending, it calls c_kill_xfer hook which * deactivates the xfer. * Calls c_kill_xfer with channel lock free. * Returns true if caller should just exit without further processing. * Caller must not further access any part of xfer or any related controller * structures in that case, it should just return. */ bool ata_waitdrain_xfer_check(struct ata_channel *chp, struct ata_xfer *xfer) { int drive = xfer->c_drive; bool draining = false; ata_channel_lock(chp); if (chp->ch_drive[drive].drive_flags & ATA_DRIVE_WAITDRAIN) { ata_channel_unlock(chp); xfer->ops->c_kill_xfer(chp, xfer, KILL_GONE); ata_channel_lock(chp); chp->ch_drive[drive].drive_flags &= ~ATA_DRIVE_WAITDRAIN; cv_signal(&chp->ch_queue->queue_drain); draining = true; } ata_channel_unlock(chp); return draining; } /* * Check for race of normal transfer handling vs. timeout. */ bool ata_timo_xfer_check(struct ata_xfer *xfer) { struct ata_channel *chp = xfer->c_chp; struct ata_drive_datas *drvp = &chp->ch_drive[xfer->c_drive]; ata_channel_lock(chp); if (xfer->c_flags & C_WAITTIMO) { xfer->c_flags &= ~C_WAITTIMO; /* Handle race vs. ata_free_xfer() */ if (xfer->c_flags & C_FREE) { xfer->c_flags &= ~C_FREE; ata_channel_unlock(chp); device_printf(drvp->drv_softc, "xfer %"PRIxPTR" freed while invoking timeout\n", (intptr_t)xfer & PAGE_MASK); ata_free_xfer(chp, xfer); return true; } /* Race vs. callout_stop() in ata_deactivate_xfer() */ ata_channel_unlock(chp); device_printf(drvp->drv_softc, "xfer %"PRIxPTR" deactivated while invoking timeout\n", (intptr_t)xfer & PAGE_MASK); return true; } ata_channel_unlock(chp); /* No race, proceed with timeout handling */ return false; } /* * Kill off all active xfers for a ata_channel. * * Must be called with channel lock held. */ void ata_kill_active(struct ata_channel *chp, int reason, int flags) { struct ata_queue * const chq = chp->ch_queue; struct ata_xfer *xfer, *xfernext; KASSERT(mutex_owned(&chp->ch_lock)); TAILQ_FOREACH_SAFE(xfer, &chq->active_xfers, c_activechain, xfernext) { ata_channel_unlock(chp); xfer->ops->c_kill_xfer(xfer->c_chp, xfer, reason); ata_channel_lock(chp); } } /* * Kill off all pending xfers for a drive. */ void ata_kill_pending(struct ata_drive_datas *drvp) { struct ata_channel * const chp = drvp->chnl_softc; struct ata_queue * const chq = chp->ch_queue; struct ata_xfer *xfer; ata_channel_lock(chp); /* Kill all pending transfers */ while ((xfer = SIMPLEQ_FIRST(&chq->queue_xfer))) { KASSERT(xfer->c_chp == chp); if (xfer->c_drive != drvp->drive) continue; SIMPLEQ_REMOVE_HEAD(&chp->ch_queue->queue_xfer, c_xferchain); /* * Keep the lock, so that we get deadlock (and 'locking against * myself' with LOCKDEBUG), instead of silent * data corruption, if the hook tries to call back into * middle layer for inactive xfer. */ xfer->ops->c_kill_xfer(chp, xfer, KILL_GONE_INACTIVE); } /* Wait until all active transfers on the drive finish */ while (chq->queue_active > 0) { bool drv_active = false; TAILQ_FOREACH(xfer, &chq->active_xfers, c_activechain) { KASSERT(xfer->c_chp == chp); if (xfer->c_drive == drvp->drive) { drv_active = true; break; } } if (!drv_active) { /* all finished */ break; } drvp->drive_flags |= ATA_DRIVE_WAITDRAIN; cv_wait(&chq->queue_drain, &chp->ch_lock); } ata_channel_unlock(chp); } static void ata_channel_freeze_locked(struct ata_channel *chp) { chp->ch_queue->queue_freeze++; ATADEBUG_PRINT(("%s(chp=%p) -> %d\n", __func__, chp, chp->ch_queue->queue_freeze), DEBUG_FUNCS | DEBUG_XFERS); } void ata_channel_freeze(struct ata_channel *chp) { ata_channel_lock(chp); ata_channel_freeze_locked(chp); ata_channel_unlock(chp); } void ata_channel_thaw_locked(struct ata_channel *chp) { KASSERT(mutex_owned(&chp->ch_lock)); KASSERT(chp->ch_queue->queue_freeze > 0); chp->ch_queue->queue_freeze--; ATADEBUG_PRINT(("%s(chp=%p) -> %d\n", __func__, chp, chp->ch_queue->queue_freeze), DEBUG_FUNCS | DEBUG_XFERS); } /* * ata_thread_run: * * Reset and ATA channel. Channel lock must be held. arg is type-specific. */ void ata_thread_run(struct ata_channel *chp, int flags, int type, int arg) { struct atac_softc *atac = chp->ch_atac; bool threset = false; struct ata_drive_datas *drvp; ata_channel_lock_owned(chp); /* * If we can poll or wait it's OK, otherwise wake up the * kernel thread to do it for us. */ ATADEBUG_PRINT(("%s flags 0x%x ch_flags 0x%x\n", __func__, flags, chp->ch_flags), DEBUG_FUNCS | DEBUG_XFERS); if ((flags & (AT_POLL | AT_WAIT)) == 0) { switch (type) { case ATACH_TH_RESET: if (chp->ch_flags & ATACH_TH_RESET) { /* No need to schedule another reset */ return; } break; case ATACH_TH_DRIVE_RESET: { int drive = arg; KASSERT(drive <= chp->ch_ndrives); drvp = &chp->ch_drive[drive]; if (drvp->drive_flags & ATA_DRIVE_TH_RESET) { /* No need to schedule another reset */ return; } drvp->drive_flags |= ATA_DRIVE_TH_RESET; break; } case ATACH_TH_RECOVERY: { uint32_t tfd = (uint32_t)arg; KASSERT((chp->ch_flags & ATACH_RECOVERING) == 0); chp->recovery_tfd = tfd; break; } default: panic("%s: unknown type: %x", __func__, type); /* NOTREACHED */ } if (!(chp->ch_flags & type)) { /* * Block execution of other commands while * reset is scheduled to a thread. */ ata_channel_freeze_locked(chp); chp->ch_flags |= type; } cv_signal(&chp->ch_thr_idle); return; } /* Block execution of other commands during reset */ ata_channel_freeze_locked(chp); /* * If reset has been scheduled to a thread, then clear * the flag now so that the thread won't try to execute it if * we happen to sleep, and thaw one more time after the reset. */ if (chp->ch_flags & type) { chp->ch_flags &= ~type; threset = true; } switch (type) { case ATACH_TH_RESET: (*atac->atac_bustype_ata->ata_reset_channel)(chp, flags); KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL); for (int drive = 0; drive < chp->ch_ndrives; drive++) chp->ch_drive[drive].state = 0; break; case ATACH_TH_DRIVE_RESET: { int drive = arg; KASSERT(drive <= chp->ch_ndrives); drvp = &chp->ch_drive[drive]; (*atac->atac_bustype_ata->ata_reset_drive)(drvp, flags, NULL); drvp->state = 0; break; } case ATACH_TH_RECOVERY: { uint32_t tfd = (uint32_t)arg; KASSERT((chp->ch_flags & ATACH_RECOVERING) == 0); KASSERT(atac->atac_bustype_ata->ata_recovery != NULL); SET(chp->ch_flags, ATACH_RECOVERING); (*atac->atac_bustype_ata->ata_recovery)(chp, flags, tfd); CLR(chp->ch_flags, ATACH_RECOVERING); break; } default: panic("%s: unknown type: %x", __func__, type); /* NOTREACHED */ } /* * Thaw one extra time to clear the freeze done when the reset has * been scheduled to the thread. */ if (threset) ata_channel_thaw_locked(chp); /* Allow commands to run again */ ata_channel_thaw_locked(chp); /* Signal the thread in case there is an xfer to run */ cv_signal(&chp->ch_thr_idle); } int ata_addref(struct ata_channel *chp) { struct atac_softc *atac = chp->ch_atac; struct scsipi_adapter *adapt = &atac->atac_atapi_adapter._generic; int s, error = 0; s = splbio(); if (adapt->adapt_refcnt++ == 0 && adapt->adapt_enable != NULL) { error = (*adapt->adapt_enable)(atac->atac_dev, 1); if (error) adapt->adapt_refcnt--; } splx(s); return (error); } void ata_delref(struct ata_channel *chp) { struct atac_softc *atac = chp->ch_atac; struct scsipi_adapter *adapt = &atac->atac_atapi_adapter._generic; int s; s = splbio(); if (adapt->adapt_refcnt-- == 1 && adapt->adapt_enable != NULL) (void) (*adapt->adapt_enable)(atac->atac_dev, 0); splx(s); } void ata_print_modes(struct ata_channel *chp) { struct atac_softc *atac = chp->ch_atac; int drive; struct ata_drive_datas *drvp; KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL); for (drive = 0; drive < chp->ch_ndrives; drive++) { drvp = &chp->ch_drive[drive]; if (drvp->drive_type == ATA_DRIVET_NONE || drvp->drv_softc == NULL) continue; aprint_verbose("%s(%s:%d:%d): using PIO mode %d", device_xname(drvp->drv_softc), device_xname(atac->atac_dev), chp->ch_channel, drvp->drive, drvp->PIO_mode); #if NATA_DMA if (drvp->drive_flags & ATA_DRIVE_DMA) aprint_verbose(", DMA mode %d", drvp->DMA_mode); #if NATA_UDMA if (drvp->drive_flags & ATA_DRIVE_UDMA) { aprint_verbose(", Ultra-DMA mode %d", drvp->UDMA_mode); if (drvp->UDMA_mode == 2) aprint_verbose(" (Ultra/33)"); else if (drvp->UDMA_mode == 4) aprint_verbose(" (Ultra/66)"); else if (drvp->UDMA_mode == 5) aprint_verbose(" (Ultra/100)"); else if (drvp->UDMA_mode == 6) aprint_verbose(" (Ultra/133)"); } #endif /* NATA_UDMA */ #endif /* NATA_DMA */ #if NATA_DMA || NATA_PIOBM if (0 #if NATA_DMA || (drvp->drive_flags & (ATA_DRIVE_DMA | ATA_DRIVE_UDMA)) #endif #if NATA_PIOBM /* PIOBM capable controllers use DMA for PIO commands */ || (atac->atac_cap & ATAC_CAP_PIOBM) #endif ) aprint_verbose(" (using DMA)"); if (drvp->drive_flags & ATA_DRIVE_NCQ) { aprint_verbose(", NCQ (%d tags)%s", ATA_REAL_OPENINGS(chp->ch_queue->queue_openings), (drvp->drive_flags & ATA_DRIVE_NCQ_PRIO) ? " w/PRIO" : ""); } else if (drvp->drive_flags & ATA_DRIVE_WFUA) aprint_verbose(", WRITE DMA FUA EXT"); #endif /* NATA_DMA || NATA_PIOBM */ aprint_verbose("\n"); } } #if defined(ATA_DOWNGRADE_MODE) && NATA_DMA /* * downgrade the transfer mode of a drive after an error. return 1 if * downgrade was possible, 0 otherwise. * * MUST BE CALLED AT splbio()! */ static int ata_downgrade_mode(struct ata_drive_datas *drvp, int flags) { struct ata_channel *chp = drvp->chnl_softc; struct atac_softc *atac = chp->ch_atac; device_t drv_dev = drvp->drv_softc; int cf_flags = device_cfdata(drv_dev)->cf_flags; ata_channel_lock_owned(drvp->chnl_softc); /* if drive or controller don't know its mode, we can't do much */ if ((drvp->drive_flags & ATA_DRIVE_MODE) == 0 || (atac->atac_set_modes == NULL)) return 0; /* current drive mode was set by a config flag, let it this way */ if ((cf_flags & ATA_CONFIG_PIO_SET) || (cf_flags & ATA_CONFIG_DMA_SET) || (cf_flags & ATA_CONFIG_UDMA_SET)) return 0; #if NATA_UDMA /* * If we were using Ultra-DMA mode, downgrade to the next lower mode. */ if ((drvp->drive_flags & ATA_DRIVE_UDMA) && drvp->UDMA_mode >= 2) { drvp->UDMA_mode--; aprint_error_dev(drv_dev, "transfer error, downgrading to Ultra-DMA mode %d\n", drvp->UDMA_mode); } #endif /* * If we were using ultra-DMA, don't downgrade to multiword DMA. */ else if (drvp->drive_flags & (ATA_DRIVE_DMA | ATA_DRIVE_UDMA)) { drvp->drive_flags &= ~(ATA_DRIVE_DMA | ATA_DRIVE_UDMA); drvp->PIO_mode = drvp->PIO_cap; aprint_error_dev(drv_dev, "transfer error, downgrading to PIO mode %d\n", drvp->PIO_mode); } else /* already using PIO, can't downgrade */ return 0; (*atac->atac_set_modes)(chp); ata_print_modes(chp); /* reset the channel, which will schedule all drives for setup */ ata_thread_run(chp, flags, ATACH_TH_RESET, ATACH_NODRIVE); return 1; } #endif /* ATA_DOWNGRADE_MODE && NATA_DMA */ /* * Probe drive's capabilities, for use by the controller later * Assumes drvp points to an existing drive. */ void ata_probe_caps(struct ata_drive_datas *drvp) { struct ataparams params, params2; struct ata_channel *chp = drvp->chnl_softc; struct atac_softc *atac = chp->ch_atac; device_t drv_dev = drvp->drv_softc; int i, printed = 0; const char *sep = ""; int cf_flags; if (ata_get_params(drvp, AT_WAIT, ¶ms) != CMD_OK) { /* IDENTIFY failed. Can't tell more about the device */ return; } if ((atac->atac_cap & (ATAC_CAP_DATA16 | ATAC_CAP_DATA32)) == (ATAC_CAP_DATA16 | ATAC_CAP_DATA32)) { /* * Controller claims 16 and 32 bit transfers. * Re-do an IDENTIFY with 32-bit transfers, * and compare results. */ ata_channel_lock(chp); drvp->drive_flags |= ATA_DRIVE_CAP32; ata_channel_unlock(chp); ata_get_params(drvp, AT_WAIT, ¶ms2); if (memcmp(¶ms, ¶ms2, sizeof(struct ataparams)) != 0) { /* Not good. fall back to 16bits */ ata_channel_lock(chp); drvp->drive_flags &= ~ATA_DRIVE_CAP32; ata_channel_unlock(chp); } else { aprint_verbose_dev(drv_dev, "32-bit data port\n"); } } #if 0 /* Some ultra-DMA drives claims to only support ATA-3. sigh */ if (params.atap_ata_major > 0x01 && params.atap_ata_major != 0xffff) { for (i = 14; i > 0; i--) { if (params.atap_ata_major & (1 << i)) { aprint_verbose_dev(drv_dev, "ATA version %d\n", i); drvp->ata_vers = i; break; } } } #endif /* An ATAPI device is at last PIO mode 3 */ if (drvp->drive_type == ATA_DRIVET_ATAPI) drvp->PIO_mode = 3; /* * It's not in the specs, but it seems that some drive * returns 0xffff in atap_extensions when this field is invalid */ if (params.atap_extensions != 0xffff && (params.atap_extensions & WDC_EXT_MODES)) { /* * XXX some drives report something wrong here (they claim to * support PIO mode 8 !). As mode is coded on 3 bits in * SET FEATURE, limit it to 7 (so limit i to 4). * If higher mode than 7 is found, abort. */ for (i = 7; i >= 0; i--) { if ((params.atap_piomode_supp & (1 << i)) == 0) continue; if (i > 4) return; /* * See if mode is accepted. * If the controller can't set its PIO mode, * assume the defaults are good, so don't try * to set it */ if (atac->atac_set_modes) /* * It's OK to poll here, it's fast enough * to not bother waiting for interrupt */ if (ata_set_mode(drvp, 0x08 | (i + 3), AT_WAIT) != CMD_OK) continue; if (!printed) { aprint_verbose_dev(drv_dev, "drive supports PIO mode %d", i + 3); sep = ","; printed = 1; } /* * If controller's driver can't set its PIO mode, * get the higher one for the drive. */ if (atac->atac_set_modes == NULL || atac->atac_pio_cap >= i + 3) { drvp->PIO_mode = i + 3; drvp->PIO_cap = i + 3; break; } } if (!printed) { /* * We didn't find a valid PIO mode. * Assume the values returned for DMA are buggy too */ return; } ata_channel_lock(chp); drvp->drive_flags |= ATA_DRIVE_MODE; ata_channel_unlock(chp); printed = 0; for (i = 7; i >= 0; i--) { if ((params.atap_dmamode_supp & (1 << i)) == 0) continue; #if NATA_DMA if ((atac->atac_cap & ATAC_CAP_DMA) && atac->atac_set_modes != NULL) if (ata_set_mode(drvp, 0x20 | i, AT_WAIT) != CMD_OK) continue; #endif if (!printed) { aprint_verbose("%s DMA mode %d", sep, i); sep = ","; printed = 1; } #if NATA_DMA if (atac->atac_cap & ATAC_CAP_DMA) { if (atac->atac_set_modes != NULL && atac->atac_dma_cap < i) continue; drvp->DMA_mode = i; drvp->DMA_cap = i; ata_channel_lock(chp); drvp->drive_flags |= ATA_DRIVE_DMA; ata_channel_unlock(chp); } #endif break; } if (params.atap_extensions & WDC_EXT_UDMA_MODES) { printed = 0; for (i = 7; i >= 0; i--) { if ((params.atap_udmamode_supp & (1 << i)) == 0) continue; #if NATA_UDMA if (atac->atac_set_modes != NULL && (atac->atac_cap & ATAC_CAP_UDMA)) if (ata_set_mode(drvp, 0x40 | i, AT_WAIT) != CMD_OK) continue; #endif if (!printed) { aprint_verbose("%s Ultra-DMA mode %d", sep, i); if (i == 2) aprint_verbose(" (Ultra/33)"); else if (i == 4) aprint_verbose(" (Ultra/66)"); else if (i == 5) aprint_verbose(" (Ultra/100)"); else if (i == 6) aprint_verbose(" (Ultra/133)"); sep = ","; printed = 1; } #if NATA_UDMA if (atac->atac_cap & ATAC_CAP_UDMA) { if (atac->atac_set_modes != NULL && atac->atac_udma_cap < i) continue; drvp->UDMA_mode = i; drvp->UDMA_cap = i; ata_channel_lock(chp); drvp->drive_flags |= ATA_DRIVE_UDMA; ata_channel_unlock(chp); } #endif break; } } } ata_channel_lock(chp); drvp->drive_flags &= ~ATA_DRIVE_NOSTREAM; if (drvp->drive_type == ATA_DRIVET_ATAPI) { if (atac->atac_cap & ATAC_CAP_ATAPI_NOSTREAM) drvp->drive_flags |= ATA_DRIVE_NOSTREAM; } else { if (atac->atac_cap & ATAC_CAP_ATA_NOSTREAM) drvp->drive_flags |= ATA_DRIVE_NOSTREAM; } ata_channel_unlock(chp); /* Try to guess ATA version here, if it didn't get reported */ if (drvp->ata_vers == 0) { #if NATA_UDMA if (drvp->drive_flags & ATA_DRIVE_UDMA) drvp->ata_vers = 4; /* should be at last ATA-4 */ else #endif if (drvp->PIO_cap > 2) drvp->ata_vers = 2; /* should be at last ATA-2 */ } cf_flags = device_cfdata(drv_dev)->cf_flags; if (cf_flags & ATA_CONFIG_PIO_SET) { ata_channel_lock(chp); drvp->PIO_mode = (cf_flags & ATA_CONFIG_PIO_MODES) >> ATA_CONFIG_PIO_OFF; drvp->drive_flags |= ATA_DRIVE_MODE; ata_channel_unlock(chp); } #if NATA_DMA if ((atac->atac_cap & ATAC_CAP_DMA) == 0) { /* don't care about DMA modes */ goto out; } if (cf_flags & ATA_CONFIG_DMA_SET) { ata_channel_lock(chp); if ((cf_flags & ATA_CONFIG_DMA_MODES) == ATA_CONFIG_DMA_DISABLE) { drvp->drive_flags &= ~ATA_DRIVE_DMA; } else { drvp->DMA_mode = (cf_flags & ATA_CONFIG_DMA_MODES) >> ATA_CONFIG_DMA_OFF; drvp->drive_flags |= ATA_DRIVE_DMA | ATA_DRIVE_MODE; } ata_channel_unlock(chp); } /* * Probe WRITE DMA FUA EXT. Support is mandatory for devices * supporting LBA48, but nevertheless confirm with the feature flag. */ if (drvp->drive_flags & ATA_DRIVE_DMA) { if ((params.atap_cmd2_en & ATA_CMD2_LBA48) != 0 && (params.atap_cmd_def & ATA_CMDE_WFE)) { drvp->drive_flags |= ATA_DRIVE_WFUA; aprint_verbose("%s WRITE DMA FUA", sep); sep = ","; } } /* Probe NCQ support - READ/WRITE FPDMA QUEUED command support */ ata_channel_lock(chp); drvp->drv_openings = 1; if (params.atap_sata_caps & SATA_NATIVE_CMDQ) { if (atac->atac_cap & ATAC_CAP_NCQ) drvp->drive_flags |= ATA_DRIVE_NCQ; drvp->drv_openings = (params.atap_queuedepth & WDC_QUEUE_DEPTH_MASK) + 1; aprint_verbose("%s NCQ (%d tags)", sep, drvp->drv_openings); sep = ","; if (params.atap_sata_caps & SATA_NCQ_PRIO) { drvp->drive_flags |= ATA_DRIVE_NCQ_PRIO; aprint_verbose(" w/PRIO"); } } ata_channel_unlock(chp); #if NATA_UDMA if ((atac->atac_cap & ATAC_CAP_UDMA) == 0) { /* don't care about UDMA modes */ goto out; } if (cf_flags & ATA_CONFIG_UDMA_SET) { ata_channel_lock(chp); if ((cf_flags & ATA_CONFIG_UDMA_MODES) == ATA_CONFIG_UDMA_DISABLE) { drvp->drive_flags &= ~ATA_DRIVE_UDMA; } else { drvp->UDMA_mode = (cf_flags & ATA_CONFIG_UDMA_MODES) >> ATA_CONFIG_UDMA_OFF; drvp->drive_flags |= ATA_DRIVE_UDMA | ATA_DRIVE_MODE; } ata_channel_unlock(chp); } #endif /* NATA_UDMA */ out: #endif /* NATA_DMA */ if (*sep != '\0') aprint_verbose("\n"); } /* management of the /dev/atabus* devices */ int atabusopen(dev_t dev, int flag, int fmt, struct lwp *l) { struct atabus_softc *sc; int error; sc = device_lookup_private(&atabus_cd, minor(dev)); if (sc == NULL) return (ENXIO); if (sc->sc_flags & ATABUSCF_OPEN) return (EBUSY); if ((error = ata_addref(sc->sc_chan)) != 0) return (error); sc->sc_flags |= ATABUSCF_OPEN; return (0); } int atabusclose(dev_t dev, int flag, int fmt, struct lwp *l) { struct atabus_softc *sc = device_lookup_private(&atabus_cd, minor(dev)); ata_delref(sc->sc_chan); sc->sc_flags &= ~ATABUSCF_OPEN; return (0); } int atabusioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l) { struct atabus_softc *sc = device_lookup_private(&atabus_cd, minor(dev)); struct ata_channel *chp = sc->sc_chan; int min_drive, max_drive, drive; int error; /* * Enforce write permission for ioctls that change the * state of the bus. Host adapter specific ioctls must * be checked by the adapter driver. */ switch (cmd) { case ATABUSIOSCAN: case ATABUSIODETACH: case ATABUSIORESET: if ((flag & FWRITE) == 0) return (EBADF); } switch (cmd) { case ATABUSIORESET: ata_channel_lock(chp); ata_thread_run(sc->sc_chan, AT_WAIT | AT_POLL, ATACH_TH_RESET, ATACH_NODRIVE); ata_channel_unlock(chp); return 0; case ATABUSIOSCAN: { #if 0 struct atabusioscan_args *a= (struct atabusioscan_args *)addr; #endif if ((chp->ch_drive[0].drive_type == ATA_DRIVET_OLD) || (chp->ch_drive[1].drive_type == ATA_DRIVET_OLD)) return (EOPNOTSUPP); return (EOPNOTSUPP); } case ATABUSIODETACH: { struct atabusiodetach_args *a= (struct atabusiodetach_args *)addr; if ((chp->ch_drive[0].drive_type == ATA_DRIVET_OLD) || (chp->ch_drive[1].drive_type == ATA_DRIVET_OLD)) return (EOPNOTSUPP); switch (a->at_dev) { case -1: min_drive = 0; max_drive = 1; break; case 0: case 1: min_drive = max_drive = a->at_dev; break; default: return (EINVAL); } for (drive = min_drive; drive <= max_drive; drive++) { if (chp->ch_drive[drive].drv_softc != NULL) { error = config_detach( chp->ch_drive[drive].drv_softc, 0); if (error) return (error); KASSERT(chp->ch_drive[drive].drv_softc == NULL); } } return 0; } default: return ENOTTY; } } static bool atabus_suspend(device_t dv, const pmf_qual_t *qual) { struct atabus_softc *sc = device_private(dv); struct ata_channel *chp = sc->sc_chan; ata_channel_idle(chp); return true; } static bool atabus_resume(device_t dv, const pmf_qual_t *qual) { struct atabus_softc *sc = device_private(dv); struct ata_channel *chp = sc->sc_chan; /* * XXX joerg: with wdc, the first channel unfreezes the controller. * Move this the reset and queue idling into wdc. */ ata_channel_lock(chp); if (chp->ch_queue->queue_freeze == 0) { ata_channel_unlock(chp); goto out; } /* unfreeze the queue and reset drives */ ata_channel_thaw_locked(chp); /* reset channel only if there are drives attached */ if (chp->ch_ndrives > 0) ata_thread_run(chp, AT_WAIT, ATACH_TH_RESET, ATACH_NODRIVE); ata_channel_unlock(chp); out: return true; } static int atabus_rescan(device_t self, const char *ifattr, const int *locators) { struct atabus_softc *sc = device_private(self); struct ata_channel *chp = sc->sc_chan; struct atabus_initq *initq; int i; /* * we can rescan a port multiplier atabus, even if some devices are * still attached */ if (chp->ch_satapmp_nports == 0) { if (chp->atapibus != NULL) { return EBUSY; } KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL); for (i = 0; i < chp->ch_ndrives; i++) { if (chp->ch_drive[i].drv_softc != NULL) { return EBUSY; } } } initq = kmem_zalloc(sizeof(*initq), KM_SLEEP); initq->atabus_sc = sc; mutex_enter(&atabus_qlock); TAILQ_INSERT_TAIL(&atabus_initq_head, initq, atabus_initq); mutex_exit(&atabus_qlock); config_pending_incr(sc->sc_dev); ata_channel_lock(chp); chp->ch_flags |= ATACH_TH_RESCAN; cv_signal(&chp->ch_thr_idle); ata_channel_unlock(chp); return 0; } void ata_delay(struct ata_channel *chp, int ms, const char *msg, int flags) { KASSERT(mutex_owned(&chp->ch_lock)); if ((flags & (AT_WAIT | AT_POLL)) == AT_POLL) { /* * can't use kpause(), we may be in interrupt context * or taking a crash dump */ delay(ms * 1000); } else { int pause = mstohz(ms); kpause(msg, false, pause > 0 ? pause : 1, &chp->ch_lock); } } void atacmd_toncq(struct ata_xfer *xfer, uint8_t *cmd, uint16_t *count, uint16_t *features, uint8_t *device) { if ((xfer->c_flags & C_NCQ) == 0) { /* FUA handling for non-NCQ drives */ if (xfer->c_bio.flags & ATA_FUA && *cmd == WDCC_WRITEDMA_EXT) *cmd = WDCC_WRITEDMA_FUA_EXT; return; } *cmd = (xfer->c_bio.flags & ATA_READ) ? WDCC_READ_FPDMA_QUEUED : WDCC_WRITE_FPDMA_QUEUED; /* for FPDMA the block count is in features */ *features = *count; /* NCQ tag */ *count = (xfer->c_slot << 3); if (xfer->c_bio.flags & ATA_PRIO_HIGH) *count |= WDSC_PRIO_HIGH; /* other device flags */ if (xfer->c_bio.flags & ATA_FUA) *device |= WDSD_FUA; } void ata_wait_cmd(struct ata_channel *chp, struct ata_xfer *xfer) { struct ata_queue *chq = chp->ch_queue; struct ata_command *ata_c = &xfer->c_ata_c; ata_channel_lock(chp); while ((ata_c->flags & AT_DONE) == 0) cv_wait(&chq->c_cmd_finish, &chp->ch_lock); ata_channel_unlock(chp); KASSERT((ata_c->flags & AT_DONE) != 0); } |
| 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | /* $NetBSD: chacha_sse2_impl.c,v 1.1 2020/07/25 22:49:20 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(1, "$NetBSD: chacha_sse2_impl.c,v 1.1 2020/07/25 22:49:20 riastradh Exp $"); #include "chacha_sse2.h" #ifdef _KERNEL #include <x86/cpu.h> #include <x86/fpu.h> #else #include <sys/sysctl.h> #include <cpuid.h> #include <stddef.h> #define fpu_kern_enter() ((void)0) #define fpu_kern_leave() ((void)0) #endif static void chacha_core_sse2_impl(uint8_t out[restrict static 64], const uint8_t in[static 16], const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr) { fpu_kern_enter(); chacha_core_sse2(out, in, k, c, nr); fpu_kern_leave(); } static void hchacha_sse2_impl(uint8_t out[restrict static 32], const uint8_t in[static 16], const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr) { fpu_kern_enter(); hchacha_sse2(out, in, k, c, nr); fpu_kern_leave(); } static void chacha_stream_sse2_impl(uint8_t *restrict s, size_t nbytes, uint32_t blkno, const uint8_t nonce[static 12], const uint8_t key[static 32], unsigned nr) { fpu_kern_enter(); chacha_stream_sse2(s, nbytes, blkno, nonce, key, nr); fpu_kern_leave(); } static void chacha_stream_xor_sse2_impl(uint8_t *c, const uint8_t *p, size_t nbytes, uint32_t blkno, const uint8_t nonce[static 12], const uint8_t key[static 32], unsigned nr) { fpu_kern_enter(); chacha_stream_xor_sse2(c, p, nbytes, blkno, nonce, key, nr); fpu_kern_leave(); } static void xchacha_stream_sse2_impl(uint8_t *restrict s, size_t nbytes, uint32_t blkno, const uint8_t nonce[static 24], const uint8_t key[static 32], unsigned nr) { fpu_kern_enter(); xchacha_stream_sse2(s, nbytes, blkno, nonce, key, nr); fpu_kern_leave(); } static void xchacha_stream_xor_sse2_impl(uint8_t *c, const uint8_t *p, size_t nbytes, uint32_t blkno, const uint8_t nonce[static 24], const uint8_t key[static 32], unsigned nr) { fpu_kern_enter(); xchacha_stream_xor_sse2(c, p, nbytes, blkno, nonce, key, nr); fpu_kern_leave(); } static int chacha_probe_sse2(void) { /* Verify that the CPU supports SSE and SSE2. */ #ifdef _KERNEL if (!i386_has_sse) return -1; if (!i386_has_sse2) return -1; #else unsigned eax, ebx, ecx, edx; if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) return -1; if ((edx & bit_SSE) == 0) return -1; if ((edx & bit_SSE2) == 0) return -1; #endif return 0; } const struct chacha_impl chacha_sse2_impl = { .ci_name = "x86 SSE2 ChaCha", .ci_probe = chacha_probe_sse2, .ci_chacha_core = chacha_core_sse2_impl, .ci_hchacha = hchacha_sse2_impl, .ci_chacha_stream = chacha_stream_sse2_impl, .ci_chacha_stream_xor = chacha_stream_xor_sse2_impl, .ci_xchacha_stream = xchacha_stream_sse2_impl, .ci_xchacha_stream_xor = xchacha_stream_xor_sse2_impl, }; |
| 485 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | /* $NetBSD: secmodel.c,v 1.2 2014/11/04 16:01:58 maxv Exp $ */ /*- * Copyright (c) 2011 Elad Efrat <elad@NetBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/types.h> #include <sys/param.h> #include <sys/errno.h> #include <sys/atomic.h> #include <sys/kauth.h> #include <sys/kmem.h> #include <sys/queue.h> #include <sys/rwlock.h> #include <secmodel/secmodel.h> #include <prop/proplib.h> /* List of secmodels, parameters, and lock. */ static LIST_HEAD(, secmodel_descr) secmodels = LIST_HEAD_INITIALIZER(secmodels); static unsigned int secmodel_copy_cred_on_fork = false; static krwlock_t secmodels_lock; static int nsecmodels = 0; /* number of registered secmodels */ static int secmodel_plug(secmodel_t); static int secmodel_unplug(secmodel_t); int secmodel_nsecmodels(void) { return nsecmodels; } void secmodel_init(void) { rw_init(&secmodels_lock); secmodel_copy_cred_on_fork = false; } /* * Register a new secmodel. */ int secmodel_register(secmodel_t *secmodel, const char *id, const char *name, prop_dictionary_t behavior, secmodel_eval_t eval, secmodel_setinfo_t setinfo) { int err; secmodel_t sm; sm = kmem_alloc(sizeof(*sm), KM_SLEEP); sm->sm_id = id; sm->sm_name = name; sm->sm_behavior = behavior; sm->sm_eval = eval; sm->sm_setinfo = setinfo; err = secmodel_plug(sm); if (err == 0) { atomic_inc_uint(&nsecmodels); } else { kmem_free(sm, sizeof(*sm)); sm = NULL; } *secmodel = sm; return err; } /* * Deregister a secmodel. */ int secmodel_deregister(secmodel_t sm) { int error; error = secmodel_unplug(sm); if (error == 0) { atomic_dec_uint(&nsecmodels); kmem_free(sm, sizeof(*sm)); } return error; } /* * Lookup a secmodel by its id. * * Requires "secmodels_lock" handling by the caller. */ static secmodel_t secmodel_lookup(const char *id) { secmodel_t tsm; KASSERT(rw_lock_held(&secmodels_lock)); LIST_FOREACH(tsm, &secmodels, sm_list) { if (strcasecmp(tsm->sm_id, id) == 0) { return tsm; } } return NULL; } /* * Adjust system-global secmodel behavior following the addition * or removal of a secmodel. * * Requires "secmodels_lock" to be held by the caller. */ static void secmodel_adjust_behavior(secmodel_t sm, bool added) { bool r, b; KASSERT(rw_write_held(&secmodels_lock)); #define ADJUST_COUNTER(which, added) \ do { \ if (added) { \ (which)++; \ } else { \ if ((which) > 0) \ (which)--; \ } \ } while (/*CONSTCOND*/0) /* Copy credentials on fork? */ r = prop_dictionary_get_bool(sm->sm_behavior, "copy-cred-on-fork", &b); if (r) { ADJUST_COUNTER(secmodel_copy_cred_on_fork, added); } #undef ADJUST_COUNTER } static int secmodel_plug(secmodel_t sm) { secmodel_t tsm; int error = 0; if (sm == NULL) return EFAULT; /* Check if the secmodel is already present. */ rw_enter(&secmodels_lock, RW_WRITER); tsm = secmodel_lookup(sm->sm_id); if (tsm != NULL) { error = EEXIST; goto out; } /* Add the secmodel. */ LIST_INSERT_HEAD(&secmodels, sm, sm_list); /* Adjust behavior. */ secmodel_adjust_behavior(sm, true); out: /* Unlock the secmodels list. */ rw_exit(&secmodels_lock); return error; } static int secmodel_unplug(secmodel_t sm) { secmodel_t tsm; int error = 0; if (sm == NULL) return EFAULT; /* Make sure the secmodel is present. */ rw_enter(&secmodels_lock, RW_WRITER); tsm = secmodel_lookup(sm->sm_id); if (tsm == NULL) { error = ENOENT; goto out; } /* Remove the secmodel. */ LIST_REMOVE(tsm, sm_list); /* Adjust behavior. */ secmodel_adjust_behavior(tsm, false); out: /* Unlock the secmodels list. */ rw_exit(&secmodels_lock); return error; } /* XXX TODO */ int secmodel_setinfo(const char *id, void *v, int *err) { return EOPNOTSUPP; } int secmodel_eval(const char *id, const char *what, void *arg, void *ret) { secmodel_t sm; int error = 0; rw_enter(&secmodels_lock, RW_READER); sm = secmodel_lookup(id); if (sm == NULL) { error = EINVAL; goto out; } if (sm->sm_eval == NULL) { error = ENOENT; goto out; } if (ret == NULL) { error = EFAULT; goto out; } error = sm->sm_eval(what, arg, ret); /* pass error from a secmodel(9) callback as a negative value */ error = -error; out: rw_exit(&secmodels_lock); return error; } |
| 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | /* $NetBSD: wsfontdev.c,v 1.20 2022/05/12 23:17:42 uwe Exp $ */ /* * Copyright (c) 2001 * Matthias Drochner. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: wsfontdev.c,v 1.20 2022/05/12 23:17:42 uwe Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/conf.h> #include <sys/fcntl.h> #include <sys/ioctl.h> #include <sys/malloc.h> #include <sys/event.h> #include <dev/wsfont/wsfont.h> #include <dev/wscons/wsconsio.h> /* XXX */ #include "ioconf.h" #ifdef WSFONT_DEBUG #define DPRINTF printf #else #define DPRINTF while (0) printf #endif static int wsfont_isopen; void wsfontattach(int n) { wsfont_init(); } static int wsfontopen(dev_t dev, int flag, int mode, struct lwp *l) { if (wsfont_isopen) return (EBUSY); wsfont_isopen = 1; return (0); } static int wsfontclose(dev_t dev, int flag, int mode, struct lwp *l) { wsfont_isopen = 0; return (0); } static void fontmatchfunc(struct wsdisplay_font *f, void *cookie, int fontcookie) { struct wsdisplayio_fontinfo *fi = cookie; struct wsdisplayio_fontdesc fd; int offset; DPRINTF("%s %dx%d\n", f->name, f->fontwidth, f->fontheight); if (fi->fi_fonts != NULL && fi->fi_buffersize > 0) { memset(&fd, 0, sizeof(fd)); strncpy(fd.fd_name, f->name, sizeof(fd.fd_name) - 1); fd.fd_width = f->fontwidth; fd.fd_height = f->fontheight; offset = sizeof(struct wsdisplayio_fontdesc) * (fi->fi_numentries + 1); if (offset > fi->fi_buffersize) { fi->fi_fonts = NULL; } else copyout(&fd, &fi->fi_fonts[fi->fi_numentries], sizeof(struct wsdisplayio_fontdesc)); } fi->fi_numentries++; } static int wsdisplayio_listfonts(struct wsdisplayio_fontinfo *f) { void *addr = f->fi_fonts; DPRINTF("%s: %d %d\n", __func__, f->fi_buffersize, f->fi_numentries); f->fi_numentries = 0; wsfont_walk(fontmatchfunc, f); /* check if we ran out of buffer space */ if (f->fi_fonts == NULL && addr != NULL) return ENOMEM; return 0; } static int wsfontioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { char nbuf[64]; void *buf; int res; switch (cmd) { case WSDISPLAYIO_LDFONT: #define d ((struct wsdisplay_font *)data) if ((flag & FWRITE) == 0) return EPERM; if (d->name) { res = copyinstr(d->name, nbuf, sizeof(nbuf), 0); if (res) return (res); d->name = nbuf; } else d->name = "loaded"; /* ??? */ buf = malloc(d->fontheight * d->stride * d->numchars, M_DEVBUF, M_WAITOK); res = copyin(d->data, buf, d->fontheight * d->stride * d->numchars); if (res) { free(buf, M_DEVBUF); return (res); } d->data = buf; res = wsfont_add(d, 1); free(buf, M_DEVBUF); #undef d return (res); case WSDISPLAYIO_LISTFONTS: return wsdisplayio_listfonts(data); default: return (EINVAL); } } const struct cdevsw wsfont_cdevsw = { .d_open = wsfontopen, .d_close = wsfontclose, .d_read = noread, .d_write = nowrite, .d_ioctl = wsfontioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER }; |
| 119 36 88 422 420 420 318 320 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 | /* $NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software developed for The NetBSD Foundation * by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $"); #ifdef _KERNEL_OPT #include "opt_modular.h" #include "opt_syscall_debug.h" #include "opt_ktrace.h" #include "opt_ptrace.h" #include "opt_dtrace.h" #endif /* XXX To get syscall prototypes. */ #define SYSVSHM #define SYSVSEM #define SYSVMSG #include <sys/param.h> #include <sys/module.h> #include <sys/sched.h> #include <sys/syscall.h> #include <sys/syscallargs.h> #include <sys/syscallvar.h> #include <sys/systm.h> #include <sys/xcall.h> #include <sys/ktrace.h> #include <sys/ptrace.h> int sys_nomodule(struct lwp *l, const void *v, register_t *retval) { #ifdef MODULAR const struct sysent *sy; const struct emul *em; const struct sc_autoload *auto_list; u_int code; /* * Restart the syscall if we interrupted a module unload that * failed. Acquiring kernconfig_lock delays us until any unload * has been completed or rolled back. */ kernconfig_lock(); sy = l->l_sysent; if (sy->sy_call != sys_nomodule) { kernconfig_unlock(); return ERESTART; } /* * Try to autoload a module to satisfy the request. If it * works, retry the request. */ em = l->l_proc->p_emul; code = sy - em->e_sysent; if ((auto_list = em->e_sc_autoload) != NULL) for (; auto_list->al_code > 0; auto_list++) { if (auto_list->al_code != code) { continue; } if (module_autoload(auto_list->al_module, MODULE_CLASS_ANY) != 0 || sy->sy_call == sys_nomodule) { break; } kernconfig_unlock(); return ERESTART; } kernconfig_unlock(); #endif /* MODULAR */ return sys_nosys(l, v, retval); } int syscall_establish(const struct emul *em, const struct syscall_package *sp) { struct sysent *sy; int i; KASSERT(kernconfig_is_held()); if (em == NULL) { em = &emul_netbsd; } sy = em->e_sysent; /* * Ensure that all preconditions are valid, since this is * an all or nothing deal. Once a system call is entered, * it can become busy and we could be unable to remove it * on error. */ for (i = 0; sp[i].sp_call != NULL; i++) { if (sp[i].sp_code >= SYS_NSYSENT) return EINVAL; if (sy[sp[i].sp_code].sy_call != sys_nomodule && sy[sp[i].sp_code].sy_call != sys_nosys) { #ifdef DIAGNOSTIC printf("syscall %d is busy\n", sp[i].sp_code); #endif return EBUSY; } } /* Everything looks good, patch them in. */ for (i = 0; sp[i].sp_call != NULL; i++) { sy[sp[i].sp_code].sy_call = sp[i].sp_call; } return 0; } int syscall_disestablish(const struct emul *em, const struct syscall_package *sp) { struct sysent *sy; const uint32_t *sb; lwp_t *l; int i; KASSERT(kernconfig_is_held()); if (em == NULL) { em = &emul_netbsd; } sy = em->e_sysent; sb = em->e_nomodbits; /* * First, patch the system calls to sys_nomodule or sys_nosys * to gate further activity. */ for (i = 0; sp[i].sp_call != NULL; i++) { KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call); sy[sp[i].sp_code].sy_call = sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ? sys_nomodule : sys_nosys; } /* * Run a cross call to cycle through all CPUs. This does two * things: lock activity provides a barrier and makes our update * of sy_call visible to all CPUs, and upon return we can be sure * that we see pertinent values of l_sysent posted by remote CPUs. */ xc_barrier(0); /* * Now it's safe to check l_sysent. Run through all LWPs and see * if anyone is still using the system call. */ for (i = 0; sp[i].sp_call != NULL; i++) { mutex_enter(&proc_lock); LIST_FOREACH(l, &alllwp, l_list) { if (l->l_sysent == &sy[sp[i].sp_code]) { break; } } mutex_exit(&proc_lock); if (l == NULL) { continue; } /* * We lose: one or more calls are still in use. Put back * the old entrypoints and act like nothing happened. * When we drop kernconfig_lock, any system calls held in * sys_nomodule() will be restarted. */ for (i = 0; sp[i].sp_call != NULL; i++) { sy[sp[i].sp_code].sy_call = sp[i].sp_call; } return EBUSY; } return 0; } /* * Return true if system call tracing is enabled for the specified process. */ bool trace_is_enabled(struct proc *p) { #ifdef SYSCALL_DEBUG return (true); #endif #ifdef KTRACE if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) return (true); #endif #ifdef PTRACE if (ISSET(p->p_slflag, PSL_SYSCALL)) return (true); #endif return (false); } /* * Start trace of particular system call. If process is being traced, * this routine is called by MD syscall dispatch code just before * a system call is actually executed. */ int trace_enter(register_t code, const struct sysent *sy, const void *args) { int error = 0; #if defined(PTRACE) || defined(KDTRACE_HOOKS) struct proc *p = curlwp->l_proc; #endif #ifdef KDTRACE_HOOKS if (sy->sy_entry) { struct emul *e = p->p_emul; if (e->e_dtrace_syscall) (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0); } #endif #ifdef SYSCALL_DEBUG scdebug_call(code, args); #endif /* SYSCALL_DEBUG */ ktrsyscall(code, args, sy->sy_narg); #ifdef PTRACE if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == (PSL_SYSCALL|PSL_TRACED)) { proc_stoptrace(TRAP_SCE, code, args, NULL, 0); if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) { /* tracer will emulate syscall for us */ error = EJUSTRETURN; } } #endif return error; } /* * End trace of particular system call. If process is being traced, * this routine is called by MD syscall dispatch code just after * a system call finishes. * MD caller guarantees the passed 'code' is within the supported * system call number range for emulation the process runs under. */ void trace_exit(register_t code, const struct sysent *sy, const void *args, register_t rval[], int error) { #if defined(PTRACE) || defined(KDTRACE_HOOKS) struct proc *p = curlwp->l_proc; #endif #ifdef KDTRACE_HOOKS if (sy->sy_return) { struct emul *e = p->p_emul; if (e->e_dtrace_syscall) (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args, rval, error); } #endif #ifdef SYSCALL_DEBUG scdebug_ret(code, error, rval); #endif /* SYSCALL_DEBUG */ ktrsysret(code, error, rval); #ifdef PTRACE if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) == (PSL_SYSCALL|PSL_TRACED)) { proc_stoptrace(TRAP_SCX, code, args, rval, error); } CLR(p->p_slflag, PSL_SYSCALLEMU); #endif } |
| 29 29 28 1 1 1 6 6 6 2 2 5 5 2 2 3 1 3 3 2 2 2 2 3 2 2 2 2 1 1 1 1 1 1 3 20 2 3 1 3 2 1 1 3 1 17 3 2 2 1 1 1 1 3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 | /* $NetBSD: kern_drvctl.c,v 1.51 2022/03/28 12:33:22 riastradh Exp $ */ /* * Copyright (c) 2004 * Matthias Drochner. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_drvctl.c,v 1.51 2022/03/28 12:33:22 riastradh Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/conf.h> #include <sys/device.h> #include <sys/event.h> #include <sys/kmem.h> #include <sys/ioctl.h> #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filedesc.h> #include <sys/select.h> #include <sys/poll.h> #include <sys/drvctlio.h> #include <sys/devmon.h> #include <sys/stat.h> #include <sys/kauth.h> #include <sys/lwp.h> #include <sys/module.h> #include "ioconf.h" struct drvctl_event { TAILQ_ENTRY(drvctl_event) dce_link; prop_dictionary_t dce_event; }; TAILQ_HEAD(drvctl_queue, drvctl_event); static struct drvctl_queue drvctl_eventq; /* FIFO */ static kcondvar_t drvctl_cond; static kmutex_t drvctl_lock; static int drvctl_nopen = 0, drvctl_eventcnt = 0; static struct selinfo drvctl_rdsel; #define DRVCTL_EVENTQ_DEPTH 64 /* arbitrary queue limit */ dev_type_open(drvctlopen); const struct cdevsw drvctl_cdevsw = { .d_open = drvctlopen, .d_close = nullclose, .d_read = nullread, .d_write = nullwrite, .d_ioctl = noioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER }; static int drvctl_read(struct file *, off_t *, struct uio *, kauth_cred_t, int); static int drvctl_write(struct file *, off_t *, struct uio *, kauth_cred_t, int); static int drvctl_ioctl(struct file *, u_long, void *); static int drvctl_poll(struct file *, int); static int drvctl_stat(struct file *, struct stat *); static int drvctl_close(struct file *); static const struct fileops drvctl_fileops = { .fo_name = "drvctl", .fo_read = drvctl_read, .fo_write = drvctl_write, .fo_ioctl = drvctl_ioctl, .fo_fcntl = fnullop_fcntl, .fo_poll = drvctl_poll, .fo_stat = drvctl_stat, .fo_close = drvctl_close, .fo_kqfilter = fnullop_kqfilter, .fo_restart = fnullop_restart, }; #define MAXLOCATORS 100 static int (*saved_insert_vec)(const char *, prop_dictionary_t) = NULL; static int drvctl_command(struct lwp *, struct plistref *, u_long, int); static int drvctl_getevent(struct lwp *, struct plistref *, u_long, int); void drvctl_init(void) { TAILQ_INIT(&drvctl_eventq); mutex_init(&drvctl_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&drvctl_cond, "devmon"); selinit(&drvctl_rdsel); } void drvctl_fini(void) { seldestroy(&drvctl_rdsel); cv_destroy(&drvctl_cond); mutex_destroy(&drvctl_lock); } int devmon_insert(const char *event, prop_dictionary_t ev) { struct drvctl_event *dce, *odce; mutex_enter(&drvctl_lock); if (drvctl_nopen == 0) { prop_object_release(ev); mutex_exit(&drvctl_lock); return 0; } /* Fill in mandatory member */ if (!prop_dictionary_set_string_nocopy(ev, "event", event)) { prop_object_release(ev); mutex_exit(&drvctl_lock); return 0; } dce = kmem_alloc(sizeof(*dce), KM_SLEEP); dce->dce_event = ev; if (drvctl_eventcnt == DRVCTL_EVENTQ_DEPTH) { odce = TAILQ_FIRST(&drvctl_eventq); TAILQ_REMOVE(&drvctl_eventq, odce, dce_link); prop_object_release(odce->dce_event); kmem_free(odce, sizeof(*odce)); --drvctl_eventcnt; } TAILQ_INSERT_TAIL(&drvctl_eventq, dce, dce_link); ++drvctl_eventcnt; cv_broadcast(&drvctl_cond); selnotify(&drvctl_rdsel, 0, 0); mutex_exit(&drvctl_lock); return 0; } int drvctlopen(dev_t dev, int flags, int mode, struct lwp *l) { struct file *fp; int fd; int ret; ret = fd_allocfile(&fp, &fd); if (ret) return ret; /* XXX setup context */ mutex_enter(&drvctl_lock); ret = fd_clone(fp, fd, flags, &drvctl_fileops, /* context */NULL); ++drvctl_nopen; mutex_exit(&drvctl_lock); return ret; } static int pmdevbyname(u_long cmd, struct devpmargs *a) { device_t d; KASSERT(KERNEL_LOCKED_P()); if ((d = device_find_by_xname(a->devname)) == NULL) return ENXIO; switch (cmd) { case DRVSUSPENDDEV: return pmf_device_recursive_suspend(d, PMF_Q_DRVCTL) ? 0 : EBUSY; case DRVRESUMEDEV: if (a->flags & DEVPM_F_SUBTREE) { return pmf_device_subtree_resume(d, PMF_Q_DRVCTL) ? 0 : EBUSY; } else { return pmf_device_recursive_resume(d, PMF_Q_DRVCTL) ? 0 : EBUSY; } default: return EPASSTHROUGH; } } static int listdevbyname(struct devlistargs *l) { device_t d, child; deviter_t di; int cnt = 0, idx, error = 0; KASSERT(KERNEL_LOCKED_P()); if (*l->l_devname == '\0') d = NULL; else if (memchr(l->l_devname, 0, sizeof(l->l_devname)) == NULL) return EINVAL; else if ((d = device_find_by_xname(l->l_devname)) == NULL) return ENXIO; for (child = deviter_first(&di, 0); child != NULL; child = deviter_next(&di)) { if (device_parent(child) != d) continue; idx = cnt++; if (l->l_childname == NULL || idx >= l->l_children) continue; error = copyoutstr(device_xname(child), l->l_childname[idx], sizeof(l->l_childname[idx]), NULL); if (error != 0) break; } deviter_release(&di); l->l_children = cnt; return error; } static int detachdevbyname(const char *devname) { device_t d; deviter_t di; int error; KASSERT(KERNEL_LOCKED_P()); for (d = deviter_first(&di, DEVITER_F_RW); d != NULL; d = deviter_next(&di)) { if (strcmp(device_xname(d), devname) == 0) break; } if (d == NULL) { error = ENXIO; goto out; } #ifndef XXXFULLRISK /* * If the parent cannot be notified, it might keep * pointers to the detached device. * There might be a private notification mechanism, * but better play it safe here. */ if (device_parent(d) && !device_cfattach(device_parent(d))->ca_childdetached) { error = ENOTSUP; goto out; } #endif error = config_detach(d, 0); out: deviter_release(&di); return error; } static int rescanbus(const char *busname, const char *ifattr, int numlocators, const int *locators) { int i, rc; device_t d; const struct cfiattrdata * const *ap; KASSERT(KERNEL_LOCKED_P()); /* XXX there should be a way to get limits and defaults (per device) from config generated data */ int locs[MAXLOCATORS]; for (i = 0; i < MAXLOCATORS; i++) locs[i] = -1; for (i = 0; i < numlocators;i++) locs[i] = locators[i]; if ((d = device_find_by_xname(busname)) == NULL) return ENXIO; /* * must support rescan, and must have something * to attach to */ if (!device_cfattach(d)->ca_rescan || !device_cfdriver(d)->cd_attrs) return ENODEV; /* rescan all ifattrs if none is specified */ if (!ifattr) { rc = 0; for (ap = device_cfdriver(d)->cd_attrs; *ap; ap++) { rc = (*device_cfattach(d)->ca_rescan)(d, (*ap)->ci_name, locs); if (rc) break; } } else { /* check for valid attribute passed */ for (ap = device_cfdriver(d)->cd_attrs; *ap; ap++) if (!strcmp((*ap)->ci_name, ifattr)) break; if (!*ap) return EINVAL; rc = (*device_cfattach(d)->ca_rescan)(d, ifattr, locs); } config_deferred(NULL); return rc; } static int drvctl_read(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, int flags) { return ENODEV; } static int drvctl_write(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, int flags) { return ENODEV; } static int drvctl_ioctl(struct file *fp, u_long cmd, void *data) { int res; char *ifattr; int *locs; size_t locs_sz = 0; /* XXXgcc */ KERNEL_LOCK(1, NULL); switch (cmd) { case DRVSUSPENDDEV: case DRVRESUMEDEV: #define d ((struct devpmargs *)data) res = pmdevbyname(cmd, d); #undef d break; case DRVLISTDEV: res = listdevbyname((struct devlistargs *)data); break; case DRVDETACHDEV: #define d ((struct devdetachargs *)data) res = detachdevbyname(d->devname); #undef d break; case DRVRESCANBUS: #define d ((struct devrescanargs *)data) d->busname[sizeof(d->busname) - 1] = '\0'; /* XXX better copyin? */ if (d->ifattr[0]) { d->ifattr[sizeof(d->ifattr) - 1] = '\0'; ifattr = d->ifattr; } else ifattr = 0; if (d->numlocators) { if (d->numlocators > MAXLOCATORS) { res = EINVAL; goto out; } locs_sz = d->numlocators * sizeof(int); locs = kmem_alloc(locs_sz, KM_SLEEP); res = copyin(d->locators, locs, locs_sz); if (res) { kmem_free(locs, locs_sz); goto out; } } else locs = NULL; res = rescanbus(d->busname, ifattr, d->numlocators, locs); if (locs) kmem_free(locs, locs_sz); #undef d break; case DRVCTLCOMMAND: res = drvctl_command(curlwp, (struct plistref *)data, cmd, fp->f_flag); break; case DRVGETEVENT: res = drvctl_getevent(curlwp, (struct plistref *)data, cmd, fp->f_flag); break; default: res = EPASSTHROUGH; break; } out: KERNEL_UNLOCK_ONE(NULL); return res; } static int drvctl_stat(struct file *fp, struct stat *st) { (void)memset(st, 0, sizeof(*st)); st->st_uid = kauth_cred_geteuid(fp->f_cred); st->st_gid = kauth_cred_getegid(fp->f_cred); return 0; } static int drvctl_poll(struct file *fp, int events) { int revents = 0; if (!TAILQ_EMPTY(&drvctl_eventq)) revents |= events & (POLLIN | POLLRDNORM); else selrecord(curlwp, &drvctl_rdsel); return revents; } static int drvctl_close(struct file *fp) { struct drvctl_event *dce; /* XXX free context */ mutex_enter(&drvctl_lock); KASSERT(drvctl_nopen > 0); --drvctl_nopen; if (drvctl_nopen == 0) { /* flush queue */ while ((dce = TAILQ_FIRST(&drvctl_eventq)) != NULL) { TAILQ_REMOVE(&drvctl_eventq, dce, dce_link); KASSERT(drvctl_eventcnt > 0); --drvctl_eventcnt; prop_object_release(dce->dce_event); kmem_free(dce, sizeof(*dce)); } } mutex_exit(&drvctl_lock); return 0; } void drvctlattach(int arg __unused) { } /***************************************************************************** * Driver control command processing engine *****************************************************************************/ static int drvctl_command_get_properties(struct lwp *l, prop_dictionary_t command_dict, prop_dictionary_t results_dict) { prop_dictionary_t args_dict; prop_string_t devname_string; device_t dev; deviter_t di; args_dict = prop_dictionary_get(command_dict, "drvctl-arguments"); if (args_dict == NULL) return EINVAL; devname_string = prop_dictionary_get(args_dict, "device-name"); if (devname_string == NULL) return EINVAL; for (dev = deviter_first(&di, 0); dev != NULL; dev = deviter_next(&di)) { if (prop_string_equals_string(devname_string, device_xname(dev))) { prop_dictionary_set(results_dict, "drvctl-result-data", device_properties(dev)); break; } } deviter_release(&di); if (dev == NULL) return ESRCH; return 0; } struct drvctl_command_desc { const char *dcd_name; /* command name */ int (*dcd_func)(struct lwp *, /* handler function */ prop_dictionary_t, prop_dictionary_t); int dcd_rw; /* read or write required */ }; static const struct drvctl_command_desc drvctl_command_table[] = { { .dcd_name = "get-properties", .dcd_func = drvctl_command_get_properties, .dcd_rw = FREAD, }, { .dcd_name = NULL } }; static int drvctl_command(struct lwp *l, struct plistref *pref, u_long ioctl_cmd, int fflag) { prop_dictionary_t command_dict, results_dict; prop_string_t command_string; const struct drvctl_command_desc *dcd; int error; error = prop_dictionary_copyin_ioctl(pref, ioctl_cmd, &command_dict); if (error) return error; results_dict = prop_dictionary_create(); if (results_dict == NULL) { prop_object_release(command_dict); return ENOMEM; } command_string = prop_dictionary_get(command_dict, "drvctl-command"); if (command_string == NULL) { error = EINVAL; goto out; } for (dcd = drvctl_command_table; dcd->dcd_name != NULL; dcd++) { if (prop_string_equals_string(command_string, dcd->dcd_name)) break; } if (dcd->dcd_name == NULL) { error = EINVAL; goto out; } if ((fflag & dcd->dcd_rw) == 0) { error = EPERM; goto out; } error = (*dcd->dcd_func)(l, command_dict, results_dict); prop_dictionary_set_int32(results_dict, "drvctl-error", error); error = prop_dictionary_copyout_ioctl(pref, ioctl_cmd, results_dict); out: prop_object_release(command_dict); prop_object_release(results_dict); return error; } static int drvctl_getevent(struct lwp *l, struct plistref *pref, u_long ioctl_cmd, int fflag) { struct drvctl_event *dce; int ret; if ((fflag & (FREAD|FWRITE)) != (FREAD|FWRITE)) return EPERM; mutex_enter(&drvctl_lock); while ((dce = TAILQ_FIRST(&drvctl_eventq)) == NULL) { if (fflag & O_NONBLOCK) { mutex_exit(&drvctl_lock); return EWOULDBLOCK; } ret = cv_wait_sig(&drvctl_cond, &drvctl_lock); if (ret) { mutex_exit(&drvctl_lock); return ret; } } TAILQ_REMOVE(&drvctl_eventq, dce, dce_link); KASSERT(drvctl_eventcnt > 0); --drvctl_eventcnt; mutex_exit(&drvctl_lock); ret = prop_dictionary_copyout_ioctl(pref, ioctl_cmd, dce->dce_event); prop_object_release(dce->dce_event); kmem_free(dce, sizeof(*dce)); return ret; } /* * Module glue */ MODULE(MODULE_CLASS_DRIVER, drvctl, NULL); int drvctl_modcmd(modcmd_t cmd, void *arg) { int error; #ifdef _MODULE int bmajor, cmajor; #endif error = 0; switch (cmd) { case MODULE_CMD_INIT: drvctl_init(); mutex_enter(&drvctl_lock); #ifdef _MODULE bmajor = cmajor = -1; error = devsw_attach("drvctl", NULL, &bmajor, &drvctl_cdevsw, &cmajor); #endif if (error == 0) { KASSERT(saved_insert_vec == NULL); saved_insert_vec = devmon_insert_vec; devmon_insert_vec = devmon_insert; } mutex_exit(&drvctl_lock); break; case MODULE_CMD_FINI: mutex_enter(&drvctl_lock); if (drvctl_nopen != 0 || drvctl_eventcnt != 0 ) { mutex_exit(&drvctl_lock); return EBUSY; } KASSERT(saved_insert_vec != NULL); devmon_insert_vec = saved_insert_vec; saved_insert_vec = NULL; #ifdef _MODULE devsw_detach(NULL, &drvctl_cdevsw); #endif mutex_exit(&drvctl_lock); drvctl_fini(); break; default: error = ENOTTY; break; } return error; } |
| 2 420 421 3 3 3 3 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 1 95 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 2 2 4 3 2 2 1 1 4 3 1 1 1 1 43 3 3 3 1 1 64 63 62 29 30 28 17 17 28 2 2 3 3 3 3 3 3 3 3 3 3 2 3 3 12 11 51 51 51 50 51 51 51 51 50 51 51 9 9 9 9 9 9 9 1 1 1 1 1 1 1 1 1 1 13 13 5 5 5 5 5 5 84 68 77 29 64 61 64 37 39 1 18 18 13 13 9 9 9 9 9 9 2 2 2 2 4 3 5 4 3 4 4 4 4 4 1 3 1 2 3 3 3 2 2 1 12 13 13 13 13 13 13 24 24 24 24 24 13 13 3 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 | /* $NetBSD: if.c,v 1.520 2022/08/21 12:34:39 skrll Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by William Studenmund and Jason R. Thorpe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.520 2022/08/21 12:34:39 skrll Exp $"); #if defined(_KERNEL_OPT) #include "opt_inet.h" #include "opt_ipsec.h" #include "opt_atalk.h" #include "opt_wlan.h" #include "opt_net_mpsafe.h" #include "opt_mrouting.h" #endif #include <sys/param.h> #include <sys/mbuf.h> #include <sys/systm.h> #include <sys/callout.h> #include <sys/proc.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/domain.h> #include <sys/protosw.h> #include <sys/kernel.h> #include <sys/ioctl.h> #include <sys/sysctl.h> #include <sys/syslog.h> #include <sys/kauth.h> #include <sys/kmem.h> #include <sys/xcall.h> #include <sys/cpu.h> #include <sys/intr.h> #include <sys/module_hook.h> #include <sys/compat_stub.h> #include <sys/msan.h> #include <sys/hook.h> #include <net/if.h> #include <net/if_dl.h> #include <net/if_ether.h> #include <net/if_media.h> #include <net80211/ieee80211.h> #include <net80211/ieee80211_ioctl.h> #include <net/if_types.h> #include <net/route.h> #include <net/netisr.h> #include <sys/module.h> #ifdef NETATALK #include <netatalk/at_extern.h> #include <netatalk/at.h> #endif #include <net/pfil.h> #include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/ip_encap.h> #include <net/bpf.h> #ifdef INET6 #include <netinet6/in6_var.h> #include <netinet6/nd6.h> #endif #include "ether.h" #include "bridge.h" #if NBRIDGE > 0 #include <net/if_bridgevar.h> #endif #include "carp.h" #if NCARP > 0 #include <netinet/ip_carp.h> #endif #include <compat/sys/sockio.h> MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); /* * XXX reusing (ifp)->if_snd->ifq_lock rather than having another spin mutex * for each ifnet. It doesn't matter because: * - if IFEF_MPSAFE is enabled, if_snd isn't used and lock contentions on * ifq_lock don't happen * - if IFEF_MPSAFE is disabled, there is no lock contention on ifq_lock * because if_snd, if_link_state_change and if_link_state_change_process * are all called with KERNEL_LOCK */ #define IF_LINK_STATE_CHANGE_LOCK(ifp) \ mutex_enter((ifp)->if_snd.ifq_lock) #define IF_LINK_STATE_CHANGE_UNLOCK(ifp) \ mutex_exit((ifp)->if_snd.ifq_lock) /* * Global list of interfaces. */ /* DEPRECATED. Remove it once kvm(3) users disappeared */ struct ifnet_head ifnet_list; struct pslist_head ifnet_pslist; static ifnet_t ** ifindex2ifnet = NULL; static u_int if_index = 1; static size_t if_indexlim = 0; static uint64_t index_gen; /* Mutex to protect the above objects. */ kmutex_t ifnet_mtx __cacheline_aligned; static struct psref_class *ifnet_psref_class __read_mostly; static pserialize_t ifnet_psz; static struct workqueue *ifnet_link_state_wq __read_mostly; static struct workqueue *if_slowtimo_wq __read_mostly; static kmutex_t if_clone_mtx; struct ifnet *lo0ifp; int ifqmaxlen = IFQ_MAXLEN; struct psref_class *ifa_psref_class __read_mostly; static int if_delroute_matcher(struct rtentry *, void *); static bool if_is_unit(const char *); static struct if_clone *if_clone_lookup(const char *, int *); static LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners); static int if_cloners_count; /* Packet filtering hook for interfaces. */ pfil_head_t * if_pfil __read_mostly; static kauth_listener_t if_listener; static int doifioctl(struct socket *, u_long, void *, struct lwp *); static void if_detach_queues(struct ifnet *, struct ifqueue *); static void sysctl_sndq_setup(struct sysctllog **, const char *, struct ifaltq *); static void if_slowtimo_intr(void *); static void if_slowtimo_work(struct work *, void *); static int sysctl_if_watchdog(SYSCTLFN_PROTO); static void sysctl_watchdog_setup(struct ifnet *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, void *); static int if_transmit(struct ifnet *, struct mbuf *); static int if_clone_create(const char *); static int if_clone_destroy(const char *); static void if_link_state_change_work(struct work *, void *); static void if_up_locked(struct ifnet *); static void _if_down(struct ifnet *); static void if_down_deactivated(struct ifnet *); struct if_percpuq { struct ifnet *ipq_ifp; void *ipq_si; struct percpu *ipq_ifqs; /* struct ifqueue */ }; static struct mbuf *if_percpuq_dequeue(struct if_percpuq *); static void if_percpuq_drops(void *, void *, struct cpu_info *); static int sysctl_percpuq_drops_handler(SYSCTLFN_PROTO); static void sysctl_percpuq_setup(struct sysctllog **, const char *, struct if_percpuq *); struct if_deferred_start { struct ifnet *ids_ifp; void (*ids_if_start)(struct ifnet *); void *ids_si; }; static void if_deferred_start_softint(void *); static void if_deferred_start_common(struct ifnet *); static void if_deferred_start_destroy(struct ifnet *); struct if_slowtimo_data { kmutex_t isd_lock; struct callout isd_ch; struct work isd_work; struct ifnet *isd_ifp; bool isd_queued; bool isd_dying; bool isd_trigger; }; #if defined(INET) || defined(INET6) static void sysctl_net_pktq_setup(struct sysctllog **, int); #endif /* * Hook for if_vlan - needed by if_agr */ struct if_vlan_vlan_input_hook_t if_vlan_vlan_input_hook; static void if_sysctl_setup(struct sysctllog **); static int if_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, void *arg0, void *arg1, void *arg2, void *arg3) { int result; enum kauth_network_req req; result = KAUTH_RESULT_DEFER; req = (enum kauth_network_req)(uintptr_t)arg1; if (action != KAUTH_NETWORK_INTERFACE) return result; if ((req == KAUTH_REQ_NETWORK_INTERFACE_GET) || (req == KAUTH_REQ_NETWORK_INTERFACE_SET)) result = KAUTH_RESULT_ALLOW; return result; } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ void ifinit(void) { #if (defined(INET) || defined(INET6)) encapinit(); #endif if_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, if_listener_cb, NULL); /* interfaces are available, inform socket code */ ifioctl = doifioctl; } /* * XXX Initialization before configure(). * XXX hack to get pfil_add_hook working in autoconf. */ void ifinit1(void) { int error __diagused; #ifdef NET_MPSAFE printf("NET_MPSAFE enabled\n"); #endif mutex_init(&if_clone_mtx, MUTEX_DEFAULT, IPL_NONE); TAILQ_INIT(&ifnet_list); mutex_init(&ifnet_mtx, MUTEX_DEFAULT, IPL_NONE); ifnet_psz = pserialize_create(); ifnet_psref_class = psref_class_create("ifnet", IPL_SOFTNET); ifa_psref_class = psref_class_create("ifa", IPL_SOFTNET); error = workqueue_create(&ifnet_link_state_wq, "iflnkst", if_link_state_change_work, NULL, PRI_SOFTNET, IPL_NET, WQ_MPSAFE); KASSERT(error == 0); PSLIST_INIT(&ifnet_pslist); error = workqueue_create(&if_slowtimo_wq, "ifwdog", if_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTCLOCK, WQ_MPSAFE); KASSERTMSG(error == 0, "error=%d", error); if_indexlim = 8; if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL); KASSERT(if_pfil != NULL); #if NETHER > 0 || defined(NETATALK) || defined(WLAN) etherinit(); #endif } /* XXX must be after domaininit() */ void ifinit_post(void) { if_sysctl_setup(NULL); } ifnet_t * if_alloc(u_char type) { return kmem_zalloc(sizeof(ifnet_t), KM_SLEEP); } void if_free(ifnet_t *ifp) { kmem_free(ifp, sizeof(ifnet_t)); } void if_initname(struct ifnet *ifp, const char *name, int unit) { (void)snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", name, unit); } /* * Null routines used while an interface is going away. These routines * just return an error. */ int if_nulloutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *so, const struct rtentry *rt) { return ENXIO; } void if_nullinput(struct ifnet *ifp, struct mbuf *m) { /* Nothing. */ } void if_nullstart(struct ifnet *ifp) { /* Nothing. */ } int if_nulltransmit(struct ifnet *ifp, struct mbuf *m) { m_freem(m); return ENXIO; } int if_nullioctl(struct ifnet *ifp, u_long cmd, void *data) { return ENXIO; } int if_nullinit(struct ifnet *ifp) { return ENXIO; } void if_nullstop(struct ifnet *ifp, int disable) { /* Nothing. */ } void if_nullslowtimo(struct ifnet *ifp) { /* Nothing. */ } void if_nulldrain(struct ifnet *ifp) { /* Nothing. */ } void if_set_sadl(struct ifnet *ifp, const void *lla, u_char addrlen, bool factory) { struct ifaddr *ifa; struct sockaddr_dl *sdl; ifp->if_addrlen = addrlen; if_alloc_sadl(ifp); ifa = ifp->if_dl; sdl = satosdl(ifa->ifa_addr); (void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, lla, ifp->if_addrlen); if (factory) { KASSERT(ifp->if_hwdl == NULL); ifp->if_hwdl = ifp->if_dl; ifaref(ifp->if_hwdl); } /* TBD routing socket */ } struct ifaddr * if_dl_create(const struct ifnet *ifp, const struct sockaddr_dl **sdlp) { unsigned socksize, ifasize; int addrlen, namelen; struct sockaddr_dl *mask, *sdl; struct ifaddr *ifa; namelen = strlen(ifp->if_xname); addrlen = ifp->if_addrlen; socksize = roundup(sockaddr_dl_measure(namelen, addrlen), sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO); sdl = (struct sockaddr_dl *)(ifa + 1); mask = (struct sockaddr_dl *)(socksize + (char *)sdl); sockaddr_dl_init(sdl, socksize, ifp->if_index, ifp->if_type, ifp->if_xname, namelen, NULL, addrlen); mask->sdl_family = AF_LINK; mask->sdl_len = sockaddr_dl_measure(namelen, 0); memset(&mask->sdl_data[0], 0xff, namelen); ifa->ifa_rtrequest = link_rtrequest; ifa->ifa_addr = (struct sockaddr *)sdl; ifa->ifa_netmask = (struct sockaddr *)mask; ifa_psref_init(ifa); *sdlp = sdl; return ifa; } static void if_sadl_setrefs(struct ifnet *ifp, struct ifaddr *ifa) { const struct sockaddr_dl *sdl; ifp->if_dl = ifa; ifaref(ifa); sdl = satosdl(ifa->ifa_addr); ifp->if_sadl = sdl; } /* * Allocate the link level name for the specified interface. This * is an attachment helper. It must be called after ifp->if_addrlen * is initialized, which may not be the case when if_attach() is * called. */ void if_alloc_sadl(struct ifnet *ifp) { struct ifaddr *ifa; const struct sockaddr_dl *sdl; /* * If the interface already has a link name, release it * now. This is useful for interfaces that can change * link types, and thus switch link names often. */ if (ifp->if_sadl != NULL) if_free_sadl(ifp, 0); ifa = if_dl_create(ifp, &sdl); ifa_insert(ifp, ifa); if_sadl_setrefs(ifp, ifa); } static void if_deactivate_sadl(struct ifnet *ifp) { struct ifaddr *ifa; KASSERT(ifp->if_dl != NULL); ifa = ifp->if_dl; ifp->if_sadl = NULL; ifp->if_dl = NULL; ifafree(ifa); } static void if_replace_sadl(struct ifnet *ifp, struct ifaddr *ifa) { struct ifaddr *old; KASSERT(ifp->if_dl != NULL); old = ifp->if_dl; ifaref(ifa); /* XXX Update if_dl and if_sadl atomically */ ifp->if_dl = ifa; ifp->if_sadl = satosdl(ifa->ifa_addr); ifafree(old); } void if_activate_sadl(struct ifnet *ifp, struct ifaddr *ifa0, const struct sockaddr_dl *sdl) { struct ifaddr *ifa; const int bound = curlwp_bind(); KASSERT(ifa_held(ifa0)); const int s = splsoftnet(); if_replace_sadl(ifp, ifa0); int ss = pserialize_read_enter(); IFADDR_READER_FOREACH(ifa, ifp) { struct psref psref; ifa_acquire(ifa, &psref); pserialize_read_exit(ss); rtinit(ifa, RTM_LLINFO_UPD, 0); ss = pserialize_read_enter(); ifa_release(ifa, &psref); } pserialize_read_exit(ss); splx(s); curlwp_bindx(bound); } /* * Free the link level name for the specified interface. This is * a detach helper. This is called from if_detach(). */ void if_free_sadl(struct ifnet *ifp, int factory) { struct ifaddr *ifa; if (factory && ifp->if_hwdl != NULL) { ifa = ifp->if_hwdl; ifp->if_hwdl = NULL; ifafree(ifa); } ifa = ifp->if_dl; if (ifa == NULL) { KASSERT(ifp->if_sadl == NULL); return; } KASSERT(ifp->if_sadl != NULL); const int s = splsoftnet(); KASSERT(ifa->ifa_addr->sa_family == AF_LINK); ifa_remove(ifp, ifa); if_deactivate_sadl(ifp); splx(s); } static void if_getindex(ifnet_t *ifp) { bool hitlimit = false; char xnamebuf[HOOKNAMSIZ]; ifp->if_index_gen = index_gen++; snprintf(xnamebuf, sizeof(xnamebuf), "%s-lshk", ifp->if_xname); ifp->if_linkstate_hooks = simplehook_create(IPL_NET, xnamebuf); ifp->if_index = if_index; if (ifindex2ifnet == NULL) { if_index++; goto skip; } while (if_byindex(ifp->if_index)) { /* * If we hit USHRT_MAX, we skip back to 0 since * there are a number of places where the value * of if_index or if_index itself is compared * to or stored in an unsigned short. By * jumping back, we won't botch those assignments * or comparisons. */ if (++if_index == 0) { if_index = 1; } else if (if_index == USHRT_MAX) { /* * However, if we have to jump back to * zero *twice* without finding an empty * slot in ifindex2ifnet[], then there * there are too many (>65535) interfaces. */ if (hitlimit) { panic("too many interfaces"); } hitlimit = true; if_index = 1; } ifp->if_index = if_index; } skip: /* * ifindex2ifnet is indexed by if_index. Since if_index will * grow dynamically, it should grow too. */ if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { size_t m, n, oldlim; void *q; oldlim = if_indexlim; while (ifp->if_index >= if_indexlim) if_indexlim <<= 1; /* grow ifindex2ifnet */ m = oldlim * sizeof(struct ifnet *); n = if_indexlim * sizeof(struct ifnet *); q = malloc(n, M_IFADDR, M_WAITOK | M_ZERO); if (ifindex2ifnet != NULL) { memcpy(q, ifindex2ifnet, m); free(ifindex2ifnet, M_IFADDR); } ifindex2ifnet = (struct ifnet **)q; } ifindex2ifnet[ifp->if_index] = ifp; } /* * Initialize an interface and assign an index for it. * * It must be called prior to a device specific attach routine * (e.g., ether_ifattach and ieee80211_ifattach) or if_alloc_sadl, * and be followed by if_register: * * if_initialize(ifp); * ether_ifattach(ifp, enaddr); * if_register(ifp); */ void if_initialize(ifnet_t *ifp) { KASSERT(if_indexlim > 0); TAILQ_INIT(&ifp->if_addrlist); /* * Link level name is allocated later by a separate call to * if_alloc_sadl(). */ if (ifp->if_snd.ifq_maxlen == 0) ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */ ifp->if_link_state = LINK_STATE_UNKNOWN; ifp->if_link_queue = -1; /* all bits set, see link_state_change() */ ifp->if_link_scheduled = false; ifp->if_capenable = 0; ifp->if_csum_flags_tx = 0; ifp->if_csum_flags_rx = 0; #ifdef ALTQ ifp->if_snd.altq_type = 0; ifp->if_snd.altq_disc = NULL; ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE; ifp->if_snd.altq_tbr = NULL; ifp->if_snd.altq_ifp = ifp; #endif IFQ_LOCK_INIT(&ifp->if_snd); ifp->if_pfil = pfil_head_create(PFIL_TYPE_IFNET, ifp); pfil_run_ifhooks(if_pfil, PFIL_IFNET_ATTACH, ifp); IF_AFDATA_LOCK_INIT(ifp); PSLIST_ENTRY_INIT(ifp, if_pslist_entry); PSLIST_INIT(&ifp->if_addr_pslist); psref_target_init(&ifp->if_psref, ifnet_psref_class); ifp->if_ioctl_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); LIST_INIT(&ifp->if_multiaddrs); if_stats_init(ifp); IFNET_GLOBAL_LOCK(); if_getindex(ifp); IFNET_GLOBAL_UNLOCK(); } /* * Register an interface to the list of "active" interfaces. */ void if_register(ifnet_t *ifp) { /* * If the driver has not supplied its own if_ioctl or if_stop, * then supply the default. */ if (ifp->if_ioctl == NULL) ifp->if_ioctl = ifioctl_common; if (ifp->if_stop == NULL) ifp->if_stop = if_nullstop; sysctl_sndq_setup(&ifp->if_sysctl_log, ifp->if_xname, &ifp->if_snd); if (!STAILQ_EMPTY(&domains)) if_attachdomain1(ifp); /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); if (ifp->if_slowtimo != NULL) { struct if_slowtimo_data *isd; isd = kmem_zalloc(sizeof(*isd), KM_SLEEP); mutex_init(&isd->isd_lock, MUTEX_DEFAULT, IPL_SOFTCLOCK); callout_init(&isd->isd_ch, CALLOUT_MPSAFE); callout_setfunc(&isd->isd_ch, if_slowtimo_intr, ifp); isd->isd_ifp = ifp; ifp->if_slowtimo_data = isd; if_slowtimo_intr(ifp); sysctl_watchdog_setup(ifp); } if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit) ifp->if_transmit = if_transmit; IFNET_GLOBAL_LOCK(); TAILQ_INSERT_TAIL(&ifnet_list, ifp, if_list); IFNET_WRITER_INSERT_TAIL(ifp); IFNET_GLOBAL_UNLOCK(); } /* * The if_percpuq framework * * It allows network device drivers to execute the network stack * in softint (so called softint-based if_input). It utilizes * softint and percpu ifqueue. It doesn't distribute any packets * between CPUs, unlike pktqueue(9). * * Currently we support two options for device drivers to apply the framework: * - Use it implicitly with less changes * - If you use if_attach in driver's _attach function and if_input in * driver's Rx interrupt handler, a packet is queued and a softint handles * the packet implicitly * - Use it explicitly in each driver (recommended) * - You can use if_percpuq_* directly in your driver * - In this case, you need to allocate struct if_percpuq in driver's softc * - See wm(4) as a reference implementation */ static void if_percpuq_softint(void *arg) { struct if_percpuq *ipq = arg; struct ifnet *ifp = ipq->ipq_ifp; struct mbuf *m; while ((m = if_percpuq_dequeue(ipq)) != NULL) { if_statinc(ifp, if_ipackets); bpf_mtap(ifp, m, BPF_D_IN); ifp->_if_input(ifp, m); } } static void if_percpuq_init_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused) { struct ifqueue *const ifq = p; memset(ifq, 0, sizeof(*ifq)); ifq->ifq_maxlen = IFQ_MAXLEN; } struct if_percpuq * if_percpuq_create(struct ifnet *ifp) { struct if_percpuq *ipq; u_int flags = SOFTINT_NET; flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0; ipq = kmem_zalloc(sizeof(*ipq), KM_SLEEP); ipq->ipq_ifp = ifp; ipq->ipq_si = softint_establish(flags, if_percpuq_softint, ipq); ipq->ipq_ifqs = percpu_alloc(sizeof(struct ifqueue)); percpu_foreach(ipq->ipq_ifqs, &if_percpuq_init_ifq, NULL); sysctl_percpuq_setup(&ifp->if_sysctl_log, ifp->if_xname, ipq); return ipq; } static struct mbuf * if_percpuq_dequeue(struct if_percpuq *ipq) { struct mbuf *m; struct ifqueue *ifq; const int s = splnet(); ifq = percpu_getref(ipq->ipq_ifqs); IF_DEQUEUE(ifq, m); percpu_putref(ipq->ipq_ifqs); splx(s); return m; } static void if_percpuq_purge_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused) { struct ifqueue *const ifq = p; IF_PURGE(ifq); } void if_percpuq_destroy(struct if_percpuq *ipq) { /* if_detach may already destroy it */ if (ipq == NULL) return; softint_disestablish(ipq->ipq_si); percpu_foreach(ipq->ipq_ifqs, &if_percpuq_purge_ifq, NULL); percpu_free(ipq->ipq_ifqs, sizeof(struct ifqueue)); kmem_free(ipq, sizeof(*ipq)); } void if_percpuq_enqueue(struct if_percpuq *ipq, struct mbuf *m) { struct ifqueue *ifq; KASSERT(ipq != NULL); const int s = splnet(); ifq = percpu_getref(ipq->ipq_ifqs); if (IF_QFULL(ifq)) { IF_DROP(ifq); percpu_putref(ipq->ipq_ifqs); m_freem(m); goto out; } IF_ENQUEUE(ifq, m); percpu_putref(ipq->ipq_ifqs); softint_schedule(ipq->ipq_si); out: splx(s); } static void if_percpuq_drops(void *p, void *arg, struct cpu_info *ci __unused) { struct ifqueue *const ifq = p; int *sum = arg; *sum += ifq->ifq_drops; } static int sysctl_percpuq_drops_handler(SYSCTLFN_ARGS) { struct sysctlnode node; struct if_percpuq *ipq; int sum = 0; int error; node = *rnode; ipq = node.sysctl_data; percpu_foreach(ipq->ipq_ifqs, if_percpuq_drops, &sum); node.sysctl_data = ∑ error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error != 0 || newp == NULL) return error; return 0; } static void sysctl_percpuq_setup(struct sysctllog **clog, const char* ifname, struct if_percpuq *ipq) { const struct sysctlnode *cnode, *rnode; if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "interfaces", SYSCTL_DESCR("Per-interface controls"), NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, ifname, SYSCTL_DESCR("Interface controls"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "rcvq", SYSCTL_DESCR("Interface input queue controls"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; #ifdef NOTYET /* XXX Should show each per-CPU queue length? */ if (sysctl_createv(clog, 0, &rnode, &rnode, CTLFLAG_PERMANENT, CTLTYPE_INT, "len", SYSCTL_DESCR("Current input queue length"), sysctl_percpuq_len, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &cnode, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "maxlen", SYSCTL_DESCR("Maximum allowed input queue length"), sysctl_percpuq_maxlen_handler, 0, (void *)ipq, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; #endif if (sysctl_createv(clog, 0, &rnode, &cnode, CTLFLAG_PERMANENT, CTLTYPE_INT, "drops", SYSCTL_DESCR("Total packets dropped due to full input queue"), sysctl_percpuq_drops_handler, 0, (void *)ipq, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; return; bad: printf("%s: could not attach sysctl nodes\n", ifname); return; } /* * The deferred if_start framework * * The common APIs to defer if_start to softint when if_start is requested * from a device driver running in hardware interrupt context. */ /* * Call ifp->if_start (or equivalent) in a dedicated softint for * deferred if_start. */ static void if_deferred_start_softint(void *arg) { struct if_deferred_start *ids = arg; struct ifnet *ifp = ids->ids_ifp; ids->ids_if_start(ifp); } /* * The default callback function for deferred if_start. */ static void if_deferred_start_common(struct ifnet *ifp) { const int s = splnet(); if_start_lock(ifp); splx(s); } static inline bool if_snd_is_used(struct ifnet *ifp) { return ALTQ_IS_ENABLED(&ifp->if_snd) || ifp->if_transmit == if_transmit || ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit; } /* * Schedule deferred if_start. */ void if_schedule_deferred_start(struct ifnet *ifp) { KASSERT(ifp->if_deferred_start != NULL); if (if_snd_is_used(ifp) && IFQ_IS_EMPTY(&ifp->if_snd)) return; softint_schedule(ifp->if_deferred_start->ids_si); } /* * Create an instance of deferred if_start. A driver should call the function * only if the driver needs deferred if_start. Drivers can setup their own * deferred if_start function via 2nd argument. */ void if_deferred_start_init(struct ifnet *ifp, void (*func)(struct ifnet *)) { struct if_deferred_start *ids; u_int flags = SOFTINT_NET; flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0; ids = kmem_zalloc(sizeof(*ids), KM_SLEEP); ids->ids_ifp = ifp; ids->ids_si = softint_establish(flags, if_deferred_start_softint, ids); if (func != NULL) ids->ids_if_start = func; else ids->ids_if_start = if_deferred_start_common; ifp->if_deferred_start = ids; } static void if_deferred_start_destroy(struct ifnet *ifp) { if (ifp->if_deferred_start == NULL) return; softint_disestablish(ifp->if_deferred_start->ids_si); kmem_free(ifp->if_deferred_start, sizeof(*ifp->if_deferred_start)); ifp->if_deferred_start = NULL; } /* * The common interface input routine that is called by device drivers, * which should be used only when the driver's rx handler already runs * in softint. */ void if_input(struct ifnet *ifp, struct mbuf *m) { KASSERT(ifp->if_percpuq == NULL); KASSERT(!cpu_intr_p()); if_statinc(ifp, if_ipackets); bpf_mtap(ifp, m, BPF_D_IN); ifp->_if_input(ifp, m); } /* * DEPRECATED. Use if_initialize and if_register instead. * See the above comment of if_initialize. * * Note that it implicitly enables if_percpuq to make drivers easy to * migrate softint-based if_input without much changes. If you don't * want to enable it, use if_initialize instead. */ void if_attach(ifnet_t *ifp) { if_initialize(ifp); ifp->if_percpuq = if_percpuq_create(ifp); if_register(ifp); } void if_attachdomain(void) { struct ifnet *ifp; const int bound = curlwp_bind(); int s = pserialize_read_enter(); IFNET_READER_FOREACH(ifp) { struct psref psref; psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class); pserialize_read_exit(s); if_attachdomain1(ifp); s = pserialize_read_enter(); psref_release(&psref, &ifp->if_psref, ifnet_psref_class); } pserialize_read_exit(s); curlwp_bindx(bound); } static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; const int s = splsoftnet(); /* address family dependent data region */ memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata)); DOMAIN_FOREACH(dp) { if (dp->dom_ifattach != NULL) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } splx(s); } /* * Deactivate an interface. This points all of the procedure * handles at error stubs. May be called from interrupt context. */ void if_deactivate(struct ifnet *ifp) { const int s = splsoftnet(); ifp->if_output = if_nulloutput; ifp->_if_input = if_nullinput; ifp->if_start = if_nullstart; ifp->if_transmit = if_nulltransmit; ifp->if_ioctl = if_nullioctl; ifp->if_init = if_nullinit; ifp->if_stop = if_nullstop; if (ifp->if_slowtimo) ifp->if_slowtimo = if_nullslowtimo; ifp->if_drain = if_nulldrain; /* No more packets may be enqueued. */ ifp->if_snd.ifq_maxlen = 0; splx(s); } bool if_is_deactivated(const struct ifnet *ifp) { return ifp->if_output == if_nulloutput; } void if_purgeaddrs(struct ifnet *ifp, int family, void (*purgeaddr)(struct ifaddr *)) { struct ifaddr *ifa, *nifa; int s; s = pserialize_read_enter(); for (ifa = IFADDR_READER_FIRST(ifp); ifa; ifa = nifa) { nifa = IFADDR_READER_NEXT(ifa); if (ifa->ifa_addr->sa_family != family) continue; pserialize_read_exit(s); (*purgeaddr)(ifa); s = pserialize_read_enter(); } pserialize_read_exit(s); } #ifdef IFAREF_DEBUG static struct ifaddr **ifa_list; static int ifa_list_size; /* Depends on only one if_attach runs at once */ static void if_build_ifa_list(struct ifnet *ifp) { struct ifaddr *ifa; int i; KASSERT(ifa_list == NULL); KASSERT(ifa_list_size == 0); IFADDR_READER_FOREACH(ifa, ifp) ifa_list_size++; ifa_list = kmem_alloc(sizeof(*ifa) * ifa_list_size, KM_SLEEP); i = 0; IFADDR_READER_FOREACH(ifa, ifp) { ifa_list[i++] = ifa; ifaref(ifa); } } static void if_check_and_free_ifa_list(struct ifnet *ifp) { int i; struct ifaddr *ifa; if (ifa_list == NULL) return; for (i = 0; i < ifa_list_size; i++) { char buf[64]; ifa = ifa_list[i]; sockaddr_format(ifa->ifa_addr, buf, sizeof(buf)); if (ifa->ifa_refcnt > 1) { log(LOG_WARNING, "ifa(%s) still referenced (refcnt=%d)\n", buf, ifa->ifa_refcnt - 1); } else log(LOG_DEBUG, "ifa(%s) not referenced (refcnt=%d)\n", buf, ifa->ifa_refcnt - 1); ifafree(ifa); } kmem_free(ifa_list, sizeof(*ifa) * ifa_list_size); ifa_list = NULL; ifa_list_size = 0; } #endif /* * Detach an interface from the list of "active" interfaces, * freeing any resources as we go along. * * NOTE: This routine must be called with a valid thread context, * as it may block. */ void if_detach(struct ifnet *ifp) { struct socket so; struct ifaddr *ifa; #ifdef IFAREF_DEBUG struct ifaddr *last_ifa = NULL; #endif struct domain *dp; const struct protosw *pr; int i, family, purged; #ifdef IFAREF_DEBUG if_build_ifa_list(ifp); #endif /* * XXX It's kind of lame that we have to have the * XXX socket structure... */ memset(&so, 0, sizeof(so)); const int s = splnet(); sysctl_teardown(&ifp->if_sysctl_log); IFNET_LOCK(ifp); /* * Unset all queued link states and pretend a * link state change is scheduled. * This stops any more link state changes occurring for this * interface while it's being detached so it's safe * to drain the workqueue. */ IF_LINK_STATE_CHANGE_LOCK(ifp); ifp->if_link_queue = -1; /* all bits set, see link_state_change() */ ifp->if_link_scheduled = true; IF_LINK_STATE_CHANGE_UNLOCK(ifp); workqueue_wait(ifnet_link_state_wq, &ifp->if_link_work); if_deactivate(ifp); IFNET_UNLOCK(ifp); /* * Unlink from the list and wait for all readers to leave * from pserialize read sections. Note that we can't do * psref_target_destroy here. See below. */ IFNET_GLOBAL_LOCK(); ifindex2ifnet[ifp->if_index] = NULL; TAILQ_REMOVE(&ifnet_list, ifp, if_list); IFNET_WRITER_REMOVE(ifp); pserialize_perform(ifnet_psz); IFNET_GLOBAL_UNLOCK(); if (ifp->if_slowtimo != NULL) { struct if_slowtimo_data *isd = ifp->if_slowtimo_data; mutex_enter(&isd->isd_lock); isd->isd_dying = true; mutex_exit(&isd->isd_lock); callout_halt(&isd->isd_ch, NULL); workqueue_wait(if_slowtimo_wq, &isd->isd_work); callout_destroy(&isd->isd_ch); mutex_destroy(&isd->isd_lock); kmem_free(isd, sizeof(*isd)); ifp->if_slowtimo_data = NULL; /* paraonia */ ifp->if_slowtimo = NULL; /* paranoia */ } if_deferred_start_destroy(ifp); /* * Do an if_down() to give protocols a chance to do something. */ if_down_deactivated(ifp); #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif #if NCARP > 0 /* Remove the interface from any carp group it is a part of. */ if (ifp->if_carp != NULL && ifp->if_type != IFT_CARP) carp_ifdetach(ifp); #endif /* * Rip all the addresses off the interface. This should make * all of the routes go away. * * pr_usrreq calls can remove an arbitrary number of ifaddrs * from the list, including our "cursor", ifa. For safety, * and to honor the TAILQ abstraction, I just restart the * loop after each removal. Note that the loop will exit * when all of the remaining ifaddrs belong to the AF_LINK * family. I am counting on the historical fact that at * least one pr_usrreq in each address domain removes at * least one ifaddr. */ again: /* * At this point, no other one tries to remove ifa in the list, * so we don't need to take a lock or psref. Avoid using * IFADDR_READER_FOREACH to pass over an inspection of contract * violations of pserialize. */ IFADDR_WRITER_FOREACH(ifa, ifp) { family = ifa->ifa_addr->sa_family; #ifdef IFAREF_DEBUG printf("if_detach: ifaddr %p, family %d, refcnt %d\n", ifa, family, ifa->ifa_refcnt); if (last_ifa != NULL && ifa == last_ifa) panic("if_detach: loop detected"); last_ifa = ifa; #endif if (family == AF_LINK) continue; dp = pffinddomain(family); KASSERTMSG(dp != NULL, "no domain for AF %d", family); /* * XXX These PURGEIF calls are redundant with the * purge-all-families calls below, but are left in for * now both to make a smaller change, and to avoid * unplanned interactions with clearing of * ifp->if_addrlist. */ purged = 0; for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { so.so_proto = pr; if (pr->pr_usrreqs) { (void) (*pr->pr_usrreqs->pr_purgeif)(&so, ifp); purged = 1; } } if (purged == 0) { /* * XXX What's really the best thing to do * XXX here? --thorpej@NetBSD.org */ printf("if_detach: WARNING: AF %d not purged\n", family); ifa_remove(ifp, ifa); } goto again; } if_free_sadl(ifp, 1); restart: IFADDR_WRITER_FOREACH(ifa, ifp) { family = ifa->ifa_addr->sa_family; KASSERT(family == AF_LINK); ifa_remove(ifp, ifa); goto restart; } /* Delete stray routes from the routing table. */ for (i = 0; i <= AF_MAX; i++) rt_delete_matched_entries(i, if_delroute_matcher, ifp); DOMAIN_FOREACH(dp) { if (dp->dom_ifdetach != NULL && ifp->if_afdata[dp->dom_family]) { void *p = ifp->if_afdata[dp->dom_family]; if (p) { ifp->if_afdata[dp->dom_family] = NULL; (*dp->dom_ifdetach)(ifp, p); } } /* * One would expect multicast memberships (INET and * INET6) on UDP sockets to be purged by the PURGEIF * calls above, but if all addresses were removed from * the interface prior to destruction, the calls will * not be made (e.g. ppp, for which pppd(8) generally * removes addresses before destroying the interface). * Because there is no invariant that multicast * memberships only exist for interfaces with IPv4 * addresses, we must call PURGEIF regardless of * addresses. (Protocols which might store ifnet * pointers are marked with PR_PURGEIF.) */ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { so.so_proto = pr; if (pr->pr_usrreqs && pr->pr_flags & PR_PURGEIF) (void)(*pr->pr_usrreqs->pr_purgeif)(&so, ifp); } } /* * Must be done after the above pr_purgeif because if_psref may be * still used in pr_purgeif. */ psref_target_destroy(&ifp->if_psref, ifnet_psref_class); PSLIST_ENTRY_DESTROY(ifp, if_pslist_entry); pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp); (void)pfil_head_destroy(ifp->if_pfil); /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); IF_AFDATA_LOCK_DESTROY(ifp); /* * remove packets that came from ifp, from software interrupt queues. */ DOMAIN_FOREACH(dp) { for (i = 0; i < __arraycount(dp->dom_ifqueues); i++) { struct ifqueue *iq = dp->dom_ifqueues[i]; if (iq == NULL) break; dp->dom_ifqueues[i] = NULL; if_detach_queues(ifp, iq); } } /* * IP queues have to be processed separately: net-queue barrier * ensures that the packets are dequeued while a cross-call will * ensure that the interrupts have completed. FIXME: not quite.. */ #ifdef INET pktq_barrier(ip_pktq); #endif #ifdef INET6 if (in6_present) pktq_barrier(ip6_pktq); #endif xc_barrier(0); if (ifp->if_percpuq != NULL) { if_percpuq_destroy(ifp->if_percpuq); ifp->if_percpuq = NULL; } mutex_obj_free(ifp->if_ioctl_lock); ifp->if_ioctl_lock = NULL; mutex_obj_free(ifp->if_snd.ifq_lock); if_stats_fini(ifp); KASSERT(!simplehook_has_hooks(ifp->if_linkstate_hooks)); simplehook_destroy(ifp->if_linkstate_hooks); splx(s); #ifdef IFAREF_DEBUG if_check_and_free_ifa_list(ifp); #endif } static void if_detach_queues(struct ifnet *ifp, struct ifqueue *q) { struct mbuf *m, *prev, *next; prev = NULL; for (m = q->ifq_head; m != NULL; m = next) { KASSERT((m->m_flags & M_PKTHDR) != 0); next = m->m_nextpkt; if (m->m_pkthdr.rcvif_index != ifp->if_index) { prev = m; continue; } if (prev != NULL) prev->m_nextpkt = m->m_nextpkt; else q->ifq_head = m->m_nextpkt; if (q->ifq_tail == m) q->ifq_tail = prev; q->ifq_len--; m->m_nextpkt = NULL; m_freem(m); IF_DROP(q); } } /* * Callback for a radix tree walk to delete all references to an * ifnet. */ static int if_delroute_matcher(struct rtentry *rt, void *v) { struct ifnet *ifp = (struct ifnet *)v; if (rt->rt_ifp == ifp) return 1; else return 0; } /* * Create a clone network interface. */ static int if_clone_create(const char *name) { struct if_clone *ifc; int unit; struct ifnet *ifp; struct psref psref; KASSERT(mutex_owned(&if_clone_mtx)); ifc = if_clone_lookup(name, &unit); if (ifc == NULL) return EINVAL; ifp = if_get(name, &psref); if (ifp != NULL) { if_put(ifp, &psref); return EEXIST; } return (*ifc->ifc_create)(ifc, unit); } /* * Destroy a clone network interface. */ static int if_clone_destroy(const char *name) { struct if_clone *ifc; struct ifnet *ifp; struct psref psref; int error; int (*if_ioctlfn)(struct ifnet *, u_long, void *); KASSERT(mutex_owned(&if_clone_mtx)); ifc = if_clone_lookup(name, NULL); if (ifc == NULL) return EINVAL; if (ifc->ifc_destroy == NULL) return EOPNOTSUPP; ifp = if_get(name, &psref); if (ifp == NULL) return ENXIO; /* We have to disable ioctls here */ IFNET_LOCK(ifp); if_ioctlfn = ifp->if_ioctl; ifp->if_ioctl = if_nullioctl; IFNET_UNLOCK(ifp); /* * We cannot call ifc_destroy with holding ifp. * Releasing ifp here is safe thanks to if_clone_mtx. */ if_put(ifp, &psref); error = (*ifc->ifc_destroy)(ifp); if (error != 0) { /* We have to restore if_ioctl on error */ IFNET_LOCK(ifp); ifp->if_ioctl = if_ioctlfn; IFNET_UNLOCK(ifp); } return error; } static bool if_is_unit(const char *name) { while (*name != '\0') { if (*name < '0' || *name > '9') return false; name++; } return true; } /* * Look up a network interface cloner. */ static struct if_clone * if_clone_lookup(const char *name, int *unitp) { struct if_clone *ifc; const char *cp; char *dp, ifname[IFNAMSIZ + 3]; int unit; KASSERT(mutex_owned(&if_clone_mtx)); strcpy(ifname, "if_"); /* separate interface name from unit */ /* TODO: search unit number from backward */ for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ && *cp && !if_is_unit(cp);) *dp++ = *cp++; if (cp == name || cp - name == IFNAMSIZ || !*cp) return NULL; /* No name or unit number */ *dp++ = '\0'; again: LIST_FOREACH(ifc, &if_cloners, ifc_list) { if (strcmp(ifname + 3, ifc->ifc_name) == 0) break; } if (ifc == NULL) { int error; if (*ifname == '\0') return NULL; mutex_exit(&if_clone_mtx); error = module_autoload(ifname, MODULE_CLASS_DRIVER); mutex_enter(&if_clone_mtx); if (error) return NULL; *ifname = '\0'; goto again; } unit = 0; while (cp - name < IFNAMSIZ && *cp) { if (*cp < '0' || *cp > '9' || unit >= INT_MAX / 10) { /* Bogus unit number. */ return NULL; } unit = (unit * 10) + (*cp++ - '0'); } if (unitp != NULL) *unitp = unit; return ifc; } /* * Register a network interface cloner. */ void if_clone_attach(struct if_clone *ifc) { mutex_enter(&if_clone_mtx); LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list); if_cloners_count++; mutex_exit(&if_clone_mtx); } /* * Unregister a network interface cloner. */ void if_clone_detach(struct if_clone *ifc) { mutex_enter(&if_clone_mtx); LIST_REMOVE(ifc, ifc_list); if_cloners_count--; mutex_exit(&if_clone_mtx); } /* * Provide list of interface cloners to userspace. */ int if_clone_list(int buf_count, char *buffer, int *total) { char outbuf[IFNAMSIZ], *dst; struct if_clone *ifc; int count, error = 0; mutex_enter(&if_clone_mtx); *total = if_cloners_count; if ((dst = buffer) == NULL) { /* Just asking how many there are. */ goto out; } if (buf_count < 0) { error = EINVAL; goto out; } count = (if_cloners_count < buf_count) ? if_cloners_count : buf_count; for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0; ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) { (void)strncpy(outbuf, ifc->ifc_name, sizeof(outbuf)); if (outbuf[sizeof(outbuf) - 1] != '\0') { error = ENAMETOOLONG; goto out; } error = copyout(outbuf, dst, sizeof(outbuf)); if (error != 0) break; } out: mutex_exit(&if_clone_mtx); return error; } void ifa_psref_init(struct ifaddr *ifa) { psref_target_init(&ifa->ifa_psref, ifa_psref_class); } void ifaref(struct ifaddr *ifa) { atomic_inc_uint(&ifa->ifa_refcnt); } void ifafree(struct ifaddr *ifa) { KASSERT(ifa != NULL); KASSERTMSG(ifa->ifa_refcnt > 0, "ifa_refcnt=%d", ifa->ifa_refcnt); #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif if (atomic_dec_uint_nv(&ifa->ifa_refcnt) != 0) return; #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_acquire(); #endif free(ifa, M_IFADDR); } bool ifa_is_destroying(struct ifaddr *ifa) { return ISSET(ifa->ifa_flags, IFA_DESTROYING); } void ifa_insert(struct ifnet *ifp, struct ifaddr *ifa) { ifa->ifa_ifp = ifp; /* * Check MP-safety for IFEF_MPSAFE drivers. * Check !IFF_RUNNING for initialization routines that normally don't * take IFNET_LOCK but it's safe because there is no competitor. * XXX there are false positive cases because IFF_RUNNING can be off on * if_stop. */ KASSERT(!if_is_mpsafe(ifp) || !ISSET(ifp->if_flags, IFF_RUNNING) || IFNET_LOCKED(ifp)); TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list); IFADDR_ENTRY_INIT(ifa); IFADDR_WRITER_INSERT_TAIL(ifp, ifa); ifaref(ifa); } void ifa_remove(struct ifnet *ifp, struct ifaddr *ifa) { KASSERT(ifa->ifa_ifp == ifp); /* * Check MP-safety for IFEF_MPSAFE drivers. * if_is_deactivated indicates ifa_remove is called from if_detach * where it is safe even if IFNET_LOCK isn't held. */ KASSERT(!if_is_mpsafe(ifp) || if_is_deactivated(ifp) || IFNET_LOCKED(ifp)); TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list); IFADDR_WRITER_REMOVE(ifa); #ifdef NET_MPSAFE IFNET_GLOBAL_LOCK(); pserialize_perform(ifnet_psz); IFNET_GLOBAL_UNLOCK(); #endif #ifdef NET_MPSAFE psref_target_destroy(&ifa->ifa_psref, ifa_psref_class); #endif IFADDR_ENTRY_DESTROY(ifa); ifafree(ifa); } void ifa_acquire(struct ifaddr *ifa, struct psref *psref) { PSREF_DEBUG_FILL_RETURN_ADDRESS(psref); psref_acquire(psref, &ifa->ifa_psref, ifa_psref_class); } void ifa_release(struct ifaddr *ifa, struct psref *psref) { if (ifa == NULL) return; psref_release(psref, &ifa->ifa_psref, ifa_psref_class); } bool ifa_held(struct ifaddr *ifa) { return psref_held(&ifa->ifa_psref, ifa_psref_class); } static inline int equal(const struct sockaddr *sa1, const struct sockaddr *sa2) { return sockaddr_cmp(sa1, sa2) == 0; } /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithaddr(const struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_READER_FOREACH(ifp) { if (if_is_deactivated(ifp)) continue; IFADDR_READER_FOREACH(ifa, ifp) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (equal(addr, ifa->ifa_addr)) return ifa; if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && /* IP6 doesn't have broadcast */ ifa->ifa_broadaddr->sa_len != 0 && equal(ifa->ifa_broadaddr, addr)) return ifa; } } return NULL; } struct ifaddr * ifa_ifwithaddr_psref(const struct sockaddr *addr, struct psref *psref) { struct ifaddr *ifa; int s = pserialize_read_enter(); ifa = ifa_ifwithaddr(addr); if (ifa != NULL) ifa_acquire(ifa, psref); pserialize_read_exit(s); return ifa; } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(const struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_READER_FOREACH(ifp) { if (if_is_deactivated(ifp)) continue; if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; IFADDR_READER_FOREACH(ifa, ifp) { if (ifa->ifa_addr->sa_family != addr->sa_family || ifa->ifa_dstaddr == NULL) continue; if (equal(addr, ifa->ifa_dstaddr)) return ifa; } } return NULL; } struct ifaddr * ifa_ifwithdstaddr_psref(const struct sockaddr *addr, struct psref *psref) { struct ifaddr *ifa; int s; s = pserialize_read_enter(); ifa = ifa_ifwithdstaddr(addr); if (ifa != NULL) ifa_acquire(ifa, psref); pserialize_read_exit(s); return ifa; } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(const struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa, *ifa_maybe = NULL; const struct sockaddr_dl *sdl; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; if (af == AF_LINK) { sdl = satocsdl(addr); if (sdl->sdl_index && sdl->sdl_index < if_indexlim && ifindex2ifnet[sdl->sdl_index] && !if_is_deactivated(ifindex2ifnet[sdl->sdl_index])) { return ifindex2ifnet[sdl->sdl_index]->if_dl; } } #ifdef NETATALK if (af == AF_APPLETALK) { const struct sockaddr_at *sat, *sat2; sat = (const struct sockaddr_at *)addr; IFNET_READER_FOREACH(ifp) { if (if_is_deactivated(ifp)) continue; ifa = at_ifawithnet((const struct sockaddr_at *)addr, ifp); if (ifa == NULL) continue; sat2 = (struct sockaddr_at *)ifa->ifa_addr; if (sat2->sat_addr.s_net == sat->sat_addr.s_net) return ifa; /* exact match */ if (ifa_maybe == NULL) { /* else keep the if with the right range */ ifa_maybe = ifa; } } return ifa_maybe; } #endif IFNET_READER_FOREACH(ifp) { if (if_is_deactivated(ifp)) continue; IFADDR_READER_FOREACH(ifa, ifp) { const char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af || ifa->ifa_netmask == NULL) next: continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = (const char *)ifa->ifa_netmask + ifa->ifa_netmask->sa_len; while (cp3 < cplim) { if ((*cp++ ^ *cp2++) & *cp3++) { /* want to continue for() loop */ goto next; } } if (ifa_maybe == NULL || rt_refines(ifa->ifa_netmask, ifa_maybe->ifa_netmask)) ifa_maybe = ifa; } } return ifa_maybe; } struct ifaddr * ifa_ifwithnet_psref(const struct sockaddr *addr, struct psref *psref) { struct ifaddr *ifa; int s; s = pserialize_read_enter(); ifa = ifa_ifwithnet(addr); if (ifa != NULL) ifa_acquire(ifa, psref); pserialize_read_exit(s); return ifa; } /* * Find the interface of the address. */ struct ifaddr * ifa_ifwithladdr(const struct sockaddr *addr) { struct ifaddr *ia; if ((ia = ifa_ifwithaddr(addr)) || (ia = ifa_ifwithdstaddr(addr)) || (ia = ifa_ifwithnet(addr))) return ia; return NULL; } struct ifaddr * ifa_ifwithladdr_psref(const struct sockaddr *addr, struct psref *psref) { struct ifaddr *ifa; int s; s = pserialize_read_enter(); ifa = ifa_ifwithladdr(addr); if (ifa != NULL) ifa_acquire(ifa, psref); pserialize_read_exit(s); return ifa; } /* * Find an interface using a specific address family */ struct ifaddr * ifa_ifwithaf(int af) { struct ifnet *ifp; struct ifaddr *ifa = NULL; int s; s = pserialize_read_enter(); IFNET_READER_FOREACH(ifp) { if (if_is_deactivated(ifp)) continue; IFADDR_READER_FOREACH(ifa, ifp) { if (ifa->ifa_addr->sa_family == af) goto out; } } out: pserialize_read_exit(s); return ifa; } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; const char *cp, *cp2, *cp3; const char *cplim; struct ifaddr *ifa_maybe = 0; u_int af = addr->sa_family; if (if_is_deactivated(ifp)) return NULL; if (af >= AF_MAX) return NULL; IFADDR_READER_FOREACH(ifa, ifp) { if (ifa->ifa_addr->sa_family != af) continue; ifa_maybe = ifa; if (ifa->ifa_netmask == NULL) { if (equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))) return ifa; continue; } cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) { if ((*cp++ ^ *cp2++) & *cp3) break; } if (cp3 == cplim) return ifa; } return ifa_maybe; } struct ifaddr * ifaof_ifpforaddr_psref(const struct sockaddr *addr, struct ifnet *ifp, struct psref *psref) { struct ifaddr *ifa; int s; s = pserialize_read_enter(); ifa = ifaof_ifpforaddr(addr, ifp); if (ifa != NULL) ifa_acquire(ifa, psref); pserialize_read_exit(s); return ifa; } /* * Default action when installing a route with a Link Level gateway. * Lookup an appropriate real ifa to point to. * This should be moved to /sys/net/link.c eventually. */ void link_rtrequest(int cmd, struct rtentry *rt, const struct rt_addrinfo *info) { struct ifaddr *ifa; const struct sockaddr *dst; struct ifnet *ifp; struct psref psref; if (cmd != RTM_ADD || ISSET(info->rti_flags, RTF_DONTCHANGEIFA)) return; ifp = rt->rt_ifa->ifa_ifp; dst = rt_getkey(rt); if ((ifa = ifaof_ifpforaddr_psref(dst, ifp, &psref)) != NULL) { rt_replace_ifa(rt, ifa); if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) ifa->ifa_rtrequest(cmd, rt, info); ifa_release(ifa, &psref); } } /* * bitmask macros to manage a densely packed link_state change queue. * Because we need to store LINK_STATE_UNKNOWN(0), LINK_STATE_DOWN(1) and * LINK_STATE_UP(2) we need 2 bits for each state change. * As a state change to store is 0, treat all bits set as an unset item. */ #define LQ_ITEM_BITS 2 #define LQ_ITEM_MASK ((1 << LQ_ITEM_BITS) - 1) #define LQ_MASK(i) (LQ_ITEM_MASK << (i) * LQ_ITEM_BITS) #define LINK_STATE_UNSET LQ_ITEM_MASK #define LQ_ITEM(q, i) (((q) & LQ_MASK((i))) >> (i) * LQ_ITEM_BITS) #define LQ_STORE(q, i, v) \ do { \ (q) &= ~LQ_MASK((i)); \ (q) |= (v) << (i) * LQ_ITEM_BITS; \ } while (0 /* CONSTCOND */) #define LQ_MAX(q) ((sizeof((q)) * NBBY) / LQ_ITEM_BITS) #define LQ_POP(q, v) \ do { \ (v) = LQ_ITEM((q), 0); \ (q) >>= LQ_ITEM_BITS; \ (q) |= LINK_STATE_UNSET << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \ } while (0 /* CONSTCOND */) #define LQ_PUSH(q, v) \ do { \ (q) >>= LQ_ITEM_BITS; \ (q) |= (v) << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \ } while (0 /* CONSTCOND */) #define LQ_FIND_UNSET(q, i) \ for ((i) = 0; i < LQ_MAX((q)); (i)++) { \ if (LQ_ITEM((q), (i)) == LINK_STATE_UNSET) \ break; \ } /* * Handle a change in the interface link state and * queue notifications. */ void if_link_state_change(struct ifnet *ifp, int link_state) { int idx; /* Ensure change is to a valid state */ switch (link_state) { case LINK_STATE_UNKNOWN: /* FALLTHROUGH */ case LINK_STATE_DOWN: /* FALLTHROUGH */ case LINK_STATE_UP: break; default: #ifdef DEBUG printf("%s: invalid link state %d\n", ifp->if_xname, link_state); #endif return; } IF_LINK_STATE_CHANGE_LOCK(ifp); /* Find the last unset event in the queue. */ LQ_FIND_UNSET(ifp->if_link_queue, idx); if (idx == 0) { /* * There is no queue of link state changes. * As we have the lock we can safely compare against the * current link state and return if the same. * Otherwise, if scheduled is true then the interface is being * detached and the queue is being drained so we need * to avoid queuing more work. */ if (ifp->if_link_state == link_state || ifp->if_link_scheduled) goto out; } else { /* Ensure link_state doesn't match the last queued state. */ if (LQ_ITEM(ifp->if_link_queue, idx - 1) == (uint8_t)link_state) goto out; } /* Handle queue overflow. */ if (idx == LQ_MAX(ifp->if_link_queue)) { uint8_t lost; /* * The DOWN state must be protected from being pushed off * the queue to ensure that userland will always be * in a sane state. * Because DOWN is protected, there is no need to protect * UNKNOWN. * It should be invalid to change from any other state to * UNKNOWN anyway ... */ lost = LQ_ITEM(ifp->if_link_queue, 0); LQ_PUSH(ifp->if_link_queue, (uint8_t)link_state); if (lost == LINK_STATE_DOWN) { lost = LQ_ITEM(ifp->if_link_queue, 0); LQ_STORE(ifp->if_link_queue, 0, LINK_STATE_DOWN); } printf("%s: lost link state change %s\n", ifp->if_xname, lost == LINK_STATE_UP ? "UP" : lost == LINK_STATE_DOWN ? "DOWN" : "UNKNOWN"); } else LQ_STORE(ifp->if_link_queue, idx, (uint8_t)link_state); if (ifp->if_link_scheduled) goto out; ifp->if_link_scheduled = true; workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work, NULL); out: IF_LINK_STATE_CHANGE_UNLOCK(ifp); } /* * Handle interface link state change notifications. */ static void if_link_state_change_process(struct ifnet *ifp, int link_state) { struct domain *dp; const int s = splnet(); bool notify; KASSERT(!cpu_intr_p()); IF_LINK_STATE_CHANGE_LOCK(ifp); /* Ensure the change is still valid. */ if (ifp->if_link_state == link_state) { IF_LINK_STATE_CHANGE_UNLOCK(ifp); splx(s); return; } #ifdef DEBUG log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname, link_state == LINK_STATE_UP ? "UP" : link_state == LINK_STATE_DOWN ? "DOWN" : "UNKNOWN", ifp->if_link_state == LINK_STATE_UP ? "UP" : ifp->if_link_state == LINK_STATE_DOWN ? "DOWN" : "UNKNOWN"); #endif /* * When going from UNKNOWN to UP, we need to mark existing * addresses as tentative and restart DAD as we may have * erroneously not found a duplicate. * * This needs to happen before rt_ifmsg to avoid a race where * listeners would have an address and expect it to work right * away. */ notify = (link_state == LINK_STATE_UP && ifp->if_link_state == LINK_STATE_UNKNOWN); ifp->if_link_state = link_state; /* The following routines may sleep so release the spin mutex */ IF_LINK_STATE_CHANGE_UNLOCK(ifp); KERNEL_LOCK_UNLESS_NET_MPSAFE(); if (notify) { DOMAIN_FOREACH(dp) { if (dp->dom_if_link_state_change != NULL) dp->dom_if_link_state_change(ifp, LINK_STATE_DOWN); } } /* Notify that the link state has changed. */ rt_ifmsg(ifp); simplehook_dohooks(ifp->if_linkstate_hooks); DOMAIN_FOREACH(dp) { if (dp->dom_if_link_state_change != NULL) dp->dom_if_link_state_change(ifp, link_state); } KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); splx(s); } /* * Process the interface link state change queue. */ static void if_link_state_change_work(struct work *work, void *arg) { struct ifnet *ifp = container_of(work, struct ifnet, if_link_work); uint8_t state; KERNEL_LOCK_UNLESS_NET_MPSAFE(); const int s = splnet(); /* * Pop a link state change from the queue and process it. * If there is nothing to process then if_detach() has been called. * We keep if_link_scheduled = true so the queue can safely drain * without more work being queued. */ IF_LINK_STATE_CHANGE_LOCK(ifp); LQ_POP(ifp->if_link_queue, state); IF_LINK_STATE_CHANGE_UNLOCK(ifp); if (state == LINK_STATE_UNSET) goto out; if_link_state_change_process(ifp, state); /* If there is a link state change to come, schedule it. */ IF_LINK_STATE_CHANGE_LOCK(ifp); if (LQ_ITEM(ifp->if_link_queue, 0) != LINK_STATE_UNSET) { ifp->if_link_scheduled = true; workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work, NULL); } else ifp->if_link_scheduled = false; IF_LINK_STATE_CHANGE_UNLOCK(ifp); out: splx(s); KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); } void * if_linkstate_change_establish(struct ifnet *ifp, void (*fn)(void *), void *arg) { khook_t *hk; hk = simplehook_establish(ifp->if_linkstate_hooks, fn, arg); return (void *)hk; } void if_linkstate_change_disestablish(struct ifnet *ifp, void *vhook, kmutex_t *lock) { simplehook_disestablish(ifp->if_linkstate_hooks, vhook, lock); } /* * Used to mark addresses on an interface as DETATCHED or TENTATIVE * and thus start Duplicate Address Detection without changing the * real link state. */ void if_domain_link_state_change(struct ifnet *ifp, int link_state) { struct domain *dp; const int s = splnet(); KERNEL_LOCK_UNLESS_NET_MPSAFE(); DOMAIN_FOREACH(dp) { if (dp->dom_if_link_state_change != NULL) dp->dom_if_link_state_change(ifp, link_state); } splx(s); KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); } /* * Default action when installing a local route on a point-to-point * interface. */ void p2p_rtrequest(int req, struct rtentry *rt, __unused const struct rt_addrinfo *info) { struct ifnet *ifp = rt->rt_ifp; struct ifaddr *ifa, *lo0ifa; int s = pserialize_read_enter(); switch (req) { case RTM_ADD: if ((rt->rt_flags & RTF_LOCAL) == 0) break; rt->rt_ifp = lo0ifp; if (ISSET(info->rti_flags, RTF_DONTCHANGEIFA)) break; IFADDR_READER_FOREACH(ifa, ifp) { if (equal(rt_getkey(rt), ifa->ifa_addr)) break; } if (ifa == NULL) break; /* * Ensure lo0 has an address of the same family. */ IFADDR_READER_FOREACH(lo0ifa, lo0ifp) { if (lo0ifa->ifa_addr->sa_family == ifa->ifa_addr->sa_family) break; } if (lo0ifa == NULL) break; /* * Make sure to set rt->rt_ifa to the interface * address we are using, otherwise we will have trouble * with source address selection. */ if (ifa != rt->rt_ifa) rt_replace_ifa(rt, ifa); break; case RTM_DELETE: default: break; } pserialize_read_exit(s); } static void _if_down(struct ifnet *ifp) { struct ifaddr *ifa; struct domain *dp; struct psref psref; ifp->if_flags &= ~IFF_UP; nanotime(&ifp->if_lastchange); const int bound = curlwp_bind(); int s = pserialize_read_enter(); IFADDR_READER_FOREACH(ifa, ifp) { ifa_acquire(ifa, &psref); pserialize_read_exit(s); pfctlinput(PRC_IFDOWN, ifa->ifa_addr); s = pserialize_read_enter(); ifa_release(ifa, &psref); } pserialize_read_exit(s); curlwp_bindx(bound); IFQ_PURGE(&ifp->if_snd); #if NCARP > 0 if (ifp->if_carp) carp_carpdev_state(ifp); #endif rt_ifmsg(ifp); DOMAIN_FOREACH(dp) { if (dp->dom_if_down) dp->dom_if_down(ifp); } } static void if_down_deactivated(struct ifnet *ifp) { KASSERT(if_is_deactivated(ifp)); _if_down(ifp); } void if_down_locked(struct ifnet *ifp) { KASSERT(IFNET_LOCKED(ifp)); _if_down(ifp); } /* * Mark an interface down and notify protocols of * the transition. * NOTE: must be called at splsoftnet or equivalent. */ void if_down(struct ifnet *ifp) { IFNET_LOCK(ifp); if_down_locked(ifp); IFNET_UNLOCK(ifp); } /* * Must be called with holding if_ioctl_lock. */ static void if_up_locked(struct ifnet *ifp) { #ifdef notyet struct ifaddr *ifa; #endif struct domain *dp; KASSERT(IFNET_LOCKED(ifp)); KASSERT(!if_is_deactivated(ifp)); ifp->if_flags |= IFF_UP; nanotime(&ifp->if_lastchange); #ifdef notyet /* this has no effect on IP, and will kill all ISO connections XXX */ IFADDR_READER_FOREACH(ifa, ifp) pfctlinput(PRC_IFUP, ifa->ifa_addr); #endif #if NCARP > 0 if (ifp->if_carp) carp_carpdev_state(ifp); #endif rt_ifmsg(ifp); DOMAIN_FOREACH(dp) { if (dp->dom_if_up) dp->dom_if_up(ifp); } } /* * Handle interface slowtimo timer routine. Called * from softclock, we decrement timer (if set) and * call the appropriate interface routine on expiration. */ static bool if_slowtimo_countdown(struct ifnet *ifp) { bool fire = false; const int s = splnet(); KERNEL_LOCK(1, NULL); if (ifp->if_timer != 0 && --ifp->if_timer == 0) fire = true; KERNEL_UNLOCK_ONE(NULL); splx(s); return fire; } static void if_slowtimo_intr(void *arg) { struct ifnet *ifp = arg; struct if_slowtimo_data *isd = ifp->if_slowtimo_data; mutex_enter(&isd->isd_lock); if (!isd->isd_dying) { if (isd->isd_trigger || if_slowtimo_countdown(ifp)) { if (!isd->isd_queued) { isd->isd_queued = true; workqueue_enqueue(if_slowtimo_wq, &isd->isd_work, NULL); } } else { callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ); } } mutex_exit(&isd->isd_lock); } static void if_slowtimo_work(struct work *work, void *arg) { struct if_slowtimo_data *isd = container_of(work, struct if_slowtimo_data, isd_work); struct ifnet *ifp = isd->isd_ifp; const int s = splnet(); KERNEL_LOCK(1, NULL); (*ifp->if_slowtimo)(ifp); KERNEL_UNLOCK_ONE(NULL); splx(s); mutex_enter(&isd->isd_lock); if (isd->isd_trigger) { isd->isd_trigger = false; printf("%s: watchdog triggered\n", ifp->if_xname); } isd->isd_queued = false; if (!isd->isd_dying) callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ); mutex_exit(&isd->isd_lock); } static int sysctl_if_watchdog(SYSCTLFN_ARGS) { struct sysctlnode node = *rnode; struct ifnet *ifp = node.sysctl_data; struct if_slowtimo_data *isd = ifp->if_slowtimo_data; int arg = 0; int error; node.sysctl_data = &arg; error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) return error; if (arg) { mutex_enter(&isd->isd_lock); KASSERT(!isd->isd_dying); isd->isd_trigger = true; callout_schedule(&isd->isd_ch, 0); mutex_exit(&isd->isd_lock); } return 0; } static void sysctl_watchdog_setup(struct ifnet *ifp) { struct sysctllog **clog = &ifp->if_sysctl_log; const struct sysctlnode *rnode; if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "interfaces", SYSCTL_DESCR("Per-interface controls"), NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, ifp->if_xname, SYSCTL_DESCR("Interface controls"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "watchdog", SYSCTL_DESCR("Interface watchdog controls"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "trigger", SYSCTL_DESCR("Trigger watchdog timeout"), sysctl_if_watchdog, 0, (int *)ifp, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; return; bad: printf("%s: could not attach sysctl watchdog nodes\n", ifp->if_xname); } /* * Mark an interface up and notify protocols of * the transition. * NOTE: must be called at splsoftnet or equivalent. */ void if_up(struct ifnet *ifp) { IFNET_LOCK(ifp); if_up_locked(ifp); IFNET_UNLOCK(ifp); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc_locked(struct ifnet *ifp, int pswitch) { int pcount, ret = 0; u_short nflags; KASSERT(IFNET_LOCKED(ifp)); pcount = ifp->if_pcount; if (pswitch) { /* * Allow the device to be "placed" into promiscuous * mode even if it is not configured up. It will * consult IFF_PROMISC when it is brought up. */ if (ifp->if_pcount++ != 0) goto out; nflags = ifp->if_flags | IFF_PROMISC; } else { if (--ifp->if_pcount > 0) goto out; nflags = ifp->if_flags & ~IFF_PROMISC; } ret = if_flags_set(ifp, nflags); /* Restore interface state if not successful. */ if (ret != 0) { ifp->if_pcount = pcount; } out: return ret; } int ifpromisc(struct ifnet *ifp, int pswitch) { int e; IFNET_LOCK(ifp); e = ifpromisc_locked(ifp, pswitch); IFNET_UNLOCK(ifp); return e; } /* * if_ioctl(ifp, cmd, data) * * Apply an ioctl command to the interface. Returns 0 on success, * nonzero errno(3) number on failure. * * For SIOCADDMULTI/SIOCDELMULTI, caller need not hold locks -- it * is the driver's responsibility to take any internal locks. * (Kernel logic should generally invoke these only through * if_mcast_op.) * * For all other ioctls, caller must hold ifp->if_ioctl_lock, * a.k.a. IFNET_LOCK. May sleep. */ int if_ioctl(struct ifnet *ifp, u_long cmd, void *data) { switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: break; default: KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname); } return (*ifp->if_ioctl)(ifp, cmd, data); } /* * if_init(ifp) * * Prepare the hardware underlying ifp to process packets * according to its current configuration. Returns 0 on success, * nonzero errno(3) number on failure. * * May sleep. Caller must hold ifp->if_ioctl_lock, a.k.a * IFNET_LOCK. */ int if_init(struct ifnet *ifp) { KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname); return (*ifp->if_init)(ifp); } /* * if_stop(ifp, disable) * * Stop the hardware underlying ifp from processing packets. * * If disable is true, ... XXX(?) * * May sleep. Caller must hold ifp->if_ioctl_lock, a.k.a * IFNET_LOCK. */ void if_stop(struct ifnet *ifp, int disable) { KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname); (*ifp->if_stop)(ifp, disable); } /* * Map interface name to * interface structure pointer. */ struct ifnet * ifunit(const char *name) { struct ifnet *ifp; const char *cp = name; u_int unit = 0; u_int i; /* * If the entire name is a number, treat it as an ifindex. */ for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) { unit = unit * 10 + (*cp - '0'); } /* * If the number took all of the name, then it's a valid ifindex. */ if (i == IFNAMSIZ || (cp != name && *cp == '\0')) return if_byindex(unit); ifp = NULL; const int s = pserialize_read_enter(); IFNET_READER_FOREACH(ifp) { if (if_is_deactivated(ifp)) continue; if (strcmp(ifp->if_xname, name) == 0) goto out; } out: pserialize_read_exit(s); return ifp; } /* * Get a reference of an ifnet object by an interface name. * The returned reference is protected by psref(9). The caller * must release a returned reference by if_put after use. */ struct ifnet * if_get(const char *name, struct psref *psref) { struct ifnet *ifp; const char *cp = name; u_int unit = 0; u_int i; /* * If the entire name is a number, treat it as an ifindex. */ for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) { unit = unit * 10 + (*cp - '0'); } /* * If the number took all of the name, then it's a valid ifindex. */ if (i == IFNAMSIZ || (cp != name && *cp == '\0')) return if_get_byindex(unit, psref); ifp = NULL; const int s = pserialize_read_enter(); IFNET_READER_FOREACH(ifp) { if (if_is_deactivated(ifp)) continue; if (strcmp(ifp->if_xname, name) == 0) { PSREF_DEBUG_FILL_RETURN_ADDRESS(psref); psref_acquire(psref, &ifp->if_psref, ifnet_psref_class); goto out; } } out: pserialize_read_exit(s); return ifp; } /* * Release a reference of an ifnet object given by if_get, if_get_byindex * or if_get_bylla. */ void if_put(const struct ifnet *ifp, struct psref *psref) { if (ifp == NULL) return; psref_release(psref, &ifp->if_psref, ifnet_psref_class); } /* * Return ifp having idx. Return NULL if not found. Normally if_byindex * should be used. */ ifnet_t * _if_byindex(u_int idx) { return (__predict_true(idx < if_indexlim)) ? ifindex2ifnet[idx] : NULL; } /* * Return ifp having idx. Return NULL if not found or the found ifp is * already deactivated. */ ifnet_t * if_byindex(u_int idx) { ifnet_t *ifp; ifp = _if_byindex(idx); if (ifp != NULL && if_is_deactivated(ifp)) ifp = NULL; return ifp; } /* * Get a reference of an ifnet object by an interface index. * The returned reference is protected by psref(9). The caller * must release a returned reference by if_put after use. */ ifnet_t * if_get_byindex(u_int idx, struct psref *psref) { ifnet_t *ifp; const int s = pserialize_read_enter(); ifp = if_byindex(idx); if (__predict_true(ifp != NULL)) { PSREF_DEBUG_FILL_RETURN_ADDRESS(psref); psref_acquire(psref, &ifp->if_psref, ifnet_psref_class); } pserialize_read_exit(s); return ifp; } ifnet_t * if_get_bylla(const void *lla, unsigned char lla_len, struct psref *psref) { ifnet_t *ifp; const int s = pserialize_read_enter(); IFNET_READER_FOREACH(ifp) { if (if_is_deactivated(ifp)) continue; if (ifp->if_addrlen != lla_len) continue; if (memcmp(lla, CLLADDR(ifp->if_sadl), lla_len) == 0) { psref_acquire(psref, &ifp->if_psref, ifnet_psref_class); break; } } pserialize_read_exit(s); return ifp; } /* * Note that it's safe only if the passed ifp is guaranteed to not be freed, * for example using pserialize or the ifp is already held or some other * object is held which guarantes the ifp to not be freed indirectly. */ void if_acquire(struct ifnet *ifp, struct psref *psref) { KASSERT(ifp->if_index != 0); psref_acquire(psref, &ifp->if_psref, ifnet_psref_class); } bool if_held(struct ifnet *ifp) { return psref_held(&ifp->if_psref, ifnet_psref_class); } /* * Some tunnel interfaces can nest, e.g. IPv4 over IPv4 gif(4) tunnel over IPv4. * Check the tunnel nesting count. * Return > 0, if tunnel nesting count is more than limit. * Return 0, if tunnel nesting count is equal or less than limit. */ int if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, int limit) { struct m_tag *mtag; int *count; mtag = m_tag_find(m, PACKET_TAG_TUNNEL_INFO); if (mtag != NULL) { count = (int *)(mtag + 1); if (++(*count) > limit) { log(LOG_NOTICE, "%s: recursively called too many times(%d)\n", ifp->if_xname, *count); return EIO; } } else { mtag = m_tag_get(PACKET_TAG_TUNNEL_INFO, sizeof(*count), M_NOWAIT); if (mtag != NULL) { m_tag_prepend(m, mtag); count = (int *)(mtag + 1); *count = 0; } else { log(LOG_DEBUG, "%s: m_tag_get() failed, recursion calls are not prevented.\n", ifp->if_xname); } } return 0; } static void if_tunnel_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused) { struct tunnel_ro *tro = p; tro->tr_ro = kmem_zalloc(sizeof(*tro->tr_ro), KM_SLEEP); tro->tr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); } static void if_tunnel_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused) { struct tunnel_ro *tro = p; rtcache_free(tro->tr_ro); kmem_free(tro->tr_ro, sizeof(*tro->tr_ro)); mutex_obj_free(tro->tr_lock); } percpu_t * if_tunnel_alloc_ro_percpu(void) { return percpu_create(sizeof(struct tunnel_ro), if_tunnel_ro_init_pc, if_tunnel_ro_fini_pc, NULL); } void if_tunnel_free_ro_percpu(percpu_t *ro_percpu) { percpu_free(ro_percpu, sizeof(struct tunnel_ro)); } static void if_tunnel_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused) { struct tunnel_ro *tro = p; mutex_enter(tro->tr_lock); rtcache_free(tro->tr_ro); mutex_exit(tro->tr_lock); } void if_tunnel_ro_percpu_rtcache_free(percpu_t *ro_percpu) { percpu_foreach(ro_percpu, if_tunnel_rtcache_free_pc, NULL); } void if_export_if_data(ifnet_t * const ifp, struct if_data *ifi, bool zero_stats) { /* Collect the volatile stats first; this zeros *ifi. */ if_stats_to_if_data(ifp, ifi, zero_stats); ifi->ifi_type = ifp->if_type; ifi->ifi_addrlen = ifp->if_addrlen; ifi->ifi_hdrlen = ifp->if_hdrlen; ifi->ifi_link_state = ifp->if_link_state; ifi->ifi_mtu = ifp->if_mtu; ifi->ifi_metric = ifp->if_metric; ifi->ifi_baudrate = ifp->if_baudrate; ifi->ifi_lastchange = ifp->if_lastchange; } /* common */ int ifioctl_common(struct ifnet *ifp, u_long cmd, void *data) { struct ifreq *ifr; struct ifcapreq *ifcr; struct ifdatareq *ifdr; unsigned short flags; char *descr; int error; switch (cmd) { case SIOCSIFCAP: ifcr = data; if ((ifcr->ifcr_capenable & ~ifp->if_capabilities) != 0) return EINVAL; if (ifcr->ifcr_capenable == ifp->if_capenable) return 0; ifp->if_capenable = ifcr->ifcr_capenable; /* Pre-compute the checksum flags mask. */ ifp->if_csum_flags_tx = 0; ifp->if_csum_flags_rx = 0; if (ifp->if_capenable & IFCAP_CSUM_IPv4_Tx) ifp->if_csum_flags_tx |= M_CSUM_IPv4; if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx) ifp->if_csum_flags_rx |= M_CSUM_IPv4; if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx) ifp->if_csum_flags_tx |= M_CSUM_TCPv4; if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx) ifp->if_csum_flags_rx |= M_CSUM_TCPv4; if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx) ifp->if_csum_flags_tx |= M_CSUM_UDPv4; if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx) ifp->if_csum_flags_rx |= M_CSUM_UDPv4; if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx) ifp->if_csum_flags_tx |= M_CSUM_TCPv6; if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx) ifp->if_csum_flags_rx |= M_CSUM_TCPv6; if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx) ifp->if_csum_flags_tx |= M_CSUM_UDPv6; if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx) ifp->if_csum_flags_rx |= M_CSUM_UDPv6; if (ifp->if_capenable & IFCAP_TSOv4) ifp->if_csum_flags_tx |= M_CSUM_TSOv4; if (ifp->if_capenable & IFCAP_TSOv6) ifp->if_csum_flags_tx |= M_CSUM_TSOv6; #if NBRIDGE > 0 if (ifp->if_bridge != NULL) bridge_calc_csum_flags(ifp->if_bridge); #endif if (ifp->if_flags & IFF_UP) return ENETRESET; return 0; case SIOCSIFFLAGS: ifr = data; /* * If if_is_mpsafe(ifp), KERNEL_LOCK isn't held here, but if_up * and if_down aren't MP-safe yet, so we must hold the lock. */ KERNEL_LOCK_IF_IFP_MPSAFE(ifp); if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) { const int s = splsoftnet(); if_down_locked(ifp); splx(s); } if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { const int s = splsoftnet(); if_up_locked(ifp); splx(s); } KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp); flags = (ifp->if_flags & IFF_CANTCHANGE) | (ifr->ifr_flags &~ IFF_CANTCHANGE); if (ifp->if_flags != flags) { ifp->if_flags = flags; /* Notify that the flags have changed. */ rt_ifmsg(ifp); } break; case SIOCGIFFLAGS: ifr = data; ifr->ifr_flags = ifp->if_flags; break; case SIOCGIFMETRIC: ifr = data; ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr = data; ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFDLT: ifr = data; ifr->ifr_dlt = ifp->if_dlt; break; case SIOCGIFCAP: ifcr = data; ifcr->ifcr_capabilities = ifp->if_capabilities; ifcr->ifcr_capenable = ifp->if_capenable; break; case SIOCSIFMETRIC: ifr = data; ifp->if_metric = ifr->ifr_metric; break; case SIOCGIFDATA: ifdr = data; if_export_if_data(ifp, &ifdr->ifdr_data, false); break; case SIOCGIFINDEX: ifr = data; ifr->ifr_index = ifp->if_index; break; case SIOCZIFDATA: ifdr = data; if_export_if_data(ifp, &ifdr->ifdr_data, true); getnanotime(&ifp->if_lastchange); break; case SIOCSIFMTU: ifr = data; if (ifp->if_mtu == ifr->ifr_mtu) break; ifp->if_mtu = ifr->ifr_mtu; return ENETRESET; case SIOCSIFDESCR: error = kauth_authorize_network(kauth_cred_get(), KAUTH_NETWORK_INTERFACE, KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd), NULL); if (error) return error; ifr = data; if (ifr->ifr_buflen > IFDESCRSIZE) return ENAMETOOLONG; if (ifr->ifr_buf == NULL || ifr->ifr_buflen == 0) { /* unset description */ descr = NULL; } else { descr = kmem_zalloc(IFDESCRSIZE, KM_SLEEP); /* * copy (IFDESCRSIZE - 1) bytes to ensure * terminating nul */ error = copyin(ifr->ifr_buf, descr, IFDESCRSIZE - 1); if (error) { kmem_free(descr, IFDESCRSIZE); return error; } } if (ifp->if_description != NULL) kmem_free(ifp->if_description, IFDESCRSIZE); ifp->if_description = descr; break; case SIOCGIFDESCR: ifr = data; descr = ifp->if_description; if (descr == NULL) return ENOMSG; if (ifr->ifr_buflen < IFDESCRSIZE) return EINVAL; error = copyout(descr, ifr->ifr_buf, IFDESCRSIZE); if (error) return error; break; default: return ENOTTY; } return 0; } int ifaddrpref_ioctl(struct socket *so, u_long cmd, void *data, struct ifnet *ifp) { struct if_addrprefreq *ifap = (struct if_addrprefreq *)data; struct ifaddr *ifa; const struct sockaddr *any, *sa; union { struct sockaddr sa; struct sockaddr_storage ss; } u, v; int s, error = 0; switch (cmd) { case SIOCSIFADDRPREF: error = kauth_authorize_network(kauth_cred_get(), KAUTH_NETWORK_INTERFACE, KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd), NULL); if (error) return error; break; case SIOCGIFADDRPREF: break; default: return EOPNOTSUPP; } /* sanity checks */ if (data == NULL || ifp == NULL) { panic("invalid argument to %s", __func__); /*NOTREACHED*/ } /* address must be specified on ADD and DELETE */ sa = sstocsa(&ifap->ifap_addr); if (sa->sa_family != sofamily(so)) return EINVAL; if ((any = sockaddr_any(sa)) == NULL || sa->sa_len != any->sa_len) return EINVAL; sockaddr_externalize(&v.sa, sizeof(v.ss), sa); s = pserialize_read_enter(); IFADDR_READER_FOREACH(ifa, ifp) { if (ifa->ifa_addr->sa_family != sa->sa_family) continue; sockaddr_externalize(&u.sa, sizeof(u.ss), ifa->ifa_addr); if (sockaddr_cmp(&u.sa, &v.sa) == 0) break; } if (ifa == NULL) { error = EADDRNOTAVAIL; goto out; } switch (cmd) { case SIOCSIFADDRPREF: ifa->ifa_preference = ifap->ifap_preference; goto out; case SIOCGIFADDRPREF: /* fill in the if_laddrreq structure */ (void)sockaddr_copy(sstosa(&ifap->ifap_addr), sizeof(ifap->ifap_addr), ifa->ifa_addr); ifap->ifap_preference = ifa->ifa_preference; goto out; default: error = EOPNOTSUPP; } out: pserialize_read_exit(s); return error; } /* * Interface ioctls. */ static int doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l) { struct ifnet *ifp; struct ifreq *ifr; int error = 0; u_long ocmd = cmd; u_short oif_flags; struct ifreq ifrb; struct oifreq *oifr = NULL; int r; struct psref psref; bool do_if43_post = false; bool do_ifm80_post = false; switch (cmd) { case SIOCGIFCONF: return ifconf(cmd, data); case SIOCINITIFADDR: return EPERM; default: MODULE_HOOK_CALL(uipc_syscalls_40_hook, (cmd, data), enosys(), error); if (error != ENOSYS) return error; MODULE_HOOK_CALL(uipc_syscalls_50_hook, (l, cmd, data), enosys(), error); if (error != ENOSYS) return error; error = 0; break; } ifr = data; /* Pre-conversion */ MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), error); if (cmd != ocmd) { oifr = data; data = ifr = &ifrb; IFREQO2N_43(oifr, ifr); do_if43_post = true; } MODULE_HOOK_CALL(ifmedia_80_pre_hook, (ifr, &cmd, &do_ifm80_post), enosys(), error); switch (cmd) { case SIOCIFCREATE: case SIOCIFDESTROY: { const int bound = curlwp_bind(); if (l != NULL) { ifp = if_get(ifr->ifr_name, &psref); error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd), NULL); if (ifp != NULL) if_put(ifp, &psref); if (error != 0) { curlwp_bindx(bound); return error; } } KERNEL_LOCK_UNLESS_NET_MPSAFE(); mutex_enter(&if_clone_mtx); r = (cmd == SIOCIFCREATE) ? if_clone_create(ifr->ifr_name) : if_clone_destroy(ifr->ifr_name); mutex_exit(&if_clone_mtx); KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); curlwp_bindx(bound); return r; } case SIOCIFGCLONERS: { struct if_clonereq *req = (struct if_clonereq *)data; return if_clone_list(req->ifcr_count, req->ifcr_buffer, &req->ifcr_total); } } if ((cmd & IOC_IN) == 0 || IOCPARM_LEN(cmd) < sizeof(ifr->ifr_name)) return EINVAL; const int bound = curlwp_bind(); ifp = if_get(ifr->ifr_name, &psref); if (ifp == NULL) { curlwp_bindx(bound); return ENXIO; } switch (cmd) { case SIOCALIFADDR: case SIOCDLIFADDR: case SIOCSIFADDRPREF: case SIOCSIFFLAGS: case SIOCSIFCAP: case SIOCSIFMETRIC: case SIOCZIFDATA: case SIOCSIFMTU: case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSLIFPHYADDR: case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSETHERCAP: case SIOCSIFMEDIA: case SIOCSDRVSPEC: case SIOCG80211: case SIOCS80211: case SIOCS80211NWID: case SIOCS80211NWKEY: case SIOCS80211POWER: case SIOCS80211BSSID: case SIOCS80211CHANNEL: case SIOCSLINKSTR: if (l != NULL) { error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd), NULL); if (error != 0) goto out; } } oif_flags = ifp->if_flags; KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp); IFNET_LOCK(ifp); error = if_ioctl(ifp, cmd, data); if (error != ENOTTY) ; else if (so->so_proto == NULL) error = EOPNOTSUPP; else { KERNEL_LOCK_IF_IFP_MPSAFE(ifp); MODULE_HOOK_CALL(if_ifioctl_43_hook, (so, ocmd, cmd, data, l), enosys(), error); if (error == ENOSYS) error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so, cmd, data, ifp); KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp); } if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) { if ((ifp->if_flags & IFF_UP) != 0) { const int s = splsoftnet(); if_up_locked(ifp); splx(s); } } /* Post-conversion */ if (do_ifm80_post && (error == 0)) MODULE_HOOK_CALL(ifmedia_80_post_hook, (ifr, cmd), enosys(), error); if (do_if43_post) IFREQN2O_43(oifr, ifr); IFNET_UNLOCK(ifp); KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp); out: if_put(ifp, &psref); curlwp_bindx(bound); return error; } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. * * Each record is a struct ifreq. Before the addition of * sockaddr_storage, the API rule was that sockaddr flavors that did * not fit would extend beyond the struct ifreq, with the next struct * ifreq starting sa_len beyond the struct sockaddr. Because the * union in struct ifreq includes struct sockaddr_storage, every kind * of sockaddr must fit. Thus, there are no longer any overlength * records. * * Records are added to the user buffer if they fit, and ifc_len is * adjusted to the length that was written. Thus, the user is only * assured of getting the complete list if ifc_len on return is at * least sizeof(struct ifreq) less than it was on entry. * * If the user buffer pointer is NULL, this routine copies no data and * returns the amount of space that would be needed. * * Invariants: * ifrp points to the next part of the user's buffer to be used. If * ifrp != NULL, space holds the number of bytes remaining that we may * write at ifrp. Otherwise, space holds the number of bytes that * would have been written had there been adequate space. */ /*ARGSUSED*/ static int ifconf(u_long cmd, void *data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr, *ifrp = NULL; int space = 0, error = 0; const int sz = (int)sizeof(struct ifreq); const bool docopy = ifc->ifc_req != NULL; struct psref psref; if (docopy) { if (ifc->ifc_len < 0) return EINVAL; space = ifc->ifc_len; ifrp = ifc->ifc_req; } memset(&ifr, 0, sizeof(ifr)); const int bound = curlwp_bind(); int s = pserialize_read_enter(); IFNET_READER_FOREACH(ifp) { psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class); pserialize_read_exit(s); (void)strncpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)); if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') { error = ENAMETOOLONG; goto release_exit; } if (IFADDR_READER_EMPTY(ifp)) { /* Interface with no addresses - send zero sockaddr. */ memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr)); if (!docopy) { space += sz; goto next; } if (space >= sz) { error = copyout(&ifr, ifrp, sz); if (error != 0) goto release_exit; ifrp++; space -= sz; } } s = pserialize_read_enter(); IFADDR_READER_FOREACH(ifa, ifp) { struct sockaddr *sa = ifa->ifa_addr; /* all sockaddrs must fit in sockaddr_storage */ KASSERT(sa->sa_len <= sizeof(ifr.ifr_ifru)); if (!docopy) { space += sz; continue; } memcpy(&ifr.ifr_space, sa, sa->sa_len); pserialize_read_exit(s); if (space >= sz) { error = copyout(&ifr, ifrp, sz); if (error != 0) goto release_exit; ifrp++; space -= sz; } s = pserialize_read_enter(); } pserialize_read_exit(s); next: s = pserialize_read_enter(); psref_release(&psref, &ifp->if_psref, ifnet_psref_class); } pserialize_read_exit(s); curlwp_bindx(bound); if (docopy) { KASSERT(0 <= space && space <= ifc->ifc_len); ifc->ifc_len -= space; } else { KASSERT(space >= 0); ifc->ifc_len = space; } return (0); release_exit: psref_release(&psref, &ifp->if_psref, ifnet_psref_class); curlwp_bindx(bound); return error; } int ifreq_setaddr(u_long cmd, struct ifreq *ifr, const struct sockaddr *sa) { uint8_t len = sizeof(ifr->ifr_ifru.ifru_space); struct ifreq ifrb; struct oifreq *oifr = NULL; u_long ocmd = cmd; int hook; MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), hook); if (hook != ENOSYS) { if (cmd != ocmd) { oifr = (struct oifreq *)(void *)ifr; ifr = &ifrb; IFREQO2N_43(oifr, ifr); len = sizeof(oifr->ifr_addr); } } if (len < sa->sa_len) return EFBIG; memset(&ifr->ifr_addr, 0, len); sockaddr_copy(&ifr->ifr_addr, len, sa); if (cmd != ocmd) IFREQN2O_43(oifr, ifr); return 0; } /* * wrapper function for the drivers which doesn't have if_transmit(). */ static int if_transmit(struct ifnet *ifp, struct mbuf *m) { int error; size_t pktlen = m->m_pkthdr.len; bool mcast = (m->m_flags & M_MCAST) != 0; const int s = splnet(); IFQ_ENQUEUE(&ifp->if_snd, m, error); if (error != 0) { /* mbuf is already freed */ goto out; } net_stat_ref_t nsr = IF_STAT_GETREF(ifp); if_statadd_ref(nsr, if_obytes, pktlen); if (mcast) if_statinc_ref(nsr, if_omcasts); IF_STAT_PUTREF(ifp); if ((ifp->if_flags & IFF_OACTIVE) == 0) if_start_lock(ifp); out: splx(s); return error; } int if_transmit_lock(struct ifnet *ifp, struct mbuf *m) { int error; kmsan_check_mbuf(m); #ifdef ALTQ KERNEL_LOCK(1, NULL); if (ALTQ_IS_ENABLED(&ifp->if_snd)) { error = if_transmit(ifp, m); KERNEL_UNLOCK_ONE(NULL); } else { KERNEL_UNLOCK_ONE(NULL); error = (*ifp->if_transmit)(ifp, m); /* mbuf is already freed */ } #else /* !ALTQ */ error = (*ifp->if_transmit)(ifp, m); /* mbuf is already freed */ #endif /* !ALTQ */ return error; } /* * Queue message on interface, and start output if interface * not yet active. */ int ifq_enqueue(struct ifnet *ifp, struct mbuf *m) { return if_transmit_lock(ifp, m); } /* * Queue message on interface, possibly using a second fast queue */ int ifq_enqueue2(struct ifnet *ifp, struct ifqueue *ifq, struct mbuf *m) { int error = 0; if (ifq != NULL #ifdef ALTQ && ALTQ_IS_ENABLED(&ifp->if_snd) == 0 #endif ) { if (IF_QFULL(ifq)) { IF_DROP(&ifp->if_snd); m_freem(m); if (error == 0) error = ENOBUFS; } else IF_ENQUEUE(ifq, m); } else IFQ_ENQUEUE(&ifp->if_snd, m, error); if (error != 0) { if_statinc(ifp, if_oerrors); return error; } return 0; } int if_addr_init(ifnet_t *ifp, struct ifaddr *ifa, const bool src) { int rc; KASSERT(IFNET_LOCKED(ifp)); if (ifp->if_initaddr != NULL) rc = (*ifp->if_initaddr)(ifp, ifa, src); else if (src || (rc = if_ioctl(ifp, SIOCSIFDSTADDR, ifa)) == ENOTTY) rc = if_ioctl(ifp, SIOCINITIFADDR, ifa); return rc; } int if_do_dad(struct ifnet *ifp) { if ((ifp->if_flags & IFF_LOOPBACK) != 0) return 0; switch (ifp->if_type) { case IFT_FAITH: /* * These interfaces do not have the IFF_LOOPBACK flag, * but loop packets back. We do not have to do DAD on such * interfaces. We should even omit it, because loop-backed * responses would confuse the DAD procedure. */ return 0; default: /* * Our DAD routine requires the interface up and running. * However, some interfaces can be up before the RUNNING * status. Additionally, users may try to assign addresses * before the interface becomes up (or running). * We simply skip DAD in such a case as a work around. * XXX: we should rather mark "tentative" on such addresses, * and do DAD after the interface becomes ready. */ if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING)) return 0; return 1; } } /* * if_flags_set(ifp, flags) * * Ask ifp to change ifp->if_flags to flags, as if with the * SIOCSIFFLAGS ioctl command. * * May sleep. Caller must hold ifp->if_ioctl_lock, a.k.a * IFNET_LOCK. */ int if_flags_set(ifnet_t *ifp, const u_short flags) { int rc; KASSERT(IFNET_LOCKED(ifp)); if (ifp->if_setflags != NULL) rc = (*ifp->if_setflags)(ifp, flags); else { u_short cantflags, chgdflags; struct ifreq ifr; chgdflags = ifp->if_flags ^ flags; cantflags = chgdflags & IFF_CANTCHANGE; if (cantflags != 0) ifp->if_flags ^= cantflags; /* * Traditionally, we do not call if_ioctl after * setting/clearing only IFF_PROMISC if the interface * isn't IFF_UP. Uphold that tradition. */ if (chgdflags == IFF_PROMISC && (ifp->if_flags & IFF_UP) == 0) return 0; memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = flags & ~IFF_CANTCHANGE; rc = if_ioctl(ifp, SIOCSIFFLAGS, &ifr); if (rc != 0 && cantflags != 0) ifp->if_flags ^= cantflags; } return rc; } /* * if_mcast_op(ifp, cmd, sa) * * Apply a multicast command, SIOCADDMULTI/SIOCDELMULTI, to the * interface. Returns 0 on success, nonzero errno(3) number on * failure. * * May sleep. * * Use this, not if_ioctl, for the multicast commands. */ int if_mcast_op(ifnet_t *ifp, const unsigned long cmd, const struct sockaddr *sa) { int rc; struct ifreq ifr; switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: break; default: panic("invalid ifnet multicast command: 0x%lx", cmd); } ifreq_setaddr(cmd, &ifr, sa); rc = if_ioctl(ifp, cmd, &ifr); return rc; } static void sysctl_sndq_setup(struct sysctllog **clog, const char *ifname, struct ifaltq *ifq) { const struct sysctlnode *cnode, *rnode; if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "interfaces", SYSCTL_DESCR("Per-interface controls"), NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, ifname, SYSCTL_DESCR("Interface controls"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "sndq", SYSCTL_DESCR("Interface output queue controls"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &cnode, CTLFLAG_PERMANENT, CTLTYPE_INT, "len", SYSCTL_DESCR("Current output queue length"), NULL, 0, &ifq->ifq_len, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &cnode, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "maxlen", SYSCTL_DESCR("Maximum allowed output queue length"), NULL, 0, &ifq->ifq_maxlen, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; if (sysctl_createv(clog, 0, &rnode, &cnode, CTLFLAG_PERMANENT, CTLTYPE_INT, "drops", SYSCTL_DESCR("Packets dropped due to full output queue"), NULL, 0, &ifq->ifq_drops, 0, CTL_CREATE, CTL_EOL) != 0) goto bad; return; bad: printf("%s: could not attach sysctl nodes\n", ifname); return; } #if defined(INET) || defined(INET6) #define SYSCTL_NET_PKTQ(q, cn, c) \ static int \ sysctl_net_##q##_##cn(SYSCTLFN_ARGS) \ { \ return sysctl_pktq_count(SYSCTLFN_CALL(rnode), q, c); \ } #if defined(INET) static int sysctl_net_ip_pktq_maxlen(SYSCTLFN_ARGS) { return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip_pktq); } SYSCTL_NET_PKTQ(ip_pktq, items, PKTQ_NITEMS) SYSCTL_NET_PKTQ(ip_pktq, drops, PKTQ_DROPS) #endif #if defined(INET6) static int sysctl_net_ip6_pktq_maxlen(SYSCTLFN_ARGS) { return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip6_pktq); } SYSCTL_NET_PKTQ(ip6_pktq, items, PKTQ_NITEMS) SYSCTL_NET_PKTQ(ip6_pktq, drops, PKTQ_DROPS) #endif static void sysctl_net_pktq_setup(struct sysctllog **clog, int pf) { sysctlfn len_func = NULL, maxlen_func = NULL, drops_func = NULL; const char *pfname = NULL, *ipname = NULL; int ipn = 0, qid = 0; switch (pf) { #if defined(INET) case PF_INET: len_func = sysctl_net_ip_pktq_items; maxlen_func = sysctl_net_ip_pktq_maxlen; drops_func = sysctl_net_ip_pktq_drops; pfname = "inet", ipn = IPPROTO_IP; ipname = "ip", qid = IPCTL_IFQ; break; #endif #if defined(INET6) case PF_INET6: len_func = sysctl_net_ip6_pktq_items; maxlen_func = sysctl_net_ip6_pktq_maxlen; drops_func = sysctl_net_ip6_pktq_drops; pfname = "inet6", ipn = IPPROTO_IPV6; ipname = "ip6", qid = IPV6CTL_IFQ; break; #endif default: KASSERT(false); } sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, pfname, NULL, NULL, 0, NULL, 0, CTL_NET, pf, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, ipname, NULL, NULL, 0, NULL, 0, CTL_NET, pf, ipn, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "ifq", SYSCTL_DESCR("Protocol input queue controls"), NULL, 0, NULL, 0, CTL_NET, pf, ipn, qid, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_QUAD, "len", SYSCTL_DESCR("Current input queue length"), len_func, 0, NULL, 0, CTL_NET, pf, ipn, qid, IFQCTL_LEN, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "maxlen", SYSCTL_DESCR("Maximum allowed input queue length"), maxlen_func, 0, NULL, 0, CTL_NET, pf, ipn, qid, IFQCTL_MAXLEN, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops", SYSCTL_DESCR("Packets dropped due to full input queue"), drops_func, 0, NULL, 0, CTL_NET, pf, ipn, qid, IFQCTL_DROPS, CTL_EOL); } #endif /* INET || INET6 */ static int if_sdl_sysctl(SYSCTLFN_ARGS) { struct ifnet *ifp; const struct sockaddr_dl *sdl; struct psref psref; int error = 0; if (namelen != 1) return EINVAL; const int bound = curlwp_bind(); ifp = if_get_byindex(name[0], &psref); if (ifp == NULL) { error = ENODEV; goto out0; } sdl = ifp->if_sadl; if (sdl == NULL) { *oldlenp = 0; goto out1; } if (oldp == NULL) { *oldlenp = sdl->sdl_alen; goto out1; } if (*oldlenp >= sdl->sdl_alen) *oldlenp = sdl->sdl_alen; error = sysctl_copyout(l, &sdl->sdl_data[sdl->sdl_nlen], oldp, *oldlenp); out1: if_put(ifp, &psref); out0: curlwp_bindx(bound); return error; } static void if_sysctl_setup(struct sysctllog **clog) { const struct sysctlnode *rnode = NULL; sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "sdl", SYSCTL_DESCR("Get active link-layer address"), if_sdl_sysctl, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL); #if defined(INET) sysctl_net_pktq_setup(NULL, PF_INET); #endif #ifdef INET6 if (in6_present) sysctl_net_pktq_setup(NULL, PF_INET6); #endif } |
| 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | /* $NetBSD: in6_print.c,v 1.1 2014/12/02 19:36:58 christos Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> #include <sys/types.h> #ifdef _KERNEL __KERNEL_RCSID(0, "$NetBSD: in6_print.c,v 1.1 2014/12/02 19:36:58 christos Exp $"); #include <sys/systm.h> #else __RCSID("$NetBSD: in6_print.c,v 1.1 2014/12/02 19:36:58 christos Exp $"); #include <stdio.h> #define s6_addr32 __u6_addr.__u6_addr32 static const uint8_t hexdigits[] = "0123456789abcdef"; #endif #include <netinet/in.h> int in6_print(char *buf, size_t len, const struct in6_addr *ia6) { int i; char *bp; char *cp, *ecp; const uint16_t *a; const uint8_t *d; int dcolon = 0; if (IN6_IS_ADDR_V4MAPPED(ia6)) { char buf4[INET_ADDRSTRLEN]; struct in_addr ia = { .s_addr = ia6->s6_addr32[3] }; in_print(buf4, sizeof(buf4), &ia); return snprintf(buf, len, "::ffff:%s", buf4); } #define ADDC(c) do { \ if (cp >= ecp) {\ cp++; \ } else \ *cp++ = (char)(c); \ } while (/*CONSTCOND*/0) #define ADDX(v) do { \ uint8_t n = hexdigits[(v)]; \ ADDC(n); \ if (cp == bp && n == '0') \ cp--; \ } while (/*CONSTCOND*/0) cp = buf; ecp = buf + len; a = (const uint16_t *)ia6; for (i = 0; i < 8; i++) { if (dcolon == 1) { if (*a == 0) { if (i == 7) ADDC(':'); a++; continue; } else dcolon = 2; } if (*a == 0) { if (dcolon == 0 && *(a + 1) == 0) { if (i == 0) ADDC(':'); ADDC(':'); dcolon = 1; } else { ADDC('0'); ADDC(':'); } a++; continue; } d = (const u_char *)a; bp = cp + 1; ADDX((u_int)*d >> 4); ADDX(*d & 0xf); d++; ADDX((u_int)*d >> 4); ADDX(*d & 0xf); ADDC(':'); a++; } if (cp > buf) --cp; if (ecp > buf) { if (cp < ecp) *cp = '\0'; else *--ecp = '\0'; } return (int)(cp - buf); } int sin6_print(char *buf, size_t len, const void *v) { const struct sockaddr_in6 *sin6 = v; const struct in6_addr *ia6 = &sin6->sin6_addr; char abuf[INET6_ADDRSTRLEN]; if (!sin6->sin6_port) return in6_print(buf, len, ia6); in6_print(abuf, sizeof(abuf), ia6); return snprintf(buf, len, "[%s]:%hu", abuf, ntohs(sin6->sin6_port)); } |
| 437 437 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 | /* $NetBSD: bus_private.h,v 1.16 2022/01/22 15:10:32 skrll Exp $ */ /* NetBSD: bus.h,v 1.8 2005/03/09 19:04:46 matt Exp */ /*- * Copyright (c) 1996, 1997, 1998, 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #if !defined(_X86_BUS_PRIVATE_H_) #define _X86_BUS_PRIVATE_H_ /* * Cookie used for bounce buffers. A pointer to one of these it stashed in * the DMA map. */ struct x86_bus_dma_cookie { int id_flags; /* flags; see below */ /* * Information about the original buffer used during * DMA map syncs. Note that origibuflen is only used * for ID_BUFTYPE_LINEAR. */ void *id_origbuf; /* pointer to orig buffer if bouncing */ bus_size_t id_origbuflen; /* ...and size */ int id_buftype; /* type of buffer */ void *id_bouncebuf; /* pointer to the bounce buffer */ bus_size_t id_bouncebuflen; /* ...and size */ int id_nbouncesegs; /* number of valid bounce segs */ bus_dma_segment_t id_bouncesegs[0]; /* array of bounce buffer physical memory segments */ }; /* id_flags */ #define X86_DMA_MIGHT_NEED_BOUNCE 0x01 /* may need bounce buffers */ #define X86_DMA_HAS_BOUNCE 0x02 /* has bounce buffers */ #define X86_DMA_IS_BOUNCING 0x04 /* is bouncing current xfer */ /* id_buftype */ #define X86_DMA_BUFTYPE_INVALID 0 #define X86_DMA_BUFTYPE_LINEAR 1 #define X86_DMA_BUFTYPE_MBUF 2 #define X86_DMA_BUFTYPE_UIO 3 #define X86_DMA_BUFTYPE_RAW 4 /* * default address translation macros, which are appropriate where * paddr_t == bus_addr_t. */ #if !defined(_BUS_PHYS_TO_BUS) #define _BUS_PHYS_TO_BUS(pa) ((bus_addr_t)(pa)) #endif /* !defined(_BUS_PHYS_TO_BUS) */ #if !defined(_BUS_BUS_TO_PHYS) #define _BUS_BUS_TO_PHYS(ba) ((paddr_t)(ba)) #endif /* !defined(_BUS_BUS_TO_PHYS) */ #if !defined(_BUS_VM_PAGE_TO_BUS) #define _BUS_VM_PAGE_TO_BUS(pg) _BUS_PHYS_TO_BUS(VM_PAGE_TO_PHYS(pg)) #endif /* !defined(_BUS_VM_PAGE_TO_BUS) */ #if !defined(_BUS_BUS_TO_VM_PAGE) #define _BUS_BUS_TO_VM_PAGE(ba) PHYS_TO_VM_PAGE(ba) #endif /* !defined(_BUS_BUS_TO_VM_PAGE) */ #if !defined(_BUS_PMAP_ENTER) #define _BUS_PMAP_ENTER(pmap, va, ba, prot, flags) \ pmap_enter(pmap, va, ba, prot, flags) #endif /* _BUS_PMAP_ENTER */ #if !defined(_BUS_VIRT_TO_BUS) #include <uvm/uvm_extern.h> static __inline bus_addr_t _bus_virt_to_bus(struct pmap *, vaddr_t); #define _BUS_VIRT_TO_BUS(pm, va) _bus_virt_to_bus((pm), (va)) static __inline bus_addr_t _bus_virt_to_bus(struct pmap *pm, vaddr_t va) { paddr_t pa; if (!pmap_extract(pm, va, &pa)) { panic("_bus_virt_to_bus"); } return _BUS_PHYS_TO_BUS(pa); } #endif /* !defined(_BUS_VIRT_TO_BUS) */ /* * by default, the end address of RAM visible on bus is the same as the * largest physical address. */ #ifndef _BUS_AVAIL_END #define _BUS_AVAIL_END (avail_end - 1) #endif struct x86_bus_dma_tag { bus_dma_tag_t bdt_super; /* bdt_present: bitmap indicating overrides present (1) in *this* tag, * bdt_exists: bitmap indicating overrides present (1) in *this* tag * or in an ancestor's tag (follow bdt_super to ancestors) */ uint64_t bdt_present; uint64_t bdt_exists; const struct bus_dma_overrides *bdt_ov; void *bdt_ctx; /* * The `bounce threshold' is checked while we are loading * the DMA map. If the physical address of the segment * exceeds the threshold, an error will be returned. The * caller can then take whatever action is necessary to * bounce the transfer. If this value is 0, it will be * ignored. */ int _tag_needs_free; bus_addr_t _bounce_thresh; bus_addr_t _bounce_alloc_lo; bus_addr_t _bounce_alloc_hi; int (*_may_bounce)(bus_dma_tag_t, bus_dmamap_t, int, int *); }; #endif /* !defined(_X86_BUS_PRIVATE_H_) */ |
| 7 6 6 6 6 7 7 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 | /* $NetBSD: spkr.c,v 1.23 2022/03/31 19:30:15 pgoyette Exp $ */ /* * Copyright (c) 1990 Eric S. Raymond (esr@snark.thyrsus.com) * Copyright (c) 1990 Andrew A. Chernov (ache@astral.msk.su) * Copyright (c) 1990 Lennart Augustsson (lennart@augustsson.net) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Eric S. Raymond * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * spkr.c -- device driver for console speaker on 80386 * * v1.1 by Eric S. Raymond (esr@snark.thyrsus.com) Feb 1990 * modified for 386bsd by Andrew A. Chernov <ache@astral.msk.su> * 386bsd only clean version, all SYSV stuff removed * use hz value from param.c */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: spkr.c,v 1.23 2022/03/31 19:30:15 pgoyette Exp $"); #if defined(_KERNEL_OPT) #include "wsmux.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/errno.h> #include <sys/device.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/uio.h> #include <sys/proc.h> #include <sys/ioctl.h> #include <sys/conf.h> #include <sys/bus.h> #include <dev/spkrio.h> #include <dev/spkrvar.h> #include <dev/wscons/wsconsio.h> #include <dev/wscons/wsbellvar.h> #include <dev/wscons/wsbellmuxvar.h> #include "ioconf.h" dev_type_open(spkropen); dev_type_close(spkrclose); dev_type_write(spkrwrite); dev_type_ioctl(spkrioctl); const struct cdevsw spkr_cdevsw = { .d_open = spkropen, .d_close = spkrclose, .d_read = noread, .d_write = spkrwrite, .d_ioctl = spkrioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER }; static void playinit(struct spkr_softc *); static void playtone(struct spkr_softc *, int, int, int); static void playstring(struct spkr_softc *, const char *, size_t); /**************** PLAY STRING INTERPRETER BEGINS HERE ********************** * * Play string interpretation is modelled on IBM BASIC 2.0's PLAY statement; * M[LNS] are missing and the ~ synonym and octave-tracking facility is added. * String play is not interruptible except possibly at physical block * boundaries. */ /* * Magic number avoidance... */ #define SECS_PER_MIN 60 /* seconds per minute */ #define WHOLE_NOTE 4 /* quarter notes per whole note */ #define MIN_VALUE 64 /* the most we can divide a note by */ #define DFLT_VALUE 4 /* default value (quarter-note) */ #define FILLTIME 8 /* for articulation, break note in parts */ #define STACCATO 6 /* 6/8 = 3/4 of note is filled */ #define NORMAL 7 /* 7/8ths of note interval is filled */ #define LEGATO 8 /* all of note interval is filled */ #define DFLT_OCTAVE 4 /* default octave */ #define MIN_TEMPO 32 /* minimum tempo */ #define DFLT_TEMPO 120 /* default tempo */ #define MAX_TEMPO 255 /* max tempo */ #define NUM_MULT 3 /* numerator of dot multiplier */ #define DENOM_MULT 2 /* denominator of dot multiplier */ /* letter to half-tone: A B C D E F G */ static const int notetab[8] = { 9, 11, 0, 2, 4, 5, 7 }; /* * This is the American Standard A440 Equal-Tempered scale with frequencies * rounded to nearest integer. Thank Goddess for the good ol' CRC Handbook... * our octave 0 is standard octave 2. */ #define OCTAVE_NOTES 12 /* semitones per octave */ static const int pitchtab[] = { /* C C# D D# E F F# G G# A A# B*/ /* 0 */ 65, 69, 73, 78, 82, 87, 93, 98, 103, 110, 117, 123, /* 1 */ 131, 139, 147, 156, 165, 175, 185, 196, 208, 220, 233, 247, /* 2 */ 262, 277, 294, 311, 330, 349, 370, 392, 415, 440, 466, 494, /* 3 */ 523, 554, 587, 622, 659, 698, 740, 784, 831, 880, 932, 988, /* 4 */ 1047, 1109, 1175, 1245, 1319, 1397, 1480, 1568, 1661, 1760, 1865, 1975, /* 5 */ 2093, 2217, 2349, 2489, 2637, 2794, 2960, 3136, 3322, 3520, 3729, 3951, /* 6 */ 4186, 4435, 4698, 4978, 5274, 5588, 5920, 6272, 6644, 7040, 7459, 7902, }; #define NOCTAVES (int)(__arraycount(pitchtab) / OCTAVE_NOTES) static void playinit(struct spkr_softc *sc) { sc->sc_octave = DFLT_OCTAVE; sc->sc_whole = (hz * SECS_PER_MIN * WHOLE_NOTE) / DFLT_TEMPO; sc->sc_fill = NORMAL; sc->sc_value = DFLT_VALUE; sc->sc_octtrack = false; sc->sc_octprefix = true;/* act as though there was an initial O(n) */ } #define SPKRPRI (PZERO - 1) /* Rest for given number of ticks */ static void rest(struct spkr_softc *sc, int ticks) { #ifdef SPKRDEBUG device_printf(sc->sc_dev, "%s: rest for %d ticks\n", __func__, ticks); #endif /* SPKRDEBUG */ KASSERT(ticks > 0); tsleep(sc->sc_dev, SPKRPRI | PCATCH, device_xname(sc->sc_dev), ticks); } /* * Play tone of proper duration for current rhythm signature. * note indicates "O0C" = 0, "O0C#" = 1, "O0D" = 2, ... , and * -1 indiacates a rest. * val indicates the length, "L4" = 4, "L8" = 8. * sustain indicates the number of subsequent dots that extend the sound * by one a half. */ static void playtone(struct spkr_softc *sc, int note, int val, int sustain) { int whole; int total; int sound; int silence; /* this weirdness avoids floating-point arithmetic */ whole = sc->sc_whole; for (; sustain; sustain--) { whole *= NUM_MULT; val *= DENOM_MULT; } /* Total length in tick */ total = whole / val; if (note == -1) { #ifdef SPKRDEBUG device_printf(sc->sc_dev, "%s: rest for %d ticks\n", __func__, total); #endif /* SPKRDEBUG */ if (total != 0) rest(sc, total); return; } /* * Rest 1/8 (if NORMAL) or 3/8 (if STACCATO) in tick. * silence should be rounded down. */ silence = total * (FILLTIME - sc->sc_fill) / FILLTIME; sound = total - silence; #ifdef SPKRDEBUG device_printf(sc->sc_dev, "%s: note %d for %d ticks, rest for %d ticks\n", __func__, note, sound, silence); #endif /* SPKRDEBUG */ if (sound != 0) (*sc->sc_tone)(sc->sc_dev, pitchtab[note], sound); if (silence != 0) rest(sc, silence); } /* interpret and play an item from a notation string */ static void playstring(struct spkr_softc *sc, const char *cp, size_t slen) { int pitch; int lastpitch = OCTAVE_NOTES * DFLT_OCTAVE; #define GETNUM(cp, v) \ for (v = 0; slen > 0 && isdigit((unsigned char)cp[1]); ) { \ v = v * 10 + (*++cp - '0'); \ slen--; \ } for (; slen--; cp++) { int sustain, timeval, tempo; char c = toupper((unsigned char)*cp); #ifdef SPKRDEBUG if (0x20 <= c && c < 0x7f) { device_printf(sc->sc_dev, "%s: '%c'\n", __func__, c); } else { device_printf(sc->sc_dev, "%s: (0x%x)\n", __func__, c); } #endif /* SPKRDEBUG */ switch (c) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': /* compute pitch */ pitch = notetab[c - 'A'] + sc->sc_octave * OCTAVE_NOTES; /* this may be followed by an accidental sign */ if (slen > 0 && (cp[1] == '#' || cp[1] == '+')) { ++pitch; ++cp; slen--; } else if (slen > 0 && cp[1] == '-') { --pitch; ++cp; slen--; } /* * If octave-tracking mode is on, and there has been no * octave- setting prefix, find the version of the * current letter note * closest to the last * regardless of octave. */ if (sc->sc_octtrack && !sc->sc_octprefix) { int d = abs(pitch - lastpitch); if (d > abs(pitch + OCTAVE_NOTES - lastpitch)) { if (sc->sc_octave < NOCTAVES - 1) { ++sc->sc_octave; pitch += OCTAVE_NOTES; } } if (d > abs(pitch - OCTAVE_NOTES - lastpitch)) { if (sc->sc_octave > 0) { --sc->sc_octave; pitch -= OCTAVE_NOTES; } } } sc->sc_octprefix = false; lastpitch = pitch; /* * ...which may in turn be followed by an override * time value */ GETNUM(cp, timeval); if (timeval <= 0 || timeval > MIN_VALUE) timeval = sc->sc_value; /* ...and/or sustain dots */ for (sustain = 0; slen > 0 && cp[1] == '.'; cp++) { slen--; sustain++; } /* time to emit the actual tone */ playtone(sc, pitch, timeval, sustain); break; case 'O': if (slen > 0 && (cp[1] == 'N' || cp[1] == 'n')) { sc->sc_octprefix = sc->sc_octtrack = false; ++cp; slen--; } else if (slen > 0 && (cp[1] == 'L' || cp[1] == 'l')) { sc->sc_octtrack = true; ++cp; slen--; } else { GETNUM(cp, sc->sc_octave); if (sc->sc_octave >= NOCTAVES) sc->sc_octave = DFLT_OCTAVE; sc->sc_octprefix = true; } break; case '>': if (sc->sc_octave < NOCTAVES - 1) sc->sc_octave++; sc->sc_octprefix = true; break; case '<': if (sc->sc_octave > 0) sc->sc_octave--; sc->sc_octprefix = true; break; case 'N': GETNUM(cp, pitch); for (sustain = 0; slen > 0 && cp[1] == '.'; cp++) { slen--; sustain++; } playtone(sc, pitch - 1, sc->sc_value, sustain); break; case 'L': GETNUM(cp, sc->sc_value); if (sc->sc_value <= 0 || sc->sc_value > MIN_VALUE) sc->sc_value = DFLT_VALUE; break; case 'P': case '~': /* this may be followed by an override time value */ GETNUM(cp, timeval); if (timeval <= 0 || timeval > MIN_VALUE) timeval = sc->sc_value; for (sustain = 0; slen > 0 && cp[1] == '.'; cp++) { slen--; sustain++; } playtone(sc, -1, timeval, sustain); break; case 'T': GETNUM(cp, tempo); if (tempo < MIN_TEMPO || tempo > MAX_TEMPO) tempo = DFLT_TEMPO; sc->sc_whole = (hz * SECS_PER_MIN * WHOLE_NOTE) / tempo; break; case 'M': if (slen > 0 && (cp[1] == 'N' || cp[1] == 'n')) { sc->sc_fill = NORMAL; ++cp; slen--; } else if (slen > 0 && (cp[1] == 'L' || cp[1] == 'l')) { sc->sc_fill = LEGATO; ++cp; slen--; } else if (slen > 0 && (cp[1] == 'S' || cp[1] == 's')) { sc->sc_fill = STACCATO; ++cp; slen--; } break; } } } /******************* UNIX DRIVER HOOKS BEGIN HERE **************************/ #define spkrenter(d) device_lookup_private(&spkr_cd, d) /* * Attaches spkr. Specify tone function with the following specification: * * void * tone(device_t self, u_int pitch, u_int tick) * plays a beep with specified parameters. * The argument 'pitch' specifies the pitch of a beep in Hz. The argument * 'tick' specifies the period of a beep in tick(9). This function waits * to finish playing the beep and then halts it. * If the pitch is zero, it halts all sound if any (for compatibility * with the past confused specifications, but there should be no sound at * this point). And it returns immediately, without waiting ticks. So * you cannot use this as a rest. * If the tick is zero, it returns immediately. */ void spkr_attach(device_t self, void (*tone)(device_t, u_int, u_int)) { struct spkr_softc *sc = device_private(self); #ifdef SPKRDEBUG aprint_debug("%s: entering for unit %d\n", __func__, device_unit(self)); #endif /* SPKRDEBUG */ sc->sc_dev = self; sc->sc_tone = tone; sc->sc_inbuf = NULL; sc->sc_wsbelldev = NULL; spkr_rescan(self, NULL, NULL); } int spkr_detach(device_t self, int flags) { struct spkr_softc *sc = device_private(self); int rc; #ifdef SPKRDEBUG aprint_debug("%s: entering for unit %d\n", __func__, device_unit(self)); #endif /* SPKRDEBUG */ if (sc == NULL) return ENXIO; /* XXXNS If speaker never closes, we cannot complete the detach. */ while ((flags & DETACH_FORCE) != 0 && sc->sc_inbuf != NULL) kpause("spkrwait", TRUE, 1, NULL); if (sc->sc_inbuf != NULL) return EBUSY; rc = config_detach_children(self, flags); return rc; } /* ARGSUSED */ int spkr_rescan(device_t self, const char *iattr, const int *locators) { #if NWSMUX > 0 struct spkr_softc *sc = device_private(self); struct wsbelldev_attach_args a; if (sc->sc_wsbelldev == NULL) { a.accesscookie = sc; sc->sc_wsbelldev = config_found(self, &a, wsbelldevprint, CFARGS_NONE); } #endif return 0; } int spkr_childdet(device_t self, device_t child) { struct spkr_softc *sc = device_private(self); if (sc->sc_wsbelldev == child) sc->sc_wsbelldev = NULL; return 0; } int spkropen(dev_t dev, int flags, int mode, struct lwp *l) { struct spkr_softc *sc = spkrenter(minor(dev)); #ifdef SPKRDEBUG device_printf(sc->sc_dev, "%s: entering\n", __func__); #endif /* SPKRDEBUG */ if (sc == NULL) return ENXIO; if (sc->sc_inbuf != NULL) return EBUSY; sc->sc_inbuf = malloc(DEV_BSIZE, M_DEVBUF, M_WAITOK); playinit(sc); return 0; } int spkrwrite(dev_t dev, struct uio *uio, int flags) { struct spkr_softc *sc = spkrenter(minor(dev)); #ifdef SPKRDEBUG device_printf(sc->sc_dev, "%s: entering with length = %zu\n", __func__, uio->uio_resid); #endif /* SPKRDEBUG */ if (sc == NULL) return ENXIO; if (sc->sc_inbuf == NULL) return EINVAL; size_t n = uimin(DEV_BSIZE, uio->uio_resid); int error = uiomove(sc->sc_inbuf, n, uio); if (error) return error; playstring(sc, sc->sc_inbuf, n); return 0; } int spkrclose(dev_t dev, int flags, int mode, struct lwp *l) { struct spkr_softc *sc = spkrenter(minor(dev)); #ifdef SPKRDEBUG device_printf(sc->sc_dev, "%s: entering\n", __func__); #endif /* SPKRDEBUG */ if (sc == NULL) return ENXIO; if (sc->sc_inbuf == NULL) return EINVAL; sc->sc_tone(sc->sc_dev, 0, 0); free(sc->sc_inbuf, M_DEVBUF); sc->sc_inbuf = NULL; return 0; } /* * Play tone specified by tp. * tp->frequency is the frequency (0 means a rest). * tp->duration is the length in tick (returns immediately if 0). */ static void playonetone(struct spkr_softc *sc, tone_t *tp) { if (tp->duration <= 0) return; if (tp->frequency == 0) rest(sc, tp->duration); else (*sc->sc_tone)(sc->sc_dev, tp->frequency, tp->duration); } int spkrioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { struct spkr_softc *sc = spkrenter(minor(dev)); tone_t *tp; tone_t ttp; int error; #ifdef SPKRDEBUG device_printf(sc->sc_dev, "%s: entering with cmd = %lx\n", __func__, cmd); #endif /* SPKRDEBUG */ if (sc == NULL) return ENXIO; if (sc->sc_inbuf == NULL) return EINVAL; switch (cmd) { case SPKRTONE: playonetone(sc, data); return 0; case SPKRTUNE: for (tp = *(void **)data;; tp++) { error = copyin(tp, &ttp, sizeof(tone_t)); if (error) return(error); if (ttp.duration == 0) break; playonetone(sc, &ttp); } return 0; case SPKRGETVOL: if (data != NULL) *(u_int *)data = sc->sc_vol; return 0; case SPKRSETVOL: if (data != NULL && *(u_int *)data <= 100) sc->sc_vol = *(u_int *)data; return 0; default: return ENOTTY; } } #ifdef _MODULE #include "ioconf.c" #endif MODULE(MODULE_CLASS_DRIVER, spkr, "audio" /* and/or pcppi */ ); int spkr_modcmd(modcmd_t cmd, void *arg) { int error = 0; #ifdef _MODULE devmajor_t bmajor, cmajor; #endif switch(cmd) { case MODULE_CMD_INIT: #ifdef _MODULE bmajor = cmajor = -1; error = devsw_attach(spkr_cd.cd_name, NULL, &bmajor, &spkr_cdevsw, &cmajor); if (error) break; error = config_init_component(cfdriver_ioconf_spkr, cfattach_ioconf_spkr, cfdata_ioconf_spkr); if (error) { devsw_detach(NULL, &spkr_cdevsw); } #endif break; case MODULE_CMD_FINI: #ifdef _MODULE error = config_fini_component(cfdriver_ioconf_spkr, cfattach_ioconf_spkr, cfdata_ioconf_spkr); if (error == 0) devsw_detach(NULL, &spkr_cdevsw); #endif break; default: error = ENOTTY; break; } return error; } |
| 8 8 8 8 8 8 5 4 5 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | /* $NetBSD: strlcat.c,v 1.4 2013/01/23 07:57:27 matt Exp $ */ /* $OpenBSD: strlcat.c,v 1.10 2003/04/12 21:56:39 millert Exp $ */ /* * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND TODD C. MILLER DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL TODD C. MILLER BE LIABLE * FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #if !defined(_KERNEL) && !defined(_STANDALONE) #if HAVE_NBTOOL_CONFIG_H #include "nbtool_config.h" #endif #include <sys/cdefs.h> #if defined(LIBC_SCCS) && !defined(lint) __RCSID("$NetBSD: strlcat.c,v 1.4 2013/01/23 07:57:27 matt Exp $"); #endif /* LIBC_SCCS and not lint */ #ifdef _LIBC #include "namespace.h" #endif #include <sys/types.h> #include <assert.h> #include <string.h> #ifdef _LIBC # ifdef __weak_alias __weak_alias(strlcat, _strlcat) # endif #endif #else #include <lib/libkern/libkern.h> #endif /* !_KERNEL && !_STANDALONE */ #if !HAVE_STRLCAT /* * Appends src to string dst of size siz (unlike strncat, siz is the * full size of dst, not space left). At most siz-1 characters * will be copied. Always NUL terminates (unless siz <= strlen(dst)). * Returns strlen(src) + MIN(siz, strlen(initial dst)). * If retval >= siz, truncation occurred. */ size_t strlcat(char *dst, const char *src, size_t siz) { #if 1 char *d = dst; const char *s = src; size_t n = siz; size_t dlen; _DIAGASSERT(dst != NULL); _DIAGASSERT(src != NULL); /* Find the end of dst and adjust bytes left but don't go past end */ while (n-- != 0 && *d != '\0') d++; dlen = d - dst; n = siz - dlen; if (n == 0) return(dlen + strlen(s)); while (*s != '\0') { if (n != 1) { *d++ = *s; n--; } s++; } *d = '\0'; return(dlen + (s - src)); /* count does not include NUL */ #else _DIAGASSERT(dst != NULL); _DIAGASSERT(src != NULL); /* * Find length of string in dst (maxing out at siz). */ size_t dlen = strnlen(dst, siz); /* * Copy src into any remaining space in dst (truncating if needed). * Note strlcpy(dst, src, 0) returns strlen(src). */ return dlen + strlcpy(dst + dlen, src, siz - dlen); #endif } #endif |
| 1 50 50 47 3 1 2 49 17 17 17 17 17 17 12 12 12 12 15 5 5 5 11 11 10 10 6 6 4 4 17 17 12 14 2 1 1 2 2 19 19 19 19 3 4 15 17 3 3 3 1 17 2 58 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 | /* $NetBSD: udp_usrreq.c,v 1.261 2021/02/19 14:51:59 christos Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 */ /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: udp_usrreq.c,v 1.261 2021/02/19 14:51:59 christos Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" #include "opt_ipsec.h" #include "opt_inet_csum.h" #include "opt_mbuftrace.h" #include "opt_net_mpsafe.h" #endif #include <sys/param.h> #include <sys/mbuf.h> #include <sys/once.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/domain.h> #include <sys/sysctl.h> #include <net/if.h> #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/in_var.h> #include <netinet/ip.h> #include <netinet/in_pcb.h> #include <netinet/ip_var.h> #include <netinet/ip_icmp.h> #include <netinet/udp.h> #include <netinet/udp_var.h> #include <netinet/udp_private.h> #ifdef INET6 #include <netinet/ip6.h> #include <netinet6/ip6_var.h> #include <netinet6/ip6_private.h> #include <netinet6/in6_pcb.h> #include <netinet6/udp6_var.h> #include <netinet6/udp6_private.h> #endif #ifndef INET6 #include <netinet/ip6.h> #endif #ifdef IPSEC #include <netipsec/ipsec.h> #include <netipsec/esp.h> #endif int udpcksum = 1; int udp_do_loopback_cksum = 0; struct inpcbtable udbtable; percpu_t *udpstat_percpu; #ifdef INET #ifdef IPSEC static int udp4_espinudp(struct mbuf **, int); #endif static void udp4_sendup(struct mbuf *, int, struct sockaddr *, struct socket *); static int udp4_realinput(struct sockaddr_in *, struct sockaddr_in *, struct mbuf **, int); static int udp4_input_checksum(struct mbuf *, const struct udphdr *, int, int); #endif #ifdef INET static void udp_notify (struct inpcb *, int); #endif #ifndef UDBHASHSIZE #define UDBHASHSIZE 128 #endif int udbhashsize = UDBHASHSIZE; /* * For send - really max datagram size; for receive - 40 1K datagrams. */ static int udp_sendspace = 9216; static int udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in)); #ifdef MBUFTRACE struct mowner udp_mowner = MOWNER_INIT("udp", ""); struct mowner udp_rx_mowner = MOWNER_INIT("udp", "rx"); struct mowner udp_tx_mowner = MOWNER_INIT("udp", "tx"); #endif #ifdef UDP_CSUM_COUNTERS #include <sys/device.h> #if defined(INET) struct evcnt udp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "udp", "hwcsum bad"); struct evcnt udp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "udp", "hwcsum ok"); struct evcnt udp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "udp", "hwcsum data"); struct evcnt udp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "udp", "swcsum"); EVCNT_ATTACH_STATIC(udp_hwcsum_bad); EVCNT_ATTACH_STATIC(udp_hwcsum_ok); EVCNT_ATTACH_STATIC(udp_hwcsum_data); EVCNT_ATTACH_STATIC(udp_swcsum); #endif /* defined(INET) */ #define UDP_CSUM_COUNTER_INCR(ev) (ev)->ev_count++ #else #define UDP_CSUM_COUNTER_INCR(ev) /* nothing */ #endif /* UDP_CSUM_COUNTERS */ static void sysctl_net_inet_udp_setup(struct sysctllog **); static int do_udpinit(void) { in_pcbinit(&udbtable, udbhashsize, udbhashsize); udpstat_percpu = percpu_alloc(sizeof(uint64_t) * UDP_NSTATS); MOWNER_ATTACH(&udp_tx_mowner); MOWNER_ATTACH(&udp_rx_mowner); MOWNER_ATTACH(&udp_mowner); return 0; } void udp_init_common(void) { static ONCE_DECL(doudpinit); RUN_ONCE(&doudpinit, do_udpinit); } void udp_init(void) { sysctl_net_inet_udp_setup(NULL); udp_init_common(); } /* * Checksum extended UDP header and data. */ int udp_input_checksum(int af, struct mbuf *m, const struct udphdr *uh, int iphlen, int len) { switch (af) { #ifdef INET case AF_INET: return udp4_input_checksum(m, uh, iphlen, len); #endif #ifdef INET6 case AF_INET6: return udp6_input_checksum(m, uh, iphlen, len); #endif } #ifdef DIAGNOSTIC panic("udp_input_checksum: unknown af %d", af); #endif /* NOTREACHED */ return -1; } #ifdef INET /* * Checksum extended UDP header and data. */ static int udp4_input_checksum(struct mbuf *m, const struct udphdr *uh, int iphlen, int len) { /* * XXX it's better to record and check if this mbuf is * already checked. */ if (uh->uh_sum == 0) return 0; switch (m->m_pkthdr.csum_flags & ((m_get_rcvif_NOMPSAFE(m)->if_csum_flags_rx & M_CSUM_UDPv4) | M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) { case M_CSUM_UDPv4|M_CSUM_TCP_UDP_BAD: UDP_CSUM_COUNTER_INCR(&udp_hwcsum_bad); goto badcsum; case M_CSUM_UDPv4|M_CSUM_DATA: { u_int32_t hw_csum = m->m_pkthdr.csum_data; UDP_CSUM_COUNTER_INCR(&udp_hwcsum_data); if (m->m_pkthdr.csum_flags & M_CSUM_NO_PSEUDOHDR) { const struct ip *ip = mtod(m, const struct ip *); hw_csum = in_cksum_phdr(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(hw_csum + len + IPPROTO_UDP)); } if ((hw_csum ^ 0xffff) != 0) goto badcsum; break; } case M_CSUM_UDPv4: /* Checksum was okay. */ UDP_CSUM_COUNTER_INCR(&udp_hwcsum_ok); break; default: /* * Need to compute it ourselves. Maybe skip checksum * on loopback interfaces. */ if (__predict_true(!(m_get_rcvif_NOMPSAFE(m)->if_flags & IFF_LOOPBACK) || udp_do_loopback_cksum)) { UDP_CSUM_COUNTER_INCR(&udp_swcsum); if (in4_cksum(m, IPPROTO_UDP, iphlen, len) != 0) goto badcsum; } break; } return 0; badcsum: UDP_STATINC(UDP_STAT_BADSUM); return -1; } void udp_input(struct mbuf *m, int off, int proto) { struct sockaddr_in src, dst; struct ip *ip; struct udphdr *uh; int iphlen = off; int len; int n; u_int16_t ip_len; MCLAIM(m, &udp_rx_mowner); UDP_STATINC(UDP_STAT_IPACKETS); /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); M_REGION_GET(uh, struct udphdr *, m, iphlen, sizeof(struct udphdr)); if (uh == NULL) { UDP_STATINC(UDP_STAT_HDROPS); return; } /* * Enforce alignment requirements that are violated in * some cases, see kern/50766 for details. */ if (ACCESSIBLE_POINTER(uh, struct udphdr) == 0) { m = m_copyup(m, iphlen + sizeof(struct udphdr), 0); if (m == NULL) { UDP_STATINC(UDP_STAT_HDROPS); return; } ip = mtod(m, struct ip *); uh = (struct udphdr *)(mtod(m, char *) + iphlen); } KASSERT(ACCESSIBLE_POINTER(uh, struct udphdr)); /* destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto bad; /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ ip_len = ntohs(ip->ip_len); len = ntohs((u_int16_t)uh->uh_ulen); if (len < sizeof(struct udphdr)) { UDP_STATINC(UDP_STAT_BADLEN); goto bad; } if (ip_len != iphlen + len) { if (ip_len < iphlen + len) { UDP_STATINC(UDP_STAT_BADLEN); goto bad; } m_adj(m, iphlen + len - ip_len); } /* * Checksum extended UDP header and data. */ if (udp4_input_checksum(m, uh, iphlen, len)) goto badcsum; /* construct source and dst sockaddrs. */ sockaddr_in_init(&src, &ip->ip_src, uh->uh_sport); sockaddr_in_init(&dst, &ip->ip_dst, uh->uh_dport); if ((n = udp4_realinput(&src, &dst, &m, iphlen)) == -1) { UDP_STATINC(UDP_STAT_HDROPS); return; } if (m == NULL) { /* * packet has been processed by ESP stuff - * e.g. dropped NAT-T-keep-alive-packet ... */ return; } ip = mtod(m, struct ip *); M_REGION_GET(uh, struct udphdr *, m, iphlen, sizeof(struct udphdr)); if (uh == NULL) { UDP_STATINC(UDP_STAT_HDROPS); return; } /* XXX Re-enforce alignment? */ #ifdef INET6 if (IN_MULTICAST(ip->ip_dst.s_addr) || n == 0) { struct sockaddr_in6 src6, dst6; memset(&src6, 0, sizeof(src6)); src6.sin6_family = AF_INET6; src6.sin6_len = sizeof(struct sockaddr_in6); in6_in_2_v4mapin6(&ip->ip_src, &src6.sin6_addr); src6.sin6_port = uh->uh_sport; memset(&dst6, 0, sizeof(dst6)); dst6.sin6_family = AF_INET6; dst6.sin6_len = sizeof(struct sockaddr_in6); in6_in_2_v4mapin6(&ip->ip_dst, &dst6.sin6_addr); dst6.sin6_port = uh->uh_dport; n += udp6_realinput(AF_INET, &src6, &dst6, &m, iphlen); } #endif if (n == 0) { if (m->m_flags & (M_BCAST | M_MCAST)) { UDP_STATINC(UDP_STAT_NOPORTBCAST); goto bad; } UDP_STATINC(UDP_STAT_NOPORT); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); m = NULL; } bad: if (m) m_freem(m); return; badcsum: m_freem(m); } #endif #ifdef INET static void udp4_sendup(struct mbuf *m, int off /* offset of data portion */, struct sockaddr *src, struct socket *so) { struct mbuf *opts = NULL; struct mbuf *n; struct inpcb *inp; KASSERT(so != NULL); KASSERT(so->so_proto->pr_domain->dom_family == AF_INET); inp = sotoinpcb(so); KASSERT(inp != NULL); #if defined(IPSEC) if (ipsec_used && ipsec_in_reject(m, inp)) { if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) icmp_error(n, ICMP_UNREACH, ICMP_UNREACH_ADMIN_PROHIBIT, 0, 0); return; } #endif if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) { if (inp->inp_flags & INP_CONTROLOPTS || SOOPT_TIMESTAMP(so->so_options)) { struct ip *ip = mtod(n, struct ip *); ip_savecontrol(inp, &opts, ip, n); } m_adj(n, off); if (sbappendaddr(&so->so_rcv, src, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); UDP_STATINC(UDP_STAT_FULLSOCK); soroverflow(so); } else sorwakeup(so); } } #endif #ifdef INET static int udp4_realinput(struct sockaddr_in *src, struct sockaddr_in *dst, struct mbuf **mp, int off /* offset of udphdr */) { u_int16_t *sport, *dport; int rcvcnt; struct in_addr *src4, *dst4; struct inpcb_hdr *inph; struct inpcb *inp; struct mbuf *m = *mp; rcvcnt = 0; off += sizeof(struct udphdr); /* now, offset of payload */ if (src->sin_family != AF_INET || dst->sin_family != AF_INET) goto bad; src4 = &src->sin_addr; sport = &src->sin_port; dst4 = &dst->sin_addr; dport = &dst->sin_port; if (IN_MULTICAST(dst4->s_addr) || in_broadcast(*dst4, m_get_rcvif_NOMPSAFE(m))) { /* * Deliver a multicast or broadcast datagram to *all* sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multi/broadcasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * KAME note: traditionally we dropped udpiphdr from mbuf here. * we need udpiphdr for IPsec processing so we do that later. */ /* * Locate pcb(s) for datagram. */ TAILQ_FOREACH(inph, &udbtable.inpt_queue, inph_queue) { inp = (struct inpcb *)inph; if (inp->inp_af != AF_INET) continue; if (inp->inp_lport != *dport) continue; if (!in_nullhost(inp->inp_laddr)) { if (!in_hosteq(inp->inp_laddr, *dst4)) continue; } if (!in_nullhost(inp->inp_faddr)) { if (!in_hosteq(inp->inp_faddr, *src4) || inp->inp_fport != *sport) continue; } udp4_sendup(m, off, (struct sockaddr *)src, inp->inp_socket); rcvcnt++; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids searching * through all pcbs in the common case of a non-shared * port. It assumes that an application will never * clear these options after setting them. */ if ((inp->inp_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } } else { /* * Locate pcb for datagram. */ inp = in_pcblookup_connect(&udbtable, *src4, *sport, *dst4, *dport, 0); if (inp == 0) { UDP_STATINC(UDP_STAT_PCBHASHMISS); inp = in_pcblookup_bind(&udbtable, *dst4, *dport); if (inp == 0) return rcvcnt; } #ifdef IPSEC /* Handle ESP over UDP */ if (inp->inp_flags & INP_ESPINUDP) { switch (udp4_espinudp(mp, off)) { case -1: /* Error, m was freed */ rcvcnt = -1; goto bad; case 1: /* ESP over UDP */ rcvcnt++; goto bad; case 0: /* plain UDP */ default: /* Unexpected */ /* * Normal UDP processing will take place, * m may have changed. */ m = *mp; break; } } #endif if (inp->inp_overudp_cb != NULL) { int ret; ret = inp->inp_overudp_cb(mp, off, inp->inp_socket, sintosa(src), inp->inp_overudp_arg); switch (ret) { case -1: /* Error, m was freed */ rcvcnt = -1; goto bad; case 1: /* Foo over UDP */ KASSERT(*mp == NULL); rcvcnt++; goto bad; case 0: /* plain UDP */ default: /* Unexpected */ /* * Normal UDP processing will take place, * m may have changed. */ m = *mp; break; } } /* * Check the minimum TTL for socket. */ if (mtod(m, struct ip *)->ip_ttl < inp->inp_ip_minttl) goto bad; udp4_sendup(m, off, (struct sockaddr *)src, inp->inp_socket); rcvcnt++; } bad: return rcvcnt; } #endif #ifdef INET /* * Notify a udp user of an asynchronous error; * just wake up so that he can collect error status. */ static void udp_notify(struct inpcb *inp, int errno) { inp->inp_socket->so_error = errno; sorwakeup(inp->inp_socket); sowwakeup(inp->inp_socket); } void * udp_ctlinput(int cmd, const struct sockaddr *sa, void *v) { struct ip *ip = v; struct udphdr *uh; void (*notify)(struct inpcb *, int) = udp_notify; int errno; if (sa->sa_family != AF_INET || sa->sa_len != sizeof(struct sockaddr_in)) return NULL; if ((unsigned)cmd >= PRC_NCMDS) return NULL; errno = inetctlerrmap[cmd]; if (PRC_IS_REDIRECT(cmd)) { notify = in_rtchange; ip = NULL; } else if (cmd == PRC_HOSTDEAD) { ip = NULL; } else if (errno == 0) { return NULL; } if (ip) { uh = (struct udphdr *)((char *)ip + (ip->ip_hl << 2)); in_pcbnotify(&udbtable, satocsin(sa)->sin_addr, uh->uh_dport, ip->ip_src, uh->uh_sport, errno, notify); /* XXX mapped address case */ } else { in_pcbnotifyall(&udbtable, satocsin(sa)->sin_addr, errno, notify); } return NULL; } int udp_ctloutput(int op, struct socket *so, struct sockopt *sopt) { int s; int error = 0; struct inpcb *inp; int family; int optval; family = so->so_proto->pr_domain->dom_family; s = splsoftnet(); switch (family) { #ifdef INET case PF_INET: if (sopt->sopt_level != IPPROTO_UDP) { error = ip_ctloutput(op, so, sopt); goto end; } break; #endif #ifdef INET6 case PF_INET6: if (sopt->sopt_level != IPPROTO_UDP) { error = ip6_ctloutput(op, so, sopt); goto end; } break; #endif default: error = EAFNOSUPPORT; goto end; } switch (op) { case PRCO_SETOPT: inp = sotoinpcb(so); switch (sopt->sopt_name) { case UDP_ENCAP: error = sockopt_getint(sopt, &optval); if (error) break; switch(optval) { case 0: inp->inp_flags &= ~INP_ESPINUDP; break; case UDP_ENCAP_ESPINUDP: inp->inp_flags |= INP_ESPINUDP; break; default: error = EINVAL; break; } break; default: error = ENOPROTOOPT; break; } break; default: error = EINVAL; break; } end: splx(s); return error; } int udp_output(struct mbuf *m, struct inpcb *inp, struct mbuf *control, struct lwp *l) { struct udpiphdr *ui; struct route *ro; struct ip_pktopts pktopts; kauth_cred_t cred; int len = m->m_pkthdr.len; int error, flags = 0; MCLAIM(m, &udp_tx_mowner); /* * Calculate data length and get a mbuf * for UDP and IP headers. */ M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT); if (m == NULL) { error = ENOBUFS; goto release; } /* * Compute the packet length of the IP header, and * punt if the length looks bogus. */ if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { error = EMSGSIZE; goto release; } if (l == NULL) cred = NULL; else cred = l->l_cred; /* Setup IP outgoing packet options */ memset(&pktopts, 0, sizeof(pktopts)); error = ip_setpktopts(control, &pktopts, &flags, inp, cred); if (error != 0) goto release; if (control != NULL) { m_freem(control); control = NULL; } /* * Fill in mbuf with extended UDP header * and addresses and length put into network format. */ ui = mtod(m, struct udpiphdr *); ui->ui_pr = IPPROTO_UDP; ui->ui_src = pktopts.ippo_laddr.sin_addr; ui->ui_dst = inp->inp_faddr; ui->ui_sport = inp->inp_lport; ui->ui_dport = inp->inp_fport; ui->ui_ulen = htons((u_int16_t)len + sizeof(struct udphdr)); ro = &inp->inp_route; /* * Set up checksum and output datagram. */ if (udpcksum) { /* * XXX Cache pseudo-header checksum part for * XXX "connected" UDP sockets. */ ui->ui_sum = in_cksum_phdr(ui->ui_src.s_addr, ui->ui_dst.s_addr, htons((u_int16_t)len + sizeof(struct udphdr) + IPPROTO_UDP)); m->m_pkthdr.csum_flags = M_CSUM_UDPv4; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } else ui->ui_sum = 0; ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len); ((struct ip *)ui)->ip_ttl = inp->inp_ip.ip_ttl; /* XXX */ ((struct ip *)ui)->ip_tos = inp->inp_ip.ip_tos; /* XXX */ UDP_STATINC(UDP_STAT_OPACKETS); flags |= inp->inp_socket->so_options & (SO_DONTROUTE|SO_BROADCAST); return ip_output(m, inp->inp_options, ro, flags, pktopts.ippo_imo, inp); release: if (control != NULL) m_freem(control); m_freem(m); return error; } static int udp_attach(struct socket *so, int proto) { struct inpcb *inp; int error; KASSERT(sotoinpcb(so) == NULL); /* Assign the lock (must happen even if we will error out). */ sosetlock(so); #ifdef MBUFTRACE so->so_mowner = &udp_mowner; so->so_rcv.sb_mowner = &udp_rx_mowner; so->so_snd.sb_mowner = &udp_tx_mowner; #endif if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) { return error; } } error = in_pcballoc(so, &udbtable); if (error) { return error; } inp = sotoinpcb(so); inp->inp_ip.ip_ttl = ip_defttl; KASSERT(solocked(so)); return error; } static void udp_detach(struct socket *so) { struct inpcb *inp; KASSERT(solocked(so)); inp = sotoinpcb(so); KASSERT(inp != NULL); in_pcbdetach(inp); } static int udp_accept(struct socket *so, struct sockaddr *nam) { KASSERT(solocked(so)); panic("udp_accept"); return EOPNOTSUPP; } static int udp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l) { struct inpcb *inp = sotoinpcb(so); struct sockaddr_in *sin = (struct sockaddr_in *)nam; int error = 0; int s; KASSERT(solocked(so)); KASSERT(inp != NULL); KASSERT(nam != NULL); s = splsoftnet(); error = in_pcbbind(inp, sin, l); splx(s); return error; } static int udp_listen(struct socket *so, struct lwp *l) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int udp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l) { struct inpcb *inp = sotoinpcb(so); int error = 0; int s; KASSERT(solocked(so)); KASSERT(inp != NULL); KASSERT(nam != NULL); s = splsoftnet(); error = in_pcbconnect(inp, (struct sockaddr_in *)nam, l); if (! error) soisconnected(so); splx(s); return error; } static int udp_connect2(struct socket *so, struct socket *so2) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int udp_disconnect(struct socket *so) { struct inpcb *inp = sotoinpcb(so); int s; KASSERT(solocked(so)); KASSERT(inp != NULL); s = splsoftnet(); /*soisdisconnected(so);*/ so->so_state &= ~SS_ISCONNECTED; /* XXX */ in_pcbdisconnect(inp); inp->inp_laddr = zeroin_addr; /* XXX */ in_pcbstate(inp, INP_BOUND); /* XXX */ splx(s); return 0; } static int udp_shutdown(struct socket *so) { int s; KASSERT(solocked(so)); s = splsoftnet(); socantsendmore(so); splx(s); return 0; } static int udp_abort(struct socket *so) { KASSERT(solocked(so)); panic("udp_abort"); return EOPNOTSUPP; } static int udp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) { return in_control(so, cmd, nam, ifp); } static int udp_stat(struct socket *so, struct stat *ub) { KASSERT(solocked(so)); /* stat: don't bother with a blocksize. */ return 0; } static int udp_peeraddr(struct socket *so, struct sockaddr *nam) { int s; KASSERT(solocked(so)); KASSERT(sotoinpcb(so) != NULL); KASSERT(nam != NULL); s = splsoftnet(); in_setpeeraddr(sotoinpcb(so), (struct sockaddr_in *)nam); splx(s); return 0; } static int udp_sockaddr(struct socket *so, struct sockaddr *nam) { int s; KASSERT(solocked(so)); KASSERT(sotoinpcb(so) != NULL); KASSERT(nam != NULL); s = splsoftnet(); in_setsockaddr(sotoinpcb(so), (struct sockaddr_in *)nam); splx(s); return 0; } static int udp_rcvd(struct socket *so, int flags, struct lwp *l) { KASSERT(solocked(so)); return EOPNOTSUPP; } static int udp_recvoob(struct socket *so, struct mbuf *m, int flags) { KASSERT(solocked(so)); return EOPNOTSUPP; } int udp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct lwp *l) { struct inpcb *inp = sotoinpcb(so); int error = 0; struct in_addr laddr; /* XXX */ int s; KASSERT(solocked(so)); KASSERT(inp != NULL); KASSERT(m != NULL); memset(&laddr, 0, sizeof laddr); s = splsoftnet(); if (nam) { laddr = inp->inp_laddr; /* XXX */ if ((so->so_state & SS_ISCONNECTED) != 0) { error = EISCONN; goto die; } error = in_pcbconnect(inp, (struct sockaddr_in *)nam, l); if (error) goto die; } else { if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; goto die; } } error = udp_output(m, inp, control, l); m = NULL; control = NULL; if (nam) { in_pcbdisconnect(inp); inp->inp_laddr = laddr; /* XXX */ in_pcbstate(inp, INP_BOUND); /* XXX */ } die: if (m != NULL) m_freem(m); if (control != NULL) m_freem(control); splx(s); return error; } static int udp_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control) { KASSERT(solocked(so)); m_freem(m); m_freem(control); return EOPNOTSUPP; } static int udp_purgeif(struct socket *so, struct ifnet *ifp) { int s; s = splsoftnet(); mutex_enter(softnet_lock); in_pcbpurgeif0(&udbtable, ifp); #ifdef NET_MPSAFE mutex_exit(softnet_lock); #endif in_purgeif(ifp); #ifdef NET_MPSAFE mutex_enter(softnet_lock); #endif in_pcbpurgeif(&udbtable, ifp); mutex_exit(softnet_lock); splx(s); return 0; } static int sysctl_net_inet_udp_stats(SYSCTLFN_ARGS) { return (NETSTAT_SYSCTL(udpstat_percpu, UDP_NSTATS)); } /* * Sysctl for udp variables. */ static void sysctl_net_inet_udp_setup(struct sysctllog **clog) { sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet", NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "udp", SYSCTL_DESCR("UDPv4 related settings"), NULL, 0, NULL, 0, CTL_NET, PF_INET, IPPROTO_UDP, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "checksum", SYSCTL_DESCR("Compute UDP checksums"), NULL, 0, &udpcksum, 0, CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_CHECKSUM, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "sendspace", SYSCTL_DESCR("Default UDP send buffer size"), NULL, 0, &udp_sendspace, 0, CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_SENDSPACE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "recvspace", SYSCTL_DESCR("Default UDP receive buffer size"), NULL, 0, &udp_recvspace, 0, CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_RECVSPACE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "do_loopback_cksum", SYSCTL_DESCR("Perform UDP checksum on loopback"), NULL, 0, &udp_do_loopback_cksum, 0, CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_LOOPBACKCKSUM, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "pcblist", SYSCTL_DESCR("UDP protocol control block list"), sysctl_inpcblist, 0, &udbtable, 0, CTL_NET, PF_INET, IPPROTO_UDP, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "stats", SYSCTL_DESCR("UDP statistics"), sysctl_net_inet_udp_stats, 0, NULL, 0, CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_STATS, CTL_EOL); } #endif void udp_statinc(u_int stat) { KASSERT(stat < UDP_NSTATS); UDP_STATINC(stat); } #if defined(INET) && defined(IPSEC) /* * Handle ESP-in-UDP packets (RFC3948). * * We need to distinguish between ESP packets and IKE packets. We do so by * looking at the Non-ESP marker. If IKE, we process the UDP packet as usual. * Otherwise, ESP, we invoke IPsec. * * Returns: * 1 if the packet was processed * 0 if normal UDP processing should take place * -1 if an error occurred and m was freed */ static int udp4_espinudp(struct mbuf **mp, int off) { const size_t skip = sizeof(struct udphdr); size_t len; uint8_t *data; size_t minlen; size_t iphdrlen; struct ip *ip; struct m_tag *tag; struct udphdr *udphdr; u_int16_t sport, dport; struct mbuf *m = *mp; uint32_t *marker; minlen = off + sizeof(struct esp); if (minlen > m->m_pkthdr.len) minlen = m->m_pkthdr.len; if (m->m_len < minlen) { if ((*mp = m_pullup(m, minlen)) == NULL) { return -1; } m = *mp; } len = m->m_len - off; data = mtod(m, uint8_t *) + off; /* Ignore keepalive packets. */ if ((len == 1) && (*data == 0xff)) { m_freem(m); *mp = NULL; /* avoid any further processing by caller */ return 1; } /* Handle Non-ESP marker (32bit). If zero, then IKE. */ marker = (uint32_t *)data; if (len <= sizeof(uint32_t)) return 0; if (marker[0] == 0) return 0; /* * Get the UDP ports. They are handled in network order * everywhere in the IPSEC_NAT_T code. */ udphdr = (struct udphdr *)((char *)data - skip); sport = udphdr->uh_sport; dport = udphdr->uh_dport; /* * Remove the UDP header, plus a possible marker. IP header * length is iphdrlen. * * Before: * <--- off ---> * +----+------+-----+ * | IP | UDP | ESP | * +----+------+-----+ * <-skip-> * After: * +----+-----+ * | IP | ESP | * +----+-----+ * <-skip-> */ iphdrlen = off - sizeof(struct udphdr); memmove(mtod(m, char *) + skip, mtod(m, void *), iphdrlen); m_adj(m, skip); ip = mtod(m, struct ip *); ip->ip_len = htons(ntohs(ip->ip_len) - skip); ip->ip_p = IPPROTO_ESP; /* * We have modified the packet - it is now ESP, so we should not * return to UDP processing. * * Add a PACKET_TAG_IPSEC_NAT_T_PORTS tag to remember the source * UDP port. This is required if we want to select the right SPD * for multiple hosts behind same NAT. */ if ((tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS, sizeof(sport) + sizeof(dport), M_DONTWAIT)) == NULL) { m_freem(m); return -1; } ((u_int16_t *)(tag + 1))[0] = sport; ((u_int16_t *)(tag + 1))[1] = dport; m_tag_prepend(m, tag); if (ipsec_used) ipsec4_common_input(m, iphdrlen, IPPROTO_ESP); else m_freem(m); /* We handled it, it shouldn't be handled by UDP */ *mp = NULL; /* avoid free by caller ... */ return 1; } #endif PR_WRAP_USRREQS(udp) #define udp_attach udp_attach_wrapper #define udp_detach udp_detach_wrapper #define udp_accept udp_accept_wrapper #define udp_bind udp_bind_wrapper #define udp_listen udp_listen_wrapper #define udp_connect udp_connect_wrapper #define udp_connect2 udp_connect2_wrapper #define udp_disconnect udp_disconnect_wrapper #define udp_shutdown udp_shutdown_wrapper #define udp_abort udp_abort_wrapper #define udp_ioctl udp_ioctl_wrapper #define udp_stat udp_stat_wrapper #define udp_peeraddr udp_peeraddr_wrapper #define udp_sockaddr udp_sockaddr_wrapper #define udp_rcvd udp_rcvd_wrapper #define udp_recvoob udp_recvoob_wrapper #define udp_send udp_send_wrapper #define udp_sendoob udp_sendoob_wrapper #define udp_purgeif udp_purgeif_wrapper const struct pr_usrreqs udp_usrreqs = { .pr_attach = udp_attach, .pr_detach = udp_detach, .pr_accept = udp_accept, .pr_bind = udp_bind, .pr_listen = udp_listen, .pr_connect = udp_connect, .pr_connect2 = udp_connect2, .pr_disconnect = udp_disconnect, .pr_shutdown = udp_shutdown, .pr_abort = udp_abort, .pr_ioctl = udp_ioctl, .pr_stat = udp_stat, .pr_peeraddr = udp_peeraddr, .pr_sockaddr = udp_sockaddr, .pr_rcvd = udp_rcvd, .pr_recvoob = udp_recvoob, .pr_send = udp_send, .pr_sendoob = udp_sendoob, .pr_purgeif = udp_purgeif, }; |
| 28 28 28 28 28 28 28 28 28 28 28 18 18 18 17 18 17 1 1 1 17 1 17 16 8 17 2 17 16 2 2 2 1 2 15 2 17 17 17 19 20 20 19 5 14 1 16 16 16 15 15 14 1 13 13 13 13 13 13 10 9 3 2 9 8 1 1 10 10 10 10 9 7 9 10 10 10 3 19 18 19 19 19 19 19 3 19 19 5 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 27 27 27 8 8 3 2 2 2 2 2 2 2 2 27 3 3 3 3 3 3 1 1 1 1 3 10 10 10 2 2 2 10 10 22 60 13 61 21 10 61 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 | /* $NetBSD: in6_pcb.c,v 1.169 2022/07/29 07:35:16 knakahara Exp $ */ /* $KAME: in6_pcb.c,v 1.84 2001/02/08 18:02:08 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: in6_pcb.c,v 1.169 2022/07/29 07:35:16 knakahara Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" #include "opt_ipsec.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/ioctl.h> #include <sys/errno.h> #include <sys/time.h> #include <sys/proc.h> #include <sys/kauth.h> #include <sys/domain.h> #include <sys/once.h> #include <net/if.h> #include <net/route.h> #include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/in_pcb.h> #include <netinet/ip6.h> #include <netinet/portalgo.h> #include <netinet6/ip6_var.h> #include <netinet6/in6_pcb.h> #include <netinet6/scope6_var.h> #include "faith.h" #ifdef IPSEC #include <netipsec/ipsec.h> #include <netipsec/ipsec6.h> #include <netipsec/key.h> #endif /* IPSEC */ #include <netinet/tcp_vtw.h> const struct in6_addr zeroin6_addr; #define IN6PCBHASH_PORT(table, lport) \ &(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash] #define IN6PCBHASH_BIND(table, laddr, lport) \ &(table)->inpt_bindhashtbl[ \ (((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \ (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3]) + ntohs(lport)) & \ (table)->inpt_bindhash] #define IN6PCBHASH_CONNECT(table, faddr, fport, laddr, lport) \ &(table)->inpt_bindhashtbl[ \ ((((faddr)->s6_addr32[0] ^ (faddr)->s6_addr32[1] ^ \ (faddr)->s6_addr32[2] ^ (faddr)->s6_addr32[3]) + ntohs(fport)) + \ (((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \ (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3]) + \ ntohs(lport))) & (table)->inpt_bindhash] int ip6_anonportmin = IPV6PORT_ANONMIN; int ip6_anonportmax = IPV6PORT_ANONMAX; int ip6_lowportmin = IPV6PORT_RESERVEDMIN; int ip6_lowportmax = IPV6PORT_RESERVEDMAX; static struct pool in6pcb_pool; static int in6pcb_poolinit(void) { pool_init(&in6pcb_pool, sizeof(struct in6pcb), 0, 0, 0, "in6pcbpl", NULL, IPL_SOFTNET); return 0; } void in6_pcbinit(struct inpcbtable *table, int bindhashsize, int connecthashsize) { static ONCE_DECL(control); in_pcbinit(table, bindhashsize, connecthashsize); table->inpt_lastport = (u_int16_t)ip6_anonportmax; RUN_ONCE(&control, in6pcb_poolinit); } int in6_pcballoc(struct socket *so, void *v) { struct inpcbtable *table = v; struct in6pcb *in6p; int s; KASSERT(so->so_proto->pr_domain->dom_family == AF_INET6); in6p = pool_get(&in6pcb_pool, PR_NOWAIT); if (in6p == NULL) return (ENOBUFS); memset((void *)in6p, 0, sizeof(*in6p)); in6p->in6p_af = AF_INET6; in6p->in6p_table = table; in6p->in6p_socket = so; in6p->in6p_hops = -1; /* use kernel default */ in6p->in6p_icmp6filt = NULL; in6p->in6p_portalgo = PORTALGO_DEFAULT; in6p->in6p_bindportonsend = false; #if defined(IPSEC) if (ipsec_enabled) { int error = ipsec_init_pcbpolicy(so, &in6p->in6p_sp); if (error != 0) { pool_put(&in6pcb_pool, in6p); return error; } in6p->in6p_sp->sp_inph = (struct inpcb_hdr *)in6p; } #endif /* IPSEC */ s = splsoftnet(); TAILQ_INSERT_HEAD(&table->inpt_queue, (struct inpcb_hdr*)in6p, inph_queue); LIST_INSERT_HEAD(IN6PCBHASH_PORT(table, in6p->in6p_lport), &in6p->in6p_head, inph_lhash); in6_pcbstate(in6p, IN6P_ATTACHED); splx(s); if (ip6_v6only) in6p->in6p_flags |= IN6P_IPV6_V6ONLY; so->so_pcb = (void *)in6p; return (0); } /* * Bind address from sin6 to in6p. */ static int in6_pcbbind_addr(struct in6pcb *in6p, struct sockaddr_in6 *sin6, struct lwp *l) { int error; int s; /* * We should check the family, but old programs * incorrectly fail to initialize it. */ if (sin6->sin6_family != AF_INET6) return (EAFNOSUPPORT); #ifndef INET if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) return (EADDRNOTAVAIL); #endif if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0) return (error); s = pserialize_read_enter(); if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } if (sin6->sin6_addr.s6_addr32[3]) { struct sockaddr_in sin; memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; bcopy(&sin6->sin6_addr.s6_addr32[3], &sin.sin_addr, sizeof(sin.sin_addr)); if (!IN_MULTICAST(sin.sin_addr.s_addr)) { struct ifaddr *ifa; ifa = ifa_ifwithaddr((struct sockaddr *)&sin); if (ifa == NULL && (in6p->in6p_flags & IN6P_BINDANY) == 0) { error = EADDRNOTAVAIL; goto out; } } } } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { // succeed } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct ifaddr *ifa = NULL; if ((in6p->in6p_flags & IN6P_FAITH) == 0) { ifa = ifa_ifwithaddr(sin6tosa(sin6)); if (ifa == NULL && (in6p->in6p_flags & IN6P_BINDANY) == 0) { error = EADDRNOTAVAIL; goto out; } } /* * bind to an anycast address might accidentally * cause sending a packet with an anycast source * address, so we forbid it. * * We should allow to bind to a deprecated address, * since the application dare to use it. * But, can we assume that they are careful enough * to check if the address is deprecated or not? * Maybe, as a safeguard, we should have a setsockopt * flag to control the bind(2) behavior against * deprecated addresses (default: forbid bind(2)). */ if (ifa && ifatoia6(ifa)->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_DUPLICATED)) { error = EADDRNOTAVAIL; goto out; } } in6p->in6p_laddr = sin6->sin6_addr; error = 0; out: pserialize_read_exit(s); return error; } /* * Bind port from sin6 to in6p. */ static int in6_pcbbind_port(struct in6pcb *in6p, struct sockaddr_in6 *sin6, struct lwp *l) { struct inpcbtable *table = in6p->in6p_table; struct socket *so = in6p->in6p_socket; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error; if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || (so->so_options & SO_ACCEPTCONN) == 0)) wild = 1; if (sin6->sin6_port != 0) { enum kauth_network_req req; #ifndef IPNOPRIVPORTS if (ntohs(sin6->sin6_port) < IPV6PORT_RESERVED) req = KAUTH_REQ_NETWORK_BIND_PRIVPORT; else #endif /* IPNOPRIVPORTS */ req = KAUTH_REQ_NETWORK_BIND_PORT; error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_BIND, req, so, sin6, NULL); if (error) return (EACCES); } if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if (so->so_options & (SO_REUSEADDR | SO_REUSEPORT)) reuseport = SO_REUSEADDR|SO_REUSEPORT; } if (sin6->sin6_port != 0) { if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { #ifdef INET struct inpcb *t; struct vestigial_inpcb vestige; t = in_pcblookup_port(table, *(struct in_addr *)&sin6->sin6_addr.s6_addr32[3], sin6->sin6_port, wild, &vestige); if (t && (reuseport & t->inp_socket->so_options) == 0) return (EADDRINUSE); if (!t && vestige.valid && !(reuseport && vestige.reuse_port)) return EADDRINUSE; #else return (EADDRNOTAVAIL); #endif } { struct in6pcb *t; struct vestigial_inpcb vestige; t = in6_pcblookup_port(table, &sin6->sin6_addr, sin6->sin6_port, wild, &vestige); if (t && (reuseport & t->in6p_socket->so_options) == 0) return (EADDRINUSE); if (!t && vestige.valid && !(reuseport && vestige.reuse_port)) return EADDRINUSE; } } if (sin6->sin6_port == 0) { int e; e = in6_pcbsetport(sin6, in6p, l); if (e != 0) return (e); } else { in6p->in6p_lport = sin6->sin6_port; in6_pcbstate(in6p, IN6P_BOUND); } LIST_REMOVE(&in6p->in6p_head, inph_lhash); LIST_INSERT_HEAD(IN6PCBHASH_PORT(table, in6p->in6p_lport), &in6p->in6p_head, inph_lhash); return (0); } int in6_pcbbind(void *v, struct sockaddr_in6 *sin6, struct lwp *l) { struct in6pcb *in6p = v; struct sockaddr_in6 lsin6; int error; if (in6p->in6p_af != AF_INET6) return (EINVAL); /* * If we already have a local port or a local address it means we're * bounded. */ if (in6p->in6p_lport || !(IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) || (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr) && in6p->in6p_laddr.s6_addr32[3] == 0))) return (EINVAL); if (NULL != sin6) { /* We were provided a sockaddr_in6 to use. */ if (sin6->sin6_len != sizeof(*sin6)) return (EINVAL); } else { /* We always bind to *something*, even if it's "anything". */ lsin6 = *((const struct sockaddr_in6 *) in6p->in6p_socket->so_proto->pr_domain->dom_sa_any); sin6 = &lsin6; } /* Bind address. */ error = in6_pcbbind_addr(in6p, sin6, l); if (error) return (error); /* Bind port. */ error = in6_pcbbind_port(in6p, sin6, l); if (error) { /* * Reset the address here to "any" so we don't "leak" the * in6pcb. */ in6p->in6p_laddr = in6addr_any; return (error); } #if 0 in6p->in6p_flowinfo = 0; /* XXX */ #endif return (0); } /* * Connect from a socket to a specified address. * Both address and port must be specified in argument sin6. * If don't have a local address for this socket yet, * then pick one. */ int in6_pcbconnect(void *v, struct sockaddr_in6 *sin6, struct lwp *l) { struct in6pcb *in6p = v; struct in6_addr *in6a = NULL; struct in6_addr ia6; struct ifnet *ifp = NULL; /* outgoing interface */ int error = 0; int scope_ambiguous = 0; #ifdef INET struct in6_addr mapped; #endif struct sockaddr_in6 tmp; struct vestigial_inpcb vestige; struct psref psref; int bound; (void)&in6a; /* XXX fool gcc */ if (in6p->in6p_af != AF_INET6) return (EINVAL); if (sin6->sin6_len != sizeof(*sin6)) return (EINVAL); if (sin6->sin6_family != AF_INET6) return (EAFNOSUPPORT); if (sin6->sin6_port == 0) return (EADDRNOTAVAIL); if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && in6p->in6p_socket->so_type == SOCK_STREAM) return EADDRNOTAVAIL; if (sin6->sin6_scope_id == 0 && !ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0) return(error); /* sanity check for mapped address case */ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) return EINVAL; if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) in6p->in6p_laddr.s6_addr16[5] = htons(0xffff); if (!IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) return EINVAL; } else { if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) return EINVAL; } /* protect *sin6 from overwrites */ tmp = *sin6; sin6 = &tmp; bound = curlwp_bind(); /* Source address selection. */ if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr) && in6p->in6p_laddr.s6_addr32[3] == 0) { #ifdef INET struct sockaddr_in sin; struct in_ifaddr *ia4; struct psref _psref; memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; memcpy(&sin.sin_addr, &sin6->sin6_addr.s6_addr32[3], sizeof(sin.sin_addr)); ia4 = in_selectsrc(&sin, &in6p->in6p_route, in6p->in6p_socket->so_options, NULL, &error, &_psref); if (ia4 == NULL) { if (error == 0) error = EADDRNOTAVAIL; curlwp_bindx(bound); return (error); } memset(&mapped, 0, sizeof(mapped)); mapped.s6_addr16[5] = htons(0xffff); memcpy(&mapped.s6_addr32[3], &IA_SIN(ia4)->sin_addr, sizeof(IA_SIN(ia4)->sin_addr)); ia4_release(ia4, &_psref); in6a = &mapped; #else curlwp_bindx(bound); return EADDRNOTAVAIL; #endif } else { /* * XXX: in6_selectsrc might replace the bound local address * with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ error = in6_selectsrc(sin6, in6p->in6p_outputopts, in6p->in6p_moptions, &in6p->in6p_route, &in6p->in6p_laddr, &ifp, &psref, &ia6); if (error == 0) in6a = &ia6; if (ifp && scope_ambiguous && (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { if_put(ifp, &psref); curlwp_bindx(bound); return error; } if (in6a == NULL) { if_put(ifp, &psref); curlwp_bindx(bound); if (error == 0) error = EADDRNOTAVAIL; return error; } } if (ifp != NULL) { in6p->in6p_ip6.ip6_hlim = (u_int8_t)in6_selecthlim(in6p, ifp); if_put(ifp, &psref); } else in6p->in6p_ip6.ip6_hlim = (u_int8_t)in6_selecthlim_rt(in6p); curlwp_bindx(bound); if (in6_pcblookup_connect(in6p->in6p_table, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) ? in6a : &in6p->in6p_laddr, in6p->in6p_lport, 0, &vestige) || vestige.valid) return (EADDRINUSE); if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) || (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr) && in6p->in6p_laddr.s6_addr32[3] == 0)) { if (in6p->in6p_lport == 0) { error = in6_pcbbind(in6p, NULL, l); if (error != 0) return error; } in6p->in6p_laddr = *in6a; } in6p->in6p_faddr = sin6->sin6_addr; in6p->in6p_fport = sin6->sin6_port; /* Late bind, if needed */ if (in6p->in6p_bindportonsend) { struct sockaddr_in6 lsin = *((const struct sockaddr_in6 *) in6p->in6p_socket->so_proto->pr_domain->dom_sa_any); lsin.sin6_addr = in6p->in6p_laddr; lsin.sin6_port = 0; if ((error = in6_pcbbind_port(in6p, &lsin, l)) != 0) return error; } in6_pcbstate(in6p, IN6P_CONNECTED); in6p->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; if (ip6_auto_flowlabel) in6p->in6p_flowinfo |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); #if defined(IPSEC) if (ipsec_enabled && in6p->in6p_socket->so_type == SOCK_STREAM) ipsec_pcbconn(in6p->in6p_sp); #endif return (0); } void in6_pcbdisconnect(struct in6pcb *in6p) { memset((void *)&in6p->in6p_faddr, 0, sizeof(in6p->in6p_faddr)); in6p->in6p_fport = 0; in6_pcbstate(in6p, IN6P_BOUND); in6p->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; #if defined(IPSEC) if (ipsec_enabled) ipsec_pcbdisconn(in6p->in6p_sp); #endif if (in6p->in6p_socket->so_state & SS_NOFDREF) in6_pcbdetach(in6p); } void in6_pcbdetach(struct in6pcb *in6p) { struct socket *so = in6p->in6p_socket; int s; if (in6p->in6p_af != AF_INET6) return; #if defined(IPSEC) if (ipsec_enabled) ipsec_delete_pcbpolicy(in6p); #endif so->so_pcb = NULL; s = splsoftnet(); in6_pcbstate(in6p, IN6P_ATTACHED); LIST_REMOVE(&in6p->in6p_head, inph_lhash); TAILQ_REMOVE(&in6p->in6p_table->inpt_queue, &in6p->in6p_head, inph_queue); splx(s); if (in6p->in6p_options) { m_freem(in6p->in6p_options); } if (in6p->in6p_outputopts != NULL) { ip6_clearpktopts(in6p->in6p_outputopts, -1); free(in6p->in6p_outputopts, M_IP6OPT); } rtcache_free(&in6p->in6p_route); ip6_freemoptions(in6p->in6p_moptions); ip_freemoptions(in6p->in6p_v4moptions); sofree(so); /* drops the socket's lock */ pool_put(&in6pcb_pool, in6p); mutex_enter(softnet_lock); /* reacquire it */ } void in6_setsockaddr(struct in6pcb *in6p, struct sockaddr_in6 *sin6) { if (in6p->in6p_af != AF_INET6) return; sockaddr_in6_init(sin6, &in6p->in6p_laddr, in6p->in6p_lport, 0, 0); (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ } void in6_setpeeraddr(struct in6pcb *in6p, struct sockaddr_in6 *sin6) { if (in6p->in6p_af != AF_INET6) return; sockaddr_in6_init(sin6, &in6p->in6p_faddr, in6p->in6p_fport, 0, 0); (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ } /* * Pass some notification to all connections of a protocol * associated with address dst. The local address and/or port numbers * may be specified to limit the search. The "usual action" will be * taken, depending on the ctlinput cmd. The caller must filter any * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. * * Must be called at splsoftnet. * * Note: src (4th arg) carries the flowlabel value on the original IPv6 * header, in sin6_flowinfo member. */ int in6_pcbnotify(struct inpcbtable *table, const struct sockaddr *dst, u_int fport_arg, const struct sockaddr *src, u_int lport_arg, int cmd, void *cmdarg, void (*notify)(struct in6pcb *, int)) { struct inpcb_hdr *inph; struct sockaddr_in6 sa6_src; const struct sockaddr_in6 *sa6_dst; u_int16_t fport = fport_arg, lport = lport_arg; int errno; int nmatch = 0; u_int32_t flowinfo; if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) return 0; sa6_dst = (const struct sockaddr_in6 *)dst; if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) return 0; /* * note that src can be NULL when we get notify by local fragmentation. */ sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; flowinfo = sa6_src.sin6_flowinfo; /* * Redirects go to all references to the destination, * and use in6_rtchange to invalidate the route cache. * Dead host indications: also use in6_rtchange to invalidate * the cache, and deliver the error to all the sockets. * Otherwise, if we have knowledge of the local port and address, * deliver only to that socket. */ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; memset((void *)&sa6_src.sin6_addr, 0, sizeof(sa6_src.sin6_addr)); if (cmd != PRC_HOSTDEAD) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) { struct in6pcb *in6p = (struct in6pcb *)inph; struct rtentry *rt = NULL; if (in6p->in6p_af != AF_INET6) continue; /* * Under the following condition, notify of redirects * to the pcb, without making address matches against inpcb. * - redirect notification is arrived. * - the inpcb is unconnected. * - the inpcb is caching !RTF_HOST routing entry. * - the ICMPv6 notification is from the gateway cached in the * inpcb. i.e. ICMPv6 notification is from nexthop gateway * the inpcb used very recently. * * This is to improve interaction between netbsd/openbsd * redirect handling code, and inpcb route cache code. * without the clause, !RTF_HOST routing entry (which carries * gateway used by inpcb right before the ICMPv6 redirect) * will be cached forever in unconnected inpcb. * * There still is a question regarding to what is TRT: * - On bsdi/freebsd, RTF_HOST (cloned) routing entry will be * generated on packet output. inpcb will always cache * RTF_HOST routing entry so there's no need for the clause * (ICMPv6 redirect will update RTF_HOST routing entry, * and inpcb is caching it already). * However, bsdi/freebsd are vulnerable to local DoS attacks * due to the cloned routing entries. * - Specwise, "destination cache" is mentioned in RFC2461. * Jinmei says that it implies bsdi/freebsd behavior, itojun * is not really convinced. * - Having hiwat/lowat on # of cloned host route (redirect/ * pmtud) may be a good idea. netbsd/openbsd has it. see * icmp6_mtudisc_update(). */ if ((PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) && IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && (rt = rtcache_validate(&in6p->in6p_route)) != NULL && !(rt->rt_flags & RTF_HOST)) { const struct sockaddr_in6 *dst6; dst6 = (const struct sockaddr_in6 *) rtcache_getdst(&in6p->in6p_route); if (dst6 == NULL) ; else if (IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &sa6_dst->sin6_addr)) { rtcache_unref(rt, &in6p->in6p_route); goto do_notify; } } rtcache_unref(rt, &in6p->in6p_route); /* * If the error designates a new path MTU for a destination * and the application (associated with this socket) wanted to * know the value, notify. Note that we notify for all * disconnected sockets if the corresponding application * wanted. This is because some UDP applications keep sending * sockets disconnected. * XXX: should we avoid to notify the value to TCP sockets? */ if (cmd == PRC_MSGSIZE && (in6p->in6p_flags & IN6P_MTU) != 0 && (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) || IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &sa6_dst->sin6_addr))) { ip6_notify_pmtu(in6p, (const struct sockaddr_in6 *)dst, (u_int32_t *)cmdarg); } /* * Detect if we should notify the error. If no source and * destination ports are specified, but non-zero flowinfo and * local address match, notify the error. This is the case * when the error is delivered with an encrypted buffer * by ESP. Otherwise, just compare addresses and ports * as usual. */ if (lport == 0 && fport == 0 && flowinfo && in6p->in6p_socket != NULL && flowinfo == (in6p->in6p_flowinfo & IPV6_FLOWLABEL_MASK) && IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &sa6_src.sin6_addr)) goto do_notify; else if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &sa6_dst->sin6_addr) || in6p->in6p_socket == NULL || (lport && in6p->in6p_lport != lport) || (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &sa6_src.sin6_addr)) || (fport && in6p->in6p_fport != fport)) continue; do_notify: if (notify) (*notify)(in6p, errno); nmatch++; } return nmatch; } void in6_pcbpurgeif0(struct inpcbtable *table, struct ifnet *ifp) { struct inpcb_hdr *inph; struct ip6_moptions *im6o; struct in6_multi_mship *imm, *nimm; KASSERT(ifp != NULL); TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) { struct in6pcb *in6p = (struct in6pcb *)inph; bool need_unlock = false; if (in6p->in6p_af != AF_INET6) continue; /* The caller holds either one of in6ps' lock */ if (!in6p_locked(in6p)) { in6p_lock(in6p); need_unlock = true; } im6o = in6p->in6p_moptions; if (im6o) { /* * Unselect the outgoing interface if it is being * detached. */ if (im6o->im6o_multicast_if_index == ifp->if_index) im6o->im6o_multicast_if_index = 0; /* * Drop multicast group membership if we joined * through the interface being detached. * XXX controversial - is it really legal for kernel * to force this? */ LIST_FOREACH_SAFE(imm, &im6o->im6o_memberships, i6mm_chain, nimm) { if (imm->i6mm_maddr->in6m_ifp == ifp) { LIST_REMOVE(imm, i6mm_chain); in6_leavegroup(imm); } } } in_purgeifmcast(in6p->in6p_v4moptions, ifp); if (need_unlock) in6p_unlock(in6p); } } void in6_pcbpurgeif(struct inpcbtable *table, struct ifnet *ifp) { struct rtentry *rt; struct inpcb_hdr *inph; TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) { struct in6pcb *in6p = (struct in6pcb *)inph; if (in6p->in6p_af != AF_INET6) continue; if ((rt = rtcache_validate(&in6p->in6p_route)) != NULL && rt->rt_ifp == ifp) { rtcache_unref(rt, &in6p->in6p_route); in6_rtchange(in6p, 0); } else rtcache_unref(rt, &in6p->in6p_route); } } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in6_losing(struct in6pcb *in6p) { struct rtentry *rt; struct rt_addrinfo info; if (in6p->in6p_af != AF_INET6) return; if ((rt = rtcache_validate(&in6p->in6p_route)) == NULL) return; memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = rtcache_getdst(&in6p->in6p_route); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); if (rt->rt_flags & RTF_DYNAMIC) { int error; struct rtentry *nrt; error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, &nrt); rtcache_unref(rt, &in6p->in6p_route); if (error == 0) rt_free(nrt); } else rtcache_unref(rt, &in6p->in6p_route); /* * A new route can be allocated * the next time output is attempted. */ rtcache_free(&in6p->in6p_route); } /* * After a routing change, flush old routing. A new route can be * allocated the next time output is attempted. */ void in6_rtchange(struct in6pcb *in6p, int errno) { if (in6p->in6p_af != AF_INET6) return; rtcache_free(&in6p->in6p_route); /* * A new route can be allocated the next time * output is attempted. */ } struct in6pcb * in6_pcblookup_port(struct inpcbtable *table, struct in6_addr *laddr6, u_int lport_arg, int lookup_wildcard, struct vestigial_inpcb *vp) { struct inpcbhead *head; struct inpcb_hdr *inph; struct in6pcb *in6p, *match = NULL; int matchwild = 3, wildcard; u_int16_t lport = lport_arg; if (vp) vp->valid = 0; head = IN6PCBHASH_PORT(table, lport); LIST_FOREACH(inph, head, inph_lhash) { in6p = (struct in6pcb *)inph; if (in6p->in6p_af != AF_INET6) continue; if (in6p->in6p_lport != lport) continue; wildcard = 0; if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) wildcard++; if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) continue; if (!IN6_IS_ADDR_V4MAPPED(laddr6)) continue; /* duplicate of IPv4 logic */ wildcard = 0; if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr) && in6p->in6p_faddr.s6_addr32[3]) wildcard++; if (!in6p->in6p_laddr.s6_addr32[3]) { if (laddr6->s6_addr32[3]) wildcard++; } else { if (!laddr6->s6_addr32[3]) wildcard++; else { if (in6p->in6p_laddr.s6_addr32[3] != laddr6->s6_addr32[3]) continue; } } } else if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { if (IN6_IS_ADDR_V4MAPPED(laddr6)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(laddr6)) wildcard++; } else { if (IN6_IS_ADDR_V4MAPPED(laddr6)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) continue; } if (IN6_IS_ADDR_UNSPECIFIED(laddr6)) wildcard++; else { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, laddr6)) continue; } } if (wildcard && !lookup_wildcard) continue; if (wildcard < matchwild) { match = in6p; matchwild = wildcard; if (matchwild == 0) break; } } if (match && matchwild == 0) return match; if (vp && table->vestige && table->vestige->init_ports6) { struct vestigial_inpcb better; void *state; state = (*table->vestige->init_ports6)(laddr6, lport_arg, lookup_wildcard); while (table->vestige && (*table->vestige->next_port6)(state, vp)) { if (vp->lport != lport) continue; wildcard = 0; if (!IN6_IS_ADDR_UNSPECIFIED(&vp->faddr.v6)) wildcard++; if (IN6_IS_ADDR_UNSPECIFIED(&vp->laddr.v6)) { if (!IN6_IS_ADDR_UNSPECIFIED(laddr6)) wildcard++; } else { if (IN6_IS_ADDR_V4MAPPED(laddr6)) { if (vp->v6only) continue; } if (IN6_IS_ADDR_UNSPECIFIED(laddr6)) wildcard++; else { if (!IN6_ARE_ADDR_EQUAL(&vp->laddr.v6, laddr6)) continue; } } if (wildcard && !lookup_wildcard) continue; if (wildcard < matchwild) { better = *vp; match = (void*)&better; matchwild = wildcard; if (matchwild == 0) break; } } if (match) { if (match != (void*)&better) return match; else { *vp = better; return 0; } } } return (match); } /* * WARNING: return value (rtentry) could be IPv4 one if in6pcb is connected to * IPv4 mapped address. */ struct rtentry * in6_pcbrtentry(struct in6pcb *in6p) { struct rtentry *rt; struct route *ro; union { const struct sockaddr *sa; const struct sockaddr_in6 *sa6; #ifdef INET const struct sockaddr_in *sa4; #endif } cdst; ro = &in6p->in6p_route; if (in6p->in6p_af != AF_INET6) return (NULL); cdst.sa = rtcache_getdst(ro); if (cdst.sa == NULL) ; #ifdef INET else if (cdst.sa->sa_family == AF_INET) { KASSERT(IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)); if (cdst.sa4->sin_addr.s_addr != in6p->in6p_faddr.s6_addr32[3]) rtcache_free(ro); } #endif else { if (!IN6_ARE_ADDR_EQUAL(&cdst.sa6->sin6_addr, &in6p->in6p_faddr)) rtcache_free(ro); } if ((rt = rtcache_validate(ro)) == NULL) rt = rtcache_update(ro, 1); #ifdef INET if (rt == NULL && IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) { union { struct sockaddr dst; struct sockaddr_in dst4; } u; struct in_addr addr; addr.s_addr = in6p->in6p_faddr.s6_addr32[3]; sockaddr_in_init(&u.dst4, &addr, 0); if (rtcache_setdst(ro, &u.dst) != 0) return NULL; rt = rtcache_init(ro); } else #endif if (rt == NULL && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { union { struct sockaddr dst; struct sockaddr_in6 dst6; } u; sockaddr_in6_init(&u.dst6, &in6p->in6p_faddr, 0, 0, 0); if (rtcache_setdst(ro, &u.dst) != 0) return NULL; rt = rtcache_init(ro); } return rt; } void in6_pcbrtentry_unref(struct rtentry *rt, struct in6pcb *in6p) { rtcache_unref(rt, &in6p->in6p_route); } struct in6pcb * in6_pcblookup_connect(struct inpcbtable *table, const struct in6_addr *faddr6, u_int fport_arg, const struct in6_addr *laddr6, u_int lport_arg, int faith, struct vestigial_inpcb *vp) { struct inpcbhead *head; struct inpcb_hdr *inph; struct in6pcb *in6p; u_int16_t fport = fport_arg, lport = lport_arg; if (vp) vp->valid = 0; head = IN6PCBHASH_CONNECT(table, faddr6, fport, laddr6, lport); LIST_FOREACH(inph, head, inph_hash) { in6p = (struct in6pcb *)inph; if (in6p->in6p_af != AF_INET6) continue; /* find exact match on both source and dest */ if (in6p->in6p_fport != fport) continue; if (in6p->in6p_lport != lport) continue; if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) continue; if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, faddr6)) continue; if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) continue; if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, laddr6)) continue; if ((IN6_IS_ADDR_V4MAPPED(laddr6) || IN6_IS_ADDR_V4MAPPED(faddr6)) && (in6p->in6p_flags & IN6P_IPV6_V6ONLY)) continue; return in6p; } if (vp && table->vestige) { if ((*table->vestige->lookup6)(faddr6, fport_arg, laddr6, lport_arg, vp)) return NULL; } return NULL; } struct in6pcb * in6_pcblookup_bind(struct inpcbtable *table, const struct in6_addr *laddr6, u_int lport_arg, int faith) { struct inpcbhead *head; struct inpcb_hdr *inph; struct in6pcb *in6p; u_int16_t lport = lport_arg; #ifdef INET struct in6_addr zero_mapped; #endif head = IN6PCBHASH_BIND(table, laddr6, lport); LIST_FOREACH(inph, head, inph_hash) { in6p = (struct in6pcb *)inph; if (in6p->in6p_af != AF_INET6) continue; if (faith && (in6p->in6p_flags & IN6P_FAITH) == 0) continue; if (in6p->in6p_fport != 0) continue; if (in6p->in6p_lport != lport) continue; if (IN6_IS_ADDR_V4MAPPED(laddr6) && (in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) continue; if (IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, laddr6)) goto out; } #ifdef INET if (IN6_IS_ADDR_V4MAPPED(laddr6)) { memset(&zero_mapped, 0, sizeof(zero_mapped)); zero_mapped.s6_addr16[5] = 0xffff; head = IN6PCBHASH_BIND(table, &zero_mapped, lport); LIST_FOREACH(inph, head, inph_hash) { in6p = (struct in6pcb *)inph; if (in6p->in6p_af != AF_INET6) continue; if (faith && (in6p->in6p_flags & IN6P_FAITH) == 0) continue; if (in6p->in6p_fport != 0) continue; if (in6p->in6p_lport != lport) continue; if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) continue; if (IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &zero_mapped)) goto out; } } #endif head = IN6PCBHASH_BIND(table, &zeroin6_addr, lport); LIST_FOREACH(inph, head, inph_hash) { in6p = (struct in6pcb *)inph; if (in6p->in6p_af != AF_INET6) continue; if (faith && (in6p->in6p_flags & IN6P_FAITH) == 0) continue; if (in6p->in6p_fport != 0) continue; if (in6p->in6p_lport != lport) continue; if (IN6_IS_ADDR_V4MAPPED(laddr6) && (in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) continue; if (IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &zeroin6_addr)) goto out; } return (NULL); out: inph = &in6p->in6p_head; if (inph != LIST_FIRST(head)) { LIST_REMOVE(inph, inph_hash); LIST_INSERT_HEAD(head, inph, inph_hash); } return in6p; } void in6_pcbstate(struct in6pcb *in6p, int state) { if (in6p->in6p_af != AF_INET6) return; if (in6p->in6p_state > IN6P_ATTACHED) LIST_REMOVE(&in6p->in6p_head, inph_hash); switch (state) { case IN6P_BOUND: LIST_INSERT_HEAD(IN6PCBHASH_BIND(in6p->in6p_table, &in6p->in6p_laddr, in6p->in6p_lport), &in6p->in6p_head, inph_hash); break; case IN6P_CONNECTED: LIST_INSERT_HEAD(IN6PCBHASH_CONNECT(in6p->in6p_table, &in6p->in6p_faddr, in6p->in6p_fport, &in6p->in6p_laddr, in6p->in6p_lport), &in6p->in6p_head, inph_hash); break; } in6p->in6p_state = state; } |
| 338 16 16 525 525 526 526 360 360 360 360 3 3 1 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 | /* $NetBSD: ufs_quota.c,v 1.117 2014/06/28 22:27:51 dholland Exp $ */ /* * Copyright (c) 1982, 1986, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Robert Elz at The University of Melbourne. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_quota.c 8.5 (Berkeley) 5/20/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: ufs_quota.c,v 1.117 2014/06/28 22:27:51 dholland Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" #endif #include <sys/param.h> #include <sys/kernel.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/file.h> #include <sys/proc.h> #include <sys/vnode.h> #include <sys/mount.h> #include <sys/kauth.h> #include <sys/quotactl.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_quota.h> kmutex_t dqlock; kcondvar_t dqcv; const char *quotatypes[MAXQUOTAS] = INITQFNAMES; /* * Code pertaining to management of the in-core dquot data structures. */ #define DQHASH(dqvp, id) \ (((((long)(dqvp)) >> 8) + id) & dqhash) static LIST_HEAD(dqhashhead, dquot) *dqhashtbl; static u_long dqhash; static pool_cache_t dquot_cache; static int quota_handle_cmd_stat(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_idtypestat(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_objtypestat(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_get(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_put(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_cursorget(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_del(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_quotaon(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_quotaoff(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_cursoropen(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_cursorclose(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_cursorskipidtype(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_cursoratend(struct mount *, struct lwp *, struct quotactl_args *args); static int quota_handle_cmd_cursorrewind(struct mount *, struct lwp *, struct quotactl_args *args); /* * Initialize the quota fields of an inode. */ void ufsquota_init(struct inode *ip) { int i; for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; } /* * Release the quota fields from an inode. */ void ufsquota_free(struct inode *ip) { int i; for (i = 0; i < MAXQUOTAS; i++) { dqrele(ITOV(ip), ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } /* * Update disk usage, and take corrective action. */ int chkdq(struct inode *ip, int64_t change, kauth_cred_t cred, int flags) { /* do not track snapshot usage, or we will deadlock */ if ((ip->i_flags & SF_SNAPSHOT) != 0) return 0; #ifdef QUOTA if (ip->i_ump->um_flags & UFS_QUOTA) return chkdq1(ip, change, cred, flags); #endif #ifdef QUOTA2 if (ip->i_ump->um_flags & UFS_QUOTA2) return chkdq2(ip, change, cred, flags); #endif return 0; } /* * Check the inode limit, applying corrective action. */ int chkiq(struct inode *ip, int32_t change, kauth_cred_t cred, int flags) { /* do not track snapshot usage, or we will deadlock */ if ((ip->i_flags & SF_SNAPSHOT) != 0) return 0; #ifdef QUOTA if (ip->i_ump->um_flags & UFS_QUOTA) return chkiq1(ip, change, cred, flags); #endif #ifdef QUOTA2 if (ip->i_ump->um_flags & UFS_QUOTA2) return chkiq2(ip, change, cred, flags); #endif return 0; } int quota_handle_cmd(struct mount *mp, struct lwp *l, struct quotactl_args *args) { int error = 0; switch (args->qc_op) { case QUOTACTL_STAT: error = quota_handle_cmd_stat(mp, l, args); break; case QUOTACTL_IDTYPESTAT: error = quota_handle_cmd_idtypestat(mp, l, args); break; case QUOTACTL_OBJTYPESTAT: error = quota_handle_cmd_objtypestat(mp, l, args); break; case QUOTACTL_QUOTAON: error = quota_handle_cmd_quotaon(mp, l, args); break; case QUOTACTL_QUOTAOFF: error = quota_handle_cmd_quotaoff(mp, l, args); break; case QUOTACTL_GET: error = quota_handle_cmd_get(mp, l, args); break; case QUOTACTL_PUT: error = quota_handle_cmd_put(mp, l, args); break; case QUOTACTL_CURSORGET: error = quota_handle_cmd_cursorget(mp, l, args); break; case QUOTACTL_DEL: error = quota_handle_cmd_del(mp, l, args); break; case QUOTACTL_CURSOROPEN: error = quota_handle_cmd_cursoropen(mp, l, args); break; case QUOTACTL_CURSORCLOSE: error = quota_handle_cmd_cursorclose(mp, l, args); break; case QUOTACTL_CURSORSKIPIDTYPE: error = quota_handle_cmd_cursorskipidtype(mp, l, args); break; case QUOTACTL_CURSORATEND: error = quota_handle_cmd_cursoratend(mp, l, args); break; case QUOTACTL_CURSORREWIND: error = quota_handle_cmd_cursorrewind(mp, l, args); break; default: panic("Invalid quotactl operation %d\n", args->qc_op); } return error; } static int quota_handle_cmd_stat(struct mount *mp, struct lwp *l, struct quotactl_args *args) { struct ufsmount *ump = VFSTOUFS(mp); struct quotastat *info; KASSERT(args->qc_op == QUOTACTL_STAT); info = args->u.stat.qc_info; if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0) return EOPNOTSUPP; #ifdef QUOTA if (ump->um_flags & UFS_QUOTA) { strcpy(info->qs_implname, "ufs/ffs quota v1"); info->qs_numidtypes = MAXQUOTAS; /* XXX no define for this */ info->qs_numobjtypes = 2; info->qs_restrictions = 0; info->qs_restrictions |= QUOTA_RESTRICT_NEEDSQUOTACHECK; info->qs_restrictions |= QUOTA_RESTRICT_UNIFORMGRACE; info->qs_restrictions |= QUOTA_RESTRICT_32BIT; } else #endif #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { strcpy(info->qs_implname, "ufs/ffs quota v2"); info->qs_numidtypes = MAXQUOTAS; info->qs_numobjtypes = N_QL; info->qs_restrictions = 0; } else #endif return EOPNOTSUPP; return 0; } static int quota_handle_cmd_idtypestat(struct mount *mp, struct lwp *l, struct quotactl_args *args) { struct ufsmount *ump = VFSTOUFS(mp); int idtype; struct quotaidtypestat *info; const char *name; KASSERT(args->qc_op == QUOTACTL_IDTYPESTAT); idtype = args->u.idtypestat.qc_idtype; info = args->u.idtypestat.qc_info; if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0) return EOPNOTSUPP; /* * These are the same for both QUOTA and QUOTA2. */ switch (idtype) { case QUOTA_IDTYPE_USER: name = "user"; break; case QUOTA_IDTYPE_GROUP: name = "group"; break; default: return EINVAL; } strlcpy(info->qis_name, name, sizeof(info->qis_name)); return 0; } static int quota_handle_cmd_objtypestat(struct mount *mp, struct lwp *l, struct quotactl_args *args) { struct ufsmount *ump = VFSTOUFS(mp); int objtype; struct quotaobjtypestat *info; const char *name; int isbytes; KASSERT(args->qc_op == QUOTACTL_OBJTYPESTAT); objtype = args->u.objtypestat.qc_objtype; info = args->u.objtypestat.qc_info; if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0) return EOPNOTSUPP; /* * These are the same for both QUOTA and QUOTA2. */ switch (objtype) { case QUOTA_OBJTYPE_BLOCKS: name = "block"; isbytes = 1; break; case QUOTA_OBJTYPE_FILES: name = "file"; isbytes = 0; break; default: return EINVAL; } strlcpy(info->qos_name, name, sizeof(info->qos_name)); info->qos_isbytes = isbytes; return 0; } /* XXX shouldn't all this be in kauth ? */ static int quota_get_auth(struct mount *mp, struct lwp *l, uid_t id) { /* The user can always query about his own quota. */ if (id == kauth_cred_geteuid(l->l_cred)) return 0; return kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, KAUTH_ARG(id), NULL); } static int quota_handle_cmd_get(struct mount *mp, struct lwp *l, struct quotactl_args *args) { struct ufsmount *ump = VFSTOUFS(mp); int error; const struct quotakey *qk; struct quotaval *qv; KASSERT(args->qc_op == QUOTACTL_GET); qk = args->u.get.qc_key; qv = args->u.get.qc_val; if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0) return EOPNOTSUPP; error = quota_get_auth(mp, l, qk->qk_id); if (error != 0) return error; #ifdef QUOTA if (ump->um_flags & UFS_QUOTA) { error = quota1_handle_cmd_get(ump, qk, qv); } else #endif #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_get(ump, qk, qv); } else #endif panic("quota_handle_cmd_get: no support ?"); if (error != 0) return error; return error; } static int quota_handle_cmd_put(struct mount *mp, struct lwp *l, struct quotactl_args *args) { struct ufsmount *ump = VFSTOUFS(mp); const struct quotakey *qk; const struct quotaval *qv; id_t kauth_id; int error; KASSERT(args->qc_op == QUOTACTL_PUT); qk = args->u.put.qc_key; qv = args->u.put.qc_val; if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0) return EOPNOTSUPP; kauth_id = qk->qk_id; if (kauth_id == QUOTA_DEFAULTID) { kauth_id = 0; } error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_MANAGE, mp, KAUTH_ARG(kauth_id), NULL); if (error != 0) { return error; } #ifdef QUOTA if (ump->um_flags & UFS_QUOTA) error = quota1_handle_cmd_put(ump, qk, qv); else #endif #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_put(ump, qk, qv); } else #endif panic("quota_handle_cmd_get: no support ?"); if (error == ENOENT) { error = 0; } return error; } static int quota_handle_cmd_del(struct mount *mp, struct lwp *l, struct quotactl_args *args) { struct ufsmount *ump = VFSTOUFS(mp); const struct quotakey *qk; id_t kauth_id; int error; KASSERT(args->qc_op == QUOTACTL_DEL); qk = args->u.del.qc_key; kauth_id = qk->qk_id; if (kauth_id == QUOTA_DEFAULTID) { kauth_id = 0; } if ((ump->um_flags & UFS_QUOTA2) == 0) return EOPNOTSUPP; /* avoid whitespace changes */ { error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_MANAGE, mp, KAUTH_ARG(kauth_id), NULL); if (error != 0) goto err; #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_del(ump, qk); } else #endif panic("quota_handle_cmd_get: no support ?"); if (error && error != ENOENT) goto err; } return 0; err: return error; } static int quota_handle_cmd_cursorget(struct mount *mp, struct lwp *l, struct quotactl_args *args) { struct ufsmount *ump = VFSTOUFS(mp); int error; KASSERT(args->qc_op == QUOTACTL_CURSORGET); if ((ump->um_flags & UFS_QUOTA2) == 0) return EOPNOTSUPP; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, NULL, NULL); if (error) return error; #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { struct quotakcursor *cursor = args->u.cursorget.qc_cursor; struct quotakey *keys = args->u.cursorget.qc_keys; struct quotaval *vals = args->u.cursorget.qc_vals; unsigned maxnum = args->u.cursorget.qc_maxnum; unsigned *ret = args->u.cursorget.qc_ret; error = quota2_handle_cmd_cursorget(ump, cursor, keys, vals, maxnum, ret); } else #endif panic("quota_handle_cmd_cursorget: no support ?"); return error; } static int quota_handle_cmd_cursoropen(struct mount *mp, struct lwp *l, struct quotactl_args *args) { #ifdef QUOTA2 struct ufsmount *ump = VFSTOUFS(mp); struct quotakcursor *cursor = args->u.cursoropen.qc_cursor; #endif int error; KASSERT(args->qc_op == QUOTACTL_CURSOROPEN); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, NULL, NULL); if (error) return error; #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_cursoropen(ump, cursor); } else #endif error = EOPNOTSUPP; return error; } static int quota_handle_cmd_cursorclose(struct mount *mp, struct lwp *l, struct quotactl_args *args) { #ifdef QUOTA2 struct ufsmount *ump = VFSTOUFS(mp); struct quotakcursor *cursor = args->u.cursorclose.qc_cursor; #endif int error; KASSERT(args->qc_op == QUOTACTL_CURSORCLOSE); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, NULL, NULL); if (error) return error; #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_cursorclose(ump, cursor); } else #endif error = EOPNOTSUPP; return error; } static int quota_handle_cmd_cursorskipidtype(struct mount *mp, struct lwp *l, struct quotactl_args *args) { #ifdef QUOTA2 struct ufsmount *ump = VFSTOUFS(mp); struct quotakcursor *cursor = args->u.cursorskipidtype.qc_cursor; int idtype = args->u.cursorskipidtype.qc_idtype; #endif int error; KASSERT(args->qc_op == QUOTACTL_CURSORSKIPIDTYPE); #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_cursorskipidtype(ump, cursor, idtype); } else #endif error = EOPNOTSUPP; return error; } static int quota_handle_cmd_cursoratend(struct mount *mp, struct lwp *l, struct quotactl_args *args) { #ifdef QUOTA2 struct ufsmount *ump = VFSTOUFS(mp); struct quotakcursor *cursor = args->u.cursoratend.qc_cursor; unsigned *ret = args->u.cursoratend.qc_ret; #endif int error; KASSERT(args->qc_op == QUOTACTL_CURSORATEND); #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_cursoratend(ump, cursor, ret); } else #endif error = EOPNOTSUPP; return error; } static int quota_handle_cmd_cursorrewind(struct mount *mp, struct lwp *l, struct quotactl_args *args) { #ifdef QUOTA2 struct ufsmount *ump = VFSTOUFS(mp); struct quotakcursor *cursor = args->u.cursorrewind.qc_cursor; #endif int error; KASSERT(args->qc_op == QUOTACTL_CURSORREWIND); #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_cursorrewind(ump, cursor); } else #endif error = EOPNOTSUPP; return error; } static int quota_handle_cmd_quotaon(struct mount *mp, struct lwp *l, struct quotactl_args *args) { struct ufsmount *ump = VFSTOUFS(mp); int error; KASSERT(args->qc_op == QUOTACTL_QUOTAON); if ((ump->um_flags & UFS_QUOTA2) != 0) return EBUSY; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_ONOFF, mp, NULL, NULL); if (error != 0) { return error; } #ifdef QUOTA int idtype = args->u.quotaon.qc_idtype; const char *qfile = args->u.quotaon.qc_quotafile; error = quota1_handle_cmd_quotaon(l, ump, idtype, qfile); #else error = EOPNOTSUPP; #endif return error; } static int quota_handle_cmd_quotaoff(struct mount *mp, struct lwp *l, struct quotactl_args *args) { struct ufsmount *ump = VFSTOUFS(mp); int error; KASSERT(args->qc_op == QUOTACTL_QUOTAOFF); if ((ump->um_flags & UFS_QUOTA2) != 0) return EOPNOTSUPP; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_ONOFF, mp, NULL, NULL); if (error != 0) { return error; } #ifdef QUOTA int idtype = args->u.quotaoff.qc_idtype; error = quota1_handle_cmd_quotaoff(l, ump, idtype); #else error = EOPNOTSUPP; #endif return error; } /* * Initialize the quota system. */ void dqinit(void) { mutex_init(&dqlock, MUTEX_DEFAULT, IPL_NONE); cv_init(&dqcv, "quota"); dqhashtbl = hashinit(desiredvnodes, HASH_LIST, true, &dqhash); dquot_cache = pool_cache_init(sizeof(struct dquot), 0, 0, 0, "ufsdq", NULL, IPL_NONE, NULL, NULL, NULL); } void dqreinit(void) { struct dquot *dq; struct dqhashhead *oldhash, *hash; struct vnode *dqvp; u_long oldmask, mask, hashval; int i; hash = hashinit(desiredvnodes, HASH_LIST, true, &mask); mutex_enter(&dqlock); oldhash = dqhashtbl; oldmask = dqhash; dqhashtbl = hash; dqhash = mask; for (i = 0; i <= oldmask; i++) { while ((dq = LIST_FIRST(&oldhash[i])) != NULL) { dqvp = dq->dq_ump->um_quotas[dq->dq_type]; LIST_REMOVE(dq, dq_hash); hashval = DQHASH(dqvp, dq->dq_id); LIST_INSERT_HEAD(&dqhashtbl[hashval], dq, dq_hash); } } mutex_exit(&dqlock); hashdone(oldhash, HASH_LIST, oldmask); } /* * Free resources held by quota system. */ void dqdone(void) { pool_cache_destroy(dquot_cache); hashdone(dqhashtbl, HASH_LIST, dqhash); cv_destroy(&dqcv); mutex_destroy(&dqlock); } /* * Set up the quotas for an inode. * * This routine completely defines the semantics of quotas. * If other criterion want to be used to establish quotas, the * MAXQUOTAS value in quotas.h should be increased, and the * additional dquots set up here. */ int getinoquota(struct inode *ip) { struct ufsmount *ump = ip->i_ump; struct vnode *vp = ITOV(ip); int i, error; u_int32_t ino_ids[MAXQUOTAS]; /* * To avoid deadlocks never update quotas for quota files * on the same file system */ for (i = 0; i < MAXQUOTAS; i++) if (vp == ump->um_quotas[i]) return 0; ino_ids[USRQUOTA] = ip->i_uid; ino_ids[GRPQUOTA] = ip->i_gid; for (i = 0; i < MAXQUOTAS; i++) { /* * If the file id changed the quota needs update. */ if (ip->i_dquot[i] != NODQUOT && ip->i_dquot[i]->dq_id != ino_ids[i]) { dqrele(ITOV(ip), ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } /* * Set up the quota based on file id. * ENODEV means that quotas are not enabled. */ if (ip->i_dquot[i] == NODQUOT && (error = dqget(vp, ino_ids[i], ump, i, &ip->i_dquot[i])) && error != ENODEV) return (error); } return 0; } /* * Obtain a dquot structure for the specified identifier and quota file * reading the information from the file if necessary. */ int dqget(struct vnode *vp, u_long id, struct ufsmount *ump, int type, struct dquot **dqp) { struct dquot *dq, *ndq; struct dqhashhead *dqh; struct vnode *dqvp; int error = 0; /* XXX gcc */ /* Lock to see an up to date value for QTF_CLOSING. */ mutex_enter(&dqlock); if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0) { mutex_exit(&dqlock); *dqp = NODQUOT; return (ENODEV); } dqvp = ump->um_quotas[type]; #ifdef QUOTA if (ump->um_flags & UFS_QUOTA) { if (dqvp == NULLVP || (ump->umq1_qflags[type] & QTF_CLOSING)) { mutex_exit(&dqlock); *dqp = NODQUOT; return (ENODEV); } } #endif #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { if (dqvp == NULLVP) { mutex_exit(&dqlock); *dqp = NODQUOT; return (ENODEV); } } #endif KASSERT(dqvp != vp); /* * Check the cache first. */ dqh = &dqhashtbl[DQHASH(dqvp, id)]; LIST_FOREACH(dq, dqh, dq_hash) { if (dq->dq_id != id || dq->dq_ump->um_quotas[dq->dq_type] != dqvp) continue; KASSERT(dq->dq_cnt > 0); dqref(dq); mutex_exit(&dqlock); *dqp = dq; return (0); } /* * Not in cache, allocate a new one. */ mutex_exit(&dqlock); ndq = pool_cache_get(dquot_cache, PR_WAITOK); /* * Initialize the contents of the dquot structure. */ memset((char *)ndq, 0, sizeof *ndq); ndq->dq_flags = 0; ndq->dq_id = id; ndq->dq_ump = ump; ndq->dq_type = type; mutex_init(&ndq->dq_interlock, MUTEX_DEFAULT, IPL_NONE); mutex_enter(&dqlock); dqh = &dqhashtbl[DQHASH(dqvp, id)]; LIST_FOREACH(dq, dqh, dq_hash) { if (dq->dq_id != id || dq->dq_ump->um_quotas[dq->dq_type] != dqvp) continue; /* * Another thread beat us allocating this dquot. */ KASSERT(dq->dq_cnt > 0); dqref(dq); mutex_exit(&dqlock); mutex_destroy(&ndq->dq_interlock); pool_cache_put(dquot_cache, ndq); *dqp = dq; return 0; } dq = ndq; LIST_INSERT_HEAD(dqh, dq, dq_hash); dqref(dq); mutex_enter(&dq->dq_interlock); mutex_exit(&dqlock); #ifdef QUOTA if (ump->um_flags & UFS_QUOTA) error = dq1get(dqvp, id, ump, type, dq); #endif #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) error = dq2get(dqvp, id, ump, type, dq); #endif /* * I/O error in reading quota file, release * quota structure and reflect problem to caller. */ if (error) { mutex_enter(&dqlock); LIST_REMOVE(dq, dq_hash); mutex_exit(&dqlock); mutex_exit(&dq->dq_interlock); dqrele(vp, dq); *dqp = NODQUOT; return (error); } mutex_exit(&dq->dq_interlock); *dqp = dq; return (0); } /* * Obtain a reference to a dquot. */ void dqref(struct dquot *dq) { KASSERT(mutex_owned(&dqlock)); dq->dq_cnt++; KASSERT(dq->dq_cnt > 0); } /* * Release a reference to a dquot. */ void dqrele(struct vnode *vp, struct dquot *dq) { if (dq == NODQUOT) return; mutex_enter(&dq->dq_interlock); for (;;) { mutex_enter(&dqlock); if (dq->dq_cnt > 1) { dq->dq_cnt--; mutex_exit(&dqlock); mutex_exit(&dq->dq_interlock); return; } if ((dq->dq_flags & DQ_MOD) == 0) break; mutex_exit(&dqlock); #ifdef QUOTA if (dq->dq_ump->um_flags & UFS_QUOTA) (void) dq1sync(vp, dq); #endif #ifdef QUOTA2 if (dq->dq_ump->um_flags & UFS_QUOTA2) (void) dq2sync(vp, dq); #endif } KASSERT(dq->dq_cnt == 1 && (dq->dq_flags & DQ_MOD) == 0); LIST_REMOVE(dq, dq_hash); mutex_exit(&dqlock); mutex_exit(&dq->dq_interlock); mutex_destroy(&dq->dq_interlock); pool_cache_put(dquot_cache, dq); } int qsync(struct mount *mp) { struct ufsmount *ump = VFSTOUFS(mp); #ifdef QUOTA if (ump->um_flags & UFS_QUOTA) return q1sync(mp); #endif #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) return q2sync(mp); #endif return 0; } #ifdef DIAGNOSTIC /* * Check the hash chains for stray dquot's. */ void dqflush(struct vnode *vp) { struct dquot *dq; int i; mutex_enter(&dqlock); for (i = 0; i <= dqhash; i++) LIST_FOREACH(dq, &dqhashtbl[i], dq_hash) KASSERT(dq->dq_ump->um_quotas[dq->dq_type] != vp); mutex_exit(&dqlock); } #endif |
| 30 30 837 60 60 60 836 836 836 835 834 835 835 835 834 147 143 838 838 837 142 2 2 835 834 24 24 832 833 470 762 834 832 834 531 528 529 468 735 733 730 733 362 403 403 403 404 403 383 379 404 404 403 403 1 404 244 403 403 403 403 1 363 458 457 458 458 458 458 458 457 250 443 458 1 1 1 837 836 836 837 836 837 837 143 836 461 2 463 837 837 3 837 46 46 837 60 60 60 59 60 836 835 835 835 773 398 839 838 838 834 835 838 839 838 835 129 837 833 833 835 832 147 145 145 838 837 837 837 140 140 140 140 837 837 147 147 147 147 147 145 2 2 2 837 838 837 414 838 838 37 36 37 37 37 37 2 36 7 7 32 32 462 463 13 13 12 12 72 29 28 29 28 28 1 464 80 834 834 835 129 129 129 129 129 835 835 834 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 | /* $NetBSD: vfs_bio.c,v 1.303 2022/03/30 14:54:29 riastradh Exp $ */ /*- * Copyright (c) 2007, 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Doran, and by Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_bio.c 8.6 (Berkeley) 1/11/94 */ /*- * Copyright (c) 1994 Christopher G. Demetriou * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_bio.c 8.6 (Berkeley) 1/11/94 */ /* * The buffer cache subsystem. * * Some references: * Bach: The Design of the UNIX Operating System (Prentice Hall, 1986) * Leffler, et al.: The Design and Implementation of the 4.3BSD * UNIX Operating System (Addison Welley, 1989) * * Locking * * There are three locks: * - bufcache_lock: protects global buffer cache state. * - BC_BUSY: a long term per-buffer lock. * - buf_t::b_objlock: lock on completion (biowait vs biodone). * * For buffers associated with vnodes (a most common case) b_objlock points * to the vnode_t::v_interlock. Otherwise, it points to generic buffer_lock. * * Lock order: * bufcache_lock -> * buf_t::b_objlock */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.303 2022/03/30 14:54:29 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_bufcache.h" #include "opt_dtrace.h" #include "opt_biohist.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/buf.h> #include <sys/vnode.h> #include <sys/mount.h> #include <sys/resourcevar.h> #include <sys/sysctl.h> #include <sys/conf.h> #include <sys/kauth.h> #include <sys/fstrans.h> #include <sys/intr.h> #include <sys/cpu.h> #include <sys/wapbl.h> #include <sys/bitops.h> #include <sys/cprng.h> #include <sys/sdt.h> #include <uvm/uvm.h> /* extern struct uvm uvm */ #include <miscfs/specfs/specdev.h> SDT_PROVIDER_DEFINE(io); SDT_PROBE_DEFINE4(io, kernel, , bbusy__start, "struct buf *"/*bp*/, "bool"/*intr*/, "int"/*timo*/, "kmutex_t *"/*interlock*/); SDT_PROBE_DEFINE5(io, kernel, , bbusy__done, "struct buf *"/*bp*/, "bool"/*intr*/, "int"/*timo*/, "kmutex_t *"/*interlock*/, "int"/*error*/); SDT_PROBE_DEFINE0(io, kernel, , getnewbuf__start); SDT_PROBE_DEFINE1(io, kernel, , getnewbuf__done, "struct buf *"/*bp*/); SDT_PROBE_DEFINE3(io, kernel, , getblk__start, "struct vnode *"/*vp*/, "daddr_t"/*blkno*/, "int"/*size*/); SDT_PROBE_DEFINE4(io, kernel, , getblk__done, "struct vnode *"/*vp*/, "daddr_t"/*blkno*/, "int"/*size*/, "struct buf *"/*bp*/); SDT_PROBE_DEFINE2(io, kernel, , brelse, "struct buf *"/*bp*/, "int"/*set*/); SDT_PROBE_DEFINE1(io, kernel, , wait__start, "struct buf *"/*bp*/); SDT_PROBE_DEFINE1(io, kernel, , wait__done, "struct buf *"/*bp*/); #ifndef BUFPAGES # define BUFPAGES 0 #endif #ifdef BUFCACHE # if (BUFCACHE < 5) || (BUFCACHE > 95) # error BUFCACHE is not between 5 and 95 # endif #else # define BUFCACHE 15 #endif u_int nbuf; /* desired number of buffer headers */ u_int bufpages = BUFPAGES; /* optional hardwired count */ u_int bufcache = BUFCACHE; /* max % of RAM to use for buffer cache */ /* * Definitions for the buffer free lists. */ #define BQUEUES 3 /* number of free buffer queues */ #define BQ_LOCKED 0 /* super-blocks &c */ #define BQ_LRU 1 /* lru, useful buffers */ #define BQ_AGE 2 /* rubbish */ struct bqueue { TAILQ_HEAD(, buf) bq_queue; uint64_t bq_bytes; buf_t *bq_marker; }; static struct bqueue bufqueues[BQUEUES] __cacheline_aligned; /* Function prototypes */ static void buf_setwm(void); static int buf_trim(void); static void *bufpool_page_alloc(struct pool *, int); static void bufpool_page_free(struct pool *, void *); static buf_t *bio_doread(struct vnode *, daddr_t, int, int); static buf_t *getnewbuf(int, int, int); static int buf_lotsfree(void); static int buf_canrelease(void); static u_long buf_mempoolidx(u_long); static u_long buf_roundsize(u_long); static void *buf_alloc(size_t); static void buf_mrelease(void *, size_t); static void binsheadfree(buf_t *, struct bqueue *); static void binstailfree(buf_t *, struct bqueue *); #ifdef DEBUG static int checkfreelist(buf_t *, struct bqueue *, int); #endif static void biointr(void *); static void biodone2(buf_t *); static void sysctl_kern_buf_setup(void); static void sysctl_vm_buf_setup(void); /* Initialization for biohist */ #include <sys/biohist.h> BIOHIST_DEFINE(biohist); void biohist_init(void) { BIOHIST_INIT(biohist, BIOHIST_SIZE); } /* * Definitions for the buffer hash lists. */ #define BUFHASH(dvp, lbn) \ (&bufhashtbl[(((long)(dvp) >> 8) + (int)(lbn)) & bufhash]) LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; u_long bufhash; static int bufhash_stats(struct hashstat_sysctl *, bool); static kcondvar_t needbuffer_cv; /* * Buffer queue lock. */ kmutex_t bufcache_lock __cacheline_aligned; kmutex_t buffer_lock __cacheline_aligned; /* Software ISR for completed transfers. */ static void *biodone_sih; /* Buffer pool for I/O buffers. */ static pool_cache_t buf_cache; static pool_cache_t bufio_cache; #define MEMPOOL_INDEX_OFFSET (ilog2(DEV_BSIZE)) /* smallest pool is 512 bytes */ #define NMEMPOOLS (ilog2(MAXBSIZE) - MEMPOOL_INDEX_OFFSET + 1) __CTASSERT((1 << (NMEMPOOLS + MEMPOOL_INDEX_OFFSET - 1)) == MAXBSIZE); /* Buffer memory pools */ static struct pool bmempools[NMEMPOOLS]; static struct vm_map *buf_map; /* * Buffer memory pool allocator. */ static void * bufpool_page_alloc(struct pool *pp, int flags) { return (void *)uvm_km_alloc(buf_map, MAXBSIZE, MAXBSIZE, ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT|UVM_KMF_TRYLOCK) | UVM_KMF_WIRED); } static void bufpool_page_free(struct pool *pp, void *v) { uvm_km_free(buf_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED); } static struct pool_allocator bufmempool_allocator = { .pa_alloc = bufpool_page_alloc, .pa_free = bufpool_page_free, .pa_pagesz = MAXBSIZE, }; /* Buffer memory management variables */ u_long bufmem_valimit; u_long bufmem_hiwater; u_long bufmem_lowater; u_long bufmem; /* * MD code can call this to set a hard limit on the amount * of virtual memory used by the buffer cache. */ int buf_setvalimit(vsize_t sz) { /* We need to accommodate at least NMEMPOOLS of MAXBSIZE each */ if (sz < NMEMPOOLS * MAXBSIZE) return EINVAL; bufmem_valimit = sz; return 0; } static void buf_setwm(void) { bufmem_hiwater = buf_memcalc(); /* lowater is approx. 2% of memory (with bufcache = 15) */ #define BUFMEM_WMSHIFT 3 #define BUFMEM_HIWMMIN (64 * 1024 << BUFMEM_WMSHIFT) if (bufmem_hiwater < BUFMEM_HIWMMIN) /* Ensure a reasonable minimum value */ bufmem_hiwater = BUFMEM_HIWMMIN; bufmem_lowater = bufmem_hiwater >> BUFMEM_WMSHIFT; } #ifdef DEBUG int debug_verify_freelist = 0; static int checkfreelist(buf_t *bp, struct bqueue *dp, int ison) { buf_t *b; if (!debug_verify_freelist) return 1; TAILQ_FOREACH(b, &dp->bq_queue, b_freelist) { if (b == bp) return ison ? 1 : 0; } return ison ? 0 : 1; } #endif /* * Insq/Remq for the buffer hash lists. * Call with buffer queue locked. */ static void binsheadfree(buf_t *bp, struct bqueue *dp) { KASSERT(mutex_owned(&bufcache_lock)); KASSERT(bp->b_freelistindex == -1); TAILQ_INSERT_HEAD(&dp->bq_queue, bp, b_freelist); dp->bq_bytes += bp->b_bufsize; bp->b_freelistindex = dp - bufqueues; } static void binstailfree(buf_t *bp, struct bqueue *dp) { KASSERT(mutex_owned(&bufcache_lock)); KASSERTMSG(bp->b_freelistindex == -1, "double free of buffer? " "bp=%p, b_freelistindex=%d\n", bp, bp->b_freelistindex); TAILQ_INSERT_TAIL(&dp->bq_queue, bp, b_freelist); dp->bq_bytes += bp->b_bufsize; bp->b_freelistindex = dp - bufqueues; } void bremfree(buf_t *bp) { struct bqueue *dp; int bqidx = bp->b_freelistindex; KASSERT(mutex_owned(&bufcache_lock)); KASSERT(bqidx != -1); dp = &bufqueues[bqidx]; KDASSERT(checkfreelist(bp, dp, 1)); KASSERT(dp->bq_bytes >= bp->b_bufsize); TAILQ_REMOVE(&dp->bq_queue, bp, b_freelist); dp->bq_bytes -= bp->b_bufsize; /* For the sysctl helper. */ if (bp == dp->bq_marker) dp->bq_marker = NULL; #if defined(DIAGNOSTIC) bp->b_freelistindex = -1; #endif /* defined(DIAGNOSTIC) */ } /* * note that for some ports this is used by pmap bootstrap code to * determine kva size. */ u_long buf_memcalc(void) { u_long n; vsize_t mapsz = 0; /* * Determine the upper bound of memory to use for buffers. * * - If bufpages is specified, use that as the number * pages. * * - Otherwise, use bufcache as the percentage of * physical memory. */ if (bufpages != 0) { n = bufpages; } else { if (bufcache < 5) { printf("forcing bufcache %d -> 5", bufcache); bufcache = 5; } if (bufcache > 95) { printf("forcing bufcache %d -> 95", bufcache); bufcache = 95; } if (buf_map != NULL) mapsz = vm_map_max(buf_map) - vm_map_min(buf_map); n = calc_cache_size(mapsz, bufcache, (buf_map != kernel_map) ? 100 : BUFCACHE_VA_MAXPCT) / PAGE_SIZE; } n <<= PAGE_SHIFT; if (bufmem_valimit != 0 && n > bufmem_valimit) n = bufmem_valimit; return (n); } /* * Initialize buffers and hash links for buffers. */ void bufinit(void) { struct bqueue *dp; int use_std; u_int i; biodone_vfs = biodone; mutex_init(&bufcache_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&buffer_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&needbuffer_cv, "needbuf"); if (bufmem_valimit != 0) { vaddr_t minaddr = 0, maxaddr; buf_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, bufmem_valimit, 0, false, 0); if (buf_map == NULL) panic("bufinit: cannot allocate submap"); } else buf_map = kernel_map; /* * Initialize buffer cache memory parameters. */ bufmem = 0; buf_setwm(); /* On "small" machines use small pool page sizes where possible */ use_std = (physmem < atop(16*1024*1024)); /* * Also use them on systems that can map the pool pages using * a direct-mapped segment. */ #ifdef PMAP_MAP_POOLPAGE use_std = 1; #endif buf_cache = pool_cache_init(sizeof(buf_t), 0, 0, 0, "bufpl", NULL, IPL_SOFTBIO, NULL, NULL, NULL); bufio_cache = pool_cache_init(sizeof(buf_t), 0, 0, 0, "biopl", NULL, IPL_BIO, NULL, NULL, NULL); for (i = 0; i < NMEMPOOLS; i++) { struct pool_allocator *pa; struct pool *pp = &bmempools[i]; u_int size = 1 << (i + MEMPOOL_INDEX_OFFSET); char *name = kmem_alloc(8, KM_SLEEP); /* XXX: never freed */ if (__predict_false(size >= 1048576)) (void)snprintf(name, 8, "buf%um", size / 1048576); else if (__predict_true(size >= 1024)) (void)snprintf(name, 8, "buf%uk", size / 1024); else (void)snprintf(name, 8, "buf%ub", size); pa = (size <= PAGE_SIZE && use_std) ? &pool_allocator_nointr : &bufmempool_allocator; pool_init(pp, size, DEV_BSIZE, 0, 0, name, pa, IPL_NONE); pool_setlowat(pp, 1); pool_sethiwat(pp, 1); } /* Initialize the buffer queues */ for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) { TAILQ_INIT(&dp->bq_queue); dp->bq_bytes = 0; } /* * Estimate hash table size based on the amount of memory we * intend to use for the buffer cache. The average buffer * size is dependent on our clients (i.e. filesystems). * * For now, use an empirical 3K per buffer. */ nbuf = (bufmem_hiwater / 1024) / 3; bufhashtbl = hashinit(nbuf, HASH_LIST, true, &bufhash); sysctl_kern_buf_setup(); sysctl_vm_buf_setup(); hashstat_register("bufhash", bufhash_stats); } void bufinit2(void) { biodone_sih = softint_establish(SOFTINT_BIO | SOFTINT_MPSAFE, biointr, NULL); if (biodone_sih == NULL) panic("bufinit2: can't establish soft interrupt"); } static int buf_lotsfree(void) { u_long guess; /* Always allocate if less than the low water mark. */ if (bufmem < bufmem_lowater) return 1; /* Never allocate if greater than the high water mark. */ if (bufmem > bufmem_hiwater) return 0; /* If there's anything on the AGE list, it should be eaten. */ if (TAILQ_FIRST(&bufqueues[BQ_AGE].bq_queue) != NULL) return 0; /* * The probabily of getting a new allocation is inversely * proportional to the current size of the cache above * the low water mark. Divide the total first to avoid overflows * in the product. */ guess = cprng_fast32() % 16; if ((bufmem_hiwater - bufmem_lowater) / 16 * guess >= (bufmem - bufmem_lowater)) return 1; /* Otherwise don't allocate. */ return 0; } /* * Return estimate of bytes we think need to be * released to help resolve low memory conditions. * * => called with bufcache_lock held. */ static int buf_canrelease(void) { int pagedemand, ninvalid = 0; KASSERT(mutex_owned(&bufcache_lock)); if (bufmem < bufmem_lowater) return 0; if (bufmem > bufmem_hiwater) return bufmem - bufmem_hiwater; ninvalid += bufqueues[BQ_AGE].bq_bytes; pagedemand = uvmexp.freetarg - uvm_availmem(false); if (pagedemand < 0) return ninvalid; return MAX(ninvalid, MIN(2 * MAXBSIZE, MIN((bufmem - bufmem_lowater) / 16, pagedemand * PAGE_SIZE))); } /* * Buffer memory allocation helper functions */ static u_long buf_mempoolidx(u_long size) { u_int n = 0; size -= 1; size >>= MEMPOOL_INDEX_OFFSET; while (size) { size >>= 1; n += 1; } if (n >= NMEMPOOLS) panic("buf mem pool index %d", n); return n; } static u_long buf_roundsize(u_long size) { /* Round up to nearest power of 2 */ return (1 << (buf_mempoolidx(size) + MEMPOOL_INDEX_OFFSET)); } static void * buf_alloc(size_t size) { u_int n = buf_mempoolidx(size); void *addr; while (1) { addr = pool_get(&bmempools[n], PR_NOWAIT); if (addr != NULL) break; /* No memory, see if we can free some. If so, try again */ mutex_enter(&bufcache_lock); if (buf_drain(1) > 0) { mutex_exit(&bufcache_lock); continue; } if (curlwp == uvm.pagedaemon_lwp) { mutex_exit(&bufcache_lock); return NULL; } /* Wait for buffers to arrive on the LRU queue */ cv_timedwait(&needbuffer_cv, &bufcache_lock, hz / 4); mutex_exit(&bufcache_lock); } return addr; } static void buf_mrelease(void *addr, size_t size) { pool_put(&bmempools[buf_mempoolidx(size)], addr); } /* * bread()/breadn() helper. */ static buf_t * bio_doread(struct vnode *vp, daddr_t blkno, int size, int async) { buf_t *bp; struct mount *mp; bp = getblk(vp, blkno, size, 0, 0); /* * getblk() may return NULL if we are the pagedaemon. */ if (bp == NULL) { KASSERT(curlwp == uvm.pagedaemon_lwp); return NULL; } /* * If buffer does not have data valid, start a read. * Note that if buffer is BC_INVAL, getblk() won't return it. * Therefore, it's valid if its I/O has completed or been delayed. */ if (!ISSET(bp->b_oflags, (BO_DONE | BO_DELWRI))) { /* Start I/O for the buffer. */ SET(bp->b_flags, B_READ | async); if (async) BIO_SETPRIO(bp, BPRIO_TIMELIMITED); else BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); VOP_STRATEGY(vp, bp); /* Pay for the read. */ curlwp->l_ru.ru_inblock++; } else if (async) brelse(bp, 0); if (vp->v_type == VBLK) mp = spec_node_getmountedfs(vp); else mp = vp->v_mount; /* * Collect statistics on synchronous and asynchronous reads. * Reads from block devices are charged to their associated * filesystem (if any). */ if (mp != NULL) { if (async == 0) mp->mnt_stat.f_syncreads++; else mp->mnt_stat.f_asyncreads++; } return (bp); } /* * Read a disk block. * This algorithm described in Bach (p.54). */ int bread(struct vnode *vp, daddr_t blkno, int size, int flags, buf_t **bpp) { buf_t *bp; int error; BIOHIST_FUNC(__func__); BIOHIST_CALLED(biohist); /* Get buffer for block. */ bp = *bpp = bio_doread(vp, blkno, size, 0); if (bp == NULL) return ENOMEM; /* Wait for the read to complete, and return result. */ error = biowait(bp); if (error == 0 && (flags & B_MODIFY) != 0) error = fscow_run(bp, true); if (error) { brelse(bp, 0); *bpp = NULL; } return error; } /* * Read-ahead multiple disk blocks. The first is sync, the rest async. * Trivial modification to the breada algorithm presented in Bach (p.55). */ int breadn(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks, int *rasizes, int nrablks, int flags, buf_t **bpp) { buf_t *bp; int error, i; BIOHIST_FUNC(__func__); BIOHIST_CALLED(biohist); bp = *bpp = bio_doread(vp, blkno, size, 0); if (bp == NULL) return ENOMEM; /* * For each of the read-ahead blocks, start a read, if necessary. */ mutex_enter(&bufcache_lock); for (i = 0; i < nrablks; i++) { /* If it's in the cache, just go on to next one. */ if (incore(vp, rablks[i])) continue; /* Get a buffer for the read-ahead block */ mutex_exit(&bufcache_lock); (void) bio_doread(vp, rablks[i], rasizes[i], B_ASYNC); mutex_enter(&bufcache_lock); } mutex_exit(&bufcache_lock); /* Otherwise, we had to start a read for it; wait until it's valid. */ error = biowait(bp); if (error == 0 && (flags & B_MODIFY) != 0) error = fscow_run(bp, true); if (error) { brelse(bp, 0); *bpp = NULL; } return error; } /* * Block write. Described in Bach (p.56) */ int bwrite(buf_t *bp) { int rv, sync, wasdelayed; struct vnode *vp; struct mount *mp; BIOHIST_FUNC(__func__); BIOHIST_CALLARGS(biohist, "bp=%#jx", (uintptr_t)bp, 0, 0, 0); KASSERT(ISSET(bp->b_cflags, BC_BUSY)); KASSERT(!cv_has_waiters(&bp->b_done)); vp = bp->b_vp; /* * dholland 20160728 AFAICT vp==NULL must be impossible as it * will crash upon reaching VOP_STRATEGY below... see further * analysis on tech-kern. */ KASSERTMSG(vp != NULL, "bwrite given buffer with null vnode"); if (vp != NULL) { KASSERT(bp->b_objlock == vp->v_interlock); if (vp->v_type == VBLK) mp = spec_node_getmountedfs(vp); else mp = vp->v_mount; } else { mp = NULL; } if (mp && mp->mnt_wapbl) { if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) { bdwrite(bp); return 0; } } /* * Remember buffer type, to switch on it later. If the write was * synchronous, but the file system was mounted with MNT_ASYNC, * convert it to a delayed write. * XXX note that this relies on delayed tape writes being converted * to async, not sync writes (which is safe, but ugly). */ sync = !ISSET(bp->b_flags, B_ASYNC); if (sync && mp != NULL && ISSET(mp->mnt_flag, MNT_ASYNC)) { bdwrite(bp); return (0); } /* * Collect statistics on synchronous and asynchronous writes. * Writes to block devices are charged to their associated * filesystem (if any). */ if (mp != NULL) { if (sync) mp->mnt_stat.f_syncwrites++; else mp->mnt_stat.f_asyncwrites++; } /* * Pay for the I/O operation and make sure the buf is on the correct * vnode queue. */ bp->b_error = 0; wasdelayed = ISSET(bp->b_oflags, BO_DELWRI); CLR(bp->b_flags, B_READ); if (wasdelayed) { mutex_enter(&bufcache_lock); mutex_enter(bp->b_objlock); CLR(bp->b_oflags, BO_DONE | BO_DELWRI); reassignbuf(bp, bp->b_vp); /* Wake anyone trying to busy the buffer via vnode's lists. */ cv_broadcast(&bp->b_busy); mutex_exit(&bufcache_lock); } else { curlwp->l_ru.ru_oublock++; mutex_enter(bp->b_objlock); CLR(bp->b_oflags, BO_DONE | BO_DELWRI); } if (vp != NULL) vp->v_numoutput++; mutex_exit(bp->b_objlock); /* Initiate disk write. */ if (sync) BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); else BIO_SETPRIO(bp, BPRIO_TIMELIMITED); VOP_STRATEGY(vp, bp); if (sync) { /* If I/O was synchronous, wait for it to complete. */ rv = biowait(bp); /* Release the buffer. */ brelse(bp, 0); return (rv); } else { return (0); } } int vn_bwrite(void *v) { struct vop_bwrite_args *ap = v; return (bwrite(ap->a_bp)); } /* * Delayed write. * * The buffer is marked dirty, but is not queued for I/O. * This routine should be used when the buffer is expected * to be modified again soon, typically a small write that * partially fills a buffer. * * NB: magnetic tapes cannot be delayed; they must be * written in the order that the writes are requested. * * Described in Leffler, et al. (pp. 208-213). */ void bdwrite(buf_t *bp) { BIOHIST_FUNC(__func__); BIOHIST_CALLARGS(biohist, "bp=%#jx", (uintptr_t)bp, 0, 0, 0); KASSERT(bp->b_vp == NULL || bp->b_vp->v_tag != VT_UFS || bp->b_vp->v_type == VBLK || ISSET(bp->b_flags, B_COWDONE)); KASSERT(ISSET(bp->b_cflags, BC_BUSY)); KASSERT(!cv_has_waiters(&bp->b_done)); /* If this is a tape block, write the block now. */ if (bdev_type(bp->b_dev) == D_TAPE) { bawrite(bp); return; } if (wapbl_vphaswapbl(bp->b_vp)) { struct mount *mp = wapbl_vptomp(bp->b_vp); if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) { WAPBL_ADD_BUF(mp, bp); } } /* * If the block hasn't been seen before: * (1) Mark it as having been seen, * (2) Charge for the write, * (3) Make sure it's on its vnode's correct block list. */ KASSERT(bp->b_vp == NULL || bp->b_objlock == bp->b_vp->v_interlock); if (!ISSET(bp->b_oflags, BO_DELWRI)) { mutex_enter(&bufcache_lock); mutex_enter(bp->b_objlock); SET(bp->b_oflags, BO_DELWRI); curlwp->l_ru.ru_oublock++; reassignbuf(bp, bp->b_vp); /* Wake anyone trying to busy the buffer via vnode's lists. */ cv_broadcast(&bp->b_busy); mutex_exit(&bufcache_lock); } else { mutex_enter(bp->b_objlock); } /* Otherwise, the "write" is done, so mark and release the buffer. */ CLR(bp->b_oflags, BO_DONE); mutex_exit(bp->b_objlock); brelse(bp, 0); } /* * Asynchronous block write; just an asynchronous bwrite(). */ void bawrite(buf_t *bp) { KASSERT(ISSET(bp->b_cflags, BC_BUSY)); KASSERT(bp->b_vp != NULL); SET(bp->b_flags, B_ASYNC); VOP_BWRITE(bp->b_vp, bp); } /* * Release a buffer on to the free lists. * Described in Bach (p. 46). */ void brelsel(buf_t *bp, int set) { struct bqueue *bufq; struct vnode *vp; SDT_PROBE2(io, kernel, , brelse, bp, set); KASSERT(bp != NULL); KASSERT(mutex_owned(&bufcache_lock)); KASSERT(!cv_has_waiters(&bp->b_done)); SET(bp->b_cflags, set); KASSERT(ISSET(bp->b_cflags, BC_BUSY)); KASSERT(bp->b_iodone == NULL); /* Wake up any processes waiting for any buffer to become free. */ cv_signal(&needbuffer_cv); /* Wake up any proceeses waiting for _this_ buffer to become free */ if (ISSET(bp->b_cflags, BC_WANTED)) CLR(bp->b_cflags, BC_WANTED|BC_AGE); /* If it's clean clear the copy-on-write flag. */ if (ISSET(bp->b_flags, B_COWDONE)) { mutex_enter(bp->b_objlock); if (!ISSET(bp->b_oflags, BO_DELWRI)) CLR(bp->b_flags, B_COWDONE); mutex_exit(bp->b_objlock); } /* * Determine which queue the buffer should be on, then put it there. */ /* If it's locked, don't report an error; try again later. */ if (ISSET(bp->b_flags, B_LOCKED)) bp->b_error = 0; /* If it's not cacheable, or an error, mark it invalid. */ if (ISSET(bp->b_cflags, BC_NOCACHE) || bp->b_error != 0) SET(bp->b_cflags, BC_INVAL); if (ISSET(bp->b_cflags, BC_VFLUSH)) { /* * This is a delayed write buffer that was just flushed to * disk. It is still on the LRU queue. If it's become * invalid, then we need to move it to a different queue; * otherwise leave it in its current position. */ CLR(bp->b_cflags, BC_VFLUSH); if (!ISSET(bp->b_cflags, BC_INVAL|BC_AGE) && !ISSET(bp->b_flags, B_LOCKED) && bp->b_error == 0) { KDASSERT(checkfreelist(bp, &bufqueues[BQ_LRU], 1)); goto already_queued; } else { bremfree(bp); } } KDASSERT(checkfreelist(bp, &bufqueues[BQ_AGE], 0)); KDASSERT(checkfreelist(bp, &bufqueues[BQ_LRU], 0)); KDASSERT(checkfreelist(bp, &bufqueues[BQ_LOCKED], 0)); if ((bp->b_bufsize <= 0) || ISSET(bp->b_cflags, BC_INVAL)) { /* * If it's invalid or empty, dissociate it from its vnode * and put on the head of the appropriate queue. */ if (ISSET(bp->b_flags, B_LOCKED)) { if (wapbl_vphaswapbl(vp = bp->b_vp)) { struct mount *mp = wapbl_vptomp(vp); KASSERT(bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone); WAPBL_REMOVE_BUF(mp, bp); } } mutex_enter(bp->b_objlock); CLR(bp->b_oflags, BO_DONE|BO_DELWRI); if ((vp = bp->b_vp) != NULL) { KASSERT(bp->b_objlock == vp->v_interlock); reassignbuf(bp, bp->b_vp); brelvp(bp); mutex_exit(vp->v_interlock); } else { KASSERT(bp->b_objlock == &buffer_lock); mutex_exit(bp->b_objlock); } /* We want to dispose of the buffer, so wake everybody. */ cv_broadcast(&bp->b_busy); if (bp->b_bufsize <= 0) /* no data */ goto already_queued; else /* invalid data */ bufq = &bufqueues[BQ_AGE]; binsheadfree(bp, bufq); } else { /* * It has valid data. Put it on the end of the appropriate * queue, so that it'll stick around for as long as possible. * If buf is AGE, but has dependencies, must put it on last * bufqueue to be scanned, ie LRU. This protects against the * livelock where BQ_AGE only has buffers with dependencies, * and we thus never get to the dependent buffers in BQ_LRU. */ if (ISSET(bp->b_flags, B_LOCKED)) { /* locked in core */ bufq = &bufqueues[BQ_LOCKED]; } else if (!ISSET(bp->b_cflags, BC_AGE)) { /* valid data */ bufq = &bufqueues[BQ_LRU]; } else { /* stale but valid data */ bufq = &bufqueues[BQ_AGE]; } binstailfree(bp, bufq); } already_queued: /* Unlock the buffer. */ CLR(bp->b_cflags, BC_AGE|BC_BUSY|BC_NOCACHE); CLR(bp->b_flags, B_ASYNC); /* * Wake only the highest priority waiter on the lock, in order to * prevent a thundering herd: many LWPs simultaneously awakening and * competing for the buffer's lock. Testing in 2019 revealed this * to reduce contention on bufcache_lock tenfold during a kernel * compile. Here and elsewhere, when the buffer is changing * identity, being disposed of, or moving from one list to another, * we wake all lock requestors. */ if (bp->b_bufsize <= 0) { cv_broadcast(&bp->b_busy); buf_destroy(bp); #ifdef DEBUG memset((char *)bp, 0, sizeof(*bp)); #endif pool_cache_put(buf_cache, bp); } else cv_signal(&bp->b_busy); } void brelse(buf_t *bp, int set) { mutex_enter(&bufcache_lock); brelsel(bp, set); mutex_exit(&bufcache_lock); } /* * Determine if a block is in the cache. * Just look on what would be its hash chain. If it's there, return * a pointer to it, unless it's marked invalid. If it's marked invalid, * we normally don't return the buffer, unless the caller explicitly * wants us to. */ buf_t * incore(struct vnode *vp, daddr_t blkno) { buf_t *bp; KASSERT(mutex_owned(&bufcache_lock)); /* Search hash chain */ LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) { if (bp->b_lblkno == blkno && bp->b_vp == vp && !ISSET(bp->b_cflags, BC_INVAL)) { KASSERT(bp->b_objlock == vp->v_interlock); return (bp); } } return (NULL); } /* * Get a block of requested size that is associated with * a given vnode and block offset. If it is found in the * block cache, mark it as having been found, make it busy * and return it. Otherwise, return an empty block of the * correct size. It is up to the caller to insure that the * cached blocks be of the correct size. */ buf_t * getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo) { int err, preserve; buf_t *bp; mutex_enter(&bufcache_lock); SDT_PROBE3(io, kernel, , getblk__start, vp, blkno, size); loop: bp = incore(vp, blkno); if (bp != NULL) { err = bbusy(bp, ((slpflag & PCATCH) != 0), slptimeo, NULL); if (err != 0) { if (err == EPASSTHROUGH) goto loop; mutex_exit(&bufcache_lock); SDT_PROBE4(io, kernel, , getblk__done, vp, blkno, size, NULL); return (NULL); } KASSERT(!cv_has_waiters(&bp->b_done)); #ifdef DIAGNOSTIC if (ISSET(bp->b_oflags, BO_DONE|BO_DELWRI) && bp->b_bcount < size && vp->v_type != VBLK) panic("getblk: block size invariant failed"); #endif bremfree(bp); preserve = 1; } else { if ((bp = getnewbuf(slpflag, slptimeo, 0)) == NULL) goto loop; if (incore(vp, blkno) != NULL) { /* The block has come into memory in the meantime. */ brelsel(bp, 0); goto loop; } LIST_INSERT_HEAD(BUFHASH(vp, blkno), bp, b_hash); bp->b_blkno = bp->b_lblkno = bp->b_rawblkno = blkno; mutex_enter(vp->v_interlock); bgetvp(vp, bp); mutex_exit(vp->v_interlock); preserve = 0; } mutex_exit(&bufcache_lock); /* * LFS can't track total size of B_LOCKED buffer (locked_queue_bytes) * if we re-size buffers here. */ if (ISSET(bp->b_flags, B_LOCKED)) { KASSERT(bp->b_bufsize >= size); } else { if (allocbuf(bp, size, preserve)) { mutex_enter(&bufcache_lock); LIST_REMOVE(bp, b_hash); brelsel(bp, BC_INVAL); mutex_exit(&bufcache_lock); SDT_PROBE4(io, kernel, , getblk__done, vp, blkno, size, NULL); return NULL; } } BIO_SETPRIO(bp, BPRIO_DEFAULT); SDT_PROBE4(io, kernel, , getblk__done, vp, blkno, size, bp); return (bp); } /* * Get an empty, disassociated buffer of given size. */ buf_t * geteblk(int size) { buf_t *bp; int error __diagused; mutex_enter(&bufcache_lock); while ((bp = getnewbuf(0, 0, 0)) == NULL) ; SET(bp->b_cflags, BC_INVAL); LIST_INSERT_HEAD(&invalhash, bp, b_hash); mutex_exit(&bufcache_lock); BIO_SETPRIO(bp, BPRIO_DEFAULT); error = allocbuf(bp, size, 0); KASSERT(error == 0); return (bp); } /* * Expand or contract the actual memory allocated to a buffer. * * If the buffer shrinks, data is lost, so it's up to the * caller to have written it out *first*; this routine will not * start a write. If the buffer grows, it's the callers * responsibility to fill out the buffer's additional contents. */ int allocbuf(buf_t *bp, int size, int preserve) { void *addr; vsize_t oldsize, desired_size; int oldcount; int delta; desired_size = buf_roundsize(size); if (desired_size > MAXBSIZE) printf("allocbuf: buffer larger than MAXBSIZE requested"); oldcount = bp->b_bcount; bp->b_bcount = size; oldsize = bp->b_bufsize; if (oldsize == desired_size) { /* * Do not short cut the WAPBL resize, as the buffer length * could still have changed and this would corrupt the * tracking of the transaction length. */ goto out; } /* * If we want a buffer of a different size, re-allocate the * buffer's memory; copy old content only if needed. */ addr = buf_alloc(desired_size); if (addr == NULL) return ENOMEM; if (preserve) memcpy(addr, bp->b_data, MIN(oldsize,desired_size)); if (bp->b_data != NULL) buf_mrelease(bp->b_data, oldsize); bp->b_data = addr; bp->b_bufsize = desired_size; /* * Update overall buffer memory counter (protected by bufcache_lock) */ delta = (long)desired_size - (long)oldsize; mutex_enter(&bufcache_lock); if ((bufmem += delta) > bufmem_hiwater) { /* * Need to trim overall memory usage. */ while (buf_canrelease()) { if (preempt_needed()) { mutex_exit(&bufcache_lock); preempt(); mutex_enter(&bufcache_lock); } if (buf_trim() == 0) break; } } mutex_exit(&bufcache_lock); out: if (wapbl_vphaswapbl(bp->b_vp)) WAPBL_RESIZE_BUF(wapbl_vptomp(bp->b_vp), bp, oldsize, oldcount); return 0; } /* * Find a buffer which is available for use. * Select something from a free list. * Preference is to AGE list, then LRU list. * * Called with the buffer queues locked. * Return buffer locked. */ static buf_t * getnewbuf(int slpflag, int slptimeo, int from_bufq) { buf_t *bp; struct vnode *vp; struct mount *transmp = NULL; SDT_PROBE0(io, kernel, , getnewbuf__start); start: KASSERT(mutex_owned(&bufcache_lock)); /* * Get a new buffer from the pool. */ if (!from_bufq && buf_lotsfree()) { mutex_exit(&bufcache_lock); bp = pool_cache_get(buf_cache, PR_NOWAIT); if (bp != NULL) { memset((char *)bp, 0, sizeof(*bp)); buf_init(bp); SET(bp->b_cflags, BC_BUSY); /* mark buffer busy */ mutex_enter(&bufcache_lock); #if defined(DIAGNOSTIC) bp->b_freelistindex = -1; #endif /* defined(DIAGNOSTIC) */ SDT_PROBE1(io, kernel, , getnewbuf__done, bp); return (bp); } mutex_enter(&bufcache_lock); } KASSERT(mutex_owned(&bufcache_lock)); if ((bp = TAILQ_FIRST(&bufqueues[BQ_AGE].bq_queue)) != NULL) { KASSERT(!ISSET(bp->b_oflags, BO_DELWRI)); } else { TAILQ_FOREACH(bp, &bufqueues[BQ_LRU].bq_queue, b_freelist) { if (ISSET(bp->b_cflags, BC_VFLUSH) || !ISSET(bp->b_oflags, BO_DELWRI)) break; if (fstrans_start_nowait(bp->b_vp->v_mount) == 0) { KASSERT(transmp == NULL); transmp = bp->b_vp->v_mount; break; } } } if (bp != NULL) { KASSERT(!ISSET(bp->b_cflags, BC_BUSY) || ISSET(bp->b_cflags, BC_VFLUSH)); bremfree(bp); /* Buffer is no longer on free lists. */ SET(bp->b_cflags, BC_BUSY); /* Wake anyone trying to lock the old identity. */ cv_broadcast(&bp->b_busy); } else { /* * XXX: !from_bufq should be removed. */ if (!from_bufq || curlwp != uvm.pagedaemon_lwp) { /* wait for a free buffer of any kind */ if ((slpflag & PCATCH) != 0) (void)cv_timedwait_sig(&needbuffer_cv, &bufcache_lock, slptimeo); else (void)cv_timedwait(&needbuffer_cv, &bufcache_lock, slptimeo); } SDT_PROBE1(io, kernel, , getnewbuf__done, NULL); return (NULL); } #ifdef DIAGNOSTIC if (bp->b_bufsize <= 0) panic("buffer %p: on queue but empty", bp); #endif if (ISSET(bp->b_cflags, BC_VFLUSH)) { /* * This is a delayed write buffer being flushed to disk. Make * sure it gets aged out of the queue when it's finished, and * leave it off the LRU queue. */ CLR(bp->b_cflags, BC_VFLUSH); SET(bp->b_cflags, BC_AGE); goto start; } KASSERT(ISSET(bp->b_cflags, BC_BUSY)); KASSERT(!cv_has_waiters(&bp->b_done)); /* * If buffer was a delayed write, start it and return NULL * (since we might sleep while starting the write). */ if (ISSET(bp->b_oflags, BO_DELWRI)) { /* * This buffer has gone through the LRU, so make sure it gets * reused ASAP. */ SET(bp->b_cflags, BC_AGE); mutex_exit(&bufcache_lock); bawrite(bp); KASSERT(transmp != NULL); fstrans_done(transmp); mutex_enter(&bufcache_lock); SDT_PROBE1(io, kernel, , getnewbuf__done, NULL); return (NULL); } KASSERT(transmp == NULL); vp = bp->b_vp; /* clear out various other fields */ bp->b_cflags = BC_BUSY; bp->b_oflags = 0; bp->b_flags = 0; bp->b_dev = NODEV; bp->b_blkno = 0; bp->b_lblkno = 0; bp->b_rawblkno = 0; bp->b_iodone = 0; bp->b_error = 0; bp->b_resid = 0; bp->b_bcount = 0; LIST_REMOVE(bp, b_hash); /* Disassociate us from our vnode, if we had one... */ if (vp != NULL) { mutex_enter(vp->v_interlock); brelvp(bp); mutex_exit(vp->v_interlock); } SDT_PROBE1(io, kernel, , getnewbuf__done, bp); return (bp); } /* * Invalidate the specified buffer if it exists. */ void binvalbuf(struct vnode *vp, daddr_t blkno) { buf_t *bp; int err; mutex_enter(&bufcache_lock); loop: bp = incore(vp, blkno); if (bp != NULL) { err = bbusy(bp, 0, 0, NULL); if (err == EPASSTHROUGH) goto loop; bremfree(bp); if (ISSET(bp->b_oflags, BO_DELWRI)) { SET(bp->b_cflags, BC_NOCACHE); mutex_exit(&bufcache_lock); bwrite(bp); } else { brelsel(bp, BC_INVAL); mutex_exit(&bufcache_lock); } } else mutex_exit(&bufcache_lock); } /* * Attempt to free an aged buffer off the queues. * Called with queue lock held. * Returns the amount of buffer memory freed. */ static int buf_trim(void) { buf_t *bp; long size; KASSERT(mutex_owned(&bufcache_lock)); /* Instruct getnewbuf() to get buffers off the queues */ if ((bp = getnewbuf(PCATCH, 1, 1)) == NULL) return 0; KASSERT((bp->b_cflags & BC_WANTED) == 0); size = bp->b_bufsize; bufmem -= size; if (size > 0) { buf_mrelease(bp->b_data, size); bp->b_bcount = bp->b_bufsize = 0; } /* brelse() will return the buffer to the global buffer pool */ brelsel(bp, 0); return size; } int buf_drain(int n) { int size = 0, sz; KASSERT(mutex_owned(&bufcache_lock)); while (size < n && bufmem > bufmem_lowater) { sz = buf_trim(); if (sz <= 0) break; size += sz; } return size; } /* * Wait for operations on the buffer to complete. * When they do, extract and return the I/O's error value. */ int biowait(buf_t *bp) { BIOHIST_FUNC(__func__); KASSERT(ISSET(bp->b_cflags, BC_BUSY)); SDT_PROBE1(io, kernel, , wait__start, bp); mutex_enter(bp->b_objlock); BIOHIST_CALLARGS(biohist, "bp=%#jx, oflags=0x%jx, ret_addr=%#jx", (uintptr_t)bp, bp->b_oflags, (uintptr_t)__builtin_return_address(0), 0); while (!ISSET(bp->b_oflags, BO_DONE | BO_DELWRI)) { BIOHIST_LOG(biohist, "waiting bp=%#jx", (uintptr_t)bp, 0, 0, 0); cv_wait(&bp->b_done, bp->b_objlock); } mutex_exit(bp->b_objlock); SDT_PROBE1(io, kernel, , wait__done, bp); BIOHIST_LOG(biohist, "return %jd", bp->b_error, 0, 0, 0); return bp->b_error; } /* * Mark I/O complete on a buffer. * * If a callback has been requested, e.g. the pageout * daemon, do so. Otherwise, awaken waiting processes. * * [ Leffler, et al., says on p.247: * "This routine wakes up the blocked process, frees the buffer * for an asynchronous write, or, for a request by the pagedaemon * process, invokes a procedure specified in the buffer structure" ] * * In real life, the pagedaemon (or other system processes) wants * to do async stuff too, and doesn't want the buffer brelse()'d. * (for swap pager, that puts swap buffers on the free lists (!!!), * for the vn device, that puts allocated buffers on the free lists!) */ void biodone(buf_t *bp) { int s; BIOHIST_FUNC(__func__); KASSERT(!ISSET(bp->b_oflags, BO_DONE)); if (cpu_intr_p()) { /* From interrupt mode: defer to a soft interrupt. */ s = splvm(); TAILQ_INSERT_TAIL(&curcpu()->ci_data.cpu_biodone, bp, b_actq); BIOHIST_CALLARGS(biohist, "bp=%#jx, softint scheduled", (uintptr_t)bp, 0, 0, 0); softint_schedule(biodone_sih); splx(s); } else { /* Process now - the buffer may be freed soon. */ biodone2(bp); } } SDT_PROBE_DEFINE1(io, kernel, , done, "struct buf *"/*bp*/); static void biodone2(buf_t *bp) { void (*callout)(buf_t *); SDT_PROBE1(io, kernel, ,done, bp); BIOHIST_FUNC(__func__); BIOHIST_CALLARGS(biohist, "bp=%#jx", (uintptr_t)bp, 0, 0, 0); mutex_enter(bp->b_objlock); /* Note that the transfer is done. */ if (ISSET(bp->b_oflags, BO_DONE)) panic("biodone2 already"); CLR(bp->b_flags, B_COWDONE); SET(bp->b_oflags, BO_DONE); BIO_SETPRIO(bp, BPRIO_DEFAULT); /* Wake up waiting writers. */ if (!ISSET(bp->b_flags, B_READ)) vwakeup(bp); if ((callout = bp->b_iodone) != NULL) { BIOHIST_LOG(biohist, "callout %#jx", (uintptr_t)callout, 0, 0, 0); /* Note callout done, then call out. */ KASSERT(!cv_has_waiters(&bp->b_done)); bp->b_iodone = NULL; mutex_exit(bp->b_objlock); (*callout)(bp); } else if (ISSET(bp->b_flags, B_ASYNC)) { /* If async, release. */ BIOHIST_LOG(biohist, "async", 0, 0, 0, 0); KASSERT(!cv_has_waiters(&bp->b_done)); mutex_exit(bp->b_objlock); brelse(bp, 0); } else { /* Otherwise just wake up waiters in biowait(). */ BIOHIST_LOG(biohist, "wake-up", 0, 0, 0, 0); cv_broadcast(&bp->b_done); mutex_exit(bp->b_objlock); } } static void biointr(void *cookie) { struct cpu_info *ci; buf_t *bp; int s; BIOHIST_FUNC(__func__); BIOHIST_CALLED(biohist); ci = curcpu(); s = splvm(); while (!TAILQ_EMPTY(&ci->ci_data.cpu_biodone)) { KASSERT(curcpu() == ci); bp = TAILQ_FIRST(&ci->ci_data.cpu_biodone); TAILQ_REMOVE(&ci->ci_data.cpu_biodone, bp, b_actq); splx(s); BIOHIST_LOG(biohist, "bp=%#jx", (uintptr_t)bp, 0, 0, 0); biodone2(bp); s = splvm(); } splx(s); } static void sysctl_fillbuf(const buf_t *i, struct buf_sysctl *o) { const bool allowaddr = get_expose_address(curproc); memset(o, 0, sizeof(*o)); o->b_flags = i->b_flags | i->b_cflags | i->b_oflags; o->b_error = i->b_error; o->b_prio = i->b_prio; o->b_dev = i->b_dev; o->b_bufsize = i->b_bufsize; o->b_bcount = i->b_bcount; o->b_resid = i->b_resid; COND_SET_VALUE(o->b_addr, PTRTOUINT64(i->b_data), allowaddr); o->b_blkno = i->b_blkno; o->b_rawblkno = i->b_rawblkno; COND_SET_VALUE(o->b_iodone, PTRTOUINT64(i->b_iodone), allowaddr); COND_SET_VALUE(o->b_proc, PTRTOUINT64(i->b_proc), allowaddr); COND_SET_VALUE(o->b_vp, PTRTOUINT64(i->b_vp), allowaddr); COND_SET_VALUE(o->b_saveaddr, PTRTOUINT64(i->b_saveaddr), allowaddr); o->b_lblkno = i->b_lblkno; } static int sysctl_dobuf(SYSCTLFN_ARGS) { buf_t *bp; struct buf_sysctl bs; struct bqueue *bq; char *dp; u_int i, op, arg; size_t len, needed, elem_size, out_size; int error, elem_count, retries; if (namelen == 1 && name[0] == CTL_QUERY) return (sysctl_query(SYSCTLFN_CALL(rnode))); if (namelen != 4) return (EINVAL); retries = 100; retry: dp = oldp; len = (oldp != NULL) ? *oldlenp : 0; op = name[0]; arg = name[1]; elem_size = name[2]; elem_count = name[3]; out_size = MIN(sizeof(bs), elem_size); /* * at the moment, these are just "placeholders" to make the * API for retrieving kern.buf data more extensible in the * future. * * XXX kern.buf currently has "netbsd32" issues. hopefully * these will be resolved at a later point. */ if (op != KERN_BUF_ALL || arg != KERN_BUF_ALL || elem_size < 1 || elem_count < 0) return (EINVAL); if (oldp == NULL) { /* count only, don't run through the buffer queues */ needed = pool_cache_nget(buf_cache) - pool_cache_nput(buf_cache); *oldlenp = (needed + KERN_BUFSLOP) * elem_size; return 0; } error = 0; needed = 0; sysctl_unlock(); mutex_enter(&bufcache_lock); for (i = 0; i < BQUEUES; i++) { bq = &bufqueues[i]; TAILQ_FOREACH(bp, &bq->bq_queue, b_freelist) { bq->bq_marker = bp; if (len >= elem_size && elem_count > 0) { sysctl_fillbuf(bp, &bs); mutex_exit(&bufcache_lock); error = copyout(&bs, dp, out_size); mutex_enter(&bufcache_lock); if (error) break; if (bq->bq_marker != bp) { /* * This sysctl node is only for * statistics. Retry; if the * queue keeps changing, then * bail out. */ if (retries-- == 0) { error = EAGAIN; break; } mutex_exit(&bufcache_lock); sysctl_relock(); goto retry; } dp += elem_size; len -= elem_size; } needed += elem_size; if (elem_count > 0 && elem_count != INT_MAX) elem_count--; } if (error != 0) break; } mutex_exit(&bufcache_lock); sysctl_relock(); *oldlenp = needed; return (error); } static int sysctl_bufvm_update(SYSCTLFN_ARGS) { int error, rv; struct sysctlnode node; unsigned int temp_bufcache; unsigned long temp_water; /* Take a copy of the supplied node and its data */ node = *rnode; if (node.sysctl_data == &bufcache) { node.sysctl_data = &temp_bufcache; temp_bufcache = *(unsigned int *)rnode->sysctl_data; } else { node.sysctl_data = &temp_water; temp_water = *(unsigned long *)rnode->sysctl_data; } /* Update the copy */ error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) return (error); if (rnode->sysctl_data == &bufcache) { if (temp_bufcache > 100) return (EINVAL); bufcache = temp_bufcache; buf_setwm(); } else if (rnode->sysctl_data == &bufmem_lowater) { if (bufmem_hiwater - temp_water < 16) return (EINVAL); bufmem_lowater = temp_water; } else if (rnode->sysctl_data == &bufmem_hiwater) { if (temp_water - bufmem_lowater < 16) return (EINVAL); bufmem_hiwater = temp_water; } else return (EINVAL); /* Drain until below new high water mark */ sysctl_unlock(); mutex_enter(&bufcache_lock); while (bufmem > bufmem_hiwater) { rv = buf_drain((bufmem - bufmem_hiwater) / (2 * 1024)); if (rv <= 0) break; } mutex_exit(&bufcache_lock); sysctl_relock(); return 0; } static struct sysctllog *vfsbio_sysctllog; static void sysctl_kern_buf_setup(void) { sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "buf", SYSCTL_DESCR("Kernel buffer cache information"), sysctl_dobuf, 0, NULL, 0, CTL_KERN, KERN_BUF, CTL_EOL); } static void sysctl_vm_buf_setup(void) { sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "bufcache", SYSCTL_DESCR("Percentage of physical memory to use for " "buffer cache"), sysctl_bufvm_update, 0, &bufcache, 0, CTL_VM, CTL_CREATE, CTL_EOL); sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_LONG, "bufmem", SYSCTL_DESCR("Amount of kernel memory used by buffer " "cache"), NULL, 0, &bufmem, 0, CTL_VM, CTL_CREATE, CTL_EOL); sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_LONG, "bufmem_lowater", SYSCTL_DESCR("Minimum amount of kernel memory to " "reserve for buffer cache"), sysctl_bufvm_update, 0, &bufmem_lowater, 0, CTL_VM, CTL_CREATE, CTL_EOL); sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_LONG, "bufmem_hiwater", SYSCTL_DESCR("Maximum amount of kernel memory to use " "for buffer cache"), sysctl_bufvm_update, 0, &bufmem_hiwater, 0, CTL_VM, CTL_CREATE, CTL_EOL); } static int bufhash_stats(struct hashstat_sysctl *hs, bool fill) { buf_t *bp; uint64_t chain; strlcpy(hs->hash_name, "bufhash", sizeof(hs->hash_name)); strlcpy(hs->hash_desc, "buffer hash", sizeof(hs->hash_desc)); if (!fill) return 0; hs->hash_size = bufhash + 1; for (size_t i = 0; i < hs->hash_size; i++) { chain = 0; mutex_enter(&bufcache_lock); LIST_FOREACH(bp, &bufhashtbl[i], b_hash) { chain++; } mutex_exit(&bufcache_lock); if (chain > 0) { hs->hash_used++; hs->hash_items += chain; if (chain > hs->hash_maxchain) hs->hash_maxchain = chain; } preempt_point(); } return 0; } #ifdef DEBUG /* * Print out statistics on the current allocation of the buffer pool. * Can be enabled to print out on every ``sync'' by setting "syncprt" * in vfs_syscalls.c using sysctl. */ void vfs_bufstats(void) { int i, j, count; buf_t *bp; struct bqueue *dp; int counts[MAXBSIZE / MIN_PAGE_SIZE + 1]; static const char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE" }; for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { count = 0; memset(counts, 0, sizeof(counts)); TAILQ_FOREACH(bp, &dp->bq_queue, b_freelist) { counts[bp->b_bufsize / PAGE_SIZE]++; count++; } printf("%s: total-%d", bname[i], count); for (j = 0; j <= MAXBSIZE / PAGE_SIZE; j++) if (counts[j] != 0) printf(", %d-%d", j * PAGE_SIZE, counts[j]); printf("\n"); } } #endif /* DEBUG */ /* ------------------------------ */ buf_t * getiobuf(struct vnode *vp, bool waitok) { buf_t *bp; bp = pool_cache_get(bufio_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); if (bp == NULL) return bp; buf_init(bp); if ((bp->b_vp = vp) != NULL) { bp->b_objlock = vp->v_interlock; } else { KASSERT(bp->b_objlock == &buffer_lock); } return bp; } void putiobuf(buf_t *bp) { buf_destroy(bp); pool_cache_put(bufio_cache, bp); } /* * nestiobuf_iodone: b_iodone callback for nested buffers. */ void nestiobuf_iodone(buf_t *bp) { buf_t *mbp = bp->b_private; int error; int donebytes; KASSERT(bp->b_bcount <= bp->b_bufsize); KASSERT(mbp != bp); error = bp->b_error; if (bp->b_error == 0 && (bp->b_bcount < bp->b_bufsize || bp->b_resid > 0)) { /* * Not all got transferred, raise an error. We have no way to * propagate these conditions to mbp. */ error = EIO; } donebytes = bp->b_bufsize; putiobuf(bp); nestiobuf_done(mbp, donebytes, error); } /* * nestiobuf_setup: setup a "nested" buffer. * * => 'mbp' is a "master" buffer which is being divided into sub pieces. * => 'bp' should be a buffer allocated by getiobuf. * => 'offset' is a byte offset in the master buffer. * => 'size' is a size in bytes of this nested buffer. */ void nestiobuf_setup(buf_t *mbp, buf_t *bp, int offset, size_t size) { const int b_pass = mbp->b_flags & (B_READ|B_PHYS|B_RAW|B_MEDIA_FLAGS); struct vnode *vp = mbp->b_vp; KASSERT(mbp->b_bcount >= offset + size); bp->b_vp = vp; bp->b_dev = mbp->b_dev; bp->b_objlock = mbp->b_objlock; bp->b_cflags = BC_BUSY; bp->b_flags = B_ASYNC | b_pass; bp->b_iodone = nestiobuf_iodone; bp->b_data = (char *)mbp->b_data + offset; bp->b_resid = bp->b_bcount = size; bp->b_bufsize = bp->b_bcount; bp->b_private = mbp; BIO_COPYPRIO(bp, mbp); if (BUF_ISWRITE(bp) && vp != NULL) { mutex_enter(vp->v_interlock); vp->v_numoutput++; mutex_exit(vp->v_interlock); } } /* * nestiobuf_done: propagate completion to the master buffer. * * => 'donebytes' specifies how many bytes in the 'mbp' is completed. * => 'error' is an errno(2) that 'donebytes' has been completed with. */ void nestiobuf_done(buf_t *mbp, int donebytes, int error) { if (donebytes == 0) { return; } mutex_enter(mbp->b_objlock); KASSERT(mbp->b_resid >= donebytes); mbp->b_resid -= donebytes; if (error) mbp->b_error = error; if (mbp->b_resid == 0) { if (mbp->b_error) mbp->b_resid = mbp->b_bcount; mutex_exit(mbp->b_objlock); biodone(mbp); } else mutex_exit(mbp->b_objlock); } void buf_init(buf_t *bp) { cv_init(&bp->b_busy, "biolock"); cv_init(&bp->b_done, "biowait"); bp->b_dev = NODEV; bp->b_error = 0; bp->b_flags = 0; bp->b_cflags = 0; bp->b_oflags = 0; bp->b_objlock = &buffer_lock; bp->b_iodone = NULL; bp->b_dev = NODEV; bp->b_vnbufs.le_next = NOLIST; BIO_SETPRIO(bp, BPRIO_DEFAULT); } void buf_destroy(buf_t *bp) { cv_destroy(&bp->b_done); cv_destroy(&bp->b_busy); } int bbusy(buf_t *bp, bool intr, int timo, kmutex_t *interlock) { int error; KASSERT(mutex_owned(&bufcache_lock)); SDT_PROBE4(io, kernel, , bbusy__start, bp, intr, timo, interlock); if ((bp->b_cflags & BC_BUSY) != 0) { if (curlwp == uvm.pagedaemon_lwp) { error = EDEADLK; goto out; } bp->b_cflags |= BC_WANTED; if (interlock != NULL) mutex_exit(interlock); if (intr) { error = cv_timedwait_sig(&bp->b_busy, &bufcache_lock, timo); } else { error = cv_timedwait(&bp->b_busy, &bufcache_lock, timo); } /* * At this point the buffer may be gone: don't touch it * again. The caller needs to find it again and retry. */ if (interlock != NULL) mutex_enter(interlock); if (error == 0) error = EPASSTHROUGH; } else { bp->b_cflags |= BC_BUSY; error = 0; } out: SDT_PROBE5(io, kernel, , bbusy__done, bp, intr, timo, interlock, error); return error; } /* * Nothing outside this file should really need to know about nbuf, * but a few things still want to read it, so give them a way to do that. */ u_int buf_nbuf(void) { return nbuf; } |
| 37 38 33 38 38 38 38 38 38 38 38 38 38 12 12 12 12 12 12 12 12 38 38 37 38 38 38 38 38 37 38 38 38 38 38 38 37 38 38 38 38 38 38 37 38 38 38 38 38 37 166 166 166 166 166 166 166 166 166 166 166 166 166 166 166 5 5 5 5 1 1 1 1 5 5 1 1 5 5 5 5 5 166 166 166 166 168 168 168 168 5 5 166 166 166 166 166 12 12 12 12 5 12 166 5 5 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 166 166 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 | /* $NetBSD: vhci.c,v 1.27 2022/03/12 15:30:51 riastradh Exp $ */ /* * Copyright (c) 2019-2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Maxime Villard. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: vhci.c,v 1.27 2022/03/12 15:30:51 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #endif #include <sys/param.h> #include <sys/bus.h> #include <sys/cpu.h> #include <sys/conf.h> #include <sys/device.h> #include <sys/kernel.h> #include <sys/kmem.h> #include <sys/mutex.h> #include <sys/proc.h> #include <sys/queue.h> #include <sys/systm.h> #include <sys/mman.h> #include <sys/file.h> #include <sys/filedesc.h> #include <sys/kcov.h> #include <machine/endian.h> #include "ioconf.h" #include <dev/usb/usb.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdivar.h> #include <dev/usb/usbroothub.h> #include <dev/usb/vhci.h> #ifdef VHCI_DEBUG #define DPRINTF(fmt, ...) printf(fmt, __VA_ARGS__) #else #define DPRINTF(fmt, ...) __nothing #endif static usbd_status vhci_open(struct usbd_pipe *); static void vhci_softintr(void *); static struct usbd_xfer *vhci_allocx(struct usbd_bus *, unsigned int); static void vhci_freex(struct usbd_bus *, struct usbd_xfer *); static void vhci_get_lock(struct usbd_bus *, kmutex_t **); static int vhci_roothub_ctrl(struct usbd_bus *, usb_device_request_t *, void *, int); static const struct usbd_bus_methods vhci_bus_methods = { .ubm_open = vhci_open, .ubm_softint = vhci_softintr, .ubm_dopoll = NULL, .ubm_allocx = vhci_allocx, .ubm_freex = vhci_freex, .ubm_getlock = vhci_get_lock, .ubm_rhctrl = vhci_roothub_ctrl, }; static usbd_status vhci_device_ctrl_transfer(struct usbd_xfer *); static usbd_status vhci_device_ctrl_start(struct usbd_xfer *); static void vhci_device_ctrl_abort(struct usbd_xfer *); static void vhci_device_ctrl_close(struct usbd_pipe *); static void vhci_device_ctrl_cleartoggle(struct usbd_pipe *); static void vhci_device_ctrl_done(struct usbd_xfer *); static const struct usbd_pipe_methods vhci_device_ctrl_methods = { .upm_init = NULL, .upm_fini = NULL, .upm_transfer = vhci_device_ctrl_transfer, .upm_start = vhci_device_ctrl_start, .upm_abort = vhci_device_ctrl_abort, .upm_close = vhci_device_ctrl_close, .upm_cleartoggle = vhci_device_ctrl_cleartoggle, .upm_done = vhci_device_ctrl_done, }; static usbd_status vhci_root_intr_transfer(struct usbd_xfer *); static usbd_status vhci_root_intr_start(struct usbd_xfer *); static void vhci_root_intr_abort(struct usbd_xfer *); static void vhci_root_intr_close(struct usbd_pipe *); static void vhci_root_intr_cleartoggle(struct usbd_pipe *); static void vhci_root_intr_done(struct usbd_xfer *); static const struct usbd_pipe_methods vhci_root_intr_methods = { .upm_init = NULL, .upm_fini = NULL, .upm_transfer = vhci_root_intr_transfer, .upm_start = vhci_root_intr_start, .upm_abort = vhci_root_intr_abort, .upm_close = vhci_root_intr_close, .upm_cleartoggle = vhci_root_intr_cleartoggle, .upm_done = vhci_root_intr_done, }; /* * There are three structures to understand: vxfers, packets, and ports. * * Each xfer from the point of view of the USB stack is a vxfer from the point * of view of vHCI. * * A vxfer has a linked list containing a maximum of two packets: a request * packet and possibly a data packet. Packets basically contain data exchanged * between the Host and the virtual USB device. A packet is linked to both a * vxfer and a port. * * A port is an abstraction of an actual USB port. Each virtual USB device gets * connected to a port. A port has two lists: * - The Usb-To-Host list, containing packets to be fetched from the USB * device and provided to the host. * - The Host-To-Usb list, containing packets to be sent from the Host to the * USB device. * Request packets are always in the H->U direction. Data packets however can * be in both the H->U and U->H directions. * * With read() and write() operations on /dev/vhci, userland respectively * "fetches" and "sends" packets from or to the virtual USB device, which * respectively means reading/inserting packets in the H->U and U->H lists on * the port where the virtual USB device is connected. * * +------------------------------------------------+ * | USB Stack | * +---------------------^--------------------------+ * | * +---------------------V--------------------------+ * | +----------------+ +-------------+ | * | | Request Packet | | Data Packet | Xfer | * | +-------|--------+ +----|---^----+ | * +---------|------------------|---|---------------+ * | | | * | +--------------+ | * | | | * +---------|---|------------------|---------------+ * | +---V---V---+ +---------|-+ | * | | H->U List | | U->H List | vHCI Port | * | +-----|-----+ +-----^-----+ | * +-----------|----------------|-------------------+ * | | * +-----------|----------------|-------------------+ * | +-----V-----+ +-----|-----+ | * | | read() | | write() | vHCI FD | * | +-----------+ +-----------+ | * +------------------------------------------------+ */ struct vhci_xfer; typedef struct vhci_packet { /* General. */ TAILQ_ENTRY(vhci_packet) portlist; TAILQ_ENTRY(vhci_packet) xferlist; struct vhci_xfer *vxfer; bool utoh; uint8_t addr; /* Type. */ struct { bool req:1; bool res:1; bool dat:1; } type; /* Exposed for FD operations. */ uint8_t *buf; size_t size; size_t cursor; } vhci_packet_t; typedef TAILQ_HEAD(, vhci_packet) vhci_packet_list_t; #define VHCI_NADDRS 16 /* maximum supported by USB */ typedef struct { kmutex_t lock; int status; int change; struct { vhci_packet_list_t usb_to_host; vhci_packet_list_t host_to_usb; } endpoints[VHCI_NADDRS]; } vhci_port_t; typedef struct { struct usbd_pipe pipe; } vhci_pipe_t; typedef struct vhci_xfer { /* General. */ struct usbd_xfer xfer; /* Port where the xfer occurs. */ vhci_port_t *port; /* Packets in the xfer. */ size_t npkts; vhci_packet_list_t pkts; /* Header storage. */ vhci_request_t reqbuf; vhci_response_t resbuf; /* Used for G/C. */ TAILQ_ENTRY(vhci_xfer) freelist; } vhci_xfer_t; typedef TAILQ_HEAD(, vhci_xfer) vhci_xfer_list_t; #define VHCI_INDEX2PORT(idx) (idx) #define VHCI_NPORTS 8 /* above 8, update TODO-bitmap */ #define VHCI_NBUSES 8 typedef struct { device_t sc_dev; struct usbd_bus sc_bus; bool sc_dying; kmutex_t sc_lock; /* * Intr Root. Used to attach the devices. */ struct usbd_xfer *sc_intrxfer; /* * The ports. Zero is for the roothub, one and beyond for the USB * devices. */ size_t sc_nports; vhci_port_t sc_port[VHCI_NPORTS]; device_t sc_child; /* /dev/usb# device */ } vhci_softc_t; typedef struct { u_int port; uint8_t addr; vhci_softc_t *softc; } vhci_fd_t; extern struct cfdriver vhci_cd; /* -------------------------------------------------------------------------- */ static void vhci_pkt_ctrl_create(vhci_port_t *port, struct usbd_xfer *xfer, bool utoh, uint8_t addr) { vhci_xfer_t *vxfer = (vhci_xfer_t *)xfer; vhci_packet_list_t *reqlist, *reslist, *datlist = NULL; vhci_packet_t *req, *res = NULL, *dat = NULL; size_t npkts = 0; /* Request packet. */ reqlist = &port->endpoints[addr].host_to_usb; req = kmem_zalloc(sizeof(*req), KM_SLEEP); req->vxfer = vxfer; req->utoh = false; req->addr = addr; req->type.req = true; req->buf = (uint8_t *)&vxfer->reqbuf; req->size = sizeof(vxfer->reqbuf); req->cursor = 0; npkts++; /* Init the request buffer. */ memset(&vxfer->reqbuf, 0, sizeof(vxfer->reqbuf)); vxfer->reqbuf.type = VHCI_REQ_CTRL; memcpy(&vxfer->reqbuf.u.ctrl, &xfer->ux_request, sizeof(xfer->ux_request)); /* Response packet. */ if (utoh && (xfer->ux_length > 0)) { reslist = &port->endpoints[addr].usb_to_host; res = kmem_zalloc(sizeof(*res), KM_SLEEP); res->vxfer = vxfer; res->utoh = true; res->addr = addr; res->type.res = true; res->buf = (uint8_t *)&vxfer->resbuf; res->size = sizeof(vxfer->resbuf); res->cursor = 0; npkts++; } /* Data packet. */ if (xfer->ux_length > 0) { if (utoh) { datlist = &port->endpoints[addr].usb_to_host; } else { datlist = &port->endpoints[addr].host_to_usb; } dat = kmem_zalloc(sizeof(*dat), KM_SLEEP); dat->vxfer = vxfer; dat->utoh = utoh; dat->addr = addr; dat->type.dat = true; dat->buf = xfer->ux_buf; dat->size = xfer->ux_length; dat->cursor = 0; npkts++; } /* Insert in the xfer. */ vxfer->port = port; vxfer->npkts = npkts; TAILQ_INIT(&vxfer->pkts); TAILQ_INSERT_TAIL(&vxfer->pkts, req, xferlist); if (res != NULL) TAILQ_INSERT_TAIL(&vxfer->pkts, res, xferlist); if (dat != NULL) TAILQ_INSERT_TAIL(&vxfer->pkts, dat, xferlist); /* Insert in the port. */ KASSERT(mutex_owned(&port->lock)); TAILQ_INSERT_TAIL(reqlist, req, portlist); if (res != NULL) TAILQ_INSERT_TAIL(reslist, res, portlist); if (dat != NULL) TAILQ_INSERT_TAIL(datlist, dat, portlist); } static void vhci_pkt_destroy(vhci_softc_t *sc, vhci_packet_t *pkt) { vhci_xfer_t *vxfer = pkt->vxfer; vhci_port_t *port = vxfer->port; vhci_packet_list_t *pktlist; KASSERT(mutex_owned(&port->lock)); /* Remove from the port. */ if (pkt->utoh) { pktlist = &port->endpoints[pkt->addr].usb_to_host; } else { pktlist = &port->endpoints[pkt->addr].host_to_usb; } TAILQ_REMOVE(pktlist, pkt, portlist); /* Remove from the xfer. */ TAILQ_REMOVE(&vxfer->pkts, pkt, xferlist); kmem_free(pkt, sizeof(*pkt)); /* Unref. */ KASSERT(vxfer->npkts > 0); vxfer->npkts--; if (vxfer->npkts > 0) return; KASSERT(TAILQ_FIRST(&vxfer->pkts) == NULL); } /* -------------------------------------------------------------------------- */ static usbd_status vhci_open(struct usbd_pipe *pipe) { struct usbd_device *dev = pipe->up_dev; struct usbd_bus *bus = dev->ud_bus; usb_endpoint_descriptor_t *ed = pipe->up_endpoint->ue_edesc; vhci_softc_t *sc = bus->ub_hcpriv; uint8_t addr = dev->ud_addr; if (sc->sc_dying) return USBD_IOERROR; DPRINTF("%s: called, type=%d\n", __func__, UE_GET_XFERTYPE(ed->bmAttributes)); if (addr == bus->ub_rhaddr) { switch (ed->bEndpointAddress) { case USB_CONTROL_ENDPOINT: DPRINTF("%s: roothub_ctrl\n", __func__); pipe->up_methods = &roothub_ctrl_methods; break; case UE_DIR_IN | USBROOTHUB_INTR_ENDPT: DPRINTF("%s: root_intr\n", __func__); pipe->up_methods = &vhci_root_intr_methods; break; default: DPRINTF("%s: inval\n", __func__); return USBD_INVAL; } } else { switch (UE_GET_XFERTYPE(ed->bmAttributes)) { case UE_CONTROL: pipe->up_methods = &vhci_device_ctrl_methods; break; case UE_INTERRUPT: case UE_BULK: default: goto bad; } } return USBD_NORMAL_COMPLETION; bad: return USBD_NOMEM; } static void vhci_softintr(void *v) { DPRINTF("%s: called\n", __func__); } static struct usbd_xfer * vhci_allocx(struct usbd_bus *bus, unsigned int nframes) { vhci_xfer_t *vxfer; vxfer = kmem_zalloc(sizeof(*vxfer), KM_SLEEP); #ifdef DIAGNOSTIC vxfer->xfer.ux_state = XFER_BUSY; #endif return (struct usbd_xfer *)vxfer; } static void vhci_freex(struct usbd_bus *bus, struct usbd_xfer *xfer) { vhci_xfer_t *vxfer = (vhci_xfer_t *)xfer; KASSERT(vxfer->npkts == 0); KASSERT(TAILQ_FIRST(&vxfer->pkts) == NULL); #ifdef DIAGNOSTIC vxfer->xfer.ux_state = XFER_FREE; #endif kmem_free(vxfer, sizeof(*vxfer)); } static void vhci_get_lock(struct usbd_bus *bus, kmutex_t **lock) { vhci_softc_t *sc = bus->ub_hcpriv; *lock = &sc->sc_lock; } static int vhci_roothub_ctrl(struct usbd_bus *bus, usb_device_request_t *req, void *buf, int buflen) { vhci_softc_t *sc = bus->ub_hcpriv; vhci_port_t *port; usb_hub_descriptor_t hubd; uint16_t len, value, index; int totlen = 0; len = UGETW(req->wLength); value = UGETW(req->wValue); index = UGETW(req->wIndex); #define C(x,y) ((x) | ((y) << 8)) switch (C(req->bRequest, req->bmRequestType)) { case C(UR_GET_DESCRIPTOR, UT_READ_DEVICE): switch (value) { case C(0, UDESC_DEVICE): { usb_device_descriptor_t devd; totlen = uimin(buflen, sizeof(devd)); memcpy(&devd, buf, totlen); USETW(devd.idVendor, 0); USETW(devd.idProduct, 0); memcpy(buf, &devd, totlen); break; } #define sd ((usb_string_descriptor_t *)buf) case C(1, UDESC_STRING): /* Vendor */ totlen = usb_makestrdesc(sd, len, "NetBSD"); break; case C(2, UDESC_STRING): /* Product */ totlen = usb_makestrdesc(sd, len, "VHCI root hub"); break; #undef sd default: /* default from usbroothub */ return buflen; } break; case C(UR_SET_FEATURE, UT_WRITE_CLASS_OTHER): switch (value) { case UHF_PORT_RESET: if (index < 1 || index >= sc->sc_nports) { return -1; } port = &sc->sc_port[VHCI_INDEX2PORT(index)]; port->status |= UPS_C_PORT_RESET; break; case UHF_PORT_POWER: break; default: return -1; } break; /* Hub requests. */ case C(UR_CLEAR_FEATURE, UT_WRITE_CLASS_DEVICE): break; case C(UR_CLEAR_FEATURE, UT_WRITE_CLASS_OTHER): if (index < 1 || index >= sc->sc_nports) { return -1; } port = &sc->sc_port[VHCI_INDEX2PORT(index)]; switch (value) { case UHF_PORT_ENABLE: port->status &= ~UPS_PORT_ENABLED; break; case UHF_C_PORT_ENABLE: port->change |= UPS_C_PORT_ENABLED; break; default: return -1; } break; case C(UR_GET_DESCRIPTOR, UT_READ_CLASS_DEVICE): totlen = uimin(buflen, sizeof(hubd)); memcpy(&hubd, buf, totlen); hubd.bNbrPorts = sc->sc_nports - 1; hubd.bDescLength = USB_HUB_DESCRIPTOR_SIZE; totlen = uimin(totlen, hubd.bDescLength); memcpy(buf, &hubd, totlen); break; case C(UR_GET_STATUS, UT_READ_CLASS_DEVICE): /* XXX The other HCs do this */ memset(buf, 0, len); totlen = len; break; case C(UR_GET_STATUS, UT_READ_CLASS_OTHER): { usb_port_status_t ps; if (index < 1 || index >= sc->sc_nports) { return -1; } port = &sc->sc_port[VHCI_INDEX2PORT(index)]; USETW(ps.wPortStatus, port->status); USETW(ps.wPortChange, port->change); totlen = uimin(len, sizeof(ps)); memcpy(buf, &ps, totlen); break; } default: /* default from usbroothub */ return buflen; } return totlen; } /* -------------------------------------------------------------------------- */ static usbd_status vhci_device_ctrl_transfer(struct usbd_xfer *xfer) { DPRINTF("%s: called\n", __func__); /* Pipe isn't running, start first */ return vhci_device_ctrl_start(SIMPLEQ_FIRST(&xfer->ux_pipe->up_queue)); } static usbd_status vhci_device_ctrl_start(struct usbd_xfer *xfer) { usb_endpoint_descriptor_t *ed = xfer->ux_pipe->up_endpoint->ue_edesc; usb_device_request_t *req = &xfer->ux_request; struct usbd_device *dev = xfer->ux_pipe->up_dev; vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv; vhci_port_t *port; bool isread = (req->bmRequestType & UT_READ) != 0; uint8_t addr = UE_GET_ADDR(ed->bEndpointAddress); int portno, ret; KASSERT(addr == 0); KASSERT(xfer->ux_rqflags & URQ_REQUEST); KASSERT(dev->ud_myhsport != NULL); portno = dev->ud_myhsport->up_portno; DPRINTF("%s: type=0x%02x, len=%d, isread=%d, portno=%d\n", __func__, req->bmRequestType, UGETW(req->wLength), isread, portno); KASSERT(sc->sc_bus.ub_usepolling || mutex_owned(&sc->sc_lock)); if (sc->sc_dying) return USBD_IOERROR; port = &sc->sc_port[portno]; mutex_enter(&port->lock); if (port->status & UPS_PORT_ENABLED) { xfer->ux_status = USBD_IN_PROGRESS; vhci_pkt_ctrl_create(port, xfer, isread, addr); ret = USBD_IN_PROGRESS; } else { ret = USBD_IOERROR; } mutex_exit(&port->lock); return ret; } static void vhci_device_ctrl_abort(struct usbd_xfer *xfer) { vhci_xfer_t *vxfer = (vhci_xfer_t *)xfer; vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv; vhci_port_t *port = vxfer->port; vhci_packet_t *pkt; DPRINTF("%s: called\n", __func__); KASSERT(mutex_owned(&sc->sc_lock)); callout_halt(&xfer->ux_callout, &sc->sc_lock); /* If anyone else beat us, we're done. */ KASSERT(xfer->ux_status != USBD_CANCELLED); if (xfer->ux_status != USBD_IN_PROGRESS) return; mutex_enter(&port->lock); while (vxfer->npkts > 0) { pkt = TAILQ_FIRST(&vxfer->pkts); KASSERT(pkt != NULL); vhci_pkt_destroy(sc, pkt); } KASSERT(TAILQ_FIRST(&vxfer->pkts) == NULL); mutex_exit(&port->lock); xfer->ux_status = USBD_CANCELLED; usb_transfer_complete(xfer); KASSERT(mutex_owned(&sc->sc_lock)); } static void vhci_device_ctrl_close(struct usbd_pipe *pipe) { DPRINTF("%s: called\n", __func__); } static void vhci_device_ctrl_cleartoggle(struct usbd_pipe *pipe) { DPRINTF("%s: called\n", __func__); } static void vhci_device_ctrl_done(struct usbd_xfer *xfer) { DPRINTF("%s: called\n", __func__); } /* -------------------------------------------------------------------------- */ static usbd_status vhci_root_intr_transfer(struct usbd_xfer *xfer) { DPRINTF("%s: called\n", __func__); /* Pipe isn't running, start first */ return vhci_root_intr_start(SIMPLEQ_FIRST(&xfer->ux_pipe->up_queue)); } static usbd_status vhci_root_intr_start(struct usbd_xfer *xfer) { vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv; DPRINTF("%s: called, len=%zu\n", __func__, (size_t)xfer->ux_length); KASSERT(sc->sc_bus.ub_usepolling || mutex_owned(&sc->sc_lock)); if (sc->sc_dying) return USBD_IOERROR; KASSERT(sc->sc_intrxfer == NULL); sc->sc_intrxfer = xfer; xfer->ux_status = USBD_IN_PROGRESS; return USBD_IN_PROGRESS; } static void vhci_root_intr_abort(struct usbd_xfer *xfer) { vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv; DPRINTF("%s: called\n", __func__); KASSERT(mutex_owned(&sc->sc_lock)); KASSERT(xfer->ux_pipe->up_intrxfer == xfer); /* If xfer has already completed, nothing to do here. */ if (sc->sc_intrxfer == NULL) return; /* * Otherwise, sc->sc_intrxfer had better be this transfer. * Cancel it. */ KASSERT(sc->sc_intrxfer == xfer); KASSERT(xfer->ux_status == USBD_IN_PROGRESS); xfer->ux_status = USBD_CANCELLED; usb_transfer_complete(xfer); } static void vhci_root_intr_close(struct usbd_pipe *pipe) { vhci_softc_t *sc __diagused = pipe->up_dev->ud_bus->ub_hcpriv; DPRINTF("%s: called\n", __func__); KASSERT(mutex_owned(&sc->sc_lock)); /* * Caller must guarantee the xfer has completed first, by * closing the pipe only after normal completion or an abort. */ KASSERT(sc->sc_intrxfer == NULL); } static void vhci_root_intr_cleartoggle(struct usbd_pipe *pipe) { DPRINTF("%s: called\n", __func__); } static void vhci_root_intr_done(struct usbd_xfer *xfer) { vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv; KASSERT(mutex_owned(&sc->sc_lock)); /* Claim the xfer so it doesn't get completed again. */ KASSERT(sc->sc_intrxfer == xfer); KASSERT(xfer->ux_status != USBD_IN_PROGRESS); sc->sc_intrxfer = NULL; } /* -------------------------------------------------------------------------- */ static void vhci_usb_attach(vhci_fd_t *vfd) { vhci_softc_t *sc = vfd->softc; vhci_port_t *port; struct usbd_xfer *xfer; u_char *p; port = &sc->sc_port[vfd->port]; mutex_enter(&sc->sc_lock); mutex_enter(&port->lock); port->status = UPS_CURRENT_CONNECT_STATUS | UPS_PORT_ENABLED | UPS_PORT_POWER; port->change = UPS_C_CONNECT_STATUS | UPS_C_PORT_RESET; mutex_exit(&port->lock); xfer = sc->sc_intrxfer; if (xfer == NULL) { goto done; } KASSERT(xfer->ux_status == USBD_IN_PROGRESS); /* * Mark our port has having changed state. Uhub will then fetch * status/change and see it needs to perform an attach. */ p = xfer->ux_buf; memset(p, 0, xfer->ux_length); p[0] = __BIT(vfd->port); /* TODO-bitmap */ xfer->ux_actlen = xfer->ux_length; xfer->ux_status = USBD_NORMAL_COMPLETION; usb_transfer_complete(xfer); done: mutex_exit(&sc->sc_lock); } static void vhci_port_flush(vhci_softc_t *sc, vhci_port_t *port) { vhci_packet_list_t *pktlist; vhci_packet_t *pkt, *nxt; vhci_xfer_list_t vxferlist; vhci_xfer_t *vxfer; uint8_t addr; KASSERT(mutex_owned(&sc->sc_lock)); KASSERT(mutex_owned(&port->lock)); TAILQ_INIT(&vxferlist); for (addr = 0; addr < VHCI_NADDRS; addr++) { /* Drop all the packets in the H->U direction. */ pktlist = &port->endpoints[addr].host_to_usb; TAILQ_FOREACH_SAFE(pkt, pktlist, portlist, nxt) { vxfer = pkt->vxfer; KASSERT(vxfer->xfer.ux_status == USBD_IN_PROGRESS); vhci_pkt_destroy(sc, pkt); if (vxfer->npkts == 0) TAILQ_INSERT_TAIL(&vxferlist, vxfer, freelist); } KASSERT(TAILQ_FIRST(pktlist) == NULL); /* Drop all the packets in the U->H direction. */ pktlist = &port->endpoints[addr].usb_to_host; TAILQ_FOREACH_SAFE(pkt, pktlist, portlist, nxt) { vxfer = pkt->vxfer; KASSERT(vxfer->xfer.ux_status == USBD_IN_PROGRESS); vhci_pkt_destroy(sc, pkt); if (vxfer->npkts == 0) TAILQ_INSERT_TAIL(&vxferlist, vxfer, freelist); } KASSERT(TAILQ_FIRST(pktlist) == NULL); /* Terminate all the xfers collected. */ while ((vxfer = TAILQ_FIRST(&vxferlist)) != NULL) { struct usbd_xfer *xfer = &vxfer->xfer; TAILQ_REMOVE(&vxferlist, vxfer, freelist); xfer->ux_status = USBD_TIMEOUT; usb_transfer_complete(xfer); } } } static void vhci_usb_detach(vhci_fd_t *vfd) { vhci_softc_t *sc = vfd->softc; vhci_port_t *port; struct usbd_xfer *xfer; u_char *p; port = &sc->sc_port[vfd->port]; mutex_enter(&sc->sc_lock); xfer = sc->sc_intrxfer; if (xfer == NULL) { goto done; } KASSERT(xfer->ux_status == USBD_IN_PROGRESS); mutex_enter(&port->lock); port->status = 0; port->change = UPS_C_CONNECT_STATUS | UPS_C_PORT_RESET; /* * Mark our port has having changed state. Uhub will then fetch * status/change and see it needs to perform a detach. */ p = xfer->ux_buf; memset(p, 0, xfer->ux_length); p[0] = __BIT(vfd->port); /* TODO-bitmap */ xfer->ux_actlen = xfer->ux_length; xfer->ux_status = USBD_NORMAL_COMPLETION; usb_transfer_complete(xfer); vhci_port_flush(sc, port); mutex_exit(&port->lock); done: mutex_exit(&sc->sc_lock); } static int vhci_get_info(vhci_fd_t *vfd, struct vhci_ioc_get_info *args) { vhci_softc_t *sc = vfd->softc; vhci_port_t *port; port = &sc->sc_port[vfd->port]; args->nports = VHCI_NPORTS; args->port = vfd->port; mutex_enter(&port->lock); args->status = port->status; mutex_exit(&port->lock); args->addr = vfd->addr; return 0; } static int vhci_set_port(vhci_fd_t *vfd, struct vhci_ioc_set_port *args) { vhci_softc_t *sc = vfd->softc; if (args->port == 0 || args->port >= sc->sc_nports) return EINVAL; vfd->port = args->port; return 0; } static int vhci_set_addr(vhci_fd_t *vfd, struct vhci_ioc_set_addr *args) { if (args->addr >= VHCI_NADDRS) return EINVAL; vfd->addr = args->addr; return 0; } /* -------------------------------------------------------------------------- */ static dev_type_open(vhci_fd_open); const struct cdevsw vhci_cdevsw = { .d_open = vhci_fd_open, .d_close = noclose, .d_read = noread, .d_write = nowrite, .d_ioctl = noioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER | D_MPSAFE }; static int vhci_fd_ioctl(file_t *, u_long, void *); static int vhci_fd_close(file_t *); static int vhci_fd_read(struct file *, off_t *, struct uio *, kauth_cred_t, int); static int vhci_fd_write(struct file *, off_t *, struct uio *, kauth_cred_t, int); const struct fileops vhci_fileops = { .fo_read = vhci_fd_read, .fo_write = vhci_fd_write, .fo_ioctl = vhci_fd_ioctl, .fo_fcntl = fnullop_fcntl, .fo_poll = fnullop_poll, .fo_stat = fbadop_stat, .fo_close = vhci_fd_close, .fo_kqfilter = fnullop_kqfilter, .fo_restart = fnullop_restart, .fo_mmap = NULL, }; static int vhci_fd_open(dev_t dev, int flags, int type, struct lwp *l) { vhci_softc_t *sc; vhci_fd_t *vfd; struct file *fp; int error, fd; sc = device_lookup_private(&vhci_cd, minor(dev)); if (sc == NULL) return EXDEV; error = fd_allocfile(&fp, &fd); if (error) return error; vfd = kmem_alloc(sizeof(*vfd), KM_SLEEP); vfd->port = 1; vfd->addr = 0; vfd->softc = sc; return fd_clone(fp, fd, flags, &vhci_fileops, vfd); } static int vhci_fd_close(file_t *fp) { vhci_fd_t *vfd = fp->f_data; KASSERT(vfd != NULL); vhci_usb_detach(vfd); kmem_free(vfd, sizeof(*vfd)); fp->f_data = NULL; return 0; } static int vhci_fd_read(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, int flags) { vhci_fd_t *vfd = fp->f_data; vhci_softc_t *sc = vfd->softc; vhci_packet_list_t *pktlist; vhci_packet_t *pkt, *nxt; vhci_xfer_list_t vxferlist; vhci_xfer_t *vxfer; vhci_port_t *port; int error = 0; uint8_t *buf; size_t size; if (uio->uio_resid == 0) return 0; port = &sc->sc_port[vfd->port]; pktlist = &port->endpoints[vfd->addr].host_to_usb; TAILQ_INIT(&vxferlist); mutex_enter(&port->lock); if (!(port->status & UPS_PORT_ENABLED)) { error = ENOBUFS; goto out; } TAILQ_FOREACH_SAFE(pkt, pktlist, portlist, nxt) { vxfer = pkt->vxfer; buf = pkt->buf + pkt->cursor; KASSERT(pkt->size >= pkt->cursor); size = uimin(uio->uio_resid, pkt->size - pkt->cursor); KASSERT(vxfer->xfer.ux_status == USBD_IN_PROGRESS); error = uiomove(buf, size, uio); if (error) { DPRINTF("%s: error = %d\n", __func__, error); goto out; } pkt->cursor += size; if (pkt->cursor == pkt->size) { vhci_pkt_destroy(sc, pkt); if (vxfer->npkts == 0) { TAILQ_INSERT_TAIL(&vxferlist, vxfer, freelist); } } if (uio->uio_resid == 0) { break; } } out: mutex_exit(&port->lock); while ((vxfer = TAILQ_FIRST(&vxferlist)) != NULL) { struct usbd_xfer *xfer = &vxfer->xfer; TAILQ_REMOVE(&vxferlist, vxfer, freelist); mutex_enter(&sc->sc_lock); xfer->ux_actlen = xfer->ux_length; xfer->ux_status = USBD_NORMAL_COMPLETION; usb_transfer_complete(xfer); mutex_exit(&sc->sc_lock); } return error; } static int vhci_fd_write(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, int flags) { vhci_fd_t *vfd = fp->f_data; vhci_softc_t *sc = vfd->softc; vhci_packet_list_t *pktlist; vhci_packet_t *pkt, *nxt; vhci_xfer_list_t vxferlist; vhci_xfer_t *vxfer; vhci_port_t *port; int error = 0; uint8_t *buf; size_t pktsize, size; if (uio->uio_resid == 0) return 0; port = &sc->sc_port[vfd->port]; pktlist = &port->endpoints[vfd->addr].usb_to_host; TAILQ_INIT(&vxferlist); mutex_enter(&port->lock); if (!(port->status & UPS_PORT_ENABLED)) { error = ENOBUFS; goto out; } TAILQ_FOREACH_SAFE(pkt, pktlist, portlist, nxt) { vxfer = pkt->vxfer; buf = pkt->buf + pkt->cursor; pktsize = pkt->size; if (pkt->type.dat) pktsize = ulmin(vxfer->resbuf.size, pktsize); KASSERT(pktsize >= pkt->cursor); size = uimin(uio->uio_resid, pktsize - pkt->cursor); KASSERT(vxfer->xfer.ux_status == USBD_IN_PROGRESS); error = uiomove(buf, size, uio); if (error) { DPRINTF("%s: error = %d\n", __func__, error); goto out; } pkt->cursor += size; if (pkt->cursor == pktsize) { vhci_pkt_destroy(sc, pkt); if (vxfer->npkts == 0) { TAILQ_INSERT_TAIL(&vxferlist, vxfer, freelist); } } if (uio->uio_resid == 0) { break; } } out: mutex_exit(&port->lock); while ((vxfer = TAILQ_FIRST(&vxferlist)) != NULL) { struct usbd_xfer *xfer = &vxfer->xfer; TAILQ_REMOVE(&vxferlist, vxfer, freelist); mutex_enter(&sc->sc_lock); xfer->ux_actlen = ulmin(vxfer->resbuf.size, xfer->ux_length); xfer->ux_status = USBD_NORMAL_COMPLETION; usb_transfer_complete(xfer); mutex_exit(&sc->sc_lock); } return error; } static int vhci_fd_ioctl(file_t *fp, u_long cmd, void *data) { vhci_fd_t *vfd = fp->f_data; KASSERT(vfd != NULL); switch (cmd) { case VHCI_IOC_GET_INFO: return vhci_get_info(vfd, data); case VHCI_IOC_SET_PORT: return vhci_set_port(vfd, data); case VHCI_IOC_SET_ADDR: return vhci_set_addr(vfd, data); case VHCI_IOC_USB_ATTACH: vhci_usb_attach(vfd); return 0; case VHCI_IOC_USB_DETACH: vhci_usb_detach(vfd); return 0; default: return EINVAL; } } /* -------------------------------------------------------------------------- */ static int vhci_match(device_t, cfdata_t, void *); static void vhci_attach(device_t, device_t, void *); static int vhci_activate(device_t, enum devact); CFATTACH_DECL_NEW(vhci, sizeof(vhci_softc_t), vhci_match, vhci_attach, NULL, vhci_activate); void vhciattach(int nunits) { struct cfdata *cf; int error; size_t i; error = config_cfattach_attach(vhci_cd.cd_name, &vhci_ca); if (error) { aprint_error("%s: unable to register cfattach\n", vhci_cd.cd_name); (void)config_cfdriver_detach(&vhci_cd); return; } for (i = 0; i < VHCI_NBUSES; i++) { cf = kmem_alloc(sizeof(*cf), KM_SLEEP); cf->cf_name = vhci_cd.cd_name; cf->cf_atname = vhci_cd.cd_name; cf->cf_unit = i; cf->cf_fstate = FSTATE_STAR; config_attach_pseudo(cf); } } static int vhci_activate(device_t self, enum devact act) { vhci_softc_t *sc = device_private(self); switch (act) { case DVACT_DEACTIVATE: sc->sc_dying = 1; return 0; default: return EOPNOTSUPP; } } static int vhci_match(device_t parent, cfdata_t match, void *aux) { return 1; } static void vhci_attach(device_t parent, device_t self, void *aux) { vhci_softc_t *sc = device_private(self); vhci_port_t *port; uint8_t addr; size_t i; sc->sc_dev = self; sc->sc_bus.ub_revision = USBREV_2_0; sc->sc_bus.ub_hctype = USBHCTYPE_VHCI; sc->sc_bus.ub_busnum = device_unit(self); sc->sc_bus.ub_usedma = false; sc->sc_bus.ub_methods = &vhci_bus_methods; sc->sc_bus.ub_pipesize = sizeof(vhci_pipe_t); sc->sc_bus.ub_hcpriv = sc; sc->sc_dying = false; mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB); sc->sc_nports = VHCI_NPORTS; for (i = 0; i < sc->sc_nports; i++) { port = &sc->sc_port[i]; mutex_init(&port->lock, MUTEX_DEFAULT, IPL_SOFTUSB); for (addr = 0; addr < VHCI_NADDRS; addr++) { TAILQ_INIT(&port->endpoints[addr].usb_to_host); TAILQ_INIT(&port->endpoints[addr].host_to_usb); } kcov_remote_register(KCOV_REMOTE_VHCI, KCOV_REMOTE_VHCI_ID(sc->sc_bus.ub_busnum, i)); } sc->sc_child = config_found(self, &sc->sc_bus, usbctlprint, CFARGS_NONE); } |
| 53 14 64 63 24 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 | /* $NetBSD: if.h,v 1.300 2022/08/20 11:09:24 riastradh Exp $ */ /*- * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by William Studenmund and Jason R. Thorpe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.h 8.3 (Berkeley) 2/9/95 */ #ifndef _NET_IF_H_ #define _NET_IF_H_ #if !defined(_KERNEL) && !defined(_STANDALONE) #include <stdbool.h> #endif #include <sys/featuretest.h> /* * Length of interface external name, including terminating '\0'. * Note: this is the same size as a generic device's external name. */ #define IF_NAMESIZE 16 /* * Length of interface description, including terminating '\0'. */ #define IFDESCRSIZE 64 #if defined(_NETBSD_SOURCE) #include <sys/socket.h> #include <sys/queue.h> #include <sys/mutex.h> #include <sys/hook.h> #include <net/dlt.h> #include <net/pfil.h> #ifdef _KERNEL #include <net/pktqueue.h> #include <sys/pslist.h> #include <sys/pserialize.h> #include <sys/psref.h> #include <sys/module_hook.h> #endif /* * Always include ALTQ glue here -- we use the ALTQ interface queue * structure even when ALTQ is not configured into the kernel so that * the size of struct ifnet does not changed based on the option. The * ALTQ queue structure is API-compatible with the legacy ifqueue. */ #include <altq/if_altq.h> /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with four parameters: * (*ifp->if_output)(ifp, m, dst, rt) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of a internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating a interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ #include <sys/time.h> #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" #include "opt_gateway.h" #endif struct mbuf; struct proc; struct rtentry; struct socket; struct ether_header; struct ifaddr; struct ifnet; struct rt_addrinfo; #define IFNAMSIZ IF_NAMESIZE /* * Structure describing a `cloning' interface. */ struct if_clone { LIST_ENTRY(if_clone) ifc_list; /* on list of cloners */ const char *ifc_name; /* name of device, e.g. `gif' */ size_t ifc_namelen; /* length of name */ int (*ifc_create)(struct if_clone *, int); int (*ifc_destroy)(struct ifnet *); }; #define IF_CLONE_INITIALIZER(name, create, destroy) \ { { NULL, NULL }, name, sizeof(name) - 1, create, destroy } /* * Structure used to query names of interface cloners. */ struct if_clonereq { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; /* * Structure defining statistics and other data kept regarding a network * interface. * * Only used for exporting data from the interface. */ struct if_data { /* generic interface information */ u_char ifi_type; /* ethernet, tokenring, etc. */ u_char ifi_addrlen; /* media address length */ u_char ifi_hdrlen; /* media header length */ int ifi_link_state; /* current link state */ uint64_t ifi_mtu; /* maximum transmission unit */ uint64_t ifi_metric; /* routing metric (external only) */ uint64_t ifi_baudrate; /* linespeed */ /* volatile statistics */ uint64_t ifi_ipackets; /* packets received on interface */ uint64_t ifi_ierrors; /* input errors on interface */ uint64_t ifi_opackets; /* packets sent on interface */ uint64_t ifi_oerrors; /* output errors on interface */ uint64_t ifi_collisions; /* collisions on csma interfaces */ uint64_t ifi_ibytes; /* total number of octets received */ uint64_t ifi_obytes; /* total number of octets sent */ uint64_t ifi_imcasts; /* packets received via multicast */ uint64_t ifi_omcasts; /* packets sent via multicast */ uint64_t ifi_iqdrops; /* dropped on input, this interface */ uint64_t ifi_noproto; /* destined for unsupported protocol */ struct timespec ifi_lastchange;/* last operational state change */ }; /* * Values for if_link_state. */ #define LINK_STATE_UNKNOWN 0 /* link invalid/unknown */ #define LINK_STATE_DOWN 1 /* link is down */ #define LINK_STATE_UP 2 /* link is up */ /* * Status bit descriptions for the various interface types. */ struct if_status_description { unsigned char ifs_type; unsigned char ifs_state; const char *ifs_string; }; #define LINK_STATE_DESC_MATCH(_ifs, _t, _s) \ (((_ifs)->ifs_type == (_t) || (_ifs)->ifs_type == 0) && \ (_ifs)->ifs_state == (_s)) #define LINK_STATE_DESCRIPTIONS { \ { IFT_ETHER, LINK_STATE_DOWN, "no carrier" }, \ { IFT_IEEE80211, LINK_STATE_DOWN, "no network" }, \ { IFT_PPP, LINK_STATE_DOWN, "no carrier" }, \ { IFT_CARP, LINK_STATE_DOWN, "backup" }, \ { IFT_CARP, LINK_STATE_UP, "master" }, \ { 0, LINK_STATE_UP, "active" }, \ { 0, LINK_STATE_UNKNOWN, "unknown" }, \ { 0, LINK_STATE_DOWN, "down" }, \ { 0, 0, NULL } \ } /* * Structure defining a queue for a network interface. */ struct ifqueue { struct mbuf *ifq_head; struct mbuf *ifq_tail; int ifq_len; int ifq_maxlen; int ifq_drops; kmutex_t *ifq_lock; }; #ifdef _KERNEL #include <sys/percpu.h> #include <sys/callout.h> #include <sys/rwlock.h> #include <sys/workqueue.h> #endif /* _KERNEL */ /* * Structure defining a queue for a network interface. * * (Would like to call this struct ``if'', but C isn't PL/1.) */ TAILQ_HEAD(ifnet_head, ifnet); /* the actual queue head */ struct bridge_softc; struct bridge_iflist; struct callout; struct krwlock; struct if_percpuq; struct if_deferred_start; struct in6_multi; typedef unsigned short if_index_t; /* * Interface. Field markings and the corresponding locks: * * i: IFNET_LOCK (a.k.a., if_ioctl_lock) * q: ifq_lock (struct ifaltq) * a: if_afdata_lock * 6: in6_multilock (global lock) * :: unlocked, stable * ?: unknown, maybe unsafe * * Lock order: IFNET_LOCK => in6_multilock => if_afdata_lock => ifq_lock * Note that currently if_afdata_lock and ifq_lock aren't held * at the same time, but define the order anyway. * * Lock order of IFNET_LOCK with other locks: * softnet_lock => solock => IFNET_LOCK => ND6_LOCK, in_multilock */ typedef struct ifnet { void *if_softc; /* :: lower-level data for this if */ /* DEPRECATED. Keep it to avoid breaking kvm(3) users */ TAILQ_ENTRY(ifnet) if_list; /* i: all struct ifnets are chained */ TAILQ_HEAD(, ifaddr) if_addrlist; /* i: linked list of addresses per if */ char if_xname[IFNAMSIZ]; /* :: external name (name + unit) */ int if_pcount; /* i: number of promiscuous listeners */ struct bpf_if *if_bpf; /* :: packet filter structure */ if_index_t if_index; /* :: numeric abbreviation for this if */ short if_timer; /* ?: time 'til if_slowtimo called */ unsigned short if_flags; /* i: up/down, broadcast, etc. */ short if_extflags; /* :: if_output MP-safe, etc. */ u_char if_type; /* :: ethernet, tokenring, etc. */ u_char if_addrlen; /* :: media address length */ u_char if_hdrlen; /* :: media header length */ /* XXX audit :? fields here. */ int if_link_state; /* :? current link state */ uint64_t if_mtu; /* :? maximum transmission unit */ uint64_t if_metric; /* :? routing metric (external only) */ uint64_t if_baudrate; /* :? linespeed */ struct timespec if_lastchange; /* :? last operational state change */ #ifdef _KERNEL percpu_t *if_stats; /* :: statistics */ #else void *if_stats; /* opaque to user-space */ #endif /* _KERNEL */ /* * Procedure handles. If you add more of these, don't forget the * corresponding NULL stub in if.c. */ int (*if_output) /* :: output routine (enqueue) */ (struct ifnet *, struct mbuf *, const struct sockaddr *, const struct rtentry *); void (*_if_input) /* :: input routine (from h/w driver) */ (struct ifnet *, struct mbuf *); void (*if_start) /* :: initiate output routine */ (struct ifnet *); int (*if_transmit) /* :: output routine, must be MP-safe */ (struct ifnet *, struct mbuf *); int (*if_ioctl) /* :: ioctl routine */ (struct ifnet *, u_long, void *); int (*if_init) /* :: init routine */ (struct ifnet *); void (*if_stop) /* :: stop routine */ (struct ifnet *, int); void (*if_slowtimo) /* :: timer routine */ (struct ifnet *); #define if_watchdog if_slowtimo void (*if_drain) /* :: routine to release resources */ (struct ifnet *); void (*if_bpf_mtap) /* :: bpf routine */ (struct bpf_if *, struct mbuf *, u_int); struct ifaltq if_snd; /* q: output queue (includes altq) */ struct ifaddr *if_dl; /* i: identity of this interface. */ const struct sockaddr_dl *if_sadl; /* i: pointer to sockaddr_dl of if_dl */ /* * May be NULL. If not NULL, it is the address assigned * to the interface by the manufacturer, so it very likely * to be unique. It MUST NOT be deleted. It is highly * suitable for deriving the EUI64 for the interface. */ struct ifaddr *if_hwdl; /* i: h/w identity */ const uint8_t *if_broadcastaddr; /* :: linklevel broadcast bytestring */ struct bridge_softc *if_bridge; /* i: bridge glue */ struct bridge_iflist *if_bridgeif; /* i: shortcut to interface list entry */ int if_dlt; /* :: data link type (<net/dlt.h>) */ pfil_head_t * if_pfil; /* :: filtering point */ uint64_t if_capabilities; /* i: interface capabilities */ uint64_t if_capenable; /* i: capabilities enabled */ union { void * carp_s; /* carp structure (used by !carp ifs) */ struct ifnet *carp_d;/* ptr to carpdev (used by carp ifs) */ } if_carp_ptr; /* ?: */ #define if_carp if_carp_ptr.carp_s #define if_carpdev if_carp_ptr.carp_d /* * These are pre-computed based on an interfaces enabled * capabilities, for speed elsewhere. */ int if_csum_flags_tx; /* i: M_CSUM_* flags for Tx */ int if_csum_flags_rx; /* i: M_CSUM_* flags for Rx */ void *if_afdata[AF_MAX]; /* a: */ struct mowner *if_mowner; /* ?: who owns mbufs for this interface */ void *if_lagg; /* :: lagg or agr structure */ void *if_npf_private;/* ?: associated NPF context */ /* * pf specific data, used only when #if NPF > 0. */ void *if_pf_kif; /* ?: pf interface abstraction */ void *if_pf_groups; /* ?: pf interface groups */ /* * During an ifnet's lifetime, it has only one if_index, but * and if_index is not sufficient to identify an ifnet * because during the lifetime of the system, many ifnets may occupy a * given if_index. Let us tell different ifnets at the same * if_index apart by their if_index_gen, a unique number that each ifnet * is assigned when it if_attach()s. Now, the kernel can use the * pair (if_index, if_index_gen) as a weak reference to an ifnet. */ uint64_t if_index_gen; /* :: generation number for the ifnet * at if_index: if two ifnets' index * and generation number are both the * same, they are the same ifnet. */ struct sysctllog *if_sysctl_log; /* :: */ int (*if_initaddr) /* :: */ (struct ifnet *, struct ifaddr *, bool); int (*if_setflags) /* :: */ (struct ifnet *, const u_short); kmutex_t *if_ioctl_lock; /* :: */ char *if_description; /* i: interface description */ #ifdef _KERNEL /* XXX kvm(3) */ struct if_slowtimo_data *if_slowtimo_data; /* :: */ struct krwlock *if_afdata_lock;/* :: */ struct if_percpuq *if_percpuq; /* :: we should remove it in the future */ struct work if_link_work; /* q: linkage on link state work queue */ uint16_t if_link_queue; /* q: masked link state change queue */ /* q: is link state work scheduled? */ bool if_link_scheduled; struct pslist_entry if_pslist_entry;/* i: */ struct psref_target if_psref; /* :: */ struct pslist_head if_addr_pslist; /* i: */ struct if_deferred_start *if_deferred_start; /* :: */ /* XXX should be protocol independent */ LIST_HEAD(, in6_multi) if_multiaddrs; /* 6: */ khook_list_t *if_linkstate_hooks; /* :: */ #endif } ifnet_t; #include <net/if_stats.h> #define if_name(ifp) ((ifp)->if_xname) #define IFF_UP 0x0001 /* interface is up */ #define IFF_BROADCAST 0x0002 /* broadcast address valid */ #define IFF_DEBUG 0x0004 /* turn on debugging */ #define IFF_LOOPBACK 0x0008 /* is a loopback net */ #define IFF_POINTOPOINT 0x0010 /* interface is point-to-point link */ /* 0x0020 was IFF_NOTRAILERS */ #define IFF_RUNNING 0x0040 /* resources allocated */ #define IFF_NOARP 0x0080 /* no address resolution protocol */ #define IFF_PROMISC 0x0100 /* receive all packets */ #define IFF_ALLMULTI 0x0200 /* receive all multicast packets */ #define IFF_OACTIVE 0x0400 /* transmission in progress */ #define IFF_SIMPLEX 0x0800 /* can't hear own transmissions */ #define IFF_LINK0 0x1000 /* per link layer defined bit */ #define IFF_LINK1 0x2000 /* per link layer defined bit */ #define IFF_LINK2 0x4000 /* per link layer defined bit */ #define IFF_MULTICAST 0x8000 /* supports multicast */ #define IFEF_MPSAFE __BIT(0) /* handlers can run in parallel (see below) */ /* * The guidelines for converting an interface to IFEF_MPSAFE are as follows * * Enabling IFEF_MPSAFE on an interface suppresses taking KERNEL_LOCK when * calling the following handlers: * - if_start * - Note that if_transmit is always called without KERNEL_LOCK * - if_output * - if_ioctl * - if_init * - if_stop * * This means that an interface with IFEF_MPSAFE must make the above handlers * MP-safe or take KERNEL_LOCK by itself inside handlers that aren't MP-safe * yet. * * There are some additional restrictions to access member variables of struct * ifnet: * - if_flags * - Must be updated with holding IFNET_LOCK * - You cannot use the flag in Tx/Rx paths anymore because there is no * synchronization on the flag except for IFNET_LOCK * - Note that IFNET_LOCK can't be taken in softint because it's known * that it causes a deadlock * - Some synchronization mechanisms such as pserialize_perform are called * with IFNET_LOCK and also require context switches on every CPUs * that mean softints finish so trying to take IFNET_LOCK in softint * might block on IFNET_LOCK and prevent such synchronization mechanisms * from being completed * - Currently the deadlock occurs only if NET_MPSAFE is enabled, however, * we should deal with the restriction because NET_MPSAFE will be enabled * by default in the future * - if_watchdog and if_timer * - The watchdog framework works only for non-IFEF_MPSAFE interfaces * that rely on KERNEL_LOCK * - Interfaces with IFEF_MPSAFE have to provide its own watchdog mechanism * if needed * - Keep if_watchdog NULL when calling if_attach */ #ifdef _KERNEL static __inline bool if_is_mpsafe(struct ifnet *ifp) { return ((ifp->if_extflags & IFEF_MPSAFE) != 0); } static __inline int if_output_lock(struct ifnet *cifp, struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, const struct rtentry *rt) { if (if_is_mpsafe(cifp)) { return (*cifp->if_output)(ifp, m, dst, rt); } else { int ret; KERNEL_LOCK(1, NULL); ret = (*cifp->if_output)(ifp, m, dst, rt); KERNEL_UNLOCK_ONE(NULL); return ret; } } static __inline void if_start_lock(struct ifnet *ifp) { if (if_is_mpsafe(ifp)) { (*ifp->if_start)(ifp); } else { KERNEL_LOCK(1, NULL); (*ifp->if_start)(ifp); KERNEL_UNLOCK_ONE(NULL); } } #define KERNEL_LOCK_IF_IFP_MPSAFE(ifp) \ do { if (if_is_mpsafe(ifp)) { KERNEL_LOCK(1, NULL); } } while (0) #define KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp) \ do { if (if_is_mpsafe(ifp)) { KERNEL_UNLOCK_ONE(NULL); } } while (0) #define KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp) \ do { if (!if_is_mpsafe(ifp)) { KERNEL_LOCK(1, NULL); } } while (0) #define KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp) \ do { if (!if_is_mpsafe(ifp)) { KERNEL_UNLOCK_ONE(NULL); } } while (0) #ifdef _KERNEL_OPT #include "opt_net_mpsafe.h" #endif /* XXX explore a better place to define */ #ifdef NET_MPSAFE #define KERNEL_LOCK_UNLESS_NET_MPSAFE() do { } while (0) #define KERNEL_UNLOCK_UNLESS_NET_MPSAFE() do { } while (0) #define SOFTNET_LOCK_UNLESS_NET_MPSAFE() do { } while (0) #define SOFTNET_UNLOCK_UNLESS_NET_MPSAFE() do { } while (0) #define SOFTNET_LOCK_IF_NET_MPSAFE() \ do { mutex_enter(softnet_lock); } while (0) #define SOFTNET_UNLOCK_IF_NET_MPSAFE() \ do { mutex_exit(softnet_lock); } while (0) #else /* NET_MPSAFE */ #define KERNEL_LOCK_UNLESS_NET_MPSAFE() \ do { KERNEL_LOCK(1, NULL); } while (0) #define KERNEL_UNLOCK_UNLESS_NET_MPSAFE() \ do { KERNEL_UNLOCK_ONE(NULL); } while (0) #define SOFTNET_LOCK_UNLESS_NET_MPSAFE() \ do { mutex_enter(softnet_lock); } while (0) #define SOFTNET_UNLOCK_UNLESS_NET_MPSAFE() \ do { mutex_exit(softnet_lock); } while (0) #define SOFTNET_LOCK_IF_NET_MPSAFE() do { } while (0) #define SOFTNET_UNLOCK_IF_NET_MPSAFE() do { } while (0) #endif /* NET_MPSAFE */ #define SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE() \ do { \ SOFTNET_LOCK_UNLESS_NET_MPSAFE(); \ KERNEL_LOCK_UNLESS_NET_MPSAFE(); \ } while (0) #define SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE() \ do { \ KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); \ SOFTNET_UNLOCK_UNLESS_NET_MPSAFE(); \ } while (0) #endif /* _KERNEL */ #define IFFBITS \ "\020\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5POINTOPOINT" \ "\7RUNNING\10NOARP\11PROMISC\12ALLMULTI\13OACTIVE\14SIMPLEX" \ "\15LINK0\16LINK1\17LINK2\20MULTICAST" /* flags set internally only: */ #define IFF_CANTCHANGE \ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_RUNNING|IFF_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC) /* * Some convenience macros used for setting ifi_baudrate. */ #define IF_Kbps(x) ((x) * 1000ULL) /* kilobits/sec. */ #define IF_Mbps(x) (IF_Kbps((x) * 1000ULL)) /* megabits/sec. */ #define IF_Gbps(x) (IF_Mbps((x) * 1000ULL)) /* gigabits/sec. */ /* Capabilities that interfaces can advertise. */ /* 0x01 .. 0x40 were previously used */ #define IFCAP_TSOv4 0x00080 /* can do TCPv4 segmentation offload */ #define IFCAP_CSUM_IPv4_Rx 0x00100 /* can do IPv4 header checksums (Rx) */ #define IFCAP_CSUM_IPv4_Tx 0x00200 /* can do IPv4 header checksums (Tx) */ #define IFCAP_CSUM_TCPv4_Rx 0x00400 /* can do IPv4/TCP checksums (Rx) */ #define IFCAP_CSUM_TCPv4_Tx 0x00800 /* can do IPv4/TCP checksums (Tx) */ #define IFCAP_CSUM_UDPv4_Rx 0x01000 /* can do IPv4/UDP checksums (Rx) */ #define IFCAP_CSUM_UDPv4_Tx 0x02000 /* can do IPv4/UDP checksums (Tx) */ #define IFCAP_CSUM_TCPv6_Rx 0x04000 /* can do IPv6/TCP checksums (Rx) */ #define IFCAP_CSUM_TCPv6_Tx 0x08000 /* can do IPv6/TCP checksums (Tx) */ #define IFCAP_CSUM_UDPv6_Rx 0x10000 /* can do IPv6/UDP checksums (Rx) */ #define IFCAP_CSUM_UDPv6_Tx 0x20000 /* can do IPv6/UDP checksums (Tx) */ #define IFCAP_TSOv6 0x40000 /* can do TCPv6 segmentation offload */ #define IFCAP_LRO 0x80000 /* can do Large Receive Offload */ #define IFCAP_MASK 0xfff80 /* currently valid capabilities */ #define IFCAPBITS \ "\020" \ "\10TSO4" \ "\11IP4CSUM_Rx" \ "\12IP4CSUM_Tx" \ "\13TCP4CSUM_Rx" \ "\14TCP4CSUM_Tx" \ "\15UDP4CSUM_Rx" \ "\16UDP4CSUM_Tx" \ "\17TCP6CSUM_Rx" \ "\20TCP6CSUM_Tx" \ "\21UDP6CSUM_Rx" \ "\22UDP6CSUM_Tx" \ "\23TSO6" \ "\24LRO" \ #define IF_AFDATA_LOCK_INIT(ifp) \ do {(ifp)->if_afdata_lock = rw_obj_alloc();} while (0) #define IF_AFDATA_LOCK_DESTROY(ifp) rw_obj_free((ifp)->if_afdata_lock) #define IF_AFDATA_WLOCK(ifp) rw_enter((ifp)->if_afdata_lock, RW_WRITER) #define IF_AFDATA_RLOCK(ifp) rw_enter((ifp)->if_afdata_lock, RW_READER) #define IF_AFDATA_WUNLOCK(ifp) rw_exit((ifp)->if_afdata_lock) #define IF_AFDATA_RUNLOCK(ifp) rw_exit((ifp)->if_afdata_lock) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_TRYLOCK(ifp) rw_tryenter((ifp)->if_afdata_lock, RW_WRITER) #define IF_AFDATA_LOCK_ASSERT(ifp) \ KASSERT(rw_lock_held((ifp)->if_afdata_lock)) #define IF_AFDATA_RLOCK_ASSERT(ifp) \ KASSERT(rw_read_held((ifp)->if_afdata_lock)) #define IF_AFDATA_WLOCK_ASSERT(ifp) \ KASSERT(rw_write_held((ifp)->if_afdata_lock)) /* * Output queues (ifp->if_snd) and internetwork datagram level (pup level 1) * input routines have queues of messages stored on ifqueue structures * (defined above). Entries are added to and deleted from these structures * by these macros, which should be called with ipl raised to splnet(). */ #define IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen) #define IF_DROP(ifq) ((ifq)->ifq_drops++) #define IF_ENQUEUE(ifq, m) do { \ (m)->m_nextpkt = 0; \ if ((ifq)->ifq_tail == 0) \ (ifq)->ifq_head = m; \ else \ (ifq)->ifq_tail->m_nextpkt = m; \ (ifq)->ifq_tail = m; \ (ifq)->ifq_len++; \ } while (/*CONSTCOND*/0) #define IF_PREPEND(ifq, m) do { \ (m)->m_nextpkt = (ifq)->ifq_head; \ if ((ifq)->ifq_tail == 0) \ (ifq)->ifq_tail = (m); \ (ifq)->ifq_head = (m); \ (ifq)->ifq_len++; \ } while (/*CONSTCOND*/0) #define IF_DEQUEUE(ifq, m) do { \ (m) = (ifq)->ifq_head; \ if (m) { \ if (((ifq)->ifq_head = (m)->m_nextpkt) == 0) \ (ifq)->ifq_tail = 0; \ (m)->m_nextpkt = 0; \ (ifq)->ifq_len--; \ } \ } while (/*CONSTCOND*/0) #define IF_POLL(ifq, m) ((m) = (ifq)->ifq_head) #define IF_PURGE(ifq) \ do { \ struct mbuf *__m0; \ \ for (;;) { \ IF_DEQUEUE((ifq), __m0); \ if (__m0 == NULL) \ break; \ else \ m_freem(__m0); \ } \ } while (/*CONSTCOND*/ 0) #define IF_IS_EMPTY(ifq) ((ifq)->ifq_len == 0) #ifndef IFQ_MAXLEN #define IFQ_MAXLEN 256 #endif #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* * Structure defining statistics and other data kept regarding an address * on a network interface. */ struct ifaddr_data { int64_t ifad_inbytes; int64_t ifad_outbytes; }; /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. */ struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ struct ifnet *ifa_ifp; /* back-pointer to interface */ TAILQ_ENTRY(ifaddr) ifa_list; /* list of addresses for interface */ struct ifaddr_data ifa_data; /* statistics on the address */ void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ (int, struct rtentry *, const struct rt_addrinfo *); u_int ifa_flags; /* mostly rt_flags for cloning */ int ifa_refcnt; /* count of references */ int ifa_metric; /* cost of going out this interface */ struct ifaddr *(*ifa_getifa)(struct ifaddr *, const struct sockaddr *); uint32_t *ifa_seqno; int16_t ifa_preference; /* preference level for this address */ #ifdef _KERNEL struct pslist_entry ifa_pslist_entry; struct psref_target ifa_psref; #endif }; #define IFA_ROUTE RTF_UP /* (0x01) route installed */ #define IFA_DESTROYING 0x2 /* * Message format for use in obtaining information about interfaces from * sysctl and the routing socket. We need to force 64-bit alignment if we * aren't using compatibility definitions. */ #if !defined(_KERNEL) || !defined(COMPAT_RTSOCK) #define __align64 __aligned(sizeof(uint64_t)) #else #define __align64 #endif struct if_msghdr { u_short ifm_msglen __align64; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ struct if_data ifm_data __align64; /* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses * from sysctl and the routing socket. */ struct ifa_msghdr { u_short ifam_msglen __align64; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ u_short ifam_index; /* index for associated ifp */ int ifam_flags; /* value of ifa_flags */ int ifam_addrs; /* like rtm_addrs */ pid_t ifam_pid; /* identify sender */ int ifam_addrflags; /* family specific address flags */ int ifam_metric; /* value of ifa_metric */ }; /* * Message format announcing the arrival or departure of a network interface. */ struct if_announcemsghdr { u_short ifan_msglen __align64; /* to skip over non-understood messages */ u_char ifan_version; /* future binary compatibility */ u_char ifan_type; /* message type */ u_short ifan_index; /* index for associated ifp */ char ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_short ifan_what; /* what type of announcement */ }; #define IFAN_ARRIVAL 0 /* interface arrival */ #define IFAN_DEPARTURE 1 /* interface departure */ #undef __align64 /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct sockaddr_storage ifru_space; short ifru_flags; int ifru_addrflags; int ifru_metric; int ifru_mtu; int ifru_dlt; u_int ifru_value; void * ifru_data; struct { uint32_t b_buflen; void *b_buf; } ifru_b; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ #define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ #define ifr_space ifr_ifru.ifru_space /* sockaddr_storage */ #define ifr_flags ifr_ifru.ifru_flags /* flags */ #define ifr_addrflags ifr_ifru.ifru_addrflags /* addr flags */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ #define ifr_dlt ifr_ifru.ifru_dlt /* data link type (DLT_*) */ #define ifr_value ifr_ifru.ifru_value /* generic value */ #define ifr_media ifr_ifru.ifru_metric /* media options (overload) */ #define ifr_data ifr_ifru.ifru_data /* for use by interface * XXX deprecated */ #define ifr_buf ifr_ifru.ifru_b.b_buf /* new interface ioctls */ #define ifr_buflen ifr_ifru.ifru_b.b_buflen #define ifr_index ifr_ifru.ifru_value /* interface index, BSD */ #define ifr_ifindex ifr_index /* interface index, linux */ }; #ifdef _KERNEL #define ifreq_setdstaddr ifreq_setaddr #define ifreq_setbroadaddr ifreq_setaddr #define ifreq_getdstaddr ifreq_getaddr #define ifreq_getbroadaddr ifreq_getaddr static __inline const struct sockaddr * /*ARGSUSED*/ ifreq_getaddr(u_long cmd, const struct ifreq *ifr) { return &ifr->ifr_addr; } #endif /* _KERNEL */ struct ifcapreq { char ifcr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint64_t ifcr_capabilities; /* supported capabiliites */ uint64_t ifcr_capenable; /* capabilities enabled */ }; struct ifaliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr ifra_addr; struct sockaddr ifra_dstaddr; #define ifra_broadaddr ifra_dstaddr struct sockaddr ifra_mask; }; struct ifdatareq { char ifdr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct if_data ifdr_data; }; struct ifmediareq { char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ int ifm_current; /* IFMWD: current media options */ int ifm_mask; /* IFMWD: don't care mask */ int ifm_status; /* media status */ int ifm_active; /* IFMWD: active options */ int ifm_count; /* # entries in ifm_ulist array */ int *ifm_ulist; /* array of ifmedia word */ }; struct ifdrv { char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ unsigned long ifd_cmd; size_t ifd_len; void *ifd_data; }; #define IFLINKSTR_QUERYLEN 0x01 #define IFLINKSTR_UNSET 0x02 /* * Structure used in SIOCGIFCONF request. * Used to retrieve interface configuration * for machine (useful for programs which * must know all networks accessible). */ struct ifconf { int ifc_len; /* size of associated buffer */ union { void * ifcu_buf; struct ifreq *ifcu_req; } ifc_ifcu; #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; /* * Structure for SIOC[AGD]LIFADDR */ struct if_laddrreq { char iflr_name[IFNAMSIZ]; unsigned int flags; #define IFLR_PREFIX 0x8000 /* in: prefix given out: kernel fills id */ #define IFLR_ACTIVE 0x4000 /* in/out: link-layer address activation */ #define IFLR_FACTORY 0x2000 /* in/out: factory link-layer address */ unsigned int prefixlen; /* in/out */ struct sockaddr_storage addr; /* in/out */ struct sockaddr_storage dstaddr; /* out */ }; /* * Structure for SIOC[SG]IFADDRPREF */ struct if_addrprefreq { char ifap_name[IFNAMSIZ]; int16_t ifap_preference; /* in/out */ struct sockaddr_storage ifap_addr; /* in/out */ }; #include <net/if_arp.h> #endif /* _NETBSD_SOURCE */ #ifdef _KERNEL #ifdef ALTQ #define IFQ_ENQUEUE(ifq, m, err) \ do { \ mutex_enter((ifq)->ifq_lock); \ if (ALTQ_IS_ENABLED(ifq)) \ ALTQ_ENQUEUE((ifq), (m), (err)); \ else { \ if (IF_QFULL(ifq)) { \ m_freem(m); \ (err) = ENOBUFS; \ } else { \ IF_ENQUEUE((ifq), (m)); \ (err) = 0; \ } \ } \ if ((err)) \ (ifq)->ifq_drops++; \ mutex_exit((ifq)->ifq_lock); \ } while (/*CONSTCOND*/ 0) #define IFQ_DEQUEUE(ifq, m) \ do { \ mutex_enter((ifq)->ifq_lock); \ if (TBR_IS_ENABLED(ifq)) \ (m) = tbr_dequeue((ifq), ALTDQ_REMOVE); \ else if (ALTQ_IS_ENABLED(ifq)) \ ALTQ_DEQUEUE((ifq), (m)); \ else \ IF_DEQUEUE((ifq), (m)); \ mutex_exit((ifq)->ifq_lock); \ } while (/*CONSTCOND*/ 0) #define IFQ_POLL(ifq, m) \ do { \ mutex_enter((ifq)->ifq_lock); \ if (TBR_IS_ENABLED(ifq)) \ (m) = tbr_dequeue((ifq), ALTDQ_POLL); \ else if (ALTQ_IS_ENABLED(ifq)) \ ALTQ_POLL((ifq), (m)); \ else \ IF_POLL((ifq), (m)); \ mutex_exit((ifq)->ifq_lock); \ } while (/*CONSTCOND*/ 0) #define IFQ_PURGE(ifq) \ do { \ mutex_enter((ifq)->ifq_lock); \ if (ALTQ_IS_ENABLED(ifq)) \ ALTQ_PURGE(ifq); \ else \ IF_PURGE(ifq); \ mutex_exit((ifq)->ifq_lock); \ } while (/*CONSTCOND*/ 0) #define IFQ_SET_READY(ifq) \ do { \ (ifq)->altq_flags |= ALTQF_READY; \ } while (/*CONSTCOND*/ 0) #define IFQ_CLASSIFY(ifq, m, af) \ do { \ KASSERT(((m)->m_flags & M_PKTHDR) != 0); \ mutex_enter((ifq)->ifq_lock); \ if (ALTQ_IS_ENABLED(ifq)) { \ if (ALTQ_NEEDS_CLASSIFY(ifq)) \ (m)->m_pkthdr.pattr_class = (*(ifq)->altq_classify) \ ((ifq)->altq_clfier, (m), (af)); \ (m)->m_pkthdr.pattr_af = (af); \ (m)->m_pkthdr.pattr_hdr = mtod((m), void *); \ } \ mutex_exit((ifq)->ifq_lock); \ } while (/*CONSTCOND*/ 0) #else /* ! ALTQ */ #define IFQ_ENQUEUE(ifq, m, err) \ do { \ mutex_enter((ifq)->ifq_lock); \ if (IF_QFULL(ifq)) { \ m_freem(m); \ (err) = ENOBUFS; \ } else { \ IF_ENQUEUE((ifq), (m)); \ (err) = 0; \ } \ if (err) \ (ifq)->ifq_drops++; \ mutex_exit((ifq)->ifq_lock); \ } while (/*CONSTCOND*/ 0) #define IFQ_DEQUEUE(ifq, m) \ do { \ mutex_enter((ifq)->ifq_lock); \ IF_DEQUEUE((ifq), (m)); \ mutex_exit((ifq)->ifq_lock); \ } while (/*CONSTCOND*/ 0) #define IFQ_POLL(ifq, m) \ do { \ mutex_enter((ifq)->ifq_lock); \ IF_POLL((ifq), (m)); \ mutex_exit((ifq)->ifq_lock); \ } while (/*CONSTCOND*/ 0) #define IFQ_PURGE(ifq) \ do { \ mutex_enter((ifq)->ifq_lock); \ IF_PURGE(ifq); \ mutex_exit((ifq)->ifq_lock); \ } while (/*CONSTCOND*/ 0) #define IFQ_SET_READY(ifq) /* nothing */ #define IFQ_CLASSIFY(ifq, m, af) /* nothing */ #endif /* ALTQ */ #define IFQ_LOCK_INIT(ifq) (ifq)->ifq_lock = \ mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET) #define IFQ_LOCK_DESTROY(ifq) mutex_obj_free((ifq)->ifq_lock) #define IFQ_LOCK(ifq) mutex_enter((ifq)->ifq_lock) #define IFQ_UNLOCK(ifq) mutex_exit((ifq)->ifq_lock) #define IFQ_IS_EMPTY(ifq) IF_IS_EMPTY(ifq) #define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++) #define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len) #define IFQ_INC_DROPS(ifq) ((ifq)->ifq_drops++) #define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len)) #define IFQ_ENQUEUE_ISR(ifq, m, isr) \ do { \ IFQ_LOCK(inq); \ if (IF_QFULL(inq)) { \ IF_DROP(inq); \ IFQ_UNLOCK(inq); \ m_freem(m); \ } else { \ IF_ENQUEUE(inq, m); \ IFQ_UNLOCK(inq); \ schednetisr(isr); \ } \ } while (/*CONSTCOND*/ 0) #include <sys/mallocvar.h> MALLOC_DECLARE(M_IFADDR); MALLOC_DECLARE(M_IFMADDR); int ifreq_setaddr(u_long, struct ifreq *, const struct sockaddr *); struct ifnet *if_alloc(u_char); void if_free(struct ifnet *); void if_initname(struct ifnet *, const char *, int); struct ifaddr *if_dl_create(const struct ifnet *, const struct sockaddr_dl **); void if_activate_sadl(struct ifnet *, struct ifaddr *, const struct sockaddr_dl *); void if_set_sadl(struct ifnet *, const void *, u_char, bool); void if_alloc_sadl(struct ifnet *); void if_free_sadl(struct ifnet *, int); void if_initialize(struct ifnet *); void if_register(struct ifnet *); void if_attach(struct ifnet *); /* Deprecated. Use if_initialize and if_register */ void if_attachdomain(void); void if_deactivate(struct ifnet *); bool if_is_deactivated(const struct ifnet *); void if_export_if_data(struct ifnet *, struct if_data *, bool); void if_purgeaddrs(struct ifnet *, int, void (*)(struct ifaddr *)); void if_detach(struct ifnet *); void if_down(struct ifnet *); void if_down_locked(struct ifnet *); void if_link_state_change(struct ifnet *, int); void if_domain_link_state_change(struct ifnet *, int); void if_up(struct ifnet *); void ifinit(void); void ifinit1(void); void ifinit_post(void); int ifaddrpref_ioctl(struct socket *, u_long, void *, struct ifnet *); extern int (*ifioctl)(struct socket *, u_long, void *, struct lwp *); int ifioctl_common(struct ifnet *, u_long, void *); int ifpromisc(struct ifnet *, int); int ifpromisc_locked(struct ifnet *, int); int if_addr_init(ifnet_t *, struct ifaddr *, bool); int if_do_dad(struct ifnet *); int if_mcast_op(ifnet_t *, const unsigned long, const struct sockaddr *); int if_flags_set(struct ifnet *, const u_short); int if_clone_list(int, char *, int *); int if_ioctl(struct ifnet *, u_long, void *); int if_init(struct ifnet *); void if_stop(struct ifnet *, int); struct ifnet *ifunit(const char *); struct ifnet *if_get(const char *, struct psref *); ifnet_t *if_byindex(u_int); ifnet_t *_if_byindex(u_int); ifnet_t *if_get_byindex(u_int, struct psref *); ifnet_t *if_get_bylla(const void *, unsigned char, struct psref *); void if_put(const struct ifnet *, struct psref *); void if_acquire(struct ifnet *, struct psref *); #define if_release if_put int if_tunnel_check_nesting(struct ifnet *, struct mbuf *, int); percpu_t *if_tunnel_alloc_ro_percpu(void); void if_tunnel_free_ro_percpu(percpu_t *); void if_tunnel_ro_percpu_rtcache_free(percpu_t *); struct tunnel_ro { struct route *tr_ro; kmutex_t *tr_lock; }; static inline void if_tunnel_get_ro(percpu_t *ro_percpu, struct route **ro, kmutex_t **lock) { struct tunnel_ro *tro; tro = percpu_getref(ro_percpu); *ro = tro->tr_ro; *lock = tro->tr_lock; mutex_enter(*lock); } static inline void if_tunnel_put_ro(percpu_t *ro_percpu, kmutex_t *lock) { mutex_exit(lock); percpu_putref(ro_percpu); } static __inline if_index_t if_get_index(const struct ifnet *ifp) { return ifp != NULL ? ifp->if_index : 0; } bool if_held(struct ifnet *); void if_input(struct ifnet *, struct mbuf *); struct if_percpuq * if_percpuq_create(struct ifnet *); void if_percpuq_destroy(struct if_percpuq *); void if_percpuq_enqueue(struct if_percpuq *, struct mbuf *); void if_deferred_start_init(struct ifnet *, void (*)(struct ifnet *)); void if_schedule_deferred_start(struct ifnet *); void ifa_insert(struct ifnet *, struct ifaddr *); void ifa_remove(struct ifnet *, struct ifaddr *); void ifa_psref_init(struct ifaddr *); void ifa_acquire(struct ifaddr *, struct psref *); void ifa_release(struct ifaddr *, struct psref *); bool ifa_held(struct ifaddr *); bool ifa_is_destroying(struct ifaddr *); void ifaref(struct ifaddr *); void ifafree(struct ifaddr *); struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); struct ifaddr *ifa_ifwithaddr_psref(const struct sockaddr *, struct psref *); struct ifaddr *ifa_ifwithaf(int); struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *); struct ifaddr *ifa_ifwithdstaddr_psref(const struct sockaddr *, struct psref *); struct ifaddr *ifa_ifwithnet(const struct sockaddr *); struct ifaddr *ifa_ifwithnet_psref(const struct sockaddr *, struct psref *); struct ifaddr *ifa_ifwithladdr(const struct sockaddr *); struct ifaddr *ifa_ifwithladdr_psref(const struct sockaddr *, struct psref *); struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *); struct ifaddr *ifaof_ifpforaddr_psref(const struct sockaddr *, struct ifnet *, struct psref *); void link_rtrequest(int, struct rtentry *, const struct rt_addrinfo *); void p2p_rtrequest(int, struct rtentry *, const struct rt_addrinfo *); void if_clone_attach(struct if_clone *); void if_clone_detach(struct if_clone *); int if_transmit_lock(struct ifnet *, struct mbuf *); int ifq_enqueue(struct ifnet *, struct mbuf *); int ifq_enqueue2(struct ifnet *, struct ifqueue *, struct mbuf *); int loioctl(struct ifnet *, u_long, void *); void loopattach(int); void loopinit(void); int looutput(struct ifnet *, struct mbuf *, const struct sockaddr *, const struct rtentry *); void * if_linkstate_change_establish(struct ifnet *, void (*)(void *), void *); void if_linkstate_change_disestablish(struct ifnet *, void *, kmutex_t *); /* * These are exported because they're an easy way to tell if * an interface is going away without having to burn a flag. */ int if_nulloutput(struct ifnet *, struct mbuf *, const struct sockaddr *, const struct rtentry *); void if_nullinput(struct ifnet *, struct mbuf *); void if_nullstart(struct ifnet *); int if_nulltransmit(struct ifnet *, struct mbuf *); int if_nullioctl(struct ifnet *, u_long, void *); int if_nullinit(struct ifnet *); void if_nullstop(struct ifnet *, int); void if_nullslowtimo(struct ifnet *); #define if_nullwatchdog if_nullslowtimo void if_nulldrain(struct ifnet *); #else struct if_nameindex { unsigned int if_index; /* 1, 2, ... */ char *if_name; /* null terminated name: "le0", ... */ }; #include <sys/cdefs.h> __BEGIN_DECLS unsigned int if_nametoindex(const char *); char * if_indextoname(unsigned int, char *); struct if_nameindex * if_nameindex(void); void if_freenameindex(struct if_nameindex *); __END_DECLS #endif /* _KERNEL */ /* XXX really ALTQ? */ #ifdef _KERNEL #define IFADDR_FIRST(__ifp) TAILQ_FIRST(&(__ifp)->if_addrlist) #define IFADDR_NEXT(__ifa) TAILQ_NEXT((__ifa), ifa_list) #define IFADDR_FOREACH(__ifa, __ifp) TAILQ_FOREACH(__ifa, \ &(__ifp)->if_addrlist, ifa_list) #define IFADDR_FOREACH_SAFE(__ifa, __ifp, __nifa) \ TAILQ_FOREACH_SAFE(__ifa, \ &(__ifp)->if_addrlist, ifa_list, __nifa) #define IFADDR_EMPTY(__ifp) TAILQ_EMPTY(&(__ifp)->if_addrlist) #define IFADDR_ENTRY_INIT(__ifa) \ PSLIST_ENTRY_INIT((__ifa), ifa_pslist_entry) #define IFADDR_ENTRY_DESTROY(__ifa) \ PSLIST_ENTRY_DESTROY((__ifa), ifa_pslist_entry) #define IFADDR_READER_EMPTY(__ifp) \ (PSLIST_READER_FIRST(&(__ifp)->if_addr_pslist, struct ifaddr, \ ifa_pslist_entry) == NULL) #define IFADDR_READER_FIRST(__ifp) \ PSLIST_READER_FIRST(&(__ifp)->if_addr_pslist, struct ifaddr, \ ifa_pslist_entry) #define IFADDR_READER_NEXT(__ifa) \ PSLIST_READER_NEXT((__ifa), struct ifaddr, ifa_pslist_entry) #define IFADDR_READER_FOREACH(__ifa, __ifp) \ PSLIST_READER_FOREACH((__ifa), &(__ifp)->if_addr_pslist, struct ifaddr,\ ifa_pslist_entry) #define IFADDR_WRITER_INSERT_HEAD(__ifp, __ifa) \ PSLIST_WRITER_INSERT_HEAD(&(__ifp)->if_addr_pslist, (__ifa), \ ifa_pslist_entry) #define IFADDR_WRITER_REMOVE(__ifa) \ PSLIST_WRITER_REMOVE((__ifa), ifa_pslist_entry) #define IFADDR_WRITER_FOREACH(__ifa, __ifp) \ PSLIST_WRITER_FOREACH((__ifa), &(__ifp)->if_addr_pslist, struct ifaddr,\ ifa_pslist_entry) #define IFADDR_WRITER_NEXT(__ifp) \ PSLIST_WRITER_NEXT((__ifp), struct ifaddr, ifa_pslist_entry) #define IFADDR_WRITER_INSERT_AFTER(__ifp, __new) \ PSLIST_WRITER_INSERT_AFTER((__ifp), (__new), ifa_pslist_entry) #define IFADDR_WRITER_EMPTY(__ifp) \ (PSLIST_WRITER_FIRST(&(__ifp)->if_addr_pslist, struct ifaddr, \ ifa_pslist_entry) == NULL) #define IFADDR_WRITER_INSERT_TAIL(__ifp, __new) \ do { \ if (IFADDR_WRITER_EMPTY(__ifp)) { \ IFADDR_WRITER_INSERT_HEAD((__ifp), (__new)); \ } else { \ struct ifaddr *__ifa; \ IFADDR_WRITER_FOREACH(__ifa, (__ifp)) { \ if (IFADDR_WRITER_NEXT(__ifa) == NULL) {\ IFADDR_WRITER_INSERT_AFTER(__ifa,\ (__new)); \ break; \ } \ } \ } \ } while (0) #define IFNET_GLOBAL_LOCK() mutex_enter(&ifnet_mtx) #define IFNET_GLOBAL_UNLOCK() mutex_exit(&ifnet_mtx) #define IFNET_GLOBAL_LOCKED() mutex_owned(&ifnet_mtx) #define IFNET_READER_EMPTY() \ (PSLIST_READER_FIRST(&ifnet_pslist, struct ifnet, if_pslist_entry) == NULL) #define IFNET_READER_FIRST() \ PSLIST_READER_FIRST(&ifnet_pslist, struct ifnet, if_pslist_entry) #define IFNET_READER_NEXT(__ifp) \ PSLIST_READER_NEXT((__ifp), struct ifnet, if_pslist_entry) #define IFNET_READER_FOREACH(__ifp) \ PSLIST_READER_FOREACH((__ifp), &ifnet_pslist, struct ifnet, \ if_pslist_entry) #define IFNET_WRITER_INSERT_HEAD(__ifp) \ PSLIST_WRITER_INSERT_HEAD(&ifnet_pslist, (__ifp), if_pslist_entry) #define IFNET_WRITER_REMOVE(__ifp) \ PSLIST_WRITER_REMOVE((__ifp), if_pslist_entry) #define IFNET_WRITER_FOREACH(__ifp) \ PSLIST_WRITER_FOREACH((__ifp), &ifnet_pslist, struct ifnet, \ if_pslist_entry) #define IFNET_WRITER_NEXT(__ifp) \ PSLIST_WRITER_NEXT((__ifp), struct ifnet, if_pslist_entry) #define IFNET_WRITER_INSERT_AFTER(__ifp, __new) \ PSLIST_WRITER_INSERT_AFTER((__ifp), (__new), if_pslist_entry) #define IFNET_WRITER_EMPTY() \ (PSLIST_WRITER_FIRST(&ifnet_pslist, struct ifnet, if_pslist_entry) == NULL) #define IFNET_WRITER_INSERT_TAIL(__new) \ do { \ if (IFNET_WRITER_EMPTY()) { \ IFNET_WRITER_INSERT_HEAD(__new); \ } else { \ struct ifnet *__ifp; \ IFNET_WRITER_FOREACH(__ifp) { \ if (IFNET_WRITER_NEXT(__ifp) == NULL) { \ IFNET_WRITER_INSERT_AFTER(__ifp,\ (__new)); \ break; \ } \ } \ } \ } while (0) #define IFNET_LOCK(ifp) mutex_enter((ifp)->if_ioctl_lock) #define IFNET_UNLOCK(ifp) mutex_exit((ifp)->if_ioctl_lock) #define IFNET_LOCKED(ifp) mutex_owned((ifp)->if_ioctl_lock) #define IFNET_ASSERT_UNLOCKED(ifp) \ KDASSERT(mutex_ownable((ifp)->if_ioctl_lock)) extern struct pslist_head ifnet_pslist; extern kmutex_t ifnet_mtx; extern struct ifnet *lo0ifp; /* * ifq sysctl support */ int sysctl_ifq(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct ifqueue *ifq); /* symbolic names for terminal (per-protocol) CTL_IFQ_ nodes */ #define IFQCTL_LEN 1 #define IFQCTL_MAXLEN 2 #define IFQCTL_PEAK 3 #define IFQCTL_DROPS 4 /* * Hook for if_vlan - needed by if_agr */ MODULE_HOOK(if_vlan_vlan_input_hook, struct mbuf *, (struct ifnet *, struct mbuf *)); #endif /* _KERNEL */ #endif /* !_NET_IF_H_ */ |
| 43 2081 738 2353 97 2080 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 | /* $NetBSD: time.h,v 1.80 2022/06/26 22:31:38 riastradh Exp $ */ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)time.h 8.5 (Berkeley) 5/4/95 */ #ifndef _SYS_TIME_H_ #define _SYS_TIME_H_ #include <sys/featuretest.h> #include <sys/types.h> /* * Structure returned by gettimeofday(2) system call, * and used in other calls. */ struct timeval { time_t tv_sec; /* seconds */ suseconds_t tv_usec; /* and microseconds */ }; #include <sys/timespec.h> #if defined(_NETBSD_SOURCE) #define TIMEVAL_TO_TIMESPEC(tv, ts) do { \ (ts)->tv_sec = (tv)->tv_sec; \ (ts)->tv_nsec = (tv)->tv_usec * 1000; \ } while (/*CONSTCOND*/0) #define TIMESPEC_TO_TIMEVAL(tv, ts) do { \ (tv)->tv_sec = (ts)->tv_sec; \ (tv)->tv_usec = (suseconds_t)(ts)->tv_nsec / 1000; \ } while (/*CONSTCOND*/0) /* * Note: timezone is obsolete. All timezone handling is now in * userland. Its just here for back compatibility. */ struct timezone { int tz_minuteswest; /* minutes west of Greenwich */ int tz_dsttime; /* type of dst correction */ }; /* Operations on timevals. */ #define timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0L #define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec) #define timercmp(tvp, uvp, cmp) \ (((tvp)->tv_sec == (uvp)->tv_sec) ? \ ((tvp)->tv_usec cmp (uvp)->tv_usec) : \ ((tvp)->tv_sec cmp (uvp)->tv_sec)) #define timeradd(tvp, uvp, vvp) \ do { \ (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \ (vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec; \ if ((vvp)->tv_usec >= 1000000) { \ (vvp)->tv_sec++; \ (vvp)->tv_usec -= 1000000; \ } \ } while (/* CONSTCOND */ 0) #define timersub(tvp, uvp, vvp) \ do { \ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ if ((vvp)->tv_usec < 0) { \ (vvp)->tv_sec--; \ (vvp)->tv_usec += 1000000; \ } \ } while (/* CONSTCOND */ 0) /* * hide bintime for _STANDALONE because this header is used for hpcboot.exe, * which is built with compilers which don't recognize LL suffix. * http://mail-index.NetBSD.org/tech-userlevel/2008/02/27/msg000181.html */ #if !defined(_STANDALONE) struct bintime { time_t sec; uint64_t frac; }; static __inline void bintime_addx(struct bintime *bt, uint64_t x) { uint64_t u; u = bt->frac; bt->frac += x; if (u > bt->frac) bt->sec++; } static __inline void bintime_add(struct bintime *bt, const struct bintime *bt2) { uint64_t u; u = bt->frac; bt->frac += bt2->frac; if (u > bt->frac) bt->sec++; bt->sec += bt2->sec; } static __inline void bintime_sub(struct bintime *bt, const struct bintime *bt2) { uint64_t u; u = bt->frac; bt->frac -= bt2->frac; if (u < bt->frac) bt->sec--; bt->sec -= bt2->sec; } #define bintimecmp(bta, btb, cmp) \ (((bta)->sec == (btb)->sec) ? \ ((bta)->frac cmp (btb)->frac) : \ ((bta)->sec cmp (btb)->sec)) /*- * Background information: * * When converting between timestamps on parallel timescales of differing * resolutions it is historical and scientific practice to round down rather * than doing 4/5 rounding. * * The date changes at midnight, not at noon. * * Even at 15:59:59.999999999 it's not four'o'clock. * * time_second ticks after N.999999999 not after N.4999999999 */ /* * The magic numbers for converting ms/us/ns to fractions */ /* 1ms = (2^64) / 1000 */ #define BINTIME_SCALE_MS ((uint64_t)18446744073709551ULL) /* 1us = (2^64) / 1000000 */ #define BINTIME_SCALE_US ((uint64_t)18446744073709ULL) /* 1ns = (2^64) / 1000000000 */ #define BINTIME_SCALE_NS ((uint64_t)18446744073ULL) static __inline void bintime2timespec(const struct bintime *bt, struct timespec *ts) { ts->tv_sec = bt->sec; ts->tv_nsec = (long)((1000000000ULL * (uint32_t)(bt->frac >> 32)) >> 32); } static __inline void timespec2bintime(const struct timespec *ts, struct bintime *bt) { bt->sec = ts->tv_sec; bt->frac = (uint64_t)ts->tv_nsec * BINTIME_SCALE_NS; } static __inline void bintime2timeval(const struct bintime *bt, struct timeval *tv) { tv->tv_sec = bt->sec; tv->tv_usec = (suseconds_t)((1000000ULL * (uint32_t)(bt->frac >> 32)) >> 32); } static __inline void timeval2bintime(const struct timeval *tv, struct bintime *bt) { bt->sec = tv->tv_sec; bt->frac = (uint64_t)tv->tv_usec * BINTIME_SCALE_US; } static __inline struct bintime ms2bintime(uint64_t ms) { struct bintime bt; bt.sec = (time_t)(ms / 1000U); bt.frac = (uint64_t)(ms % 1000U) * BINTIME_SCALE_MS; return bt; } static __inline struct bintime us2bintime(uint64_t us) { struct bintime bt; bt.sec = (time_t)(us / 1000000U); bt.frac = (uint64_t)(us % 1000000U) * BINTIME_SCALE_US; return bt; } static __inline struct bintime ns2bintime(uint64_t ns) { struct bintime bt; bt.sec = (time_t)(ns / 1000000000U); bt.frac = (uint64_t)(ns % 1000000000U) * BINTIME_SCALE_NS; return bt; } #endif /* !defined(_STANDALONE) */ /* Operations on timespecs. */ #define timespecclear(tsp) (tsp)->tv_sec = (time_t)((tsp)->tv_nsec = 0L) #define timespecisset(tsp) ((tsp)->tv_sec || (tsp)->tv_nsec) #define timespeccmp(tsp, usp, cmp) \ (((tsp)->tv_sec == (usp)->tv_sec) ? \ ((tsp)->tv_nsec cmp (usp)->tv_nsec) : \ ((tsp)->tv_sec cmp (usp)->tv_sec)) #define timespecadd(tsp, usp, vsp) \ do { \ (vsp)->tv_sec = (tsp)->tv_sec + (usp)->tv_sec; \ (vsp)->tv_nsec = (tsp)->tv_nsec + (usp)->tv_nsec; \ if ((vsp)->tv_nsec >= 1000000000L) { \ (vsp)->tv_sec++; \ (vsp)->tv_nsec -= 1000000000L; \ } \ } while (/* CONSTCOND */ 0) #define timespecsub(tsp, usp, vsp) \ do { \ (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ if ((vsp)->tv_nsec < 0) { \ (vsp)->tv_sec--; \ (vsp)->tv_nsec += 1000000000L; \ } \ } while (/* CONSTCOND */ 0) #define timespec2ns(x) (((uint64_t)(x)->tv_sec) * 1000000000L + (x)->tv_nsec) #ifdef _KERNEL bool timespecaddok(const struct timespec *, const struct timespec *) __pure; bool timespecsubok(const struct timespec *, const struct timespec *) __pure; #endif #endif /* _NETBSD_SOURCE */ /* * Names of the interval timers, and structure * defining a timer setting. * NB: Must match the CLOCK_ constants below. */ #define ITIMER_REAL 0 #define ITIMER_VIRTUAL 1 #define ITIMER_PROF 2 #define ITIMER_MONOTONIC 3 struct itimerval { struct timeval it_interval; /* timer interval */ struct timeval it_value; /* current value */ }; /* * Structure defined by POSIX.1b to be like a itimerval, but with * timespecs. Used in the timer_*() system calls. */ struct itimerspec { struct timespec it_interval; struct timespec it_value; }; #define CLOCK_REALTIME 0 #define CLOCK_VIRTUAL 1 #define CLOCK_PROF 2 #define CLOCK_MONOTONIC 3 #define CLOCK_THREAD_CPUTIME_ID 0x20000000 #define CLOCK_PROCESS_CPUTIME_ID 0x40000000 #if defined(_NETBSD_SOURCE) #define TIMER_RELTIME 0x0 /* relative timer */ #endif #define TIMER_ABSTIME 0x1 /* absolute timer */ #ifdef _KERNEL #include <sys/timevar.h> #else /* !_KERNEL */ #ifndef _STANDALONE #if (_POSIX_C_SOURCE - 0) >= 200112L || \ (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) || \ (_XOPEN_SOURCE - 0) >= 500 || defined(_NETBSD_SOURCE) #include <sys/select.h> #endif #include <sys/cdefs.h> #include <time.h> __BEGIN_DECLS #ifndef __LIBC12_SOURCE__ #if (_POSIX_C_SOURCE - 0) >= 200112L || \ defined(_XOPEN_SOURCE) || defined(_NETBSD_SOURCE) int getitimer(int, struct itimerval *) __RENAME(__getitimer50); int gettimeofday(struct timeval * __restrict, void *__restrict) __RENAME(__gettimeofday50); int setitimer(int, const struct itimerval * __restrict, struct itimerval * __restrict) __RENAME(__setitimer50); int utimes(const char *, const struct timeval [2]) __RENAME(__utimes50); #endif /* _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE || _NETBSD_SOURCE */ #if defined(_NETBSD_SOURCE) || defined(HAVE_NBTOOL_CONFIG_H) int adjtime(const struct timeval *, struct timeval *) __RENAME(__adjtime50); int futimes(int, const struct timeval [2]) __RENAME(__futimes50); int lutimes(const char *, const struct timeval [2]) __RENAME(__lutimes50); int settimeofday(const struct timeval * __restrict, const void *__restrict) __RENAME(__settimeofday50); #endif /* _NETBSD_SOURCE */ #endif /* __LIBC12_SOURCE__ */ __END_DECLS #endif /* !_STANDALONE */ #endif /* !_KERNEL */ #endif /* !_SYS_TIME_H_ */ |
| 5 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | /* $NetBSD: clock.h,v 1.4 2018/04/19 21:19:07 christos Exp $ */ /*- * Copyright (c) 1996 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Gordon W. Ross * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _SYS_CLOCK_H_ #define _SYS_CLOCK_H_ /* Some handy constants. */ #define SECS_PER_MINUTE 60 #define SECS_PER_HOUR 3600 #define SECS_PER_DAY 86400 #define DAYS_PER_COMMON_YEAR 365 #define DAYS_PER_LEAP_YEAR 366 #define SECS_PER_COMMON_YEAR (SECS_PER_DAY * DAYS_PER_COMMON_YEAR) #define SECS_PER_LEAP_YEAR (SECS_PER_DAY * DAYS_PER_LEAP_YEAR) /* Traditional POSIX base year */ #define POSIX_BASE_YEAR 1970 /* Some handy functions */ static __inline int days_in_month(int m) { switch (m) { case 2: return 28; case 4: case 6: case 9: case 11: return 30; case 1: case 3: case 5: case 7: case 8: case 10: case 12: return 31; default: return -1; } } /* * This inline avoids some unnecessary modulo operations * as compared with the usual macro: * ( ((year % 4) == 0 && * (year % 100) != 0) || * ((year % 400) == 0) ) * It is otherwise equivalent. */ static __inline int is_leap_year(uint64_t year) { if ((year & 3) != 0) return 0; if (__predict_false((year % 100) != 0)) return 1; return __predict_false((year % 400) == 0); } static __inline int days_per_year(uint64_t year) { return is_leap_year(year) ? DAYS_PER_LEAP_YEAR : DAYS_PER_COMMON_YEAR; } #endif /* _SYS_CLOCK_H_ */ |
| 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 | /* $NetBSD: uhso.c,v 1.35 2021/06/13 09:27:20 mlelstv Exp $ */ /*- * Copyright (c) 2009 Iain Hibbert * Copyright (c) 2008 Fredrik Lindberg * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * This driver originated as the hso module for FreeBSD written by * Fredrik Lindberg[1]. It has been rewritten almost completely for * NetBSD, and to support more devices with information extracted from * the Linux hso driver provided by Option N.V.[2] * * [1] http://www.shapeshifter.se/code/hso * [2] http://www.pharscape.org/hso.htm */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uhso.c,v 1.35 2021/06/13 09:27:20 mlelstv Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" #include "opt_usb.h" #endif #include <sys/param.h> #include <sys/conf.h> #include <sys/fcntl.h> #include <sys/kauth.h> #include <sys/kernel.h> #include <sys/kmem.h> #include <sys/mbuf.h> #include <sys/poll.h> #include <sys/queue.h> #include <sys/socket.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/tty.h> #include <sys/vnode.h> #include <sys/lwp.h> #include <net/bpf.h> #include <net/if.h> #include <net/if_dl.h> #include <net/if_types.h> #include <net/netisr.h> #include <netinet/in_systm.h> #include <netinet/in_var.h> #include <netinet/ip.h> #include <dev/usb/usb.h> #include <dev/usb/usbcdc.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/umassvar.h> #include <dev/scsipi/scsi_disk.h> #include "usbdevs.h" #include "ioconf.h" #undef DPRINTF #ifdef UHSO_DEBUG /* * defined levels * 0 warnings only * 1 informational * 5 really chatty */ int uhso_debug = 0; #define DPRINTF(n, ...) do { \ if (uhso_debug >= (n)) { \ printf("%s: ", __func__); \ printf(__VA_ARGS__); \ } \ } while (/* CONSTCOND */0) #else #define DPRINTF(...) ((void)0) #endif /* * When first attached, the device class will be 0 and the modem * will attach as UMASS until a SCSI REZERO_UNIT command is sent, * in which case it will detach and reattach with device class set * to UDCLASS_VENDOR (0xff) and provide the serial interfaces. * * If autoswitch is set (the default) this will happen automatically. */ Static int uhso_autoswitch = 1; SYSCTL_SETUP(sysctl_hw_uhso_setup, "uhso sysctl setup") { const struct sysctlnode *node = NULL; sysctl_createv(clog, 0, NULL, &node, CTLFLAG_PERMANENT, CTLTYPE_NODE, "uhso", NULL, NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL); if (node == NULL) return; #ifdef UHSO_DEBUG sysctl_createv(clog, 0, &node, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "debug", SYSCTL_DESCR("uhso debug level (0, 1, 5)"), NULL, 0, &uhso_debug, sizeof(uhso_debug), CTL_CREATE, CTL_EOL); #endif sysctl_createv(clog, 0, &node, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "autoswitch", SYSCTL_DESCR("automatically switch device into modem mode"), NULL, 0, &uhso_autoswitch, sizeof(uhso_autoswitch), CTL_CREATE, CTL_EOL); } /* * The uhso modems have a number of interfaces providing a variety of * IO ports using the bulk endpoints, or multiplexed on the control * endpoints. We separate the ports by function and provide each with * a predictable index number used to construct the device minor number. * * The Network port is configured as a network interface rather than * a tty as it provides raw IPv4 packets. */ Static const char *uhso_port_name[] = { "Control", "Diagnostic", "Diagnostic2", "Application", "Application2", "GPS", "GPS Control", "PC Smartcard", "Modem", "MSD", /* "Modem Sharing Device" ? */ "Voice", "Network", }; #define UHSO_PORT_CONTROL 0x00 #define UHSO_PORT_DIAG 0x01 #define UHSO_PORT_DIAG2 0x02 #define UHSO_PORT_APP 0x03 #define UHSO_PORT_APP2 0x04 #define UHSO_PORT_GPS 0x05 #define UHSO_PORT_GPS_CONTROL 0x06 #define UHSO_PORT_PCSC 0x07 #define UHSO_PORT_MODEM 0x08 #define UHSO_PORT_MSD 0x09 #define UHSO_PORT_VOICE 0x0a #define UHSO_PORT_NETWORK 0x0b #define UHSO_PORT_MAX __arraycount(uhso_port_name) #define UHSO_IFACE_MUX 0x20 #define UHSO_IFACE_BULK 0x40 #define UHSO_IFACE_IFNET 0x80 /* * The interface specification can sometimes be deduced from the device * type and interface number, or some modems support a vendor specific * way to read config info which we can translate to the port index. */ Static const uint8_t uhso_spec_default[] = { UHSO_IFACE_IFNET | UHSO_PORT_NETWORK | UHSO_IFACE_MUX, UHSO_IFACE_BULK | UHSO_PORT_DIAG, UHSO_IFACE_BULK | UHSO_PORT_MODEM, }; Static const uint8_t uhso_spec_icon321[] = { UHSO_IFACE_IFNET | UHSO_PORT_NETWORK | UHSO_IFACE_MUX, UHSO_IFACE_BULK | UHSO_PORT_DIAG2, UHSO_IFACE_BULK | UHSO_PORT_MODEM, UHSO_IFACE_BULK | UHSO_PORT_DIAG, }; Static const uint8_t uhso_spec_config[] = { 0, UHSO_IFACE_BULK | UHSO_PORT_DIAG, UHSO_IFACE_BULK | UHSO_PORT_GPS, UHSO_IFACE_BULK | UHSO_PORT_GPS_CONTROL, UHSO_IFACE_BULK | UHSO_PORT_APP, UHSO_IFACE_BULK | UHSO_PORT_APP2, UHSO_IFACE_BULK | UHSO_PORT_CONTROL, UHSO_IFACE_IFNET | UHSO_PORT_NETWORK, UHSO_IFACE_BULK | UHSO_PORT_MODEM, UHSO_IFACE_BULK | UHSO_PORT_MSD, UHSO_IFACE_BULK | UHSO_PORT_PCSC, UHSO_IFACE_BULK | UHSO_PORT_VOICE, }; struct uhso_dev { uint16_t vendor; uint16_t product; uint16_t type; }; #define UHSOTYPE_DEFAULT 1 #define UHSOTYPE_ICON321 2 #define UHSOTYPE_CONFIG 3 Static const struct uhso_dev uhso_devs[] = { { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GSICON72, UHSOTYPE_DEFAULT }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON225, UHSOTYPE_DEFAULT }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GEHSUPA, UHSOTYPE_DEFAULT }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GTHSUPA, UHSOTYPE_DEFAULT }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GSHSUPA, UHSOTYPE_DEFAULT }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GE40X1, UHSOTYPE_CONFIG }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GE40X2, UHSOTYPE_CONFIG }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GE40X3, UHSOTYPE_CONFIG }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON401, UHSOTYPE_CONFIG }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GTM382, UHSOTYPE_CONFIG }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GE40X4, UHSOTYPE_CONFIG }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GTHSUPAM, UHSOTYPE_CONFIG }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICONEDGE, UHSOTYPE_DEFAULT }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_MODHSXPA, UHSOTYPE_ICON321 }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON321, UHSOTYPE_ICON321 }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON322, UHSOTYPE_ICON321 }, { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON505, UHSOTYPE_CONFIG }, }; #define uhso_lookup(p, v) ((const struct uhso_dev *)usb_lookup(uhso_devs, (p), (v))) /* IO buffer sizes */ #define UHSO_MUX_WSIZE 64 #define UHSO_MUX_RSIZE 1024 #define UHSO_BULK_WSIZE 8192 #define UHSO_BULK_RSIZE 4096 #define UHSO_IFNET_MTU 1500 /* * Each IO port provided by the modem can be mapped to a network * interface (when hp_ifp != NULL) or a tty (when hp_tp != NULL) * which may be multiplexed and sharing interrupt and control endpoints * from an interface, or using the dedicated bulk endpoints. */ struct uhso_port; struct uhso_softc; /* uhso callback functions return errno on failure */ typedef int (*uhso_callback)(struct uhso_port *); struct uhso_port { struct uhso_softc *hp_sc; /* master softc */ struct tty *hp_tp; /* tty pointer */ struct ifnet *hp_ifp; /* ifnet pointer */ unsigned int hp_flags; /* see below */ int hp_swflags; /* persistent tty flags */ int hp_status; /* modem status */ /* port type specific handlers */ uhso_callback hp_abort; /* abort any transfers */ uhso_callback hp_detach; /* detach port completely */ uhso_callback hp_init; /* init port (first open) */ uhso_callback hp_clean; /* clean port (last close) */ uhso_callback hp_write; /* write data */ usbd_callback hp_write_cb; /* write callback */ uhso_callback hp_read; /* read data */ usbd_callback hp_read_cb; /* read callback */ uhso_callback hp_control; /* set control lines */ struct usbd_interface *hp_ifh; /* interface handle */ unsigned int hp_index; /* usb request index */ int hp_iaddr; /* interrupt endpoint */ struct usbd_pipe *hp_ipipe; /* interrupt pipe */ void *hp_ibuf; /* interrupt buffer */ size_t hp_isize; /* allocated size */ int hp_raddr; /* bulk in endpoint */ struct usbd_pipe *hp_rpipe; /* bulk in pipe */ struct usbd_xfer *hp_rxfer; /* input xfer */ void *hp_rbuf; /* input buffer */ size_t hp_rlen; /* fill length */ size_t hp_rsize; /* allocated size */ int hp_waddr; /* bulk out endpoint */ struct usbd_pipe *hp_wpipe; /* bulk out pipe */ struct usbd_xfer *hp_wxfer; /* output xfer */ void *hp_wbuf; /* output buffer */ size_t hp_wlen; /* fill length */ size_t hp_wsize; /* allocated size */ struct mbuf *hp_mbuf; /* partial packet */ }; /* hp_flags */ #define UHSO_PORT_MUXPIPE __BIT(0) /* duplicate ipipe/ibuf references */ #define UHSO_PORT_MUXREADY __BIT(1) /* input is ready */ #define UHSO_PORT_MUXBUSY __BIT(2) /* read in progress */ struct uhso_softc { device_t sc_dev; /* self */ struct usbd_device *sc_udev; int sc_refcnt; struct uhso_port *sc_port[UHSO_PORT_MAX]; }; #define UHSO_CONFIG_NO 1 static int uhso_match(device_t, cfdata_t, void *); static void uhso_attach(device_t, device_t, void *); static int uhso_detach(device_t, int); CFATTACH_DECL_NEW(uhso, sizeof(struct uhso_softc), uhso_match, uhso_attach, uhso_detach, NULL); Static int uhso_switch_mode(struct usbd_device *); Static int uhso_get_iface_spec(struct usb_attach_arg *, uint8_t, uint8_t *); Static usb_endpoint_descriptor_t *uhso_get_endpoint(struct usbd_interface *, int, int); Static void uhso_mux_attach(struct uhso_softc *, struct usbd_interface *, int); Static int uhso_mux_abort(struct uhso_port *); Static int uhso_mux_detach(struct uhso_port *); Static int uhso_mux_init(struct uhso_port *); Static int uhso_mux_clean(struct uhso_port *); Static int uhso_mux_write(struct uhso_port *); Static int uhso_mux_read(struct uhso_port *); Static int uhso_mux_control(struct uhso_port *); Static void uhso_mux_intr(struct usbd_xfer *, void *, usbd_status); Static void uhso_bulk_attach(struct uhso_softc *, struct usbd_interface *, int); Static int uhso_bulk_abort(struct uhso_port *); Static int uhso_bulk_detach(struct uhso_port *); Static int uhso_bulk_init(struct uhso_port *); Static int uhso_bulk_clean(struct uhso_port *); Static int uhso_bulk_write(struct uhso_port *); Static int uhso_bulk_read(struct uhso_port *); Static int uhso_bulk_control(struct uhso_port *); Static void uhso_bulk_intr(struct usbd_xfer *, void *, usbd_status); Static void uhso_tty_attach(struct uhso_port *); Static void uhso_tty_detach(struct uhso_port *); Static void uhso_tty_read_cb(struct usbd_xfer *, void *, usbd_status); Static void uhso_tty_write_cb(struct usbd_xfer *, void *, usbd_status); static dev_type_open(uhso_tty_open); static dev_type_close(uhso_tty_close); static dev_type_read(uhso_tty_read); static dev_type_write(uhso_tty_write); static dev_type_ioctl(uhso_tty_ioctl); static dev_type_stop(uhso_tty_stop); static dev_type_tty(uhso_tty_tty); static dev_type_poll(uhso_tty_poll); const struct cdevsw uhso_cdevsw = { .d_open = uhso_tty_open, .d_close = uhso_tty_close, .d_read = uhso_tty_read, .d_write = uhso_tty_write, .d_ioctl = uhso_tty_ioctl, .d_stop = uhso_tty_stop, .d_tty = uhso_tty_tty, .d_poll = uhso_tty_poll, .d_mmap = nommap, .d_kqfilter = ttykqfilter, .d_discard = nodiscard, .d_flag = D_TTY }; Static int uhso_tty_init(struct uhso_port *); Static void uhso_tty_clean(struct uhso_port *); Static int uhso_tty_do_ioctl(struct uhso_port *, u_long, void *, int, struct lwp *); Static void uhso_tty_start(struct tty *); Static int uhso_tty_param(struct tty *, struct termios *); Static int uhso_tty_control(struct uhso_port *, u_long, int); #define UHSO_UNIT_MASK TTUNIT_MASK #define UHSO_PORT_MASK 0x0000f #define UHSO_DIALOUT_MASK TTDIALOUT_MASK #define UHSO_CALLUNIT_MASK TTCALLUNIT_MASK #define UHSOUNIT(x) (TTUNIT(x) >> 4) #define UHSOPORT(x) (TTUNIT(x) & UHSO_PORT_MASK) #define UHSODIALOUT(x) TTDIALOUT(x) #define UHSOMINOR(u, p) ((((u) << 4) & UHSO_UNIT_MASK) | ((p) & UHSO_UNIT_MASK)) Static void uhso_ifnet_attach(struct uhso_softc *, struct usbd_interface *, int); Static int uhso_ifnet_abort(struct uhso_port *); Static int uhso_ifnet_detach(struct uhso_port *); Static void uhso_ifnet_read_cb(struct usbd_xfer *, void *, usbd_status); Static void uhso_ifnet_input(struct ifnet *, struct mbuf **, uint8_t *, size_t); Static void uhso_ifnet_write_cb(struct usbd_xfer *, void *, usbd_status); Static int uhso_ifnet_ioctl(struct ifnet *, u_long, void *); Static int uhso_ifnet_init(struct uhso_port *); Static void uhso_ifnet_clean(struct uhso_port *); Static void uhso_ifnet_start(struct ifnet *); Static int uhso_ifnet_output(struct ifnet *, struct mbuf *, const struct sockaddr *, const struct rtentry *); /******************************************************************************* * * USB autoconfig * */ static int uhso_match(device_t parent, cfdata_t match, void *aux) { struct usb_attach_arg *uaa = aux; /* * don't claim this device if autoswitch is disabled * and it is not in modem mode already */ if (!uhso_autoswitch && uaa->uaa_class != UDCLASS_VENDOR) return UMATCH_NONE; if (uhso_lookup(uaa->uaa_vendor, uaa->uaa_product)) return UMATCH_VENDOR_PRODUCT; return UMATCH_NONE; } static void uhso_attach(device_t parent, device_t self, void *aux) { struct uhso_softc *sc = device_private(self); struct usb_attach_arg *uaa = aux; struct usbd_interface *ifh; char *devinfop; uint8_t count, i, spec; usbd_status status; DPRINTF(1, ": sc = %p, self=%p", sc, self); sc->sc_dev = self; sc->sc_udev = uaa->uaa_device; aprint_naive("\n"); aprint_normal("\n"); devinfop = usbd_devinfo_alloc(uaa->uaa_device, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev); status = usbd_set_config_no(sc->sc_udev, UHSO_CONFIG_NO, 1); if (status != USBD_NORMAL_COMPLETION) { aprint_error_dev(self, "failed to set configuration" ", err=%s\n", usbd_errstr(status)); return; } if (uaa->uaa_class != UDCLASS_VENDOR) { aprint_verbose_dev(self, "Switching device into modem mode..\n"); if (uhso_switch_mode(uaa->uaa_device) != 0) aprint_error_dev(self, "modem switch failed\n"); return; } count = 0; (void)usbd_interface_count(sc->sc_udev, &count); DPRINTF(1, "interface count %d\n", count); for (i = 0; i < count; i++) { status = usbd_device2interface_handle(sc->sc_udev, i, &ifh); if (status != USBD_NORMAL_COMPLETION) { aprint_error_dev(self, "could not get interface %d: %s\n", i, usbd_errstr(status)); return; } if (!uhso_get_iface_spec(uaa, i, &spec)) { aprint_error_dev(self, "could not get interface %d specification\n", i); return; } if (ISSET(spec, UHSO_IFACE_MUX)) uhso_mux_attach(sc, ifh, UHSOPORT(spec)); if (ISSET(spec, UHSO_IFACE_BULK)) uhso_bulk_attach(sc, ifh, UHSOPORT(spec)); if (ISSET(spec, UHSO_IFACE_IFNET)) uhso_ifnet_attach(sc, ifh, UHSOPORT(spec)); } if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); } static int uhso_detach(device_t self, int flags) { struct uhso_softc *sc = device_private(self); struct uhso_port *hp; devmajor_t major; devminor_t minor; unsigned int i; int s; pmf_device_deregister(self); for (i = 0; i < UHSO_PORT_MAX; i++) { hp = sc->sc_port[i]; if (hp != NULL) (*hp->hp_abort)(hp); } s = splusb(); if (sc->sc_refcnt-- > 0) { DPRINTF(1, "waiting for refcnt (%d)..\n", sc->sc_refcnt); usb_detach_waitold(sc->sc_dev); } splx(s); /* * XXX the tty close routine increases/decreases refcnt causing * XXX another usb_detach_wakeupold() does it matter, should these * XXX be before the detach_wait? or before the abort? */ /* Nuke the vnodes for any open instances (calls close). */ major = cdevsw_lookup_major(&uhso_cdevsw); minor = UHSOMINOR(device_unit(sc->sc_dev), 0); vdevgone(major, minor, minor + UHSO_PORT_MAX, VCHR); minor = UHSOMINOR(device_unit(sc->sc_dev), 0) | UHSO_DIALOUT_MASK; vdevgone(major, minor, minor + UHSO_PORT_MAX, VCHR); minor = UHSOMINOR(device_unit(sc->sc_dev), 0) | UHSO_CALLUNIT_MASK; vdevgone(major, minor, minor + UHSO_PORT_MAX, VCHR); for (i = 0; i < UHSO_PORT_MAX; i++) { hp = sc->sc_port[i]; if (hp != NULL) (*hp->hp_detach)(hp); } usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev); return 0; } /* * Send SCSI REZERO_UNIT command to switch device into modem mode */ Static int uhso_switch_mode(struct usbd_device *udev) { umass_bbb_cbw_t cmd; usb_endpoint_descriptor_t *ed; struct usbd_interface *ifh; struct usbd_pipe *pipe; struct usbd_xfer *xfer; usbd_status status; status = usbd_device2interface_handle(udev, 0, &ifh); if (status != USBD_NORMAL_COMPLETION) return EIO; ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_OUT); if (ed == NULL) return ENODEV; status = usbd_open_pipe(ifh, ed->bEndpointAddress, 0, &pipe); if (status != USBD_NORMAL_COMPLETION) return EIO; int error = usbd_create_xfer(pipe, sizeof(cmd), 0, 0, &xfer); if (error) return error; USETDW(cmd.dCBWSignature, CBWSIGNATURE); USETDW(cmd.dCBWTag, 1); USETDW(cmd.dCBWDataTransferLength, 0); cmd.bCBWFlags = CBWFLAGS_OUT; cmd.bCBWLUN = 0; cmd.bCDBLength = 6; memset(&cmd.CBWCDB, 0, CBWCDBLENGTH); cmd.CBWCDB[0] = SCSI_REZERO_UNIT; usbd_setup_xfer(xfer, NULL, &cmd, sizeof(cmd), USBD_SYNCHRONOUS, USBD_DEFAULT_TIMEOUT, NULL); status = usbd_transfer(xfer); usbd_destroy_xfer(xfer); usbd_close_pipe(pipe); return status == USBD_NORMAL_COMPLETION ? 0 : EIO; } Static int uhso_get_iface_spec(struct usb_attach_arg *uaa, uint8_t ifnum, uint8_t *spec) { const struct uhso_dev *hd; uint8_t config[17]; usb_device_request_t req; usbd_status status; hd = uhso_lookup(uaa->uaa_vendor, uaa->uaa_product); KASSERT(hd != NULL); switch (hd->type) { case UHSOTYPE_DEFAULT: if (ifnum >= __arraycount(uhso_spec_default)) break; *spec = uhso_spec_default[ifnum]; return 1; case UHSOTYPE_ICON321: if (ifnum >= __arraycount(uhso_spec_icon321)) break; *spec = uhso_spec_icon321[ifnum]; return 1; case UHSOTYPE_CONFIG: req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = 0x86; /* "Config Info" */ USETW(req.wValue, 0); USETW(req.wIndex, 0); USETW(req.wLength, sizeof(config)); status = usbd_do_request(uaa->uaa_device, &req, config); if (status != USBD_NORMAL_COMPLETION) break; if (ifnum >= __arraycount(config) || config[ifnum] >= __arraycount(uhso_spec_config)) break; *spec = uhso_spec_config[config[ifnum]]; /* * Apparently some modems also have a CRC bug that is * indicated by ISSET(config[16], __BIT(0)) but we dont * handle it at this time. */ return 1; default: DPRINTF(0, "unknown interface type\n"); break; } return 0; } Static usb_endpoint_descriptor_t * uhso_get_endpoint(struct usbd_interface *ifh, int type, int dir) { usb_endpoint_descriptor_t *ed; uint8_t count, i; count = 0; (void)usbd_endpoint_count(ifh, &count); for (i = 0; i < count; i++) { ed = usbd_interface2endpoint_descriptor(ifh, i); if (ed != NULL && UE_GET_XFERTYPE(ed->bmAttributes) == type && UE_GET_DIR(ed->bEndpointAddress) == dir) return ed; } return NULL; } /****************************************************************************** * * Multiplexed ports signal with the interrupt endpoint to indicate * when data is available for reading, and a separate request is made on * the control endpoint to read or write on each port. The offsets in the * table below relate to bit numbers in the mux mask, identifying each port. */ Static const int uhso_mux_port[] = { UHSO_PORT_CONTROL, UHSO_PORT_APP, UHSO_PORT_PCSC, UHSO_PORT_GPS, UHSO_PORT_APP2, }; Static void uhso_mux_attach(struct uhso_softc *sc, struct usbd_interface *ifh, int index) { usbd_desc_iter_t iter; const usb_descriptor_t *desc; usb_endpoint_descriptor_t *ed; struct usbd_pipe *pipe; struct uhso_port *hp; uint8_t *buf; size_t size; unsigned int i, mux, flags; int addr; usbd_status status; ed = uhso_get_endpoint(ifh, UE_INTERRUPT, UE_DIR_IN); if (ed == NULL) { aprint_error_dev(sc->sc_dev, "no interrupt endpoint\n"); return; } addr = ed->bEndpointAddress; size = UGETW(ed->wMaxPacketSize); /* * There should be an additional "Class Specific" descriptor on * the mux interface containing a single byte with a bitmask of * enabled ports. We need to look through the device descriptor * to find it and the port index is found from the uhso_mux_port * array, above. */ usb_desc_iter_init(sc->sc_udev, &iter); /* skip past the current interface descriptor */ iter.cur = (const uByte *)usbd_get_interface_descriptor(ifh); desc = usb_desc_iter_next(&iter); for (;;) { desc = usb_desc_iter_next(&iter); if (desc == NULL || desc->bDescriptorType == UDESC_INTERFACE) { mux = 0; break; /* not found */ } if (desc->bDescriptorType == UDESC_CS_INTERFACE && desc->bLength == 3) { mux = ((const uint8_t *)desc)[2]; break; } } DPRINTF(1, "addr=%d, size=%zd, mux=0x%02x\n", addr, size, mux); buf = kmem_alloc(size, KM_SLEEP); status = usbd_open_pipe_intr(ifh, addr, USBD_SHORT_XFER_OK, &pipe, sc, buf, size, uhso_mux_intr, USBD_DEFAULT_INTERVAL); if (status != USBD_NORMAL_COMPLETION) { aprint_error_dev(sc->sc_dev, "failed to open interrupt pipe: %s", usbd_errstr(status)); kmem_free(buf, size); return; } flags = 0; for (i = 0; i < __arraycount(uhso_mux_port); i++) { if (ISSET(mux, __BIT(i))) { if (sc->sc_port[uhso_mux_port[i]] != NULL) { aprint_error_dev(sc->sc_dev, "mux port %d is duplicate!\n", i); continue; } hp = kmem_zalloc(sizeof(struct uhso_port), KM_SLEEP); sc->sc_port[uhso_mux_port[i]] = hp; hp->hp_sc = sc; hp->hp_index = i; hp->hp_ipipe = pipe; hp->hp_ibuf = buf; hp->hp_isize = size; hp->hp_flags = flags; hp->hp_abort = uhso_mux_abort; hp->hp_detach = uhso_mux_detach; hp->hp_init = uhso_mux_init; hp->hp_clean = uhso_mux_clean; hp->hp_write = uhso_mux_write; hp->hp_write_cb = uhso_tty_write_cb; hp->hp_read = uhso_mux_read; hp->hp_read_cb = uhso_tty_read_cb; hp->hp_control = uhso_mux_control; hp->hp_wsize = UHSO_MUX_WSIZE; hp->hp_rsize = UHSO_MUX_RSIZE; uhso_tty_attach(hp); aprint_normal_dev(sc->sc_dev, "%s (port %d) attached as mux tty\n", uhso_port_name[uhso_mux_port[i]], uhso_mux_port[i]); /* * As the pipe handle is stored in each mux, mark * secondary references so they don't get released */ flags = UHSO_PORT_MUXPIPE; } } if (flags == 0) { /* for whatever reasons, nothing was attached */ usbd_abort_pipe(pipe); usbd_close_pipe(pipe); kmem_free(buf, size); } } Static int uhso_mux_abort(struct uhso_port *hp) { struct uhso_softc *sc = hp->hp_sc; DPRINTF(1, "hp=%p\n", hp); if (!ISSET(hp->hp_flags, UHSO_PORT_MUXPIPE)) usbd_abort_pipe(hp->hp_ipipe); usbd_abort_default_pipe(sc->sc_udev); return (*hp->hp_clean)(hp); } Static int uhso_mux_detach(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); if (!ISSET(hp->hp_flags, UHSO_PORT_MUXPIPE)) { DPRINTF(1, "interrupt pipe closed\n"); usbd_abort_pipe(hp->hp_ipipe); usbd_close_pipe(hp->hp_ipipe); kmem_free(hp->hp_ibuf, hp->hp_isize); } uhso_tty_detach(hp); kmem_free(hp, sizeof(struct uhso_port)); return 0; } Static int uhso_mux_init(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); CLR(hp->hp_flags, UHSO_PORT_MUXBUSY | UHSO_PORT_MUXREADY); SET(hp->hp_status, TIOCM_DSR | TIOCM_CAR); struct uhso_softc *sc = hp->hp_sc; struct usbd_pipe *pipe0 = usbd_get_pipe0(sc->sc_udev); int error; error = usbd_create_xfer(pipe0, hp->hp_rsize, 0, 0, &hp->hp_rxfer); if (error) return error; hp->hp_rbuf = usbd_get_buffer(hp->hp_rxfer); error = usbd_create_xfer(pipe0, hp->hp_wsize, 0, 0, &hp->hp_wxfer); if (error) return error; hp->hp_wbuf = usbd_get_buffer(hp->hp_wxfer); return 0; } Static int uhso_mux_clean(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); CLR(hp->hp_flags, UHSO_PORT_MUXREADY); CLR(hp->hp_status, TIOCM_DTR | TIOCM_DSR | TIOCM_CAR); return 0; } Static int uhso_mux_write(struct uhso_port *hp) { struct uhso_softc *sc = hp->hp_sc; usb_device_request_t req; usbd_status status; DPRINTF(5, "hp=%p, index=%d, wlen=%zd\n", hp, hp->hp_index, hp->hp_wlen); req.bmRequestType = UT_WRITE_CLASS_INTERFACE; req.bRequest = UCDC_SEND_ENCAPSULATED_COMMAND; USETW(req.wValue, 0); USETW(req.wIndex, hp->hp_index); USETW(req.wLength, hp->hp_wlen); usbd_setup_default_xfer(hp->hp_wxfer, sc->sc_udev, hp, USBD_NO_TIMEOUT, &req, hp->hp_wbuf, hp->hp_wlen, 0, hp->hp_write_cb); status = usbd_transfer(hp->hp_wxfer); if (status != USBD_IN_PROGRESS) { DPRINTF(0, "non-normal status %s\n", usbd_errstr(status)); return EIO; } sc->sc_refcnt++; return 0; } Static int uhso_mux_read(struct uhso_port *hp) { struct uhso_softc *sc = hp->hp_sc; usb_device_request_t req; usbd_status status; CLR(hp->hp_flags, UHSO_PORT_MUXBUSY); if (hp->hp_rlen == 0 && !ISSET(hp->hp_flags, UHSO_PORT_MUXREADY)) return 0; SET(hp->hp_flags, UHSO_PORT_MUXBUSY); CLR(hp->hp_flags, UHSO_PORT_MUXREADY); DPRINTF(5, "hp=%p, index=%d\n", hp, hp->hp_index); req.bmRequestType = UT_READ_CLASS_INTERFACE; req.bRequest = UCDC_GET_ENCAPSULATED_RESPONSE; USETW(req.wValue, 0); USETW(req.wIndex, hp->hp_index); USETW(req.wLength, hp->hp_rsize); usbd_setup_default_xfer(hp->hp_rxfer, sc->sc_udev, hp, USBD_NO_TIMEOUT, &req, hp->hp_rbuf, hp->hp_rsize, USBD_SHORT_XFER_OK, hp->hp_read_cb); status = usbd_transfer(hp->hp_rxfer); if (status != USBD_IN_PROGRESS) { DPRINTF(0, "non-normal status %s\n", usbd_errstr(status)); CLR(hp->hp_flags, UHSO_PORT_MUXBUSY); return EIO; } sc->sc_refcnt++; return 0; } Static int uhso_mux_control(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); return 0; } Static void uhso_mux_intr(struct usbd_xfer *xfer, void * p, usbd_status status) { struct uhso_softc *sc = p; struct uhso_port *hp; uint32_t cc; uint8_t *buf; unsigned int i; if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "non-normal status %s\n", usbd_errstr(status)); return; } usbd_get_xfer_status(xfer, NULL, (void **)&buf, &cc, NULL); if (cc == 0) return; DPRINTF(5, "mux mask 0x%02x, cc=%u\n", buf[0], cc); for (i = 0; i < __arraycount(uhso_mux_port); i++) { if (!ISSET(buf[0], __BIT(i))) continue; DPRINTF(5, "mux %d port %d\n", i, uhso_mux_port[i]); hp = sc->sc_port[uhso_mux_port[i]]; if (hp == NULL || hp->hp_tp == NULL || !ISSET(hp->hp_status, TIOCM_DTR)) continue; SET(hp->hp_flags, UHSO_PORT_MUXREADY); if (ISSET(hp->hp_flags, UHSO_PORT_MUXBUSY)) continue; uhso_mux_read(hp); } } /****************************************************************************** * * Bulk ports operate using the bulk endpoints on an interface, though * the Modem port (at least) may have an interrupt endpoint that will pass * CDC Notification messages with the modem status. */ Static void uhso_bulk_attach(struct uhso_softc *sc, struct usbd_interface *ifh, int index) { usb_endpoint_descriptor_t *ed; usb_interface_descriptor_t *id; struct uhso_port *hp; int in, out; ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_IN); if (ed == NULL) { aprint_error_dev(sc->sc_dev, "bulk-in endpoint not found\n"); return; } in = ed->bEndpointAddress; ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_OUT); if (ed == NULL) { aprint_error_dev(sc->sc_dev, "bulk-out endpoint not found\n"); return; } out = ed->bEndpointAddress; id = usbd_get_interface_descriptor(ifh); if (id == NULL) { aprint_error_dev(sc->sc_dev, "interface descriptor not found\n"); return; } DPRINTF(1, "bulk endpoints in=%x, out=%x\n", in, out); if (sc->sc_port[index] != NULL) { aprint_error_dev(sc->sc_dev, "bulk port %d is duplicate!\n", index); return; } hp = kmem_zalloc(sizeof(struct uhso_port), KM_SLEEP); sc->sc_port[index] = hp; hp->hp_sc = sc; hp->hp_ifh = ifh; hp->hp_index = id->bInterfaceNumber; hp->hp_raddr = in; hp->hp_waddr = out; hp->hp_abort = uhso_bulk_abort; hp->hp_detach = uhso_bulk_detach; hp->hp_init = uhso_bulk_init; hp->hp_clean = uhso_bulk_clean; hp->hp_write = uhso_bulk_write; hp->hp_write_cb = uhso_tty_write_cb; hp->hp_read = uhso_bulk_read; hp->hp_read_cb = uhso_tty_read_cb; hp->hp_control = uhso_bulk_control; hp->hp_wsize = UHSO_BULK_WSIZE; hp->hp_rsize = UHSO_BULK_RSIZE; if (index == UHSO_PORT_MODEM) { ed = uhso_get_endpoint(ifh, UE_INTERRUPT, UE_DIR_IN); if (ed != NULL) { hp->hp_iaddr = ed->bEndpointAddress; hp->hp_isize = UGETW(ed->wMaxPacketSize); } } uhso_tty_attach(hp); aprint_normal_dev(sc->sc_dev, "%s (port %d) attached as bulk tty\n", uhso_port_name[index], index); } Static int uhso_bulk_abort(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); return (*hp->hp_clean)(hp); } Static int uhso_bulk_detach(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); uhso_tty_detach(hp); kmem_free(hp, sizeof(struct uhso_port)); return 0; } Static int uhso_bulk_init(struct uhso_port *hp) { usbd_status status; DPRINTF(1, "hp=%p\n", hp); if (hp->hp_isize > 0) { hp->hp_ibuf = kmem_alloc(hp->hp_isize, KM_SLEEP); status = usbd_open_pipe_intr(hp->hp_ifh, hp->hp_iaddr, USBD_SHORT_XFER_OK, &hp->hp_ipipe, hp, hp->hp_ibuf, hp->hp_isize, uhso_bulk_intr, USBD_DEFAULT_INTERVAL); if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "interrupt pipe open failed: %s\n", usbd_errstr(status)); return EIO; } } status = usbd_open_pipe(hp->hp_ifh, hp->hp_raddr, 0, &hp->hp_rpipe); if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "read pipe open failed: %s\n", usbd_errstr(status)); return EIO; } status = usbd_open_pipe(hp->hp_ifh, hp->hp_waddr, 0, &hp->hp_wpipe); if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "write pipe open failed: %s\n", usbd_errstr(status)); return EIO; } int error = usbd_create_xfer(hp->hp_rpipe, hp->hp_rsize, 0, 0, &hp->hp_rxfer); if (error) return error; hp->hp_rbuf = usbd_get_buffer(hp->hp_rxfer); error = usbd_create_xfer(hp->hp_wpipe, hp->hp_wsize, 0, 0, &hp->hp_wxfer); if (error) return error; hp->hp_wbuf = usbd_get_buffer(hp->hp_wxfer); return 0; } Static int uhso_bulk_clean(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); if (hp->hp_ipipe != NULL) { usbd_abort_pipe(hp->hp_ipipe); usbd_close_pipe(hp->hp_ipipe); hp->hp_ipipe = NULL; } if (hp->hp_ibuf != NULL) { kmem_free(hp->hp_ibuf, hp->hp_isize); hp->hp_ibuf = NULL; } if (hp->hp_rpipe != NULL) { usbd_abort_pipe(hp->hp_rpipe); } if (hp->hp_wpipe != NULL) { usbd_abort_pipe(hp->hp_wpipe); } if (hp->hp_rxfer != NULL) { usbd_destroy_xfer(hp->hp_rxfer); hp->hp_rxfer = NULL; hp->hp_rbuf = NULL; } if (hp->hp_wxfer != NULL) { usbd_destroy_xfer(hp->hp_wxfer); hp->hp_wxfer = NULL; hp->hp_wbuf = NULL; } if (hp->hp_rpipe != NULL) { usbd_close_pipe(hp->hp_rpipe); hp->hp_rpipe = NULL; } if (hp->hp_wpipe != NULL) { usbd_close_pipe(hp->hp_wpipe); hp->hp_wpipe = NULL; } return 0; } Static int uhso_bulk_write(struct uhso_port *hp) { struct uhso_softc *sc = hp->hp_sc; usbd_status status; DPRINTF(5, "hp=%p, wlen=%zd\n", hp, hp->hp_wlen); usbd_setup_xfer(hp->hp_wxfer, hp, hp->hp_wbuf, hp->hp_wlen, 0, USBD_NO_TIMEOUT, hp->hp_write_cb); status = usbd_transfer(hp->hp_wxfer); if (status != USBD_IN_PROGRESS) { DPRINTF(0, "non-normal status %s\n", usbd_errstr(status)); return EIO; } sc->sc_refcnt++; return 0; } Static int uhso_bulk_read(struct uhso_port *hp) { struct uhso_softc *sc = hp->hp_sc; usbd_status status; DPRINTF(5, "hp=%p\n", hp); usbd_setup_xfer(hp->hp_rxfer, hp, hp->hp_rbuf, hp->hp_rsize, USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, hp->hp_read_cb); status = usbd_transfer(hp->hp_rxfer); if (status != USBD_IN_PROGRESS) { DPRINTF(0, "non-normal status %s\n", usbd_errstr(status)); return EIO; } sc->sc_refcnt++; return 0; } Static int uhso_bulk_control(struct uhso_port *hp) { struct uhso_softc *sc = hp->hp_sc; usb_device_request_t req; usbd_status status; int val; DPRINTF(1, "hp=%p\n", hp); if (hp->hp_isize == 0) return 0; val = 0; if (ISSET(hp->hp_status, TIOCM_DTR)) SET(val, UCDC_LINE_DTR); if (ISSET(hp->hp_status, TIOCM_RTS)) SET(val, UCDC_LINE_RTS); req.bmRequestType = UT_WRITE_CLASS_INTERFACE; req.bRequest = UCDC_SET_CONTROL_LINE_STATE; USETW(req.wValue, val); USETW(req.wIndex, hp->hp_index); USETW(req.wLength, 0); sc->sc_refcnt++; status = usbd_do_request(sc->sc_udev, &req, NULL); if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "non-normal status %s\n", usbd_errstr(status)); return EIO; } return 0; } Static void uhso_bulk_intr(struct usbd_xfer *xfer, void * p, usbd_status status) { struct uhso_port *hp = p; struct tty *tp = hp->hp_tp; usb_cdc_notification_t *msg; uint32_t cc; int s, old; if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "non-normal status %s\n", usbd_errstr(status)); return; } usbd_get_xfer_status(xfer, NULL, (void **)&msg, &cc, NULL); if (cc < UCDC_NOTIFICATION_LENGTH || msg->bmRequestType != UCDC_NOTIFICATION || msg->bNotification != UCDC_N_SERIAL_STATE || UGETW(msg->wValue) != 0 || UGETW(msg->wIndex) != hp->hp_index || UGETW(msg->wLength) < 1) return; DPRINTF(5, "state=%02x\n", msg->data[0]); old = hp->hp_status; CLR(hp->hp_status, TIOCM_RNG | TIOCM_DSR | TIOCM_CAR); if (ISSET(msg->data[0], UCDC_N_SERIAL_RI)) SET(hp->hp_status, TIOCM_RNG); if (ISSET(msg->data[0], UCDC_N_SERIAL_DSR)) SET(hp->hp_status, TIOCM_DSR); if (ISSET(msg->data[0], UCDC_N_SERIAL_DCD)) SET(hp->hp_status, TIOCM_CAR); if (ISSET(hp->hp_status ^ old, TIOCM_CAR)) { s = spltty(); tp->t_linesw->l_modem(tp, ISSET(hp->hp_status, TIOCM_CAR)); splx(s); } if (ISSET((hp->hp_status ^ old), TIOCM_RNG | TIOCM_DSR | TIOCM_CAR)) DPRINTF(1, "RNG %s, DSR %s, DCD %s\n", (ISSET(hp->hp_status, TIOCM_RNG) ? "on" : "off"), (ISSET(hp->hp_status, TIOCM_DSR) ? "on" : "off"), (ISSET(hp->hp_status, TIOCM_CAR) ? "on" : "off")); } /****************************************************************************** * * TTY management * */ Static void uhso_tty_attach(struct uhso_port *hp) { struct tty *tp; tp = tty_alloc(); tp->t_oproc = uhso_tty_start; tp->t_param = uhso_tty_param; hp->hp_tp = tp; tty_attach(tp); DPRINTF(1, "hp=%p, tp=%p\n", hp, tp); } Static void uhso_tty_detach(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); uhso_tty_clean(hp); tty_detach(hp->hp_tp); tty_free(hp->hp_tp); hp->hp_tp = NULL; } Static void uhso_tty_write_cb(struct usbd_xfer *xfer, void * p, usbd_status status) { struct uhso_port *hp = p; struct uhso_softc *sc = hp->hp_sc; struct tty *tp = hp->hp_tp; uint32_t cc; int s; if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "non-normal status %s\n", usbd_errstr(status)); if (status == USBD_STALLED && hp->hp_wpipe != NULL) usbd_clear_endpoint_stall_async(hp->hp_wpipe); else return; } else { usbd_get_xfer_status(xfer, NULL, NULL, &cc, NULL); DPRINTF(5, "wrote %d bytes (of %zd)\n", cc, hp->hp_wlen); if (cc != hp->hp_wlen) DPRINTF(0, "cc=%u, wlen=%zd\n", cc, hp->hp_wlen); } s = spltty(); CLR(tp->t_state, TS_BUSY); tp->t_linesw->l_start(tp); splx(s); } Static void uhso_tty_read_cb(struct usbd_xfer *xfer, void * p, usbd_status status) { struct uhso_port *hp = p; struct uhso_softc *sc = hp->hp_sc; struct tty *tp = hp->hp_tp; uint8_t *cp; uint32_t cc; int s; if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "non-normal status: %s\n", usbd_errstr(status)); if (status == USBD_STALLED && hp->hp_rpipe != NULL) usbd_clear_endpoint_stall_async(hp->hp_rpipe); else return; hp->hp_rlen = 0; } else { usbd_get_xfer_status(xfer, NULL, (void **)&cp, &cc, NULL); hp->hp_rlen = cc; DPRINTF(5, "read %d bytes\n", cc); s = spltty(); while (cc > 0) { if (tp->t_linesw->l_rint(*cp++, tp) == -1) { DPRINTF(0, "lost %d bytes\n", cc); break; } cc--; } splx(s); } (*hp->hp_read)(hp); } /****************************************************************************** * * TTY subsystem * */ static int uhso_tty_open(dev_t dev, int flag, int mode, struct lwp *l) { struct uhso_softc *sc; struct uhso_port *hp; struct tty *tp; int error, s; DPRINTF(1, "unit %d port %d\n", UHSOUNIT(dev), UHSOPORT(dev)); sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev)); if (sc == NULL || !device_is_active(sc->sc_dev) || UHSOPORT(dev) >= UHSO_PORT_MAX) return ENXIO; hp = sc->sc_port[UHSOPORT(dev)]; if (hp == NULL || hp->hp_tp == NULL) return ENXIO; tp = hp->hp_tp; if (kauth_authorize_device_tty(l->l_cred, KAUTH_DEVICE_TTY_OPEN, tp)) return EBUSY; error = 0; s = spltty(); if (!ISSET(tp->t_state, TS_ISOPEN) && tp->t_wopen == 0) { tp->t_dev = dev; error = uhso_tty_init(hp); } splx(s); if (error == 0) { error = ttyopen(tp, UHSODIALOUT(dev), ISSET(flag, O_NONBLOCK)); if (error == 0) { error = tp->t_linesw->l_open(dev, tp); } } if (!ISSET(tp->t_state, TS_ISOPEN) && tp->t_wopen == 0) uhso_tty_clean(hp); DPRINTF(1, "sc=%p, hp=%p, tp=%p, error=%d\n", sc, hp, tp, error); return error; } Static int uhso_tty_init(struct uhso_port *hp) { struct tty *tp = hp->hp_tp; struct termios t; int error; DPRINTF(1, "sc=%p, hp=%p, tp=%p\n", sc, hp, tp); /* * Initialize the termios status to the defaults. Add in the * sticky bits from TIOCSFLAGS. */ t.c_ispeed = 0; t.c_ospeed = TTYDEF_SPEED; t.c_cflag = TTYDEF_CFLAG; if (ISSET(hp->hp_swflags, TIOCFLAG_CLOCAL)) SET(t.c_cflag, CLOCAL); if (ISSET(hp->hp_swflags, TIOCFLAG_CRTSCTS)) SET(t.c_cflag, CRTSCTS); if (ISSET(hp->hp_swflags, TIOCFLAG_MDMBUF)) SET(t.c_cflag, MDMBUF); /* Ensure uhso_tty_param() will do something. */ tp->t_ospeed = 0; (void)uhso_tty_param(tp, &t); tp->t_iflag = TTYDEF_IFLAG; tp->t_oflag = TTYDEF_OFLAG; tp->t_lflag = TTYDEF_LFLAG; ttychars(tp); ttsetwater(tp); hp->hp_status = 0; error = (*hp->hp_init)(hp); if (error != 0) return error; /* * Turn on DTR. We must always do this, even if carrier is not * present, because otherwise we'd have to use TIOCSDTR * immediately after setting CLOCAL, which applications do not * expect. We always assert DTR while the port is open * unless explicitly requested to deassert it. Ditto RTS. */ uhso_tty_control(hp, TIOCMBIS, TIOCM_DTR | TIOCM_RTS); /* and start reading */ error = (*hp->hp_read)(hp); if (error != 0) return error; return 0; } static int uhso_tty_close(dev_t dev, int flag, int mode, struct lwp *l) { struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev)); struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)]; struct tty *tp = hp->hp_tp; if (!ISSET(tp->t_state, TS_ISOPEN)) return 0; DPRINTF(1, "sc=%p, hp=%p, tp=%p\n", sc, hp, tp); sc->sc_refcnt++; tp->t_linesw->l_close(tp, flag); ttyclose(tp); if (!ISSET(tp->t_state, TS_ISOPEN) && tp->t_wopen == 0) uhso_tty_clean(hp); if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); return 0; } Static void uhso_tty_clean(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); if (ISSET(hp->hp_status, TIOCM_DTR) && ISSET(hp->hp_tp->t_cflag, HUPCL)) uhso_tty_control(hp, TIOCMBIC, TIOCM_DTR); (*hp->hp_clean)(hp); if (hp->hp_rxfer != NULL) { usbd_destroy_xfer(hp->hp_rxfer); hp->hp_rxfer = NULL; hp->hp_rbuf = NULL; } if (hp->hp_wxfer != NULL) { usbd_destroy_xfer(hp->hp_wxfer); hp->hp_wxfer = NULL; hp->hp_wbuf = NULL; } } static int uhso_tty_read(dev_t dev, struct uio *uio, int flag) { struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev)); struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)]; struct tty *tp = hp->hp_tp; int error; if (!device_is_active(sc->sc_dev)) return EIO; DPRINTF(5, "sc=%p, hp=%p, tp=%p\n", sc, hp, tp); sc->sc_refcnt++; error = tp->t_linesw->l_read(tp, uio, flag); if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); return error; } static int uhso_tty_write(dev_t dev, struct uio *uio, int flag) { struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev)); struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)]; struct tty *tp = hp->hp_tp; int error; if (!device_is_active(sc->sc_dev)) return EIO; DPRINTF(5, "sc=%p, hp=%p, tp=%p\n", sc, hp, tp); sc->sc_refcnt++; error = tp->t_linesw->l_write(tp, uio, flag); if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); return error; } static int uhso_tty_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev)); struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)]; int error; if (!device_is_active(sc->sc_dev)) return EIO; DPRINTF(1, "sc=%p, hp=%p\n", sc, hp); sc->sc_refcnt++; error = uhso_tty_do_ioctl(hp, cmd, data, flag, l); if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); return error; } Static int uhso_tty_do_ioctl(struct uhso_port *hp, u_long cmd, void *data, int flag, struct lwp *l) { struct tty *tp = hp->hp_tp; int error, s; error = tp->t_linesw->l_ioctl(tp, cmd, data, flag, l); if (error != EPASSTHROUGH) return error; error = ttioctl(tp, cmd, data, flag, l); if (error != EPASSTHROUGH) return error; error = 0; s = spltty(); switch (cmd) { case TIOCSDTR: error = uhso_tty_control(hp, TIOCMBIS, TIOCM_DTR); break; case TIOCCDTR: error = uhso_tty_control(hp, TIOCMBIC, TIOCM_DTR); break; case TIOCGFLAGS: *(int *)data = hp->hp_swflags; break; case TIOCSFLAGS: error = kauth_authorize_device_tty(l->l_cred, KAUTH_DEVICE_TTY_PRIVSET, tp); if (error) break; hp->hp_swflags = *(int *)data; break; case TIOCMSET: case TIOCMBIS: case TIOCMBIC: error = uhso_tty_control(hp, cmd, *(int *)data); break; case TIOCMGET: *(int *)data = hp->hp_status; break; default: error = EPASSTHROUGH; break; } splx(s); return error; } /* this is called with tty_lock held */ static void uhso_tty_stop(struct tty *tp, int flag) { #if 0 struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(tp->t_dev)); struct uhso_port *hp = sc->sc_port[UHSOPORT(tp->t_dev)]; #endif } static struct tty * uhso_tty_tty(dev_t dev) { struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev)); struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)]; return hp->hp_tp; } static int uhso_tty_poll(dev_t dev, int events, struct lwp *l) { struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev)); struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)]; struct tty *tp = hp->hp_tp; int revents; if (!device_is_active(sc->sc_dev)) return POLLHUP; sc->sc_refcnt++; revents = tp->t_linesw->l_poll(tp, events, l); if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); return revents; } Static int uhso_tty_param(struct tty *tp, struct termios *t) { struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(tp->t_dev)); struct uhso_port *hp = sc->sc_port[UHSOPORT(tp->t_dev)]; if (!device_is_active(sc->sc_dev)) return EIO; DPRINTF(1, "hp=%p, tp=%p, termios iflag=%x, oflag=%x, cflag=%x\n", hp, tp, t->c_iflag, t->c_oflag, t->c_cflag); /* Check requested parameters. */ if (t->c_ispeed != 0 && t->c_ispeed != t->c_ospeed) return EINVAL; /* force CLOCAL and !HUPCL for console */ if (ISSET(hp->hp_swflags, TIOCFLAG_SOFTCAR)) { SET(t->c_cflag, CLOCAL); CLR(t->c_cflag, HUPCL); } /* If there were no changes, don't do anything. */ if (tp->t_ospeed == t->c_ospeed && tp->t_cflag == t->c_cflag) return 0; tp->t_ispeed = 0; tp->t_ospeed = t->c_ospeed; tp->t_cflag = t->c_cflag; /* update tty layers idea of carrier bit */ tp->t_linesw->l_modem(tp, ISSET(hp->hp_status, TIOCM_CAR)); return 0; } /* this is called with tty_lock held */ Static void uhso_tty_start(struct tty *tp) { struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(tp->t_dev)); struct uhso_port *hp = sc->sc_port[UHSOPORT(tp->t_dev)]; int s; if (!device_is_active(sc->sc_dev)) return; s = spltty(); if (!ISSET(tp->t_state, TS_BUSY | TS_TIMEOUT | TS_TTSTOP) && ttypull(tp) != 0) { hp->hp_wlen = q_to_b(&tp->t_outq, hp->hp_wbuf, hp->hp_wsize); if (hp->hp_wlen > 0) { SET(tp->t_state, TS_BUSY); (*hp->hp_write)(hp); } } splx(s); } Static int uhso_tty_control(struct uhso_port *hp, u_long cmd, int bits) { bits &= (TIOCM_DTR | TIOCM_RTS); DPRINTF(1, "cmd %s, DTR=%d, RTS=%d\n", (cmd == TIOCMBIC ? "BIC" : (cmd == TIOCMBIS ? "BIS" : "SET")), (bits & TIOCM_DTR) ? 1 : 0, (bits & TIOCM_RTS) ? 1 : 0); switch (cmd) { case TIOCMBIC: CLR(hp->hp_status, bits); break; case TIOCMBIS: SET(hp->hp_status, bits); break; case TIOCMSET: CLR(hp->hp_status, TIOCM_DTR | TIOCM_RTS); SET(hp->hp_status, bits); break; } return (*hp->hp_control)(hp); } /****************************************************************************** * * Network Interface * */ Static void uhso_ifnet_attach(struct uhso_softc *sc, struct usbd_interface *ifh, int index) { usb_endpoint_descriptor_t *ed; struct uhso_port *hp; struct ifnet *ifp; int in, out; ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_IN); if (ed == NULL) { aprint_error_dev(sc->sc_dev, "could not find bulk-in endpoint\n"); return; } in = ed->bEndpointAddress; ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_OUT); if (ed == NULL) { aprint_error_dev(sc->sc_dev, "could not find bulk-out endpoint\n"); return; } out = ed->bEndpointAddress; DPRINTF(1, "in=%d, out=%d\n", in, out); if (sc->sc_port[index] != NULL) { aprint_error_dev(sc->sc_dev, "ifnet port %d is duplicate!\n", index); return; } hp = kmem_zalloc(sizeof(struct uhso_port), KM_SLEEP); sc->sc_port[index] = hp; ifp = if_alloc(IFT_IP); strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ); ifp->if_softc = hp; ifp->if_mtu = UHSO_IFNET_MTU; ifp->if_dlt = DLT_RAW; ifp->if_type = IFT_IP; ifp->if_flags = IFF_NOARP | IFF_SIMPLEX; ifp->if_ioctl = uhso_ifnet_ioctl; ifp->if_start = uhso_ifnet_start; ifp->if_output = uhso_ifnet_output; IFQ_SET_READY(&ifp->if_snd); hp->hp_sc = sc; hp->hp_ifp = ifp; hp->hp_ifh = ifh; hp->hp_raddr = in; hp->hp_waddr = out; hp->hp_abort = uhso_ifnet_abort; hp->hp_detach = uhso_ifnet_detach; hp->hp_init = uhso_bulk_init; hp->hp_clean = uhso_bulk_clean; hp->hp_write = uhso_bulk_write; hp->hp_write_cb = uhso_ifnet_write_cb; hp->hp_read = uhso_bulk_read; hp->hp_read_cb = uhso_ifnet_read_cb; hp->hp_wsize = MCLBYTES; hp->hp_rsize = MCLBYTES; if_attach(ifp); if_alloc_sadl(ifp); bpf_attach(ifp, DLT_RAW, 0); aprint_normal_dev(sc->sc_dev, "%s (port %d) attached as ifnet\n", uhso_port_name[index], index); } Static int uhso_ifnet_abort(struct uhso_port *hp) { struct ifnet *ifp = hp->hp_ifp; /* All ifnet IO will abort when IFF_RUNNING is not set */ CLR(ifp->if_flags, IFF_RUNNING); return (*hp->hp_clean)(hp); } Static int uhso_ifnet_detach(struct uhso_port *hp) { struct ifnet *ifp = hp->hp_ifp; int s; s = splnet(); bpf_detach(ifp); if_detach(ifp); if_free(ifp); splx(s); kmem_free(hp, sizeof(struct uhso_port)); return 0; } Static void uhso_ifnet_write_cb(struct usbd_xfer *xfer, void * p, usbd_status status) { struct uhso_port *hp = p; struct uhso_softc *sc= hp->hp_sc; struct ifnet *ifp = hp->hp_ifp; uint32_t cc; int s; if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); if (!ISSET(ifp->if_flags, IFF_RUNNING)) return; if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "non-normal status %s\n", usbd_errstr(status)); if (status == USBD_STALLED && hp->hp_wpipe != NULL) usbd_clear_endpoint_stall_async(hp->hp_wpipe); else return; if_statinc(ifp, if_oerrors); } else { usbd_get_xfer_status(xfer, NULL, NULL, &cc, NULL); DPRINTF(5, "wrote %d bytes (of %zd)\n", cc, hp->hp_wlen); if (cc != hp->hp_wlen) DPRINTF(0, "cc=%u, wlen=%zd\n", cc, hp->hp_wlen); if_statinc(ifp, if_opackets); } s = splnet(); CLR(ifp->if_flags, IFF_OACTIVE); ifp->if_start(ifp); splx(s); } Static void uhso_ifnet_read_cb(struct usbd_xfer *xfer, void * p, usbd_status status) { struct uhso_port *hp = p; struct uhso_softc *sc= hp->hp_sc; struct ifnet *ifp = hp->hp_ifp; void *cp; uint32_t cc; if (--sc->sc_refcnt < 0) usb_detach_wakeupold(sc->sc_dev); if (!ISSET(ifp->if_flags, IFF_RUNNING)) return; if (status != USBD_NORMAL_COMPLETION) { DPRINTF(0, "non-normal status: %s\n", usbd_errstr(status)); if (status == USBD_STALLED && hp->hp_rpipe != NULL) usbd_clear_endpoint_stall_async(hp->hp_rpipe); else return; if_statinc(ifp, if_ierrors); hp->hp_rlen = 0; } else { usbd_get_xfer_status(xfer, NULL, (void **)&cp, &cc, NULL); hp->hp_rlen = cc; DPRINTF(5, "read %d bytes\n", cc); uhso_ifnet_input(ifp, &hp->hp_mbuf, cp, cc); } (*hp->hp_read)(hp); } Static void uhso_ifnet_input(struct ifnet *ifp, struct mbuf **mb, uint8_t *cp, size_t cc) { struct mbuf *m; size_t got, len, want; int s; /* * Several IP packets might be in the same buffer, we need to * separate them before handing it to the ip-stack. We might * also receive partial packets which we need to defer until * we get more data. */ while (cc > 0) { if (*mb == NULL) { MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) { aprint_error_ifnet(ifp, "no mbufs\n"); if_statinc(ifp, if_ierrors); break; } MCLGET(m, M_DONTWAIT); if (!ISSET(m->m_flags, M_EXT)) { aprint_error_ifnet(ifp, "no mbuf clusters\n"); if_statinc(ifp, if_ierrors); m_freem(m); break; } got = 0; } else { m = *mb; *mb = NULL; got = m->m_pkthdr.len; } /* make sure that the incoming packet is ok */ if (got == 0) mtod(m, uint8_t *)[0] = cp[0]; want = mtod(m, struct ip *)->ip_hl << 2; if (mtod(m, struct ip *)->ip_v != 4 || want != sizeof(struct ip)) { aprint_error_ifnet(ifp, "bad IP header (v=%d, hl=%zd)\n", mtod(m, struct ip *)->ip_v, want); if_statinc(ifp, if_ierrors); m_freem(m); break; } /* ensure we have the IP header.. */ if (got < want) { len = MIN(want - got, cc); memcpy(mtod(m, uint8_t *) + got, cp, len); got += len; cc -= len; cp += len; if (got < want) { DPRINTF(5, "waiting for IP header " "(got %zd want %zd)\n", got, want); m->m_pkthdr.len = got; *mb = m; break; } } /* ..and the packet body */ want = ntohs(mtod(m, struct ip *)->ip_len); if (got < want) { len = MIN(want - got, cc); memcpy(mtod(m, uint8_t *) + got, cp, len); got += len; cc -= len; cp += len; if (got < want) { DPRINTF(5, "waiting for IP packet " "(got %zd want %zd)\n", got, want); m->m_pkthdr.len = got; *mb = m; break; } } m_set_rcvif(m, ifp); m->m_pkthdr.len = m->m_len = got; s = splnet(); bpf_mtap(ifp, m, BPF_D_IN); if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) { m_freem(m); } else { if_statadd2(ifp, if_ipackets, 1, if_ibytes, got); } splx(s); } } Static int uhso_ifnet_ioctl(struct ifnet *ifp, u_long cmd, void *data) { struct uhso_port *hp = ifp->if_softc; int error, s; s = splnet(); switch (cmd) { case SIOCINITIFADDR: switch (((struct ifaddr *)data)->ifa_addr->sa_family) { #ifdef INET case AF_INET: if (!ISSET(ifp->if_flags, IFF_RUNNING)) { SET(ifp->if_flags, IFF_UP); error = uhso_ifnet_init(hp); if (error != 0) { uhso_ifnet_clean(hp); break; } SET(ifp->if_flags, IFF_RUNNING); DPRINTF(1, "hp=%p, ifp=%p INITIFADDR\n", hp, ifp); break; } error = 0; break; #endif default: error = EAFNOSUPPORT; break; } break; case SIOCSIFMTU: if (((struct ifreq *)data)->ifr_mtu > hp->hp_wsize) { error = EINVAL; break; } error = ifioctl_common(ifp, cmd, data); if (error == ENETRESET) error = 0; break; case SIOCSIFFLAGS: error = ifioctl_common(ifp, cmd, data); if (error != 0) break; switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) { case IFF_UP: error = uhso_ifnet_init(hp); if (error != 0) { uhso_ifnet_clean(hp); break; } SET(ifp->if_flags, IFF_RUNNING); DPRINTF(1, "hp=%p, ifp=%p RUNNING\n", hp, ifp); break; case IFF_RUNNING: uhso_ifnet_clean(hp); CLR(ifp->if_flags, IFF_RUNNING); DPRINTF(1, "hp=%p, ifp=%p STOPPED\n", hp, ifp); break; default: break; } break; default: error = ifioctl_common(ifp, cmd, data); break; } splx(s); return error; } /* is only called if IFF_RUNNING not set */ Static int uhso_ifnet_init(struct uhso_port *hp) { struct uhso_softc *sc = hp->hp_sc; int error; DPRINTF(1, "sc=%p, hp=%p\n", sc, hp); if (!device_is_active(sc->sc_dev)) return EIO; error = (*hp->hp_init)(hp); if (error != 0) return error; error = (*hp->hp_read)(hp); if (error != 0) return error; return 0; } Static void uhso_ifnet_clean(struct uhso_port *hp) { DPRINTF(1, "hp=%p\n", hp); (*hp->hp_clean)(hp); } /* called at splnet() with IFF_OACTIVE not set */ Static void uhso_ifnet_start(struct ifnet *ifp) { struct uhso_port *hp = ifp->if_softc; struct mbuf *m; KASSERT(!ISSET(ifp->if_flags, IFF_OACTIVE)); if (!ISSET(ifp->if_flags, IFF_RUNNING)) return; if (IFQ_IS_EMPTY(&ifp->if_snd)) { DPRINTF(5, "finished sending\n"); return; } SET(ifp->if_flags, IFF_OACTIVE); IFQ_DEQUEUE(&ifp->if_snd, m); hp->hp_wlen = m->m_pkthdr.len; if (hp->hp_wlen > hp->hp_wsize) { aprint_error_ifnet(ifp, "packet too long (%zd > %zd), truncating\n", hp->hp_wlen, hp->hp_wsize); hp->hp_wlen = hp->hp_wsize; } bpf_mtap(ifp, m, BPF_D_OUT); m_copydata(m, 0, hp->hp_wlen, hp->hp_wbuf); m_freem(m); if ((*hp->hp_write)(hp) != 0) { if_statinc(ifp, if_oerrors); CLR(ifp->if_flags, IFF_OACTIVE); } } Static int uhso_ifnet_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, const struct rtentry *rt0) { int error; if (!ISSET(ifp->if_flags, IFF_RUNNING)) return EIO; IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family); switch (dst->sa_family) { #ifdef INET case AF_INET: error = ifq_enqueue(ifp, m); break; #endif default: DPRINTF(0, "unsupported address family %d\n", dst->sa_family); error = EAFNOSUPPORT; m_freem(m); break; } return error; } |
| 44 44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 | /* $NetBSD: tcp_var.h,v 1.196 2021/07/31 20:29:37 andvar Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 * * NRL grants permission for redistribution and use in source and binary * forms, with or without modification, of the software and documentation * created at NRL provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgements: * This product includes software developed by the University of * California, Berkeley and its contributors. * This product includes software developed at the Information * Technology Division, US Naval Research Laboratory. * 4. Neither the name of the NRL nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation * are those of the authors and should not be interpreted as representing * official policies, either expressed or implied, of the US Naval * Research Laboratory (NRL). */ /*- * Copyright (c) 1997, 1998, 1999, 2001, 2005 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 */ #ifndef _NETINET_TCP_VAR_H_ #define _NETINET_TCP_VAR_H_ #if defined(_KERNEL_OPT) #include "opt_inet.h" #include "opt_mbuftrace.h" #endif /* * TCP kernel structures and variables. */ #include <sys/callout.h> #ifdef TCP_SIGNATURE /* * Defines which are needed by the xform_tcp module and tcp_[in|out]put * for SADB verification and lookup. */ #define TCP_SIGLEN 16 /* length of computed digest in bytes */ #define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */ #define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */ /* * Only a single SA per host may be specified at this time. An SPI is * needed in order for the KEY_LOOKUP_SA() lookup to work. */ #define TCP_SIG_SPI 0x1000 #endif /* TCP_SIGNATURE */ /* * Tcp+ip header, after ip options removed. */ struct tcpiphdr { struct ipovly ti_i; /* overlaid ip structure */ struct tcphdr ti_t; /* tcp header */ }; #ifdef CTASSERT CTASSERT(sizeof(struct tcpiphdr) == 40); #endif #define ti_x1 ti_i.ih_x1 #define ti_pr ti_i.ih_pr #define ti_len ti_i.ih_len #define ti_src ti_i.ih_src #define ti_dst ti_i.ih_dst #define ti_sport ti_t.th_sport #define ti_dport ti_t.th_dport #define ti_seq ti_t.th_seq #define ti_ack ti_t.th_ack #define ti_x2 ti_t.th_x2 #define ti_off ti_t.th_off #define ti_flags ti_t.th_flags #define ti_win ti_t.th_win #define ti_sum ti_t.th_sum #define ti_urp ti_t.th_urp /* * SACK option block. */ struct sackblk { tcp_seq left; /* Left edge of sack block. */ tcp_seq right; /* Right edge of sack block. */ }; TAILQ_HEAD(sackhead, sackhole); struct sackhole { tcp_seq start; tcp_seq end; tcp_seq rxmit; TAILQ_ENTRY(sackhole) sackhole_q; }; /* * Tcp control block, one per tcp; fields: */ struct tcpcb { int t_family; /* address family on the wire */ struct ipqehead segq; /* sequencing queue */ int t_segqlen; /* length of the above */ callout_t t_timer[TCPT_NTIMERS];/* tcp timers */ short t_state; /* state of this connection */ short t_rxtshift; /* log(2) of rexmt exp. backoff */ uint32_t t_rxtcur; /* current retransmit value */ short t_dupacks; /* consecutive dup acks recd */ /* * t_partialacks: * <0 not in fast recovery. * ==0 in fast recovery. has not received partial acks * >0 in fast recovery. has received partial acks */ short t_partialacks; /* partials acks during fast rexmit */ u_short t_peermss; /* peer's maximum segment size */ u_short t_ourmss; /* our's maximum segment size */ u_short t_segsz; /* current segment size in use */ char t_force; /* 1 if forcing out a byte */ u_int t_flags; #define TF_ACKNOW 0x0001 /* ack peer immediately */ #define TF_DELACK 0x0002 /* ack, but try to delay it */ #define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ #define TF_NOOPT 0x0008 /* don't use tcp options */ #define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ #define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ #define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ #define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ #define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ #define TF_SYN_REXMT 0x0400 /* rexmit timer fired on SYN */ #define TF_WILL_SACK 0x0800 /* try to use SACK */ #define TF_REASSEMBLING 0x1000 /* we're busy reassembling */ #define TF_DEAD 0x2000 /* dead and to-be-released */ #define TF_PMTUD_PEND 0x4000 /* Path MTU Discovery pending */ #define TF_ECN_PERMIT 0x10000 /* other side said is ECN-ready */ #define TF_ECN_SND_CWR 0x20000 /* ECN CWR in queue */ #define TF_ECN_SND_ECE 0x40000 /* ECN ECE in queue */ #define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ struct mbuf *t_template; /* skeletal packet for transmit */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ struct in6pcb *t_in6pcb; /* back pointer to internet pcb */ callout_t t_delack_ch; /* delayed ACK callout */ /* * The following fields are used as in the protocol specification. * See RFC793, Dec. 1981, page 21. */ /* send sequence variables */ tcp_seq snd_una; /* send unacknowledged */ tcp_seq snd_nxt; /* send next */ tcp_seq snd_up; /* send urgent pointer */ tcp_seq snd_wl1; /* window update seg seq number */ tcp_seq snd_wl2; /* window update seg ack number */ tcp_seq iss; /* initial send sequence number */ u_long snd_wnd; /* send window */ /* * snd_recover * it's basically same as the "recover" variable in RFC 2852 (NewReno). * when entering fast retransmit, it's set to snd_max. * newreno uses this to detect partial ack. * snd_high * it's basically same as the "send_high" variable in RFC 2852 (NewReno). * on each RTO, it's set to snd_max. * newreno uses this to avoid false fast retransmits. */ tcp_seq snd_recover; tcp_seq snd_high; /* receive sequence variables */ u_long rcv_wnd; /* receive window */ tcp_seq rcv_nxt; /* receive next */ tcp_seq rcv_up; /* receive urgent pointer */ tcp_seq irs; /* initial receive sequence number */ /* * Additional variables for this implementation. */ /* receive variables */ tcp_seq rcv_adv; /* advertised window */ /* * retransmit variables * * snd_max * the highest sequence number we've ever sent. * used to recognize retransmits. */ tcp_seq snd_max; /* congestion control (for slow start, source quench, retransmit after loss) */ u_long snd_cwnd; /* congestion-controlled window */ u_long snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch */ /* auto-sizing variables */ u_int rfbuf_cnt; /* recv buffer autoscaling byte count */ uint32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ /* * transmit timing stuff. See below for scale of srtt and rttvar. * "Variance" is actually smoothed difference. */ uint32_t t_rcvtime; /* time last segment received */ uint32_t t_rtttime; /* time we started measuring rtt */ tcp_seq t_rtseq; /* sequence number being timed */ int32_t t_srtt; /* smoothed round-trip time */ int32_t t_rttvar; /* variance in round-trip time */ uint32_t t_rttmin; /* minimum rtt allowed */ u_long max_sndwnd; /* largest window peer has offered */ /* out-of-band data */ char t_oobflags; /* have some */ char t_iobc; /* input character */ #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 short t_softerror; /* possible error not yet reported */ /* RFC 1323 variables */ u_char snd_scale; /* window scaling for send window */ u_char rcv_scale; /* window scaling for recv window */ u_char request_r_scale; /* pending window scaling */ u_char requested_s_scale; u_int32_t ts_recent; /* timestamp echo data */ u_int32_t ts_recent_age; /* when last updated */ u_int32_t ts_timebase; /* our timebase */ tcp_seq last_ack_sent; /* RFC 3465 variables */ u_long t_bytes_acked; /* ABC "bytes_acked" parameter */ /* SACK stuff */ #define TCP_SACK_MAX 3 #define TCPSACK_NONE 0 #define TCPSACK_HAVED 1 u_char rcv_sack_flags; /* SACK flags. */ struct sackblk rcv_dsack_block; /* RX D-SACK block. */ struct ipqehead timeq; /* time sequenced queue. */ struct sackhead snd_holes; /* TX SACK holes. */ int snd_numholes; /* Number of TX SACK holes. */ tcp_seq rcv_lastsack; /* last seq number(+1) sack'd by rcv'r*/ tcp_seq sack_newdata; /* New data xmitted in this recovery episode starts at this seq number*/ tcp_seq snd_fack; /* FACK TCP. Forward-most data held by peer. */ /* CUBIC variables */ ulong snd_cubic_wmax; /* W_max */ ulong snd_cubic_wmax_last; /* Used for fast convergence */ ulong snd_cubic_ctime; /* Last congestion time */ /* pointer for syn cache entries*/ LIST_HEAD(, syn_cache) t_sc; /* list of entries by this tcb */ /* prediction of next mbuf when using large window sizes */ struct mbuf *t_lastm; /* last mbuf that data was sent from */ int t_inoff; /* data offset in previous mbuf */ int t_lastoff; /* last data address in mbuf chain */ int t_lastlen; /* last length read from mbuf chain */ /* Path-MTU discovery blackhole detection */ int t_mtudisc; /* perform mtudisc for this tcb */ /* Path-MTU Discovery Information */ u_int t_pmtud_mss_acked; /* MSS acked, lower bound for MTU */ u_int t_pmtud_mtu_sent; /* MTU used, upper bound for MTU */ tcp_seq t_pmtud_th_seq; /* TCP SEQ from ICMP payload */ u_int t_pmtud_nextmtu; /* Advertised Next-Hop MTU from ICMP */ u_short t_pmtud_ip_len; /* IP length from ICMP payload */ u_short t_pmtud_ip_hl; /* IP header length from ICMP payload */ uint8_t t_ecn_retries; /* # of ECN setup retries */ const struct tcp_congctl *t_congctl; /* per TCB congctl algorithm */ /* Keepalive per socket */ u_int t_keepinit; u_int t_keepidle; u_int t_keepintvl; u_int t_keepcnt; u_int t_maxidle; /* t_keepcnt * t_keepintvl */ u_int t_msl; /* MSL to use for this connexion */ /* maintain a few stats per connection: */ uint32_t t_rcvoopack; /* out-of-order packets received */ uint32_t t_sndrexmitpack; /* retransmit packets sent */ uint32_t t_sndzerowin; /* zero-window updates sent */ }; /* * Macros to aid ECN TCP. */ #define TCP_ECN_ALLOWED(tp) (tp->t_flags & TF_ECN_PERMIT) /* * Macros to aid SACK/FACK TCP. */ #define TCP_SACK_ENABLED(tp) (tp->t_flags & TF_WILL_SACK) #define TCP_FACK_FASTRECOV(tp) \ (TCP_SACK_ENABLED(tp) && \ (SEQ_GT(tp->snd_fack, tp->snd_una + tcprexmtthresh * tp->t_segsz))) #ifdef _KERNEL /* * TCP reassembly queue locks. */ static __inline int tcp_reass_lock_try (struct tcpcb *) __unused; static __inline void tcp_reass_unlock (struct tcpcb *) __unused; static __inline int tcp_reass_lock_try(struct tcpcb *tp) { int s; /* * Use splvm() -- we're blocking things that would cause * mbuf allocation. */ s = splvm(); if (tp->t_flags & TF_REASSEMBLING) { splx(s); return (0); } tp->t_flags |= TF_REASSEMBLING; splx(s); return (1); } static __inline void tcp_reass_unlock(struct tcpcb *tp) { int s; s = splvm(); KASSERT((tp->t_flags & TF_REASSEMBLING) != 0); tp->t_flags &= ~TF_REASSEMBLING; splx(s); } #ifdef DIAGNOSTIC #define TCP_REASS_LOCK(tp) \ do { \ if (tcp_reass_lock_try(tp) == 0) { \ printf("%s:%d: tcpcb %p reass already locked\n", \ __FILE__, __LINE__, tp); \ panic("tcp_reass_lock"); \ } \ } while (/*CONSTCOND*/ 0) #define TCP_REASS_LOCK_CHECK(tp) \ do { \ if (((tp)->t_flags & TF_REASSEMBLING) == 0) { \ printf("%s:%d: tcpcb %p reass lock not held\n", \ __FILE__, __LINE__, tp); \ panic("tcp reass lock check"); \ } \ } while (/*CONSTCOND*/ 0) #else #define TCP_REASS_LOCK(tp) (void) tcp_reass_lock_try((tp)) #define TCP_REASS_LOCK_CHECK(tp) /* nothing */ #endif #define TCP_REASS_UNLOCK(tp) tcp_reass_unlock((tp)) #endif /* _KERNEL */ /* * Queue for delayed ACK processing. */ #ifdef _KERNEL extern int tcp_delack_ticks; void tcp_delack(void *); #define TCP_RESTART_DELACK(tp) \ callout_reset(&(tp)->t_delack_ch, tcp_delack_ticks, \ tcp_delack, tp) #define TCP_SET_DELACK(tp) \ do { \ if (((tp)->t_flags & TF_DELACK) == 0) { \ (tp)->t_flags |= TF_DELACK; \ TCP_RESTART_DELACK(tp); \ } \ } while (/*CONSTCOND*/0) #define TCP_CLEAR_DELACK(tp) \ do { \ if ((tp)->t_flags & TF_DELACK) { \ (tp)->t_flags &= ~TF_DELACK; \ callout_stop(&(tp)->t_delack_ch); \ } \ } while (/*CONSTCOND*/0) #endif /* _KERNEL */ /* * Compute the current timestamp for a connection. */ #define TCP_TIMESTAMP(tp) (tcp_now - (tp)->ts_timebase) /* * Handy way of passing around TCP option info. */ struct tcp_opt_info { int ts_present; u_int32_t ts_val; u_int32_t ts_ecr; u_int16_t maxseg; }; #define TOF_SIGNATURE 0x0040 /* signature option present */ #define TOF_SIGLEN 0x0080 /* sigature length valid (RFC2385) */ /* * Data for the TCP compressed state engine. */ union syn_cache_sa { struct sockaddr sa; struct sockaddr_in sin; #if 1 /*def INET6*/ struct sockaddr_in6 sin6; #endif }; struct syn_cache { TAILQ_ENTRY(syn_cache) sc_bucketq; /* link on bucket list */ callout_t sc_timer; /* rexmt timer */ struct route sc_route; long sc_win; /* advertised window */ int sc_bucketidx; /* our bucket index */ u_int32_t sc_hash; u_int32_t sc_timestamp; /* timestamp from SYN */ u_int32_t sc_timebase; /* our local timebase */ union syn_cache_sa sc_src; union syn_cache_sa sc_dst; tcp_seq sc_irs; tcp_seq sc_iss; u_int sc_rxtcur; /* current rxt timeout */ u_int sc_rxttot; /* total time spend on queues */ u_short sc_rxtshift; /* for computing backoff */ u_short sc_flags; #define SCF_UNREACH 0x0001 /* we've had an unreach error */ #define SCF_TIMESTAMP 0x0002 /* peer will do timestamps */ #define SCF_DEAD 0x0004 /* this entry to be released */ #define SCF_SACK_PERMIT 0x0008 /* peer will do SACK */ #define SCF_ECN_PERMIT 0x0010 /* peer will do ECN */ #define SCF_SIGNATURE 0x40 /* send MD5 digests */ struct mbuf *sc_ipopts; /* IP options */ u_int16_t sc_peermaxseg; u_int16_t sc_ourmaxseg; u_int8_t sc_request_r_scale : 4, sc_requested_s_scale : 4; struct tcpcb *sc_tp; /* tcb for listening socket */ LIST_ENTRY(syn_cache) sc_tpq; /* list of entries by same tp */ }; struct syn_cache_head { TAILQ_HEAD(, syn_cache) sch_bucket; /* bucket entries */ u_short sch_length; /* # entries in bucket */ }; #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) #ifdef INET6 #define in6totcpcb(ip) ((struct tcpcb *)(ip)->in6p_ppcb) #endif #ifndef INET6 #define sototcpcb(so) (intotcpcb(sotoinpcb(so))) #else #define sototcpcb(so) (((so)->so_proto->pr_domain->dom_family == AF_INET) \ ? intotcpcb(sotoinpcb(so)) \ : in6totcpcb(sotoin6pcb(so))) #endif /* * See RFC2988 for a discussion of RTO calculation; comments assume * familiarity with that document. * * The smoothed round-trip time and estimated variance are stored as * fixed point numbers. Historically, srtt was scaled by * TCP_RTT_SHIFT bits, and rttvar by TCP_RTTVAR_SHIFT bits. Because * the values coincide with the alpha and beta parameters suggested * for RTO calculation (1/8 for srtt, 1/4 for rttvar), the combination * of computing 1/8 of the new value and transforming it to the * fixed-point representation required zero instructions. However, * the storage representations no longer coincide with the alpha/beta * shifts; instead, more fractional bits are present. * * The storage representation of srtt is 1/32 slow ticks, or 1/64 s. * (The assumption that a slow tick is 500 ms should not be present in * the code.) * * The storage representation of rttvar is 1/16 slow ticks, or 1/32 s. * There may be some confusion about this in the code. * * For historical reasons, these scales are also used in smoothing the * average (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). * This results in alpha of 0.125 and beta of 0.25, following RFC2988 * section 2.3 * * XXX Change SHIFT values to LGWEIGHT and REP_SHIFT, and adjust * the code to use the correct ones. */ #define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ #define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ /* * Compute TCP retransmission timer, following RFC2988. * This macro returns a value in slow timeout ticks. * * Section 2.2 requires that the RTO value be * srtt + max(G, 4*RTTVAR) * where G is the clock granularity. * * This comment has not necessarily been updated for the new storage * representation: * * Because of the way we do the smoothing, srtt and rttvar * will each average +1/2 tick of bias. When we compute * the retransmit timer, we want 1/2 tick of rounding and * 1 extra tick because of +-1/2 tick uncertainty in the * firing of the timer. The bias will give us exactly the * 1.5 tick we need. But, because the bias is * statistical, we have to test that we don't drop below * the minimum feasible timer (which is 2 ticks). * This macro assumes that the value of 1<<TCP_RTTVAR_SHIFT * is the same as the multiplier for rttvar. * * This macro appears to be wrong; it should be checking rttvar*4 in * ticks and making sure we use 1 instead if rttvar*4 rounds to 0. It * appears to be treating srtt as being in the old storage * representation, resulting in a factor of 4 extra. */ #define TCP_REXMTVAL(tp) \ ((((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) >> 2) /* * Compute the initial window for slow start. */ #define TCP_INITIAL_WINDOW(iw, segsz) \ uimin((iw) * (segsz), uimax(2 * (segsz), tcp_init_win_max[(iw)])) /* * TCP statistics. * Each counter is an unsigned 64-bit value. * * Many of these should be kept per connection, but that's inconvenient * at the moment. */ #define TCP_STAT_CONNATTEMPT 0 /* connections initiated */ #define TCP_STAT_ACCEPTS 1 /* connections accepted */ #define TCP_STAT_CONNECTS 2 /* connections established */ #define TCP_STAT_DROPS 3 /* connections dropped */ #define TCP_STAT_CONNDROPS 4 /* embryonic connections dropped */ #define TCP_STAT_CLOSED 5 /* conn. closed (includes drops) */ #define TCP_STAT_SEGSTIMED 6 /* segs where we tried to get rtt */ #define TCP_STAT_RTTUPDATED 7 /* times we succeeded */ #define TCP_STAT_DELACK 8 /* delayed ACKs sent */ #define TCP_STAT_TIMEOUTDROP 9 /* conn. dropped in rxmt timeout */ #define TCP_STAT_REXMTTIMEO 10 /* retransmit timeouts */ #define TCP_STAT_PERSISTTIMEO 11 /* persist timeouts */ #define TCP_STAT_KEEPTIMEO 12 /* keepalive timeouts */ #define TCP_STAT_KEEPPROBE 13 /* keepalive probes sent */ #define TCP_STAT_KEEPDROPS 14 /* connections dropped in keepalive */ #define TCP_STAT_PERSISTDROPS 15 /* connections dropped in persist */ #define TCP_STAT_CONNSDRAINED 16 /* connections drained due to memory shortage */ #define TCP_STAT_PMTUBLACKHOLE 17 /* PMTUD blackhole detected */ #define TCP_STAT_SNDTOTAL 18 /* total packets sent */ #define TCP_STAT_SNDPACK 19 /* data packlets sent */ #define TCP_STAT_SNDBYTE 20 /* data bytes sent */ #define TCP_STAT_SNDREXMITPACK 21 /* data packets retransmitted */ #define TCP_STAT_SNDREXMITBYTE 22 /* data bytes retransmitted */ #define TCP_STAT_SNDACKS 23 /* ACK-only packets sent */ #define TCP_STAT_SNDPROBE 24 /* window probes sent */ #define TCP_STAT_SNDURG 25 /* packets sent with URG only */ #define TCP_STAT_SNDWINUP 26 /* window update-only packets sent */ #define TCP_STAT_SNDCTRL 27 /* control (SYN|FIN|RST) packets sent */ #define TCP_STAT_RCVTOTAL 28 /* total packets received */ #define TCP_STAT_RCVPACK 29 /* packets received in sequence */ #define TCP_STAT_RCVBYTE 30 /* bytes received in sequence */ #define TCP_STAT_RCVBADSUM 31 /* packets received with cksum errs */ #define TCP_STAT_RCVBADOFF 32 /* packets received with bad offset */ #define TCP_STAT_RCVMEMDROP 33 /* packets dropped for lack of memory */ #define TCP_STAT_RCVSHORT 34 /* packets received too short */ #define TCP_STAT_RCVDUPPACK 35 /* duplicate-only packets received */ #define TCP_STAT_RCVDUPBYTE 36 /* duplicate-only bytes received */ #define TCP_STAT_RCVPARTDUPPACK 37 /* packets with some duplicate data */ #define TCP_STAT_RCVPARTDUPBYTE 38 /* dup. bytes in part-dup. packets */ #define TCP_STAT_RCVOOPACK 39 /* out-of-order packets received */ #define TCP_STAT_RCVOOBYTE 40 /* out-of-order bytes received */ #define TCP_STAT_RCVPACKAFTERWIN 41 /* packets with data after window */ #define TCP_STAT_RCVBYTEAFTERWIN 42 /* bytes received after window */ #define TCP_STAT_RCVAFTERCLOSE 43 /* packets received after "close" */ #define TCP_STAT_RCVWINPROBE 44 /* rcvd window probe packets */ #define TCP_STAT_RCVDUPACK 45 /* rcvd duplicate ACKs */ #define TCP_STAT_RCVACKTOOMUCH 46 /* rcvd ACKs for unsent data */ #define TCP_STAT_RCVACKPACK 47 /* rcvd ACK packets */ #define TCP_STAT_RCVACKBYTE 48 /* bytes ACKed by rcvd ACKs */ #define TCP_STAT_RCVWINUPD 49 /* rcvd window update packets */ #define TCP_STAT_PAWSDROP 50 /* segments dropped due to PAWS */ #define TCP_STAT_PREDACK 51 /* times hdr predict OK for ACKs */ #define TCP_STAT_PREDDAT 52 /* times hdr predict OK for data pkts */ #define TCP_STAT_PCBHASHMISS 53 /* input packets missing PCB hash */ #define TCP_STAT_NOPORT 54 /* no socket on port */ #define TCP_STAT_BADSYN 55 /* received ACK for which we have no SYN in compressed state */ #define TCP_STAT_DELAYED_FREE 56 /* delayed pool_put() of tcpcb */ #define TCP_STAT_SC_ADDED 57 /* # of sc entries added */ #define TCP_STAT_SC_COMPLETED 58 /* # of sc connections completed */ #define TCP_STAT_SC_TIMED_OUT 59 /* # of sc entries timed out */ #define TCP_STAT_SC_OVERFLOWED 60 /* # of sc drops due to overflow */ #define TCP_STAT_SC_RESET 61 /* # of sc drops due to RST */ #define TCP_STAT_SC_UNREACH 62 /* # of sc drops due to ICMP unreach */ #define TCP_STAT_SC_BUCKETOVERFLOW 63 /* # of sc drops due to bucket ovflow */ #define TCP_STAT_SC_ABORTED 64 /* # of sc entries aborted (no mem) */ #define TCP_STAT_SC_DUPESYN 65 /* # of duplicate SYNs received */ #define TCP_STAT_SC_DROPPED 66 /* # of SYNs dropped (no route/mem) */ #define TCP_STAT_SC_COLLISIONS 67 /* # of sc hash collisions */ #define TCP_STAT_SC_RETRANSMITTED 68 /* # of sc retransmissions */ #define TCP_STAT_SC_DELAYED_FREE 69 /* # of delayed pool_put()s */ #define TCP_STAT_SELFQUENCH 70 /* # of ENOBUFS we get on output */ #define TCP_STAT_BADSIG 71 /* # of drops due to bad signature */ #define TCP_STAT_GOODSIG 72 /* # of packets with good signature */ #define TCP_STAT_ECN_SHS 73 /* # of successful ECN handshakes */ #define TCP_STAT_ECN_CE 74 /* # of packets with CE bit */ #define TCP_STAT_ECN_ECT 75 /* # of packets with ECT(0) bit */ #define TCP_NSTATS 76 /* * Names for TCP sysctl objects. */ #define TCPCTL_RFC1323 1 /* RFC1323 timestamps/scaling */ #define TCPCTL_SENDSPACE 2 /* default send buffer */ #define TCPCTL_RECVSPACE 3 /* default recv buffer */ #define TCPCTL_MSSDFLT 4 /* default seg size */ #define TCPCTL_SYN_CACHE_LIMIT 5 /* max size of comp. state engine */ #define TCPCTL_SYN_BUCKET_LIMIT 6 /* max size of hash bucket */ #if 0 /*obsoleted*/ #define TCPCTL_SYN_CACHE_INTER 7 /* interval of comp. state timer */ #endif #define TCPCTL_INIT_WIN 8 /* initial window */ #define TCPCTL_MSS_IFMTU 9 /* mss from interface, not in_maxmtu */ #define TCPCTL_SACK 10 /* RFC2018 selective acknowledgement */ #define TCPCTL_WSCALE 11 /* RFC1323 window scaling */ #define TCPCTL_TSTAMP 12 /* RFC1323 timestamps */ #if 0 /*obsoleted*/ #define TCPCTL_COMPAT_42 13 /* 4.2BSD TCP bug work-arounds */ #endif #define TCPCTL_CWM 14 /* Congestion Window Monitoring */ #define TCPCTL_CWM_BURSTSIZE 15 /* burst size allowed by CWM */ #define TCPCTL_ACK_ON_PUSH 16 /* ACK immediately on PUSH */ #define TCPCTL_KEEPIDLE 17 /* keepalive idle time */ #define TCPCTL_KEEPINTVL 18 /* keepalive probe interval */ #define TCPCTL_KEEPCNT 19 /* keepalive count */ #define TCPCTL_SLOWHZ 20 /* PR_SLOWHZ (read-only) */ #define TCPCTL_NEWRENO 21 /* NewReno Congestion Control */ #define TCPCTL_LOG_REFUSED 22 /* Log refused connections */ #if 0 /*obsoleted*/ #define TCPCTL_RSTRATELIMIT 23 /* RST rate limit */ #endif #define TCPCTL_RSTPPSLIMIT 24 /* RST pps limit */ #define TCPCTL_DELACK_TICKS 25 /* # ticks to delay ACK */ #define TCPCTL_INIT_WIN_LOCAL 26 /* initial window for local nets */ #define TCPCTL_IDENT 27 /* rfc 931 identd */ #define TCPCTL_ACKDROPRATELIMIT 28 /* SYN/RST -> ACK rate limit */ #define TCPCTL_LOOPBACKCKSUM 29 /* do TCP checksum on loopback */ #define TCPCTL_STATS 30 /* TCP statistics */ #define TCPCTL_DEBUG 31 /* TCP debug sockets */ #define TCPCTL_DEBX 32 /* # of tcp debug sockets */ #define TCPCTL_DROP 33 /* drop tcp connection */ #define TCPCTL_MSL 34 /* Max Segment Life */ #ifdef _KERNEL extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */ extern const struct pr_usrreqs tcp_usrreqs; extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ extern int tcp_do_rfc1323; /* enabled/disabled? */ extern int tcp_do_sack; /* SACK enabled/disabled? */ extern int tcp_do_win_scale; /* RFC1323 window scaling enabled/disabled? */ extern int tcp_do_timestamps; /* RFC1323 timestamps enabled/disabled? */ extern int tcp_mssdflt; /* default seg size */ extern int tcp_minmss; /* minimal seg size */ extern int tcp_msl; /* max segment life */ extern int tcp_init_win; /* initial window */ extern int tcp_init_win_local; /* initial window for local nets */ extern int tcp_init_win_max[11];/* max sizes for values of tcp_init_win_* */ extern int tcp_mss_ifmtu; /* take MSS from interface, not in_maxmtu */ extern int tcp_cwm; /* enable Congestion Window Monitoring */ extern int tcp_cwm_burstsize; /* burst size allowed by CWM */ extern int tcp_ack_on_push; /* ACK immediately on PUSH */ extern int tcp_syn_cache_limit; /* max entries for compressed state engine */ extern int tcp_syn_bucket_limit;/* max entries per hash bucket */ extern int tcp_log_refused; /* log refused connections */ extern int tcp_do_ecn; /* TCP ECN enabled/disabled? */ extern int tcp_ecn_maxretries; /* Max ECN setup retries */ extern int tcp_do_rfc1948; /* ISS by cryptographic hash */ extern int tcp_sack_tp_maxholes; /* Max holes per connection. */ extern int tcp_sack_globalmaxholes; /* Max holes per system. */ extern int tcp_sack_globalholes; /* Number of holes present. */ extern int tcp_do_abc; /* RFC3465 ABC enabled/disabled? */ extern int tcp_abc_aggressive; /* 1: L=2*SMSS 0: L=1*SMSS */ extern int tcp_msl_enable; /* enable TIME_WAIT truncation */ extern int tcp_msl_loop; /* MSL for loopback */ extern int tcp_msl_local; /* MSL for 'local' */ extern int tcp_msl_remote; /* MSL otherwise */ extern int tcp_msl_remote_threshold; /* RTT threshold */ extern int tcp_rttlocal; /* Use RTT to decide who's 'local' */ extern int tcp4_vtw_enable; extern int tcp6_vtw_enable; extern int tcp_vtw_was_enabled; extern int tcp_vtw_entries; extern int tcp_rst_ppslim; extern int tcp_ackdrop_ppslim; extern int tcp_syn_cache_size; extern struct syn_cache_head tcp_syn_cache[]; extern u_long syn_cache_count; #ifdef MBUFTRACE extern struct mowner tcp_rx_mowner; extern struct mowner tcp_tx_mowner; extern struct mowner tcp_reass_mowner; extern struct mowner tcp_sock_mowner; extern struct mowner tcp_sock_rx_mowner; extern struct mowner tcp_sock_tx_mowner; extern struct mowner tcp_mowner; #endif extern int tcp_do_autorcvbuf; extern int tcp_autorcvbuf_inc; extern int tcp_autorcvbuf_max; extern int tcp_do_autosndbuf; extern int tcp_autosndbuf_inc; extern int tcp_autosndbuf_max; struct secasvar; void tcp_canceltimers(struct tcpcb *); struct tcpcb * tcp_close(struct tcpcb *); int tcp_isdead(struct tcpcb *); #ifdef INET6 void *tcp6_ctlinput(int, const struct sockaddr *, void *); #endif void *tcp_ctlinput(int, const struct sockaddr *, void *); int tcp_ctloutput(int, struct socket *, struct sockopt *); struct tcpcb * tcp_disconnect1(struct tcpcb *); struct tcpcb * tcp_drop(struct tcpcb *, int); #ifdef TCP_SIGNATURE int tcp_signature_apply(void *, void *, u_int); struct secasvar *tcp_signature_getsav(struct mbuf *); int tcp_signature(struct mbuf *, struct tcphdr *, int, struct secasvar *, char *); #endif void tcp_drain(void); void tcp_drainstub(void); void tcp_established(struct tcpcb *); void tcp_init(void); void tcp_init_common(unsigned); #ifdef INET6 int tcp6_input(struct mbuf **, int *, int); #endif void tcp_input(struct mbuf *, int, int); u_int tcp_hdrsz(struct tcpcb *); u_long tcp_mss_to_advertise(const struct ifnet *, int); void tcp_mss_from_peer(struct tcpcb *, int); void tcp_tcpcb_template(void); struct tcpcb * tcp_newtcpcb(int, void *); void tcp_notify(struct inpcb *, int); #ifdef INET6 void tcp6_notify(struct in6pcb *, int); #endif u_int tcp_optlen(struct tcpcb *); int tcp_output(struct tcpcb *); void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); void tcp_quench(struct inpcb *); #ifdef INET6 void tcp6_quench(struct in6pcb *); #endif void tcp_mtudisc(struct inpcb *, int); #ifdef INET6 void tcp6_mtudisc_callback(struct in6_addr *); #endif void tcpipqent_init(void); struct ipqent *tcpipqent_alloc(void); void tcpipqent_free(struct ipqent *); int tcp_respond(struct tcpcb *, struct mbuf *, struct mbuf *, struct tcphdr *, tcp_seq, tcp_seq, int); void tcp_rmx_rtt(struct tcpcb *); void tcp_setpersist(struct tcpcb *); #ifdef TCP_SIGNATURE int tcp_signature_compute(struct mbuf *, struct tcphdr *, int, int, int, u_char *, u_int); #endif void tcp_fasttimo(void); struct mbuf * tcp_template(struct tcpcb *); void tcp_trace(short, short, struct tcpcb *, struct mbuf *, int); struct tcpcb * tcp_usrclosed(struct tcpcb *); void tcp_usrreq_init(void); void tcp_xmit_timer(struct tcpcb *, uint32_t); tcp_seq tcp_new_iss(struct tcpcb *); tcp_seq tcp_new_iss1(void *, void *, u_int16_t, u_int16_t, size_t); void tcp_sack_init(void); void tcp_new_dsack(struct tcpcb *, tcp_seq, u_int32_t); void tcp_sack_option(struct tcpcb *, const struct tcphdr *, const u_char *, int); void tcp_del_sackholes(struct tcpcb *, const struct tcphdr *); void tcp_free_sackholes(struct tcpcb *); void tcp_sack_adjust(struct tcpcb *tp); struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); int tcp_sack_numblks(const struct tcpcb *); #define TCP_SACK_OPTLEN(nblks) ((nblks) * 8 + 2 + 2) void tcp_statinc(u_int); void tcp_statadd(u_int, uint64_t); int syn_cache_add(struct sockaddr *, struct sockaddr *, struct tcphdr *, unsigned int, struct socket *, struct mbuf *, u_char *, int, struct tcp_opt_info *); void syn_cache_unreach(const struct sockaddr *, const struct sockaddr *, struct tcphdr *); struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *, struct tcphdr *, struct socket *so, struct mbuf *); void syn_cache_init(void); void syn_cache_insert(struct syn_cache *, struct tcpcb *); struct syn_cache *syn_cache_lookup(const struct sockaddr *, const struct sockaddr *, struct syn_cache_head **); void syn_cache_reset(struct sockaddr *, struct sockaddr *, struct tcphdr *); int syn_cache_respond(struct syn_cache *); void syn_cache_cleanup(struct tcpcb *); int tcp_input_checksum(int, struct mbuf *, const struct tcphdr *, int, int, int); #endif #endif /* !_NETINET_TCP_VAR_H_ */ |
| 30 30 30 30 30 29 30 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 | /* $NetBSD: subr_ipi.c,v 1.10 2022/04/09 23:51:22 riastradh Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Mindaugas Rasiukevicius. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Inter-processor interrupt (IPI) interface: asynchronous IPIs to * invoke functions with a constant argument and synchronous IPIs * with the cross-call support. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: subr_ipi.c,v 1.10 2022/04/09 23:51:22 riastradh Exp $"); #include <sys/param.h> #include <sys/types.h> #include <sys/atomic.h> #include <sys/evcnt.h> #include <sys/cpu.h> #include <sys/ipi.h> #include <sys/intr.h> #include <sys/kcpuset.h> #include <sys/kmem.h> #include <sys/lock.h> #include <sys/mutex.h> /* * An array of the IPI handlers used for asynchronous invocation. * The lock protects the slot allocation. */ typedef struct { ipi_func_t func; void * arg; } ipi_intr_t; static kmutex_t ipi_mngmt_lock; static ipi_intr_t ipi_intrs[IPI_MAXREG] __cacheline_aligned; /* * Per-CPU mailbox for IPI messages: it is a single cache line storing * up to IPI_MSG_MAX messages. This interface is built on top of the * synchronous IPIs. */ #define IPI_MSG_SLOTS (CACHE_LINE_SIZE / sizeof(ipi_msg_t *)) #define IPI_MSG_MAX IPI_MSG_SLOTS typedef struct { ipi_msg_t * msg[IPI_MSG_SLOTS]; } ipi_mbox_t; /* Mailboxes for the synchronous IPIs. */ static ipi_mbox_t * ipi_mboxes __read_mostly; static struct evcnt ipi_mboxfull_ev __cacheline_aligned; static void ipi_msg_cpu_handler(void *); /* Handler for the synchronous IPIs - it must be zero. */ #define IPI_SYNCH_ID 0 #ifndef MULTIPROCESSOR #define cpu_ipi(ci) KASSERT(ci == NULL) #endif void ipi_sysinit(void) { mutex_init(&ipi_mngmt_lock, MUTEX_DEFAULT, IPL_NONE); memset(ipi_intrs, 0, sizeof(ipi_intrs)); /* * Register the handler for synchronous IPIs. This mechanism * is built on top of the asynchronous interface. Slot zero is * reserved permanently; it is also handy to use zero as a failure * for other registers (as it is potentially less error-prone). */ ipi_intrs[IPI_SYNCH_ID].func = ipi_msg_cpu_handler; evcnt_attach_dynamic(&ipi_mboxfull_ev, EVCNT_TYPE_MISC, NULL, "ipi", "full"); } void ipi_percpu_init(void) { const size_t len = ncpu * sizeof(ipi_mbox_t); /* Initialise the per-CPU bit fields. */ for (u_int i = 0; i < ncpu; i++) { struct cpu_info *ci = cpu_lookup(i); memset(&ci->ci_ipipend, 0, sizeof(ci->ci_ipipend)); } /* Allocate per-CPU IPI mailboxes. */ ipi_mboxes = kmem_zalloc(len, KM_SLEEP); KASSERT(ipi_mboxes != NULL); } /* * ipi_register: register an asynchronous IPI handler. * * => Returns IPI ID which is greater than zero; on failure - zero. */ u_int ipi_register(ipi_func_t func, void *arg) { mutex_enter(&ipi_mngmt_lock); for (u_int i = 0; i < IPI_MAXREG; i++) { if (ipi_intrs[i].func == NULL) { /* Register the function. */ ipi_intrs[i].func = func; ipi_intrs[i].arg = arg; mutex_exit(&ipi_mngmt_lock); KASSERT(i != IPI_SYNCH_ID); return i; } } mutex_exit(&ipi_mngmt_lock); printf("WARNING: ipi_register: table full, increase IPI_MAXREG\n"); return 0; } /* * ipi_unregister: release the IPI handler given the ID. */ void ipi_unregister(u_int ipi_id) { ipi_msg_t ipimsg = { .func = __FPTRCAST(ipi_func_t, nullop) }; KASSERT(ipi_id != IPI_SYNCH_ID); KASSERT(ipi_id < IPI_MAXREG); /* Release the slot. */ mutex_enter(&ipi_mngmt_lock); KASSERT(ipi_intrs[ipi_id].func != NULL); ipi_intrs[ipi_id].func = NULL; /* Ensure that there are no IPIs in flight. */ kpreempt_disable(); ipi_broadcast(&ipimsg, false); ipi_wait(&ipimsg); kpreempt_enable(); mutex_exit(&ipi_mngmt_lock); } /* * ipi_mark_pending: internal routine to mark an IPI pending on the * specified CPU (which might be curcpu()). */ static bool ipi_mark_pending(u_int ipi_id, struct cpu_info *ci) { const u_int i = ipi_id >> IPI_BITW_SHIFT; const uint32_t bitm = 1U << (ipi_id & IPI_BITW_MASK); KASSERT(ipi_id < IPI_MAXREG); KASSERT(kpreempt_disabled()); /* Mark as pending and return true if not previously marked. */ if ((atomic_load_acquire(&ci->ci_ipipend[i]) & bitm) == 0) { #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif atomic_or_32(&ci->ci_ipipend[i], bitm); return true; } return false; } /* * ipi_trigger: asynchronously send an IPI to the specified CPU. */ void ipi_trigger(u_int ipi_id, struct cpu_info *ci) { KASSERT(curcpu() != ci); if (ipi_mark_pending(ipi_id, ci)) { cpu_ipi(ci); } } /* * ipi_trigger_multi_internal: the guts of ipi_trigger_multi() and * ipi_trigger_broadcast(). */ static void ipi_trigger_multi_internal(u_int ipi_id, const kcpuset_t *target, bool skip_self) { const cpuid_t selfid = cpu_index(curcpu()); CPU_INFO_ITERATOR cii; struct cpu_info *ci; KASSERT(kpreempt_disabled()); KASSERT(target != NULL); for (CPU_INFO_FOREACH(cii, ci)) { const cpuid_t cpuid = cpu_index(ci); if (!kcpuset_isset(target, cpuid) || cpuid == selfid) { continue; } ipi_trigger(ipi_id, ci); } if (!skip_self && kcpuset_isset(target, selfid)) { ipi_mark_pending(ipi_id, curcpu()); int s = splhigh(); ipi_cpu_handler(); splx(s); } } /* * ipi_trigger_multi: same as ipi_trigger() but sends to the multiple * CPUs given the target CPU set. */ void ipi_trigger_multi(u_int ipi_id, const kcpuset_t *target) { ipi_trigger_multi_internal(ipi_id, target, false); } /* * ipi_trigger_broadcast: same as ipi_trigger_multi() to kcpuset_attached, * optionally skipping the sending CPU. */ void ipi_trigger_broadcast(u_int ipi_id, bool skip_self) { ipi_trigger_multi_internal(ipi_id, kcpuset_attached, skip_self); } /* * put_msg: insert message into the mailbox. * * Caller is responsible for issuing membar_release first. */ static inline void put_msg(ipi_mbox_t *mbox, ipi_msg_t *msg) { int count = SPINLOCK_BACKOFF_MIN; again: for (u_int i = 0; i < IPI_MSG_MAX; i++) { if (atomic_cas_ptr(&mbox->msg[i], NULL, msg) == NULL) { return; } } /* All slots are full: we have to spin-wait. */ ipi_mboxfull_ev.ev_count++; SPINLOCK_BACKOFF(count); goto again; } /* * ipi_cpu_handler: the IPI handler. */ void ipi_cpu_handler(void) { struct cpu_info * const ci = curcpu(); /* * Handle asynchronous IPIs: inspect per-CPU bit field, extract * IPI ID numbers and execute functions in those slots. */ for (u_int i = 0; i < IPI_BITWORDS; i++) { uint32_t pending, bit; if (atomic_load_relaxed(&ci->ci_ipipend[i]) == 0) { continue; } pending = atomic_swap_32(&ci->ci_ipipend[i], 0); #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_acquire(); #endif while ((bit = ffs(pending)) != 0) { const u_int ipi_id = (i << IPI_BITW_SHIFT) | --bit; ipi_intr_t *ipi_hdl = &ipi_intrs[ipi_id]; pending &= ~(1U << bit); KASSERT(ipi_hdl->func != NULL); ipi_hdl->func(ipi_hdl->arg); } } } /* * ipi_msg_cpu_handler: handle synchronous IPIs - iterate mailbox, * execute the passed functions and acknowledge the messages. */ static void ipi_msg_cpu_handler(void *arg __unused) { const struct cpu_info * const ci = curcpu(); ipi_mbox_t *mbox = &ipi_mboxes[cpu_index(ci)]; for (u_int i = 0; i < IPI_MSG_MAX; i++) { ipi_msg_t *msg; /* Get the message. */ if ((msg = atomic_load_acquire(&mbox->msg[i])) == NULL) { continue; } atomic_store_relaxed(&mbox->msg[i], NULL); /* Execute the handler. */ KASSERT(msg->func); msg->func(msg->arg); /* Ack the request. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif atomic_dec_uint(&msg->_pending); } } /* * ipi_unicast: send an IPI to a single CPU. * * => The CPU must be remote; must not be local. * => The caller must ipi_wait() on the message for completion. */ void ipi_unicast(ipi_msg_t *msg, struct cpu_info *ci) { const cpuid_t id = cpu_index(ci); KASSERT(msg->func != NULL); KASSERT(kpreempt_disabled()); KASSERT(curcpu() != ci); msg->_pending = 1; #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif put_msg(&ipi_mboxes[id], msg); ipi_trigger(IPI_SYNCH_ID, ci); } /* * ipi_multicast: send an IPI to each CPU in the specified set. * * => The caller must ipi_wait() on the message for completion. */ void ipi_multicast(ipi_msg_t *msg, const kcpuset_t *target) { const struct cpu_info * const self = curcpu(); CPU_INFO_ITERATOR cii; struct cpu_info *ci; u_int local; KASSERT(msg->func != NULL); KASSERT(kpreempt_disabled()); local = !!kcpuset_isset(target, cpu_index(self)); msg->_pending = kcpuset_countset(target) - local; #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif for (CPU_INFO_FOREACH(cii, ci)) { cpuid_t id; if (__predict_false(ci == self)) { continue; } id = cpu_index(ci); if (!kcpuset_isset(target, id)) { continue; } put_msg(&ipi_mboxes[id], msg); ipi_trigger(IPI_SYNCH_ID, ci); } if (local) { msg->func(msg->arg); } } /* * ipi_broadcast: send an IPI to all CPUs. * * => The caller must ipi_wait() on the message for completion. */ void ipi_broadcast(ipi_msg_t *msg, bool skip_self) { const struct cpu_info * const self = curcpu(); CPU_INFO_ITERATOR cii; struct cpu_info *ci; KASSERT(msg->func != NULL); KASSERT(kpreempt_disabled()); msg->_pending = ncpu - 1; #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif /* Broadcast IPIs for remote CPUs. */ for (CPU_INFO_FOREACH(cii, ci)) { cpuid_t id; if (__predict_false(ci == self)) { continue; } id = cpu_index(ci); put_msg(&ipi_mboxes[id], msg); ipi_trigger(IPI_SYNCH_ID, ci); } if (!skip_self) { /* Finally, execute locally. */ msg->func(msg->arg); } } /* * ipi_wait: spin-wait until the message is processed. */ void ipi_wait(ipi_msg_t *msg) { int count = SPINLOCK_BACKOFF_MIN; while (atomic_load_acquire(&msg->_pending)) { KASSERT(atomic_load_relaxed(&msg->_pending) < ncpu); SPINLOCK_BACKOFF(count); } } |
| 126 126 126 54 54 54 54 38 47 67 67 67 40 62 62 593 592 193 80 80 80 80 9 575 575 574 534 569 569 27 27 19 19 19 27 27 27 27 18 25 27 26 26 7 26 27 542 542 33 33 529 33 542 13 13 13 9 9 5 9 5 2 2 2 3 17 17 13 3 11 3 12 4 27 23 23 23 23 23 23 3 3 1 2 2 3 2 2 3 23 71 23 2 2 129 1 128 1 127 1 126 126 45 45 45 45 45 45 45 45 45 43 45 45 45 45 45 45 21 21 21 21 5 5 17 19 14 19 5 4 5 9 9 9 9 9 9 9 14 16 9 9 9 8 8 8 202 203 198 197 14 6 5 4 1 10 9 9 54 87 87 72 41 72 72 87 80 87 87 87 66 87 87 87 86 87 87 20 45 9 9 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 | /* $NetBSD: vfs_mount.c,v 1.95 2022/08/22 09:14:24 hannken Exp $ */ /*- * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.95 2022/08/22 09:14:24 hannken Exp $"); #include <sys/param.h> #include <sys/kernel.h> #include <sys/atomic.h> #include <sys/buf.h> #include <sys/conf.h> #include <sys/fcntl.h> #include <sys/filedesc.h> #include <sys/device.h> #include <sys/kauth.h> #include <sys/kmem.h> #include <sys/module.h> #include <sys/mount.h> #include <sys/fstrans.h> #include <sys/namei.h> #include <sys/extattr.h> #include <sys/syscallargs.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/vfs_syscalls.h> #include <sys/vnode_impl.h> #include <miscfs/genfs/genfs.h> #include <miscfs/specfs/specdev.h> #include <uvm/uvm_swap.h> enum mountlist_type { ME_MOUNT, ME_MARKER }; struct mountlist_entry { TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ struct mount *me_mount; /* Actual mount if ME_MOUNT, current mount else. */ enum mountlist_type me_type; /* Mount or marker. */ }; struct mount_iterator { struct mountlist_entry mi_entry; }; static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, bool (*)(void *, struct vnode *), void *, bool); /* Root filesystem. */ vnode_t * rootvnode; /* Mounted filesystem list. */ static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; static kmutex_t mountlist_lock __cacheline_aligned; int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ = offsetof(vnode_impl_t, vi_lrulist.tqe_next); kmutex_t vfs_list_lock __cacheline_aligned; static specificdata_domain_t mount_specificdata_domain; static kmutex_t mntid_lock; static kmutex_t mountgen_lock __cacheline_aligned; static uint64_t mountgen; void vfs_mount_sysinit(void) { TAILQ_INIT(&mountlist); mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); mount_specificdata_domain = specificdata_domain_create(); mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); mountgen = 0; } struct mount * vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) { struct mount *mp; int error __diagused; mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); mp->mnt_op = vfsops; mp->mnt_refcnt = 1; TAILQ_INIT(&mp->mnt_vnodelist); mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); mp->mnt_vnodecovered = vp; mount_initspecific(mp); error = fstrans_mount(mp); KASSERT(error == 0); mutex_enter(&mountgen_lock); mp->mnt_gen = mountgen++; mutex_exit(&mountgen_lock); return mp; } /* * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and * initialize a mount structure for it. * * Devname is usually updated by mount(8) after booting. */ int vfs_rootmountalloc(const char *fstypename, const char *devname, struct mount **mpp) { struct vfsops *vfsp = NULL; struct mount *mp; int error __diagused; mutex_enter(&vfs_list_lock); LIST_FOREACH(vfsp, &vfs_list, vfs_list) if (!strncmp(vfsp->vfs_name, fstypename, sizeof(mp->mnt_stat.f_fstypename))) break; if (vfsp == NULL) { mutex_exit(&vfs_list_lock); return (ENODEV); } vfsp->vfs_refcount++; mutex_exit(&vfs_list_lock); if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) return ENOMEM; error = vfs_busy(mp); KASSERT(error == 0); mp->mnt_flag = MNT_RDONLY; (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, sizeof(mp->mnt_stat.f_fstypename)); mp->mnt_stat.f_mntonname[0] = '/'; mp->mnt_stat.f_mntonname[1] = '\0'; mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = '\0'; (void)copystr(devname, mp->mnt_stat.f_mntfromname, sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); *mpp = mp; return 0; } /* * vfs_getnewfsid: get a new unique fsid. */ void vfs_getnewfsid(struct mount *mp) { static u_short xxxfs_mntid; fsid_t tfsid; int mtype; mutex_enter(&mntid_lock); mtype = makefstype(mp->mnt_op->vfs_name); mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; if (xxxfs_mntid == 0) ++xxxfs_mntid; tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); tfsid.__fsid_val[1] = mtype; while (vfs_getvfs(&tfsid)) { tfsid.__fsid_val[0]++; xxxfs_mntid++; } mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; mutex_exit(&mntid_lock); } /* * Lookup a mount point by filesystem identifier. * * XXX Needs to add a reference to the mount point. */ struct mount * vfs_getvfs(fsid_t *fsid) { mount_iterator_t *iter; struct mount *mp; mountlist_iterator_init(&iter); while ((mp = mountlist_iterator_next(iter)) != NULL) { if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { mountlist_iterator_destroy(iter); return mp; } } mountlist_iterator_destroy(iter); return NULL; } /* * Take a reference to a mount structure. */ void vfs_ref(struct mount *mp) { KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); atomic_inc_uint(&mp->mnt_refcnt); } /* * Drop a reference to a mount structure, freeing if the last reference. */ void vfs_rele(struct mount *mp) { #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { return; } #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_acquire(); #endif /* * Nothing else has visibility of the mount: we can now * free the data structures. */ KASSERT(mp->mnt_refcnt == 0); specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); mutex_obj_free(mp->mnt_updating); mutex_obj_free(mp->mnt_renamelock); mutex_obj_free(mp->mnt_vnodelock); if (mp->mnt_op != NULL) { vfs_delref(mp->mnt_op); } fstrans_unmount(mp); /* * Final free of mp gets done from fstrans_mount_dtor(). * * Prevents this memory to be reused as a mount before * fstrans releases all references to it. */ } /* * Mark a mount point as busy, and gain a new reference to it. Used to * prevent the file system from being unmounted during critical sections. * * vfs_busy can be called multiple times and by multiple threads * and must be accompanied by the same number of vfs_unbusy calls. * * => The caller must hold a pre-existing reference to the mount. * => Will fail if the file system is being unmounted, or is unmounted. */ static inline int _vfs_busy(struct mount *mp, bool wait) { KASSERT(mp->mnt_refcnt > 0); if (wait) { fstrans_start(mp); } else { if (fstrans_start_nowait(mp)) return EBUSY; } if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { fstrans_done(mp); return ENOENT; } vfs_ref(mp); return 0; } int vfs_busy(struct mount *mp) { return _vfs_busy(mp, true); } int vfs_trybusy(struct mount *mp) { return _vfs_busy(mp, false); } /* * Unbusy a busy filesystem. * * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. */ void vfs_unbusy(struct mount *mp) { KASSERT(mp->mnt_refcnt > 0); fstrans_done(mp); vfs_rele(mp); } struct vnode_iterator { vnode_impl_t vi_vnode; }; void vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) { vnode_t *vp; vnode_impl_t *vip; vp = vnalloc_marker(mp); vip = VNODE_TO_VIMPL(vp); mutex_enter(mp->mnt_vnodelock); TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); vp->v_usecount = 1; mutex_exit(mp->mnt_vnodelock); *vnip = (struct vnode_iterator *)vip; } void vfs_vnode_iterator_destroy(struct vnode_iterator *vni) { vnode_impl_t *mvip = &vni->vi_vnode; vnode_t *mvp = VIMPL_TO_VNODE(mvip); kmutex_t *lock; KASSERT(vnis_marker(mvp)); if (vrefcnt(mvp) != 0) { lock = mvp->v_mount->mnt_vnodelock; mutex_enter(lock); TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); mvp->v_usecount = 0; mutex_exit(lock); } vnfree_marker(mvp); } static struct vnode * vfs_vnode_iterator_next1(struct vnode_iterator *vni, bool (*f)(void *, struct vnode *), void *cl, bool do_wait) { vnode_impl_t *mvip = &vni->vi_vnode; struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; vnode_t *vp; vnode_impl_t *vip; kmutex_t *lock; int error; KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); lock = mp->mnt_vnodelock; do { mutex_enter(lock); vip = TAILQ_NEXT(mvip, vi_mntvnodes); TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); VIMPL_TO_VNODE(mvip)->v_usecount = 0; again: if (vip == NULL) { mutex_exit(lock); return NULL; } vp = VIMPL_TO_VNODE(vip); KASSERT(vp != NULL); mutex_enter(vp->v_interlock); if (vnis_marker(vp) || vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || (f && !(*f)(cl, vp))) { mutex_exit(vp->v_interlock); vip = TAILQ_NEXT(vip, vi_mntvnodes); goto again; } TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); VIMPL_TO_VNODE(mvip)->v_usecount = 1; mutex_exit(lock); error = vcache_vget(vp); KASSERT(error == 0 || error == ENOENT); } while (error != 0); return vp; } struct vnode * vfs_vnode_iterator_next(struct vnode_iterator *vni, bool (*f)(void *, struct vnode *), void *cl) { return vfs_vnode_iterator_next1(vni, f, cl, false); } /* * Move a vnode from one mount queue to another. */ void vfs_insmntque(vnode_t *vp, struct mount *mp) { vnode_impl_t *vip = VNODE_TO_VIMPL(vp); struct mount *omp; kmutex_t *lock; KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || vp->v_tag == VT_VFS); /* * Delete from old mount point vnode list, if on one. */ if ((omp = vp->v_mount) != NULL) { lock = omp->mnt_vnodelock; mutex_enter(lock); TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); mutex_exit(lock); } /* * Insert into list of vnodes for the new mount point, if * available. The caller must take a reference on the mount * structure and donate to the vnode. */ if ((vp->v_mount = mp) != NULL) { lock = mp->mnt_vnodelock; mutex_enter(lock); TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); mutex_exit(lock); } if (omp != NULL) { /* Release reference to old mount. */ vfs_rele(omp); } } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If FORCECLOSE is not specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If FORCECLOSE is specified, detach any active vnodes * that are found. * * If WRITECLOSE is set, only flush out regular file vnodes open for * writing. * * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. */ #ifdef DEBUG int busyprt = 0; /* print out busy vnodes */ struct ctldebug debug1 = { "busyprt", &busyprt }; #endif static vnode_t * vflushnext(struct vnode_iterator *marker, int *when) { if (getticks() > *when) { yield(); *when = getticks() + hz / 10; } return vfs_vnode_iterator_next1(marker, NULL, NULL, true); } /* * Flush one vnode. Referenced on entry, unreferenced on return. */ static int vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) { int error; struct vattr vattr; if (vp == skipvp || ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { vrele(vp); return 0; } /* * If WRITECLOSE is set, only flush out regular file * vnodes open for writing or open and unlinked. */ if ((flags & WRITECLOSE)) { if (vp->v_type != VREG) { vrele(vp); return 0; } error = vn_lock(vp, LK_EXCLUSIVE); if (error) { KASSERT(error == ENOENT); vrele(vp); return 0; } error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); if (error == 0) error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); VOP_UNLOCK(vp); if (error) { vrele(vp); return error; } if (vp->v_writecount == 0 && vattr.va_nlink > 0) { vrele(vp); return 0; } } /* * First try to recycle the vnode. */ if (vrecycle(vp)) return 0; /* * If FORCECLOSE is set, forcibly close the vnode. * For block or character devices, revert to an * anonymous device. For all other files, just * kill them. */ if (flags & FORCECLOSE) { if (vrefcnt(vp) > 1 && (vp->v_type == VBLK || vp->v_type == VCHR)) vcache_make_anon(vp); else vgone(vp); return 0; } vrele(vp); return EBUSY; } int vflush(struct mount *mp, vnode_t *skipvp, int flags) { vnode_t *vp; struct vnode_iterator *marker; int busy, error, when, retries = 2; do { busy = error = when = 0; /* * First, flush out any vnode references from the * deferred vrele list. */ vrele_flush(mp); vfs_vnode_iterator_init(mp, &marker); while ((vp = vflushnext(marker, &when)) != NULL) { error = vflush_one(vp, skipvp, flags); if (error == EBUSY) { error = 0; busy++; #ifdef DEBUG if (busyprt && retries == 0) vprint("vflush: busy vnode", vp); #endif } else if (error != 0) { break; } } vfs_vnode_iterator_destroy(marker); } while (error == 0 && busy > 0 && retries-- > 0); if (error) return error; if (busy) return EBUSY; return 0; } /* * Mount a file system. */ /* * Scan all active processes to see if any of them have a current or root * directory onto which the new filesystem has just been mounted. If so, * replace them with the new mount point. */ static void mount_checkdirs(vnode_t *olddp) { vnode_t *newdp, *rele1, *rele2; struct cwdinfo *cwdi; struct proc *p; bool retry; if (vrefcnt(olddp) == 1) { return; } if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) panic("mount: lost mount"); do { retry = false; mutex_enter(&proc_lock); PROCLIST_FOREACH(p, &allproc) { if ((cwdi = p->p_cwdi) == NULL) continue; /* * Cannot change to the old directory any more, * so even if we see a stale value it is not a * problem. */ if (cwdi->cwdi_cdir != olddp && cwdi->cwdi_rdir != olddp) continue; retry = true; rele1 = NULL; rele2 = NULL; atomic_inc_uint(&cwdi->cwdi_refcnt); mutex_exit(&proc_lock); rw_enter(&cwdi->cwdi_lock, RW_WRITER); if (cwdi->cwdi_cdir == olddp) { rele1 = cwdi->cwdi_cdir; vref(newdp); cwdi->cwdi_cdir = newdp; } if (cwdi->cwdi_rdir == olddp) { rele2 = cwdi->cwdi_rdir; vref(newdp); cwdi->cwdi_rdir = newdp; } rw_exit(&cwdi->cwdi_lock); cwdfree(cwdi); if (rele1 != NULL) vrele(rele1); if (rele2 != NULL) vrele(rele2); mutex_enter(&proc_lock); break; } mutex_exit(&proc_lock); } while (retry); if (rootvnode == olddp) { vrele(rootvnode); vref(newdp); rootvnode = newdp; } vput(newdp); } /* * Start extended attributes */ static int start_extattr(struct mount *mp) { int error; error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); if (error) printf("%s: failed to start extattr: error = %d\n", mp->mnt_stat.f_mntonname, error); return error; } int mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, const char *path, int flags, void *data, size_t *data_len) { vnode_t *vp = *vpp; struct mount *mp; struct pathbuf *pb; struct nameidata nd; int error, error2; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); if (error) { vfs_delref(vfsops); return error; } /* Cannot make a non-dir a mount-point (from here anyway). */ if (vp->v_type != VDIR) { vfs_delref(vfsops); return ENOTDIR; } if (flags & MNT_EXPORTED) { vfs_delref(vfsops); return EINVAL; } if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { vfs_delref(vfsops); return ENOMEM; } mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); /* * The underlying file system may refuse the mount for * various reasons. Allow the user to force it to happen. * * Set the mount level flags. */ mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); error = VFS_MOUNT(mp, path, data, data_len); mp->mnt_flag &= ~MNT_OP_FLAGS; if (error != 0) { vfs_rele(mp); return error; } /* Suspend new file system before taking mnt_updating. */ do { error2 = vfs_suspend(mp, 0); } while (error2 == EINTR || error2 == ERESTART); KASSERT(error2 == 0 || error2 == EOPNOTSUPP); mutex_enter(mp->mnt_updating); /* * Validate and prepare the mount point. */ error = pathbuf_copyin(path, &pb); if (error != 0) { goto err_mounted; } NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); error = namei(&nd); pathbuf_destroy(pb); if (error != 0) { goto err_mounted; } if (nd.ni_vp != vp) { vput(nd.ni_vp); error = EINVAL; goto err_mounted; } if (vp->v_mountedhere != NULL) { vput(nd.ni_vp); error = EBUSY; goto err_mounted; } error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); if (error != 0) { vput(nd.ni_vp); goto err_mounted; } /* * Put the new filesystem on the mount list after root. */ cache_purge(vp); mp->mnt_iflag &= ~IMNT_WANTRDWR; mountlist_append(mp); if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) vfs_syncer_add_to_worklist(mp); vp->v_mountedhere = mp; vput(nd.ni_vp); mount_checkdirs(vp); mutex_exit(mp->mnt_updating); if (error2 == 0) vfs_resume(mp); /* Hold an additional reference to the mount across VFS_START(). */ vfs_ref(mp); (void) VFS_STATVFS(mp, &mp->mnt_stat); error = VFS_START(mp, 0); if (error) { vrele(vp); } else if (flags & MNT_EXTATTR) { if (start_extattr(mp) != 0) mp->mnt_flag &= ~MNT_EXTATTR; } /* Drop reference held for VFS_START(). */ vfs_rele(mp); *vpp = NULL; return error; err_mounted: if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) panic("Unmounting fresh file system failed"); mutex_exit(mp->mnt_updating); if (error2 == 0) vfs_resume(mp); vfs_rele(mp); return error; } /* * Do the actual file system unmount. File system is assumed to have * been locked by the caller. * * => Caller hold reference to the mount, explicitly for dounmount(). */ int dounmount(struct mount *mp, int flags, struct lwp *l) { vnode_t *coveredvp; int error, async, used_syncer, used_extattr; const bool was_suspended = fstrans_is_owner(mp); #if NVERIEXEC > 0 error = veriexec_unmountchk(mp); if (error) return (error); #endif /* NVERIEXEC > 0 */ if (!was_suspended) { error = vfs_suspend(mp, 0); if (error) { return error; } } KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; used_extattr = mp->mnt_flag & MNT_EXTATTR; mp->mnt_iflag |= IMNT_UNMOUNT; mutex_enter(mp->mnt_updating); async = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; cache_purgevfs(mp); /* remove cache entries for this file sys */ if (used_syncer) vfs_syncer_remove_from_worklist(mp); error = 0; if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); } if (error == 0 || (flags & MNT_FORCE)) { error = VFS_UNMOUNT(mp, flags); } if (error) { mp->mnt_iflag &= ~IMNT_UNMOUNT; if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) vfs_syncer_add_to_worklist(mp); mp->mnt_flag |= async; mutex_exit(mp->mnt_updating); if (!was_suspended) vfs_resume(mp); if (used_extattr) { if (start_extattr(mp) != 0) mp->mnt_flag &= ~MNT_EXTATTR; else mp->mnt_flag |= MNT_EXTATTR; } return (error); } mutex_exit(mp->mnt_updating); /* * mark filesystem as gone to prevent further umounts * after mnt_umounting lock is gone, this also prevents * vfs_busy() from succeeding. */ mp->mnt_iflag |= IMNT_GONE; if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { coveredvp->v_mountedhere = NULL; } if (!was_suspended) vfs_resume(mp); mountlist_remove(mp); if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) panic("unmount: dangling vnode"); vfs_hooks_unmount(mp); vfs_rele(mp); /* reference from mount() */ if (coveredvp != NULLVP) { vrele(coveredvp); } return (0); } /* * Unmount all file systems. * We traverse the list in reverse order under the assumption that doing so * will avoid needing to worry about dependencies. */ bool vfs_unmountall(struct lwp *l) { printf("unmounting file systems...\n"); return vfs_unmountall1(l, true, true); } static void vfs_unmount_print(struct mount *mp, const char *pfx) { aprint_verbose("%sunmounted %s on %s type %s\n", pfx, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); } /* * Return the mount with the highest generation less than "gen". */ static struct mount * vfs_unmount_next(uint64_t gen) { mount_iterator_t *iter; struct mount *mp, *nmp; nmp = NULL; mountlist_iterator_init(&iter); while ((mp = mountlist_iterator_next(iter)) != NULL) { if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && mp->mnt_gen < gen) { if (nmp != NULL) vfs_rele(nmp); nmp = mp; vfs_ref(nmp); } } mountlist_iterator_destroy(iter); return nmp; } bool vfs_unmount_forceone(struct lwp *l) { struct mount *mp; int error; mp = vfs_unmount_next(mountgen); if (mp == NULL) { return false; } #ifdef DEBUG printf("forcefully unmounting %s (%s)...\n", mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); #endif if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { vfs_unmount_print(mp, "forcefully "); return true; } else { vfs_rele(mp); } #ifdef DEBUG printf("forceful unmount of %s failed with error %d\n", mp->mnt_stat.f_mntonname, error); #endif return false; } bool vfs_unmountall1(struct lwp *l, bool force, bool verbose) { struct mount *mp; mount_iterator_t *iter; bool any_error = false, progress = false; uint64_t gen; int error; gen = mountgen; for (;;) { mp = vfs_unmount_next(gen); if (mp == NULL) break; gen = mp->mnt_gen; #ifdef DEBUG printf("unmounting %p %s (%s)...\n", (void *)mp, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); #endif if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { vfs_unmount_print(mp, ""); progress = true; } else { vfs_rele(mp); if (verbose) { printf("unmount of %s failed with error %d\n", mp->mnt_stat.f_mntonname, error); } any_error = true; } } if (verbose) { printf("unmounting done\n"); } if (any_error && verbose) { printf("WARNING: some file systems would not unmount\n"); } /* If the mountlist is empty it is time to remove swap. */ mountlist_iterator_init(&iter); if (mountlist_iterator_next(iter) == NULL) { uvm_swap_shutdown(l); } mountlist_iterator_destroy(iter); return progress; } void vfs_sync_all(struct lwp *l) { printf("syncing disks... "); /* remove user processes from run queue */ suspendsched(); (void)spl0(); /* avoid coming back this way again if we panic. */ doing_shutdown = 1; do_sys_sync(l); /* Wait for sync to finish. */ if (vfs_syncwait() != 0) { #if defined(DDB) && defined(DEBUG_HALT_BUSY) Debugger(); #endif printf("giving up\n"); return; } else printf("done\n"); } /* * Sync and unmount file systems before shutting down. */ void vfs_shutdown(void) { lwp_t *l = curlwp; vfs_sync_all(l); /* * If we have panicked - do not make the situation potentially * worse by unmounting the file systems. */ if (panicstr != NULL) { return; } /* Unmount file systems. */ vfs_unmountall(l); } /* * Print a list of supported file system types (used by vfs_mountroot) */ static void vfs_print_fstypes(void) { struct vfsops *v; int cnt = 0; mutex_enter(&vfs_list_lock); LIST_FOREACH(v, &vfs_list, vfs_list) ++cnt; mutex_exit(&vfs_list_lock); if (cnt == 0) { printf("WARNING: No file system modules have been loaded.\n"); return; } printf("Supported file systems:"); mutex_enter(&vfs_list_lock); LIST_FOREACH(v, &vfs_list, vfs_list) { printf(" %s", v->vfs_name); } mutex_exit(&vfs_list_lock); printf("\n"); } /* * Mount the root file system. If the operator didn't specify a * file system to use, try all possible file systems until one * succeeds. */ int vfs_mountroot(void) { struct vfsops *v; int error = ENODEV; if (root_device == NULL) panic("vfs_mountroot: root device unknown"); switch (device_class(root_device)) { case DV_IFNET: if (rootdev != NODEV) panic("vfs_mountroot: rootdev set for DV_IFNET " "(0x%llx -> %llu,%llu)", (unsigned long long)rootdev, (unsigned long long)major(rootdev), (unsigned long long)minor(rootdev)); break; case DV_DISK: if (rootdev == NODEV) panic("vfs_mountroot: rootdev not set for DV_DISK"); if (bdevvp(rootdev, &rootvp)) panic("vfs_mountroot: can't get vnode for rootdev"); vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_OPEN(rootvp, FREAD, FSCRED); VOP_UNLOCK(rootvp); if (error) { printf("vfs_mountroot: can't open root device\n"); return (error); } break; case DV_VIRTUAL: break; default: printf("%s: inappropriate for root file system\n", device_xname(root_device)); return (ENODEV); } /* * If user specified a root fs type, use it. Make sure the * specified type exists and has a mount_root() */ if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { v = vfs_getopsbyname(rootfstype); error = EFTYPE; if (v != NULL) { if (v->vfs_mountroot != NULL) { error = (v->vfs_mountroot)(); } v->vfs_refcount--; } goto done; } /* * Try each file system currently configured into the kernel. */ mutex_enter(&vfs_list_lock); LIST_FOREACH(v, &vfs_list, vfs_list) { if (v->vfs_mountroot == NULL) continue; #ifdef DEBUG aprint_normal("mountroot: trying %s...\n", v->vfs_name); #endif v->vfs_refcount++; mutex_exit(&vfs_list_lock); error = (*v->vfs_mountroot)(); mutex_enter(&vfs_list_lock); v->vfs_refcount--; if (!error) { aprint_normal("root file system type: %s\n", v->vfs_name); break; } } mutex_exit(&vfs_list_lock); if (v == NULL) { vfs_print_fstypes(); printf("no file system for %s", device_xname(root_device)); if (device_class(root_device) == DV_DISK) printf(" (dev 0x%llx)", (unsigned long long)rootdev); printf("\n"); error = EFTYPE; } done: if (error && device_class(root_device) == DV_DISK) { vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); VOP_CLOSE(rootvp, FREAD, FSCRED); VOP_UNLOCK(rootvp); vrele(rootvp); } if (error == 0) { mount_iterator_t *iter; struct mount *mp; extern struct cwdinfo cwdi0; mountlist_iterator_init(&iter); mp = mountlist_iterator_next(iter); KASSERT(mp != NULL); mountlist_iterator_destroy(iter); mp->mnt_flag |= MNT_ROOTFS; mp->mnt_op->vfs_refcount++; /* * Get the vnode for '/'. Set cwdi0.cwdi_cdir to * reference it, and donate it the reference grabbed * with VFS_ROOT(). */ error = VFS_ROOT(mp, LK_NONE, &rootvnode); if (error) panic("cannot find root vnode, error=%d", error); cwdi0.cwdi_cdir = rootvnode; cwdi0.cwdi_rdir = NULL; /* * Now that root is mounted, we can fixup initproc's CWD * info. All other processes are kthreads, which merely * share proc0's CWD info. */ initproc->p_cwdi->cwdi_cdir = rootvnode; vref(initproc->p_cwdi->cwdi_cdir); initproc->p_cwdi->cwdi_rdir = NULL; /* * Enable loading of modules from the filesystem */ module_load_vfs_init(); } return (error); } /* * mount_specific_key_create -- * Create a key for subsystem mount-specific data. */ int mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) { return specificdata_key_create(mount_specificdata_domain, keyp, dtor); } /* * mount_specific_key_delete -- * Delete a key for subsystem mount-specific data. */ void mount_specific_key_delete(specificdata_key_t key) { specificdata_key_delete(mount_specificdata_domain, key); } /* * mount_initspecific -- * Initialize a mount's specificdata container. */ void mount_initspecific(struct mount *mp) { int error __diagused; error = specificdata_init(mount_specificdata_domain, &mp->mnt_specdataref); KASSERT(error == 0); } /* * mount_finispecific -- * Finalize a mount's specificdata container. */ void mount_finispecific(struct mount *mp) { specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); } /* * mount_getspecific -- * Return mount-specific data corresponding to the specified key. */ void * mount_getspecific(struct mount *mp, specificdata_key_t key) { return specificdata_getspecific(mount_specificdata_domain, &mp->mnt_specdataref, key); } /* * mount_setspecific -- * Set mount-specific data corresponding to the specified key. */ void mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) { specificdata_setspecific(mount_specificdata_domain, &mp->mnt_specdataref, key, data); } /* * Check to see if a filesystem is mounted on a block device. */ int vfs_mountedon(vnode_t *vp) { vnode_t *vq; int error = 0; if (vp->v_type != VBLK) return ENOTBLK; if (spec_node_getmountedfs(vp) != NULL) return EBUSY; if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq) == 0) { if (spec_node_getmountedfs(vq) != NULL) error = EBUSY; vrele(vq); } return error; } /* * Check if a device pointed to by vp is mounted. * * Returns: * EINVAL if it's not a disk * EBUSY if it's a disk and mounted * 0 if it's a disk and not mounted */ int rawdev_mounted(vnode_t *vp, vnode_t **bvpp) { vnode_t *bvp; dev_t dev; int d_type; bvp = NULL; d_type = D_OTHER; if (iskmemvp(vp)) return EINVAL; switch (vp->v_type) { case VCHR: { const struct cdevsw *cdev; dev = vp->v_rdev; cdev = cdevsw_lookup(dev); if (cdev != NULL) { dev_t blkdev; blkdev = devsw_chr2blk(dev); if (blkdev != NODEV) { if (vfinddev(blkdev, VBLK, &bvp) != 0) { d_type = (cdev->d_flag & D_TYPEMASK); /* XXX: what if bvp disappears? */ vrele(bvp); } } } break; } case VBLK: { const struct bdevsw *bdev; dev = vp->v_rdev; bdev = bdevsw_lookup(dev); if (bdev != NULL) d_type = (bdev->d_flag & D_TYPEMASK); bvp = vp; break; } default: break; } if (d_type != D_DISK) return EINVAL; if (bvpp != NULL) *bvpp = bvp; /* * XXX: This is bogus. We should be failing the request * XXX: not only if this specific slice is mounted, but * XXX: if it's on a disk with any other mounted slice. */ if (vfs_mountedon(bvp)) return EBUSY; return 0; } /* * Make a 'unique' number from a mount type name. */ long makefstype(const char *type) { long rv; for (rv = 0; *type; type++) { rv <<= 2; rv ^= *type; } return rv; } static struct mountlist_entry * mountlist_alloc(enum mountlist_type type, struct mount *mp) { struct mountlist_entry *me; me = kmem_zalloc(sizeof(*me), KM_SLEEP); me->me_mount = mp; me->me_type = type; return me; } static void mountlist_free(struct mountlist_entry *me) { kmem_free(me, sizeof(*me)); } void mountlist_iterator_init(mount_iterator_t **mip) { struct mountlist_entry *me; me = mountlist_alloc(ME_MARKER, NULL); mutex_enter(&mountlist_lock); TAILQ_INSERT_HEAD(&mountlist, me, me_list); mutex_exit(&mountlist_lock); *mip = (mount_iterator_t *)me; } void mountlist_iterator_destroy(mount_iterator_t *mi) { struct mountlist_entry *marker = &mi->mi_entry; if (marker->me_mount != NULL) vfs_unbusy(marker->me_mount); mutex_enter(&mountlist_lock); TAILQ_REMOVE(&mountlist, marker, me_list); mutex_exit(&mountlist_lock); mountlist_free(marker); } /* * Return the next mount or NULL for this iterator. * Mark it busy on success. */ static inline struct mount * _mountlist_iterator_next(mount_iterator_t *mi, bool wait) { struct mountlist_entry *me, *marker = &mi->mi_entry; struct mount *mp; int error; if (marker->me_mount != NULL) { vfs_unbusy(marker->me_mount); marker->me_mount = NULL; } mutex_enter(&mountlist_lock); for (;;) { KASSERT(marker->me_type == ME_MARKER); me = TAILQ_NEXT(marker, me_list); if (me == NULL) { /* End of list: keep marker and return. */ mutex_exit(&mountlist_lock); return NULL; } TAILQ_REMOVE(&mountlist, marker, me_list); TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); /* Skip other markers. */ if (me->me_type != ME_MOUNT) continue; /* Take an initial reference for vfs_busy() below. */ mp = me->me_mount; KASSERT(mp != NULL); vfs_ref(mp); mutex_exit(&mountlist_lock); /* Try to mark this mount busy and return on success. */ if (wait) error = vfs_busy(mp); else error = vfs_trybusy(mp); if (error == 0) { vfs_rele(mp); marker->me_mount = mp; return mp; } vfs_rele(mp); mutex_enter(&mountlist_lock); } } struct mount * mountlist_iterator_next(mount_iterator_t *mi) { return _mountlist_iterator_next(mi, true); } struct mount * mountlist_iterator_trynext(mount_iterator_t *mi) { return _mountlist_iterator_next(mi, false); } /* * Attach new mount to the end of the mount list. */ void mountlist_append(struct mount *mp) { struct mountlist_entry *me; me = mountlist_alloc(ME_MOUNT, mp); mutex_enter(&mountlist_lock); TAILQ_INSERT_TAIL(&mountlist, me, me_list); mutex_exit(&mountlist_lock); } /* * Remove mount from mount list. */void mountlist_remove(struct mount *mp) { struct mountlist_entry *me; mutex_enter(&mountlist_lock); TAILQ_FOREACH(me, &mountlist, me_list) if (me->me_type == ME_MOUNT && me->me_mount == mp) break; KASSERT(me != NULL); TAILQ_REMOVE(&mountlist, me, me_list); mutex_exit(&mountlist_lock); mountlist_free(me); } /* * Unlocked variant to traverse the mountlist. * To be used from DDB only. */ struct mount * _mountlist_next(struct mount *mp) { struct mountlist_entry *me; if (mp == NULL) { me = TAILQ_FIRST(&mountlist); } else { TAILQ_FOREACH(me, &mountlist, me_list) if (me->me_type == ME_MOUNT && me->me_mount == mp) break; if (me != NULL) me = TAILQ_NEXT(me, me_list); } while (me != NULL && me->me_type != ME_MOUNT) me = TAILQ_NEXT(me, me_list); return (me ? me->me_mount : NULL); } |
| 4665 422 4819 7 4822 4332 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | /* $NetBSD: syscallvar.h,v 1.12 2018/04/19 21:19:07 christos Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software developed for The NetBSD Foundation * by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _SYS_SYSCALLVAR_H_ #define _SYS_SYSCALLVAR_H_ #ifndef _KERNEL #error nothing of interest to userspace here #endif #if defined(_KERNEL) && defined(_KERNEL_OPT) #include "opt_dtrace.h" #endif #include <sys/systm.h> #include <sys/proc.h> extern struct emul emul_netbsd; struct syscall_package { u_short sp_code; u_short sp_flags; sy_call_t *sp_call; }; void syscall_init(void); int syscall_establish(const struct emul *, const struct syscall_package *); int syscall_disestablish(const struct emul *, const struct syscall_package *); static __inline int sy_call(const struct sysent *sy, struct lwp *l, const void *uap, register_t *rval) { int error; l->l_sysent = sy; error = (*sy->sy_call)(l, uap, rval); l->l_sysent = NULL; return error; } static __inline int sy_invoke(const struct sysent *sy, struct lwp *l, const void *uap, register_t *rval, int code) { const bool do_trace = l->l_proc->p_trace_enabled && (sy->sy_flags & SYCALL_INDIRECT) == 0; int error; #ifdef KDTRACE_HOOKS #define KDTRACE_ENTRY(a) (a) #else #define KDTRACE_ENTRY(a) (0) #endif if (__predict_true(!(do_trace || KDTRACE_ENTRY(sy->sy_entry))) || (error = trace_enter(code, sy, uap)) == 0) { rval[0] = 0; #if !defined(__mips__) && !defined(__m68k__) /* * Due to the mips userland code for SYS_break needing v1 to be * preserved, we can't clear this on mips. */ rval[1] = 0; #endif error = sy_call(sy, l, uap, rval); } if (__predict_false(do_trace || KDTRACE_ENTRY(sy->sy_return))) { trace_exit(code, sy, uap, rval, error); } return error; } /* inclusion in the kernel currently depends on SYSCALL_DEBUG */ extern const char * const syscallnames[]; extern const char * const altsyscallnames[]; #endif /* _SYS_SYSCALLVAR_H_ */ |
| 20 20 20 27 27 27 20 27 16 16 16 16 11 11 5 16 2 2 2 2 13 13 13 13 3 10 13 13 11 11 11 17 17 84 79 70 70 8 8 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 17 19 19 19 19 14 3 14 14 7 7 3 3 3 14 18 17 17 17 5 5 17 17 17 17 17 17 17 17 17 17 16 17 18 16 16 2 2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 | /* $NetBSD: uvm_aobj.c,v 1.156 2022/05/31 08:43:16 andvar Exp $ */ /* * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and * Washington University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp */ /* * uvm_aobj.c: anonymous memory uvm_object pager * * author: Chuck Silvers <chuq@chuq.com> * started: Jan-1998 * * - design mostly from Chuck Cranor */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.156 2022/05/31 08:43:16 andvar Exp $"); #ifdef _KERNEL_OPT #include "opt_uvmhist.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/kmem.h> #include <sys/pool.h> #include <sys/atomic.h> #include <uvm/uvm.h> #include <uvm/uvm_page_array.h> /* * An anonymous UVM object (aobj) manages anonymous-memory. In addition to * keeping the list of resident pages, it may also keep a list of allocated * swap blocks. Depending on the size of the object, this list is either * stored in an array (small objects) or in a hash table (large objects). * * Lock order * * uao_list_lock -> * uvm_object::vmobjlock */ /* * Note: for hash tables, we break the address space of the aobj into blocks * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two. */ #define UAO_SWHASH_CLUSTER_SHIFT 4 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT) /* Get the "tag" for this page index. */ #define UAO_SWHASH_ELT_TAG(idx) ((idx) >> UAO_SWHASH_CLUSTER_SHIFT) #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \ ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1)) /* Given an ELT and a page index, find the swap slot. */ #define UAO_SWHASH_ELT_PAGESLOT(elt, idx) \ ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)]) /* Given an ELT, return its pageidx base. */ #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \ ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT) /* The hash function. */ #define UAO_SWHASH_HASH(aobj, idx) \ (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \ & (aobj)->u_swhashmask)]) /* * The threshold which determines whether we will use an array or a * hash table to store the list of allocated swap blocks. */ #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4) #define UAO_USES_SWHASH(aobj) \ ((aobj)->u_pages > UAO_SWHASH_THRESHOLD) /* The number of buckets in a hash, with an upper bound. */ #define UAO_SWHASH_MAXBUCKETS 256 #define UAO_SWHASH_BUCKETS(aobj) \ (MIN((aobj)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS)) /* * uao_swhash_elt: when a hash table is being used, this structure defines * the format of an entry in the bucket list. */ struct uao_swhash_elt { LIST_ENTRY(uao_swhash_elt) list; /* the hash list */ voff_t tag; /* our 'tag' */ int count; /* our number of active slots */ int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */ }; /* * uao_swhash: the swap hash table structure */ LIST_HEAD(uao_swhash, uao_swhash_elt); /* * uao_swhash_elt_pool: pool of uao_swhash_elt structures. * Note: pages for this pool must not come from a pageable kernel map. */ static struct pool uao_swhash_elt_pool __cacheline_aligned; /* * uvm_aobj: the actual anon-backed uvm_object * * => the uvm_object is at the top of the structure, this allows * (struct uvm_aobj *) == (struct uvm_object *) * => only one of u_swslots and u_swhash is used in any given aobj */ struct uvm_aobj { struct uvm_object u_obj; /* has: lock, pgops, #pages, #refs */ pgoff_t u_pages; /* number of pages in entire object */ int u_flags; /* the flags (see uvm_aobj.h) */ int *u_swslots; /* array of offset->swapslot mappings */ /* * hashtable of offset->swapslot mappings * (u_swhash is an array of bucket heads) */ struct uao_swhash *u_swhash; u_long u_swhashmask; /* mask for hashtable */ LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */ int u_freelist; /* freelist to allocate pages from */ }; static void uao_free(struct uvm_aobj *); static int uao_get(struct uvm_object *, voff_t, struct vm_page **, int *, int, vm_prot_t, int, int); static int uao_put(struct uvm_object *, voff_t, voff_t, int); #if defined(VMSWAP) static struct uao_swhash_elt *uao_find_swhash_elt (struct uvm_aobj *, int, bool); static bool uao_pagein(struct uvm_aobj *, int, int); static bool uao_pagein_page(struct uvm_aobj *, int); #endif /* defined(VMSWAP) */ static struct vm_page *uao_pagealloc(struct uvm_object *, voff_t, int); /* * aobj_pager * * note that some functions (e.g. put) are handled elsewhere */ const struct uvm_pagerops aobj_pager = { .pgo_reference = uao_reference, .pgo_detach = uao_detach, .pgo_get = uao_get, .pgo_put = uao_put, }; /* * uao_list: global list of active aobjs, locked by uao_list_lock */ static LIST_HEAD(aobjlist, uvm_aobj) uao_list __cacheline_aligned; static kmutex_t uao_list_lock __cacheline_aligned; /* * hash table/array related functions */ #if defined(VMSWAP) /* * uao_find_swhash_elt: find (or create) a hash table entry for a page * offset. * * => the object should be locked by the caller */ static struct uao_swhash_elt * uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create) { struct uao_swhash *swhash; struct uao_swhash_elt *elt; voff_t page_tag; swhash = UAO_SWHASH_HASH(aobj, pageidx); page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* * now search the bucket for the requested tag */ LIST_FOREACH(elt, swhash, list) { if (elt->tag == page_tag) { return elt; } } if (!create) { return NULL; } /* * allocate a new entry for the bucket and init/insert it in */ elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT); if (elt == NULL) { return NULL; } LIST_INSERT_HEAD(swhash, elt, list); elt->tag = page_tag; elt->count = 0; memset(elt->slots, 0, sizeof(elt->slots)); return elt; } /* * uao_find_swslot: find the swap slot number for an aobj/pageidx * * => object must be locked by caller */ int uao_find_swslot(struct uvm_object *uobj, int pageidx) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; struct uao_swhash_elt *elt; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); /* * if noswap flag is set, then we never return a slot */ if (aobj->u_flags & UAO_FLAG_NOSWAP) return 0; /* * if hashing, look in hash table. */ if (UAO_USES_SWHASH(aobj)) { elt = uao_find_swhash_elt(aobj, pageidx, false); return elt ? UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) : 0; } /* * otherwise, look in the array */ return aobj->u_swslots[pageidx]; } /* * uao_set_swslot: set the swap slot for a page in an aobj. * * => setting a slot to zero frees the slot * => object must be locked by caller * => we return the old slot number, or -1 if we failed to allocate * memory to record the new slot number */ int uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; struct uao_swhash_elt *elt; int oldslot; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(pdhist, "aobj %#jx pageidx %jd slot %jd", (uintptr_t)aobj, pageidx, slot, 0); KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0); KASSERT(UVM_OBJ_IS_AOBJ(uobj)); /* * if noswap flag is set, then we can't set a non-zero slot. */ if (aobj->u_flags & UAO_FLAG_NOSWAP) { KASSERTMSG(slot == 0, "uao_set_swslot: no swap object"); return 0; } /* * are we using a hash table? if so, add it in the hash. */ if (UAO_USES_SWHASH(aobj)) { /* * Avoid allocating an entry just to free it again if * the page had not swap slot in the first place, and * we are freeing. */ elt = uao_find_swhash_elt(aobj, pageidx, slot != 0); if (elt == NULL) { return slot ? -1 : 0; } oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot; /* * now adjust the elt's reference counter and free it if we've * dropped it to zero. */ if (slot) { if (oldslot == 0) elt->count++; } else { if (oldslot) elt->count--; if (elt->count == 0) { LIST_REMOVE(elt, list); pool_put(&uao_swhash_elt_pool, elt); } } } else { /* we are using an array */ oldslot = aobj->u_swslots[pageidx]; aobj->u_swslots[pageidx] = slot; } return oldslot; } #endif /* defined(VMSWAP) */ /* * end of hash/array functions */ /* * uao_free: free all resources held by an aobj, and then free the aobj * * => the aobj should be dead */ static void uao_free(struct uvm_aobj *aobj) { struct uvm_object *uobj = &aobj->u_obj; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); KASSERT(rw_write_held(uobj->vmobjlock)); uao_dropswap_range(uobj, 0, 0); rw_exit(uobj->vmobjlock); #if defined(VMSWAP) if (UAO_USES_SWHASH(aobj)) { /* * free the hash table itself. */ hashdone(aobj->u_swhash, HASH_LIST, aobj->u_swhashmask); } else { /* * free the array itself. */ kmem_free(aobj->u_swslots, aobj->u_pages * sizeof(int)); } #endif /* defined(VMSWAP) */ /* * finally free the aobj itself */ uvm_obj_destroy(uobj, true); kmem_free(aobj, sizeof(struct uvm_aobj)); } /* * pager functions */ /* * uao_create: create an aobj of the given size and return its uvm_object. * * => for normal use, flags are always zero * => for the kernel object, the flags are: * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once) * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ") */ struct uvm_object * uao_create(voff_t size, int flags) { static struct uvm_aobj kernel_object_store; static krwlock_t bootstrap_kernel_object_lock; static int kobj_alloced __diagused = 0; pgoff_t pages = round_page((uint64_t)size) >> PAGE_SHIFT; struct uvm_aobj *aobj; int refs; /* * Allocate a new aobj, unless kernel object is requested. */ if (flags & UAO_FLAG_KERNOBJ) { KASSERT(!kobj_alloced); aobj = &kernel_object_store; aobj->u_pages = pages; aobj->u_flags = UAO_FLAG_NOSWAP; refs = UVM_OBJ_KERN; kobj_alloced = UAO_FLAG_KERNOBJ; } else if (flags & UAO_FLAG_KERNSWAP) { KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ); aobj = &kernel_object_store; kobj_alloced = UAO_FLAG_KERNSWAP; refs = 0xdeadbeaf; /* XXX: gcc */ } else { aobj = kmem_alloc(sizeof(struct uvm_aobj), KM_SLEEP); aobj->u_pages = pages; aobj->u_flags = 0; refs = 1; } /* * no freelist by default */ aobj->u_freelist = VM_NFREELIST; /* * allocate hash/array if necessary * * note: in the KERNSWAP case no need to worry about locking since * we are still booting we should be the only thread around. */ const int kernswap = (flags & UAO_FLAG_KERNSWAP) != 0; if (flags == 0 || kernswap) { #if defined(VMSWAP) /* allocate hash table or array depending on object size */ if (UAO_USES_SWHASH(aobj)) { aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj), HASH_LIST, true, &aobj->u_swhashmask); } else { aobj->u_swslots = kmem_zalloc(pages * sizeof(int), KM_SLEEP); } #endif /* defined(VMSWAP) */ /* * Replace kernel_object's temporary static lock with * a regular rw_obj. We cannot use uvm_obj_setlock() * because that would try to free the old lock. */ if (kernswap) { aobj->u_obj.vmobjlock = rw_obj_alloc(); rw_destroy(&bootstrap_kernel_object_lock); } if (flags) { aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */ return &aobj->u_obj; } } /* * Initialise UVM object. */ const bool kernobj = (flags & UAO_FLAG_KERNOBJ) != 0; uvm_obj_init(&aobj->u_obj, &aobj_pager, !kernobj, refs); if (__predict_false(kernobj)) { /* Use a temporary static lock for kernel_object. */ rw_init(&bootstrap_kernel_object_lock); uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock); } /* * now that aobj is ready, add it to the global list */ mutex_enter(&uao_list_lock); LIST_INSERT_HEAD(&uao_list, aobj, u_list); mutex_exit(&uao_list_lock); return(&aobj->u_obj); } /* * uao_set_pgfl: allocate pages only from the specified freelist. * * => must be called before any pages are allocated for the object. * => reset by setting it to VM_NFREELIST, meaning any freelist. */ void uao_set_pgfl(struct uvm_object *uobj, int freelist) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; KASSERTMSG((0 <= freelist), "invalid freelist %d", freelist); KASSERTMSG((freelist <= VM_NFREELIST), "invalid freelist %d", freelist); aobj->u_freelist = freelist; } /* * uao_pagealloc: allocate a page for aobj. */ static inline struct vm_page * uao_pagealloc(struct uvm_object *uobj, voff_t offset, int flags) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; if (__predict_true(aobj->u_freelist == VM_NFREELIST)) return uvm_pagealloc(uobj, offset, NULL, flags); else return uvm_pagealloc_strat(uobj, offset, NULL, flags, UVM_PGA_STRAT_ONLY, aobj->u_freelist); } /* * uao_init: set up aobj pager subsystem * * => called at boot time from uvm_pager_init() */ void uao_init(void) { static int uao_initialized; if (uao_initialized) return; uao_initialized = true; LIST_INIT(&uao_list); mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE); pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0, 0, 0, "uaoeltpl", NULL, IPL_VM); } /* * uao_reference: hold a reference to an anonymous UVM object. */ void uao_reference(struct uvm_object *uobj) { /* Kernel object is persistent. */ if (UVM_OBJ_IS_KERN_OBJECT(uobj)) { return; } atomic_inc_uint(&uobj->uo_refs); } /* * uao_detach: drop a reference to an anonymous UVM object. */ void uao_detach(struct uvm_object *uobj) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; struct uvm_page_array a; struct vm_page *pg; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); /* * Detaching from kernel object is a NOP. */ if (UVM_OBJ_IS_KERN_OBJECT(uobj)) return; /* * Drop the reference. If it was the last one, destroy the object. */ KASSERT(uobj->uo_refs > 0); UVMHIST_LOG(maphist," (uobj=%#jx) ref=%jd", (uintptr_t)uobj, uobj->uo_refs, 0, 0); #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif if (atomic_dec_uint_nv(&uobj->uo_refs) > 0) { UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0); return; } #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_acquire(); #endif /* * Remove the aobj from the global list. */ mutex_enter(&uao_list_lock); LIST_REMOVE(aobj, u_list); mutex_exit(&uao_list_lock); /* * Free all the pages left in the aobj. For each page, when the * page is no longer busy (and thus after any disk I/O that it is * involved in is complete), release any swap resources and free * the page itself. */ uvm_page_array_init(&a, uobj, 0); rw_enter(uobj->vmobjlock, RW_WRITER); while ((pg = uvm_page_array_fill_and_peek(&a, 0, 0)) != NULL) { uvm_page_array_advance(&a); pmap_page_protect(pg, VM_PROT_NONE); if (pg->flags & PG_BUSY) { uvm_pagewait(pg, uobj->vmobjlock, "uao_det"); uvm_page_array_clear(&a); rw_enter(uobj->vmobjlock, RW_WRITER); continue; } uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); uvm_pagefree(pg); } uvm_page_array_fini(&a); /* * Finally, free the anonymous UVM object itself. */ uao_free(aobj); } /* * uao_put: flush pages out of a uvm object * * => object should be locked by caller. we may _unlock_ the object * if (and only if) we need to clean a page (PGO_CLEANIT). * XXXJRT Currently, however, we don't. In the case of cleaning * XXXJRT a page, we simply just deactivate it. Should probably * XXXJRT handle this better, in the future (although "flushing" * XXXJRT anonymous memory isn't terribly important). * => if PGO_CLEANIT is not set, then we will neither unlock the object * or block. * => if PGO_ALLPAGE is set, then all pages in the object are valid targets * for flushing. * => we return 0 unless we encountered some sort of I/O error * XXXJRT currently never happens, as we never directly initiate * XXXJRT I/O */ static int uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; struct uvm_page_array a; struct vm_page *pg; voff_t curoff; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); KASSERT(UVM_OBJ_IS_AOBJ(uobj)); KASSERT(rw_write_held(uobj->vmobjlock)); if (flags & PGO_ALLPAGES) { start = 0; stop = aobj->u_pages << PAGE_SHIFT; } else { start = trunc_page(start); if (stop == 0) { stop = aobj->u_pages << PAGE_SHIFT; } else { stop = round_page(stop); } if (stop > (uint64_t)(aobj->u_pages << PAGE_SHIFT)) { printf("uao_put: strange, got an out of range " "flush %#jx > %#jx (fixed)\n", (uintmax_t)stop, (uintmax_t)(aobj->u_pages << PAGE_SHIFT)); stop = aobj->u_pages << PAGE_SHIFT; } } UVMHIST_LOG(maphist, " flush start=%#jx, stop=%#jx, flags=%#jx", start, stop, flags, 0); /* * Don't need to do any work here if we're not freeing * or deactivating pages. */ if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { rw_exit(uobj->vmobjlock); return 0; } /* locked: uobj */ uvm_page_array_init(&a, uobj, 0); curoff = start; while ((pg = uvm_page_array_fill_and_peek(&a, curoff, 0)) != NULL) { if (pg->offset >= stop) { break; } /* * wait and try again if the page is busy. */ if (pg->flags & PG_BUSY) { uvm_pagewait(pg, uobj->vmobjlock, "uao_put"); uvm_page_array_clear(&a); rw_enter(uobj->vmobjlock, RW_WRITER); continue; } uvm_page_array_advance(&a); curoff = pg->offset + PAGE_SIZE; switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { /* * XXX In these first 3 cases, we always just * XXX deactivate the page. We may want to * XXX handle the different cases more specifically * XXX in the future. */ case PGO_CLEANIT|PGO_FREE: case PGO_CLEANIT|PGO_DEACTIVATE: case PGO_DEACTIVATE: deactivate_it: uvm_pagelock(pg); uvm_pagedeactivate(pg); uvm_pageunlock(pg); break; case PGO_FREE: /* * If there are multiple references to * the object, just deactivate the page. */ if (uobj->uo_refs > 1) goto deactivate_it; /* * free the swap slot and the page. */ pmap_page_protect(pg, VM_PROT_NONE); /* * freeing swapslot here is not strictly necessary. * however, leaving it here doesn't save much * because we need to update swap accounting anyway. */ uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); uvm_pagefree(pg); break; default: panic("%s: impossible", __func__); } } rw_exit(uobj->vmobjlock); uvm_page_array_fini(&a); return 0; } /* * uao_get: fetch me a page * * we have three cases: * 1: page is resident -> just return the page. * 2: page is zero-fill -> allocate a new page and zero it. * 3: page is swapped out -> fetch the page from swap. * * case 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot. * so, if the "center" page hits case 2/3 then we will need to return EBUSY. * * => prefer map unlocked (not required) * => object must be locked! we will _unlock_ it before starting any I/O. * => flags: PGO_LOCKED: fault data structures are locked * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] * => NOTE: caller must check for released pages!! */ static int uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags) { voff_t current_offset; struct vm_page *ptmp; int lcv, gotpages, maxpages, swslot, pageidx; bool overwrite = ((flags & PGO_OVERWRITE) != 0); struct uvm_page_array a; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(pdhist, "aobj=%#jx offset=%jd, flags=%#jx", (uintptr_t)uobj, offset, flags,0); /* * the object must be locked. it can only be a read lock when * processing a read fault with PGO_LOCKED. */ KASSERT(UVM_OBJ_IS_AOBJ(uobj)); KASSERT(rw_lock_held(uobj->vmobjlock)); KASSERT(rw_write_held(uobj->vmobjlock) || ((flags & PGO_LOCKED) != 0 && (access_type & VM_PROT_WRITE) == 0)); /* * get number of pages */ maxpages = *npagesp; /* * step 1: handled the case where fault data structures are locked. */ if (flags & PGO_LOCKED) { /* * step 1a: get pages that are already resident. only do * this if the data structures are locked (i.e. the first * time through). */ uvm_page_array_init(&a, uobj, 0); gotpages = 0; /* # of pages we got so far */ for (lcv = 0; lcv < maxpages; lcv++) { ptmp = uvm_page_array_fill_and_peek(&a, offset + (lcv << PAGE_SHIFT), maxpages); if (ptmp == NULL) { break; } KASSERT(ptmp->offset >= offset); lcv = (ptmp->offset - offset) >> PAGE_SHIFT; if (lcv >= maxpages) { break; } uvm_page_array_advance(&a); /* * to be useful must get a non-busy page */ if ((ptmp->flags & PG_BUSY) != 0) { continue; } /* * useful page: plug it in our result array */ KASSERT(uvm_pagegetdirty(ptmp) != UVM_PAGE_STATUS_CLEAN); pps[lcv] = ptmp; gotpages++; } uvm_page_array_fini(&a); /* * step 1b: now we've either done everything needed or we * to unlock and do some waiting or I/O. */ UVMHIST_LOG(pdhist, "<- done (done=%jd)", (pps[centeridx] != NULL), 0,0,0); *npagesp = gotpages; return pps[centeridx] != NULL ? 0 : EBUSY; } /* * step 2: get non-resident or busy pages. * object is locked. data structures are unlocked. */ if ((flags & PGO_SYNCIO) == 0) { goto done; } uvm_page_array_init(&a, uobj, 0); for (lcv = 0, current_offset = offset ; lcv < maxpages ;) { /* * we have yet to locate the current page (pps[lcv]). we * first look for a page that is already at the current offset. * if we find a page, we check to see if it is busy or * released. if that is the case, then we sleep on the page * until it is no longer busy or released and repeat the lookup. * if the page we found is neither busy nor released, then we * busy it (so we own it) and plug it into pps[lcv]. we are * ready to move on to the next page. */ ptmp = uvm_page_array_fill_and_peek(&a, current_offset, maxpages - lcv); if (ptmp != NULL && ptmp->offset == current_offset) { /* page is there, see if we need to wait on it */ if ((ptmp->flags & PG_BUSY) != 0) { UVMHIST_LOG(pdhist, "sleeping, ptmp->flags %#jx\n", ptmp->flags,0,0,0); uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get"); rw_enter(uobj->vmobjlock, RW_WRITER); uvm_page_array_clear(&a); continue; } /* * if we get here then the page is resident and * unbusy. we busy it now (so we own it). if * overwriting, mark the page dirty up front as * it will be zapped via an unmanaged mapping. */ KASSERT(uvm_pagegetdirty(ptmp) != UVM_PAGE_STATUS_CLEAN); if (overwrite) { uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_DIRTY); } /* we own it, caller must un-busy */ ptmp->flags |= PG_BUSY; UVM_PAGE_OWN(ptmp, "uao_get2"); pps[lcv++] = ptmp; current_offset += PAGE_SIZE; uvm_page_array_advance(&a); continue; } else { KASSERT(ptmp == NULL || ptmp->offset > current_offset); } /* * not resident. allocate a new busy/fake/clean page in the * object. if it's in swap we need to do I/O to fill in the * data, otherwise the page needs to be cleared: if it's not * destined to be overwritten, then zero it here and now. */ pageidx = current_offset >> PAGE_SHIFT; swslot = uao_find_swslot(uobj, pageidx); ptmp = uao_pagealloc(uobj, current_offset, swslot != 0 || overwrite ? 0 : UVM_PGA_ZERO); /* out of RAM? */ if (ptmp == NULL) { rw_exit(uobj->vmobjlock); UVMHIST_LOG(pdhist, "sleeping, ptmp == NULL",0,0,0,0); uvm_wait("uao_getpage"); rw_enter(uobj->vmobjlock, RW_WRITER); uvm_page_array_clear(&a); continue; } /* * if swslot == 0, page hasn't existed before and is zeroed. * otherwise we have a "fake/busy/clean" page that we just * allocated. do the needed "i/o", reading from swap. */ if (swslot != 0) { #if defined(VMSWAP) int error; UVMHIST_LOG(pdhist, "pagein from swslot %jd", swslot, 0,0,0); /* * page in the swapped-out page. * unlock object for i/o, relock when done. */ uvm_page_array_clear(&a); rw_exit(uobj->vmobjlock); error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO); rw_enter(uobj->vmobjlock, RW_WRITER); /* * I/O done. check for errors. */ if (error != 0) { UVMHIST_LOG(pdhist, "<- done (error=%jd)", error,0,0,0); /* * remove the swap slot from the aobj * and mark the aobj as having no real slot. * don't free the swap slot, thus preventing * it from being used again. */ swslot = uao_set_swslot(uobj, pageidx, SWSLOT_BAD); if (swslot > 0) { uvm_swap_markbad(swslot, 1); } uvm_pagefree(ptmp); rw_exit(uobj->vmobjlock); UVMHIST_LOG(pdhist, "<- done (error)", error,lcv,0,0); if (lcv != 0) { uvm_page_unbusy(pps, lcv); } memset(pps, 0, maxpages * sizeof(pps[0])); uvm_page_array_fini(&a); return error; } #else /* defined(VMSWAP) */ panic("%s: pagein", __func__); #endif /* defined(VMSWAP) */ } /* * note that we will allow the page being writably-mapped * (!PG_RDONLY) regardless of access_type. if overwrite, * the page can be modified through an unmanaged mapping * so mark it dirty up front. */ if (overwrite) { uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_DIRTY); } else { uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_UNKNOWN); } /* * we got the page! clear the fake flag (indicates valid * data now in page) and plug into our result array. note * that page is still busy. * * it is the callers job to: * => check if the page is released * => unbusy the page * => activate the page */ KASSERT(uvm_pagegetdirty(ptmp) != UVM_PAGE_STATUS_CLEAN); KASSERT((ptmp->flags & PG_FAKE) != 0); KASSERT(ptmp->offset == current_offset); ptmp->flags &= ~PG_FAKE; pps[lcv++] = ptmp; current_offset += PAGE_SIZE; } uvm_page_array_fini(&a); /* * finally, unlock object and return. */ done: rw_exit(uobj->vmobjlock); UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0); return 0; } #if defined(VMSWAP) /* * uao_dropswap: release any swap resources from this aobj page. * * => aobj must be locked or have a reference count of 0. */ void uao_dropswap(struct uvm_object *uobj, int pageidx) { int slot; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); slot = uao_set_swslot(uobj, pageidx, 0); if (slot) { uvm_swap_free(slot, 1); } } /* * page in every page in every aobj that is paged-out to a range of swslots. * * => nothing should be locked. * => returns true if pagein was aborted due to lack of memory. */ bool uao_swap_off(int startslot, int endslot) { struct uvm_aobj *aobj; /* * Walk the list of all anonymous UVM objects. Grab the first. */ mutex_enter(&uao_list_lock); if ((aobj = LIST_FIRST(&uao_list)) == NULL) { mutex_exit(&uao_list_lock); return false; } uao_reference(&aobj->u_obj); do { struct uvm_aobj *nextaobj; bool rv; /* * Prefetch the next object and immediately hold a reference * on it, so neither the current nor the next entry could * disappear while we are iterating. */ if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) { uao_reference(&nextaobj->u_obj); } mutex_exit(&uao_list_lock); /* * Page in all pages in the swap slot range. */ rw_enter(aobj->u_obj.vmobjlock, RW_WRITER); rv = uao_pagein(aobj, startslot, endslot); rw_exit(aobj->u_obj.vmobjlock); /* Drop the reference of the current object. */ uao_detach(&aobj->u_obj); if (rv) { if (nextaobj) { uao_detach(&nextaobj->u_obj); } return rv; } aobj = nextaobj; mutex_enter(&uao_list_lock); } while (aobj); mutex_exit(&uao_list_lock); return false; } /* * page in any pages from aobj in the given range. * * => aobj must be locked and is returned locked. * => returns true if pagein was aborted due to lack of memory. */ static bool uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot) { bool rv; if (UAO_USES_SWHASH(aobj)) { struct uao_swhash_elt *elt; int buck; restart: for (buck = aobj->u_swhashmask; buck >= 0; buck--) { for (elt = LIST_FIRST(&aobj->u_swhash[buck]); elt != NULL; elt = LIST_NEXT(elt, list)) { int i; for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) { int slot = elt->slots[i]; /* * if the slot isn't in range, skip it. */ if (slot < startslot || slot >= endslot) { continue; } /* * process the page, * the start over on this object * since the swhash elt * may have been freed. */ rv = uao_pagein_page(aobj, UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i); if (rv) { return rv; } goto restart; } } } } else { int i; for (i = 0; i < aobj->u_pages; i++) { int slot = aobj->u_swslots[i]; /* * if the slot isn't in range, skip it */ if (slot < startslot || slot >= endslot) { continue; } /* * process the page. */ rv = uao_pagein_page(aobj, i); if (rv) { return rv; } } } return false; } /* * uao_pagein_page: page in a single page from an anonymous UVM object. * * => Returns true if pagein was aborted due to lack of memory. * => Object must be locked and is returned locked. */ static bool uao_pagein_page(struct uvm_aobj *aobj, int pageidx) { struct uvm_object *uobj = &aobj->u_obj; struct vm_page *pg; int rv, npages; pg = NULL; npages = 1; KASSERT(rw_write_held(uobj->vmobjlock)); rv = uao_get(uobj, (voff_t)pageidx << PAGE_SHIFT, &pg, &npages, 0, VM_PROT_READ | VM_PROT_WRITE, 0, PGO_SYNCIO); /* * relock and finish up. */ rw_enter(uobj->vmobjlock, RW_WRITER); switch (rv) { case 0: break; case EIO: case ERESTART: /* * nothing more to do on errors. * ERESTART can only mean that the anon was freed, * so again there's nothing to do. */ return false; default: return true; } /* * ok, we've got the page now. * mark it as dirty, clear its swslot and un-busy it. */ uao_dropswap(&aobj->u_obj, pageidx); /* * make sure it's on a page queue. */ uvm_pagelock(pg); uvm_pageenqueue(pg); uvm_pagewakeup(pg); uvm_pageunlock(pg); pg->flags &= ~(PG_BUSY|PG_FAKE); uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); UVM_PAGE_OWN(pg, NULL); return false; } /* * uao_dropswap_range: drop swapslots in the range. * * => aobj must be locked and is returned locked. * => start is inclusive. end is exclusive. */ void uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; int swpgonlydelta = 0; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); KASSERT(rw_write_held(uobj->vmobjlock)); if (end == 0) { end = INT64_MAX; } if (UAO_USES_SWHASH(aobj)) { int i, hashbuckets = aobj->u_swhashmask + 1; voff_t taghi; voff_t taglo; taglo = UAO_SWHASH_ELT_TAG(start); taghi = UAO_SWHASH_ELT_TAG(end); for (i = 0; i < hashbuckets; i++) { struct uao_swhash_elt *elt, *next; for (elt = LIST_FIRST(&aobj->u_swhash[i]); elt != NULL; elt = next) { int startidx, endidx; int j; next = LIST_NEXT(elt, list); if (elt->tag < taglo || taghi < elt->tag) { continue; } if (elt->tag == taglo) { startidx = UAO_SWHASH_ELT_PAGESLOT_IDX(start); } else { startidx = 0; } if (elt->tag == taghi) { endidx = UAO_SWHASH_ELT_PAGESLOT_IDX(end); } else { endidx = UAO_SWHASH_CLUSTER_SIZE; } for (j = startidx; j < endidx; j++) { int slot = elt->slots[j]; KASSERT(uvm_pagelookup(&aobj->u_obj, (UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + j) << PAGE_SHIFT) == NULL); if (slot > 0) { uvm_swap_free(slot, 1); swpgonlydelta++; KASSERT(elt->count > 0); elt->slots[j] = 0; elt->count--; } } if (elt->count == 0) { LIST_REMOVE(elt, list); pool_put(&uao_swhash_elt_pool, elt); } } } } else { int i; if (aobj->u_pages < end) { end = aobj->u_pages; } for (i = start; i < end; i++) { int slot = aobj->u_swslots[i]; if (slot > 0) { uvm_swap_free(slot, 1); swpgonlydelta++; } } } /* * adjust the counter of pages only in swap for all * the swap slots we've freed. */ if (swpgonlydelta > 0) { KASSERT(uvmexp.swpgonly >= swpgonlydelta); atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta); } } #endif /* defined(VMSWAP) */ |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 | /* $NetBSD: wsdisplay.c,v 1.165 2022/07/17 11:43:39 riastradh Exp $ */ /* * Copyright (c) 1996, 1997 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: wsdisplay.c,v 1.165 2022/07/17 11:43:39 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_wsdisplay_compat.h" #include "opt_wsmsgattrs.h" #endif #include "wskbd.h" #include "wsmux.h" #include "wsdisplay.h" #include <sys/param.h> #include <sys/conf.h> #include <sys/device.h> #include <sys/ioctl.h> #include <sys/poll.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/malloc.h> #include <sys/syslog.h> #include <sys/systm.h> #include <sys/tty.h> #include <sys/signalvar.h> #include <sys/errno.h> #include <sys/fcntl.h> #include <sys/vnode.h> #include <sys/kauth.h> #include <sys/sysctl.h> #include <dev/wscons/wsconsio.h> #include <dev/wscons/wseventvar.h> #include <dev/wscons/wsmuxvar.h> #include <dev/wscons/wsdisplayvar.h> #include <dev/wscons/wsksymvar.h> #include <dev/wscons/wsksymdef.h> #include <dev/wscons/wsemulvar.h> #include <dev/wscons/wscons_callbacks.h> #include <dev/cons.h> #include "locators.h" #include "ioconf.h" #ifdef WSDISPLAY_MULTICONS static bool wsdisplay_multicons_enable = true; static bool wsdisplay_multicons_suspended = false; #endif /* Console device before replaced by wsdisplay */ static struct consdev *wsdisplay_ocn; struct wsscreen_internal { const struct wsdisplay_emulops *emulops; void *emulcookie; const struct wsscreen_descr *scrdata; const struct wsemul_ops *wsemul; void *wsemulcookie; }; struct wsscreen { struct wsscreen_internal *scr_dconf; struct tty *scr_tty; int scr_hold_screen; /* hold tty output */ int scr_flags; #define SCR_OPEN 1 /* is it open? */ #define SCR_WAITACTIVE 2 /* someone waiting on activation */ #define SCR_GRAPHICS 4 /* graphics mode, no text (emulation) output */ #define SCR_DUMBFB 8 /* in use as a dumb fb (iff SCR_GRAPHICS) */ const struct wscons_syncops *scr_syncops; void *scr_synccookie; #ifdef WSDISPLAY_COMPAT_RAWKBD int scr_rawkbd; #endif #ifdef WSDISPLAY_MULTICONS callout_t scr_getc_ch; #endif struct wsdisplay_softc *sc; /* XXX this is to support a hack in emulinput, see comment below */ int scr_in_ttyoutput; }; static struct wsscreen *wsscreen_attach(struct wsdisplay_softc *, int, const char *, const struct wsscreen_descr *, void *, int, int, long); static void wsscreen_detach(struct wsscreen *); static int wsdisplay_addscreen(struct wsdisplay_softc *, int, const char *, const char *); static void wsdisplay_addscreen_print(struct wsdisplay_softc *, int, int); static void wsdisplay_closescreen(struct wsdisplay_softc *, struct wsscreen *); static int wsdisplay_delscreen(struct wsdisplay_softc *, int, int); #define WSDISPLAY_MAXSCREEN 8 struct wsdisplay_softc { device_t sc_dev; const struct wsdisplay_accessops *sc_accessops; void *sc_accesscookie; const struct wsscreen_list *sc_scrdata; #ifdef WSDISPLAY_SCROLLSUPPORT struct wsdisplay_scroll_data sc_scroll_values; #endif struct wsscreen *sc_scr[WSDISPLAY_MAXSCREEN]; int sc_focusidx; /* available only if sc_focus isn't null */ struct wsscreen *sc_focus; struct wseventvar evar; int sc_isconsole; int sc_flags; #define SC_SWITCHPENDING 1 #define SC_SWITCHERROR 2 #define SC_XATTACHED 4 /* X server active */ kmutex_t sc_flagsmtx; /* for flags, might also be used for focus */ kcondvar_t sc_flagscv; int sc_screenwanted, sc_oldscreen; /* valid with SC_SWITCHPENDING */ #if NWSKBD > 0 struct wsevsrc *sc_input; #ifdef WSDISPLAY_COMPAT_RAWKBD int sc_rawkbd; #endif #endif /* NWSKBD > 0 */ }; #ifdef WSDISPLAY_SCROLLSUPPORT struct wsdisplay_scroll_data wsdisplay_default_scroll_values = { WSDISPLAY_SCROLL_DOALL, 25, 2, }; #endif /* Autoconfiguration definitions. */ static int wsdisplay_emul_match(device_t , cfdata_t, void *); static void wsdisplay_emul_attach(device_t, device_t, void *); static int wsdisplay_emul_detach(device_t, int); static int wsdisplay_noemul_match(device_t, cfdata_t, void *); static void wsdisplay_noemul_attach(device_t, device_t, void *); static bool wsdisplay_suspend(device_t, const pmf_qual_t *); CFATTACH_DECL_NEW(wsdisplay_emul, sizeof (struct wsdisplay_softc), wsdisplay_emul_match, wsdisplay_emul_attach, wsdisplay_emul_detach, NULL); CFATTACH_DECL_NEW(wsdisplay_noemul, sizeof (struct wsdisplay_softc), wsdisplay_noemul_match, wsdisplay_noemul_attach, NULL, NULL); dev_type_open(wsdisplayopen); dev_type_close(wsdisplayclose); dev_type_read(wsdisplayread); dev_type_write(wsdisplaywrite); dev_type_ioctl(wsdisplayioctl); dev_type_stop(wsdisplaystop); dev_type_tty(wsdisplaytty); dev_type_poll(wsdisplaypoll); dev_type_mmap(wsdisplaymmap); dev_type_kqfilter(wsdisplaykqfilter); const struct cdevsw wsdisplay_cdevsw = { .d_open = wsdisplayopen, .d_close = wsdisplayclose, .d_read = wsdisplayread, .d_write = wsdisplaywrite, .d_ioctl = wsdisplayioctl, .d_stop = wsdisplaystop, .d_tty = wsdisplaytty, .d_poll = wsdisplaypoll, .d_mmap = wsdisplaymmap, .d_kqfilter = wsdisplaykqfilter, .d_discard = nodiscard, .d_flag = D_TTY }; static void wsdisplaystart(struct tty *); static int wsdisplayparam(struct tty *, struct termios *); #define WSDISPLAYUNIT(dev) (minor(dev) >> 8) #define WSDISPLAYSCREEN(dev) (minor(dev) & 0xff) #define ISWSDISPLAYSTAT(dev) (WSDISPLAYSCREEN(dev) == 254) #define ISWSDISPLAYCTL(dev) (WSDISPLAYSCREEN(dev) == 255) #define WSDISPLAYMINOR(unit, screen) (((unit) << 8) | (screen)) #define WSSCREEN_HAS_EMULATOR(scr) ((scr)->scr_dconf->wsemul != NULL) #define WSSCREEN_HAS_TTY(scr) ((scr)->scr_tty != NULL) static void wsdisplay_common_attach(struct wsdisplay_softc *sc, int console, int kbdmux, const struct wsscreen_list *, const struct wsdisplay_accessops *accessops, void *accesscookie); #ifdef WSDISPLAY_COMPAT_RAWKBD int wsdisplay_update_rawkbd(struct wsdisplay_softc *, struct wsscreen *); #endif static int wsdisplay_console_initted; static int wsdisplay_console_attached; static struct wsdisplay_softc *wsdisplay_console_device; static struct wsscreen_internal wsdisplay_console_conf; static int wsdisplay_getc(dev_t); static void wsdisplay_pollc(dev_t, int); static int wsdisplay_cons_pollmode; static int (*wsdisplay_cons_kbd_getc)(dev_t); static void (*wsdisplay_cons_kbd_pollc)(dev_t, int); static struct consdev wsdisplay_cons = { .cn_getc = wsdisplay_getc, .cn_putc = wsdisplay_cnputc, .cn_pollc = wsdisplay_pollc, .cn_dev = NODEV, .cn_pri = CN_NORMAL }; #ifndef WSDISPLAY_DEFAULTSCREENS # define WSDISPLAY_DEFAULTSCREENS 0 #endif int wsdisplay_defaultscreens = WSDISPLAY_DEFAULTSCREENS; static int wsdisplay_switch1(device_t, int, int); static void wsdisplay_switch1_cb(void *, int, int); static int wsdisplay_switch2(device_t, int, int); static void wsdisplay_switch2_cb(void *, int, int); static int wsdisplay_switch3(device_t, int, int); static void wsdisplay_switch3_cb(void *, int, int); static void wsdisplay_swdone_cb(void *, int, int); static int wsdisplay_dosync(struct wsdisplay_softc *, int); int wsdisplay_clearonclose; #ifdef WSDISPLAY_MULTICONS /* * Replace cn_isconsole() so that we can enter DDB from old console. */ bool wsdisplay_cn_isconsole(dev_t dev) { return (cn_tab != NULL && cn_tab->cn_dev == dev) || (cn_tab == &wsdisplay_cons && !wsdisplay_multicons_suspended && wsdisplay_multicons_enable && wsdisplay_ocn != NULL && wsdisplay_ocn->cn_dev == dev); } static void wsscreen_getc_poll(void *priv) { struct wsscreen *scr = priv; int c; if (wsdisplay_multicons_enable && wsdisplay_ocn && wsdisplay_ocn->cn_getc && WSSCREEN_HAS_EMULATOR(scr) && WSSCREEN_HAS_TTY(scr)) { struct tty *tp = scr->scr_tty; do { c = wsdisplay_ocn->cn_getc(wsdisplay_ocn->cn_dev); if (c >= 0) (*tp->t_linesw->l_rint)((unsigned char)c, tp); } while (c >= 0); } callout_schedule(&scr->scr_getc_ch, mstohz(10)); } #endif static struct wsscreen * wsscreen_attach(struct wsdisplay_softc *sc, int console, const char *emul, const struct wsscreen_descr *type, void *cookie, int ccol, int crow, long defattr) { struct wsscreen_internal *dconf; struct wsscreen *scr; scr = malloc(sizeof(struct wsscreen), M_DEVBUF, M_WAITOK); if (console) { dconf = &wsdisplay_console_conf; /* * If there's an emulation, tell it about the callback argument. * The other stuff is already there. */ if (dconf->wsemul != NULL) (*dconf->wsemul->attach)(1, 0, 0, 0, 0, scr, 0); } else { /* not console */ dconf = malloc(sizeof(struct wsscreen_internal), M_DEVBUF, M_WAITOK); dconf->emulops = type->textops; dconf->emulcookie = cookie; if (dconf->emulops) { dconf->wsemul = wsemul_pick(emul); if (dconf->wsemul == NULL) { free(dconf, M_DEVBUF); free(scr, M_DEVBUF); return NULL; } dconf->wsemulcookie = (*dconf->wsemul->attach)(0, type, cookie, ccol, crow, scr, defattr); } else dconf->wsemul = NULL; dconf->scrdata = type; } scr->scr_dconf = dconf; scr->scr_tty = tty_alloc(); tty_attach(scr->scr_tty); scr->scr_hold_screen = 0; if (WSSCREEN_HAS_EMULATOR(scr)) scr->scr_flags = 0; else scr->scr_flags = SCR_GRAPHICS; scr->scr_syncops = 0; scr->sc = sc; #ifdef WSDISPLAY_COMPAT_RAWKBD scr->scr_rawkbd = 0; #endif #ifdef WSDISPLAY_MULTICONS callout_init(&scr->scr_getc_ch, 0); callout_setfunc(&scr->scr_getc_ch, wsscreen_getc_poll, scr); if (console) callout_schedule(&scr->scr_getc_ch, mstohz(10)); #endif return scr; } static void wsscreen_detach(struct wsscreen *scr) { u_int ccol, crow; /* XXX */ if (WSSCREEN_HAS_TTY(scr)) { tty_detach(scr->scr_tty); tty_free(scr->scr_tty); } if (WSSCREEN_HAS_EMULATOR(scr)) { (*scr->scr_dconf->wsemul->detach)(scr->scr_dconf->wsemulcookie, &ccol, &crow); wsemul_drop(scr->scr_dconf->wsemul); } if (scr->scr_dconf->scrdata->capabilities & WSSCREEN_FREE) free(__UNCONST(scr->scr_dconf->scrdata), M_DEVBUF); #ifdef WSDISPLAY_MULTICONS callout_halt(&scr->scr_getc_ch, NULL); callout_destroy(&scr->scr_getc_ch); #endif free(scr->scr_dconf, M_DEVBUF); free(scr, M_DEVBUF); } const struct wsscreen_descr * wsdisplay_screentype_pick(const struct wsscreen_list *scrdata, const char *name) { int i; const struct wsscreen_descr *scr; KASSERT(scrdata->nscreens > 0); if (name == NULL) return scrdata->screens[0]; for (i = 0; i < scrdata->nscreens; i++) { scr = scrdata->screens[i]; if (!strcmp(name, scr->name)) return scr; } return 0; } /* * print info about attached screen */ static void wsdisplay_addscreen_print(struct wsdisplay_softc *sc, int idx, int count) { aprint_verbose_dev(sc->sc_dev, "screen %d", idx); if (count > 1) aprint_verbose("-%d", idx + (count-1)); aprint_verbose(" added (%s", sc->sc_scr[idx]->scr_dconf->scrdata->name); if (WSSCREEN_HAS_EMULATOR(sc->sc_scr[idx])) { aprint_verbose(", %s emulation", sc->sc_scr[idx]->scr_dconf->wsemul->name); } aprint_verbose(")\n"); } static int wsdisplay_addscreen(struct wsdisplay_softc *sc, int idx, const char *screentype, const char *emul) { const struct wsscreen_descr *scrdesc; struct wsscreen_descr *scrdescr2; int error; void *cookie; int ccol, crow; long defattr; struct wsscreen *scr; int s; if (idx < 0 || idx >= WSDISPLAY_MAXSCREEN) return EINVAL; if (sc->sc_scr[idx] != NULL) return EBUSY; scrdesc = wsdisplay_screentype_pick(sc->sc_scrdata, screentype); if (!scrdesc) return ENXIO; /* * if this screen can resize we need to copy the descr so each screen * gets its own */ if (scrdesc->capabilities & WSSCREEN_RESIZE) { /* we want per screen wsscreen_descr */ scrdescr2 = malloc(sizeof(struct wsscreen_descr), M_DEVBUF, M_WAITOK); memcpy(scrdescr2, scrdesc, sizeof(struct wsscreen_descr)); scrdescr2->capabilities |= WSSCREEN_FREE; scrdesc = scrdescr2; } error = (*sc->sc_accessops->alloc_screen)(sc->sc_accesscookie, scrdesc, &cookie, &ccol, &crow, &defattr); if (error) return error; scr = wsscreen_attach(sc, 0, emul, scrdesc, cookie, ccol, crow, defattr); if (scr == NULL) { (*sc->sc_accessops->free_screen)(sc->sc_accesscookie, cookie); return ENXIO; } sc->sc_scr[idx] = scr; /* if no screen has focus yet, activate the first we get */ s = spltty(); if (!sc->sc_focus) { (*sc->sc_accessops->show_screen)(sc->sc_accesscookie, scr->scr_dconf->emulcookie, 0, 0, 0); sc->sc_focusidx = idx; sc->sc_focus = scr; } splx(s); return 0; } static void wsdisplay_closescreen(struct wsdisplay_softc *sc, struct wsscreen *scr) { int maj, mn, idx; /* hangup */ if (WSSCREEN_HAS_TTY(scr)) { struct tty *tp = scr->scr_tty; (*tp->t_linesw->l_modem)(tp, 0); } /* locate the major number */ maj = cdevsw_lookup_major(&wsdisplay_cdevsw); /* locate the screen index */ for (idx = 0; idx < WSDISPLAY_MAXSCREEN; idx++) if (scr == sc->sc_scr[idx]) break; #ifdef DIAGNOSTIC if (idx == WSDISPLAY_MAXSCREEN) panic("wsdisplay_forceclose: bad screen"); #endif /* nuke the vnodes */ mn = WSDISPLAYMINOR(device_unit(sc->sc_dev), idx); vdevgone(maj, mn, mn, VCHR); } #ifdef WSDISPLAY_SCROLLSUPPORT void wsdisplay_scroll(void *arg, int op) { device_t dv = arg; struct wsdisplay_softc *sc = device_private(dv); struct wsscreen *scr; int lines; scr = sc->sc_focus; if (!scr) return; if (op == WSDISPLAY_SCROLL_RESET) lines = 0; else { lines = (op & WSDISPLAY_SCROLL_LOW) ? sc->sc_scroll_values.slowlines : sc->sc_scroll_values.fastlines; if (op & WSDISPLAY_SCROLL_BACKWARD) lines = -(lines); } if (sc->sc_accessops->scroll) { (*sc->sc_accessops->scroll)(sc->sc_accesscookie, sc->sc_focus->scr_dconf->emulcookie, lines); } } #endif static int wsdisplay_delscreen(struct wsdisplay_softc *sc, int idx, int flags) { struct wsscreen *scr; int s; void *cookie; if (idx < 0 || idx >= WSDISPLAY_MAXSCREEN) return EINVAL; if ((scr = sc->sc_scr[idx]) == NULL) return ENXIO; if (scr->scr_dconf == &wsdisplay_console_conf || scr->scr_syncops || ((scr->scr_flags & SCR_OPEN) && !(flags & WSDISPLAY_DELSCR_FORCE))) return EBUSY; wsdisplay_closescreen(sc, scr); /* * delete pointers, so neither device entries * nor keyboard input can reference it anymore */ s = spltty(); if (sc->sc_focus == scr) { sc->sc_focus = 0; #ifdef WSDISPLAY_COMPAT_RAWKBD wsdisplay_update_rawkbd(sc, 0); #endif } sc->sc_scr[idx] = 0; splx(s); /* * Wake up processes waiting for the screen to * be activated. Sleepers must check whether * the screen still exists. */ if (scr->scr_flags & SCR_WAITACTIVE) wakeup(scr); /* save a reference to the graphics screen */ cookie = scr->scr_dconf->emulcookie; wsscreen_detach(scr); (*sc->sc_accessops->free_screen)(sc->sc_accesscookie, cookie); aprint_verbose_dev(sc->sc_dev, "screen %d deleted\n", idx); return 0; } /* * Autoconfiguration functions. */ int wsdisplay_emul_match(device_t parent, cfdata_t match, void *aux) { struct wsemuldisplaydev_attach_args *ap = aux; if (match->cf_loc[WSEMULDISPLAYDEVCF_CONSOLE] != WSEMULDISPLAYDEVCF_CONSOLE_DEFAULT) { /* * If console-ness of device specified, either match * exactly (at high priority), or fail. */ if (match->cf_loc[WSEMULDISPLAYDEVCF_CONSOLE] != 0 && ap->console != 0) return 10; else return 0; } /* If console-ness unspecified, it wins. */ return 1; } void wsdisplay_emul_attach(device_t parent, device_t self, void *aux) { struct wsdisplay_softc *sc = device_private(self); struct wsemuldisplaydev_attach_args *ap = aux; sc->sc_dev = self; /* Don't allow more than one console to attach */ if (wsdisplay_console_attached && ap->console) ap->console = 0; wsdisplay_common_attach(sc, ap->console, device_cfdata(self)->cf_loc[WSEMULDISPLAYDEVCF_KBDMUX], ap->scrdata, ap->accessops, ap->accesscookie); if (ap->console) { int maj; /* locate the major number */ maj = cdevsw_lookup_major(&wsdisplay_cdevsw); cn_tab->cn_dev = makedev(maj, WSDISPLAYMINOR(device_unit(self), 0)); } } /* Print function (for parent devices). */ int wsemuldisplaydevprint(void *aux, const char *pnp) { #if 0 /* -Wunused */ struct wsemuldisplaydev_attach_args *ap = aux; #endif if (pnp) aprint_normal("wsdisplay at %s", pnp); #if 0 /* don't bother; it's ugly */ aprint_normal(" console %d", ap->console); #endif return UNCONF; } int wsdisplay_emul_detach(device_t dev, int how) { struct wsdisplay_softc *sc = device_private(dev); int flag, i, res; flag = (how & DETACH_FORCE ? WSDISPLAY_DELSCR_FORCE : 0); for (i = 0; i < WSDISPLAY_MAXSCREEN; i++) if (sc->sc_scr[i]) { res = wsdisplay_delscreen(sc, i, flag); if (res) return res; } cv_destroy(&sc->sc_flagscv); mutex_destroy(&sc->sc_flagsmtx); return 0; } int wsdisplay_noemul_match(device_t parent, cfdata_t match, void *aux) { #if 0 /* -Wunused */ struct wsdisplaydev_attach_args *ap = aux; #endif /* Always match. */ return 1; } void wsdisplay_noemul_attach(device_t parent, device_t self, void *aux) { struct wsdisplay_softc *sc = device_private(self); struct wsdisplaydev_attach_args *ap = aux; sc->sc_dev = self; wsdisplay_common_attach(sc, 0, device_cfdata(self)->cf_loc[WSDISPLAYDEVCF_KBDMUX], NULL, ap->accessops, ap->accesscookie); } static void wsdisplay_swdone_cb(void *arg, int error, int waitok) { struct wsdisplay_softc *sc = arg; mutex_enter(&sc->sc_flagsmtx); KASSERT(sc->sc_flags & SC_SWITCHPENDING); if (error) sc->sc_flags |= SC_SWITCHERROR; sc->sc_flags &= ~SC_SWITCHPENDING; cv_signal(&sc->sc_flagscv); mutex_exit(&sc->sc_flagsmtx); } static int wsdisplay_dosync(struct wsdisplay_softc *sc, int attach) { struct wsscreen *scr; int (*op)(void *, int, void (*)(void *, int, int), void *); int res; scr = sc->sc_focus; if (!scr || !scr->scr_syncops) return 0; /* XXX check SCR_GRAPHICS? */ sc->sc_flags |= SC_SWITCHPENDING; sc->sc_flags &= ~SC_SWITCHERROR; if (attach) op = scr->scr_syncops->attach; else op = scr->scr_syncops->detach; res = (*op)(scr->scr_synccookie, 1, wsdisplay_swdone_cb, sc); if (res == EAGAIN) { /* wait for callback */ mutex_enter(&sc->sc_flagsmtx); while (sc->sc_flags & SC_SWITCHPENDING) cv_wait_sig(&sc->sc_flagscv, &sc->sc_flagsmtx); mutex_exit(&sc->sc_flagsmtx); if (sc->sc_flags & SC_SWITCHERROR) return EIO; /* XXX pass real error */ } else { sc->sc_flags &= ~SC_SWITCHPENDING; if (res) return res; } if (attach) sc->sc_flags |= SC_XATTACHED; else sc->sc_flags &= ~SC_XATTACHED; return 0; } int wsdisplay_handlex(int resume) { int i, res; device_t dv; for (i = 0; i < wsdisplay_cd.cd_ndevs; i++) { dv = device_lookup(&wsdisplay_cd, i); if (!dv) continue; res = wsdisplay_dosync(device_private(dv), resume); if (res) return res; } return 0; } static bool wsdisplay_suspend(device_t dv, const pmf_qual_t *qual) { struct wsdisplay_softc *sc = device_private(dv); struct wsscreen *scr = sc->sc_focus; if (sc->sc_flags & SC_XATTACHED) { KASSERT(scr); KASSERT(scr->scr_syncops); } #if 1 /* * XXX X servers should have been detached earlier. * pmf currently ignores our return value and suspends the system * after device suspend failures. We try to avoid bigger damage * and try to detach the X server here. This is not safe because * other parts of the system which the X server deals with * might already be suspended. */ if (sc->sc_flags & SC_XATTACHED) { printf("%s: emergency X server detach\n", device_xname(dv)); wsdisplay_dosync(sc, 0); } #endif return !(sc->sc_flags & SC_XATTACHED); } /* Print function (for parent devices). */ int wsdisplaydevprint(void *aux, const char *pnp) { #if 0 /* -Wunused */ struct wsdisplaydev_attach_args *ap = aux; #endif if (pnp) aprint_normal("wsdisplay at %s", pnp); return UNCONF; } static void wsdisplay_common_attach(struct wsdisplay_softc *sc, int console, int kbdmux, const struct wsscreen_list *scrdata, const struct wsdisplay_accessops *accessops, void *accesscookie) { int i, start=0; #if NWSKBD > 0 struct wsevsrc *kme; #if NWSMUX > 0 struct wsmux_softc *mux; if (kbdmux >= 0) mux = wsmux_getmux(kbdmux); else mux = wsmux_create("dmux", device_unit(sc->sc_dev)); sc->sc_input = &mux->sc_base; mux->sc_base.me_dispdv = sc->sc_dev; aprint_normal(" kbdmux %d", kbdmux); #else if (kbdmux >= 0) aprint_normal(" (kbdmux ignored)"); #endif #endif sc->sc_isconsole = console; if (console) { KASSERT(wsdisplay_console_initted); KASSERT(wsdisplay_console_device == NULL); sc->sc_scr[0] = wsscreen_attach(sc, 1, 0, 0, 0, 0, 0, 0); wsdisplay_console_device = sc; aprint_normal(": console (%s, %s emulation)", wsdisplay_console_conf.scrdata->name, wsdisplay_console_conf.wsemul->name); #if NWSKBD > 0 kme = wskbd_set_console_display(sc->sc_dev, sc->sc_input); if (kme != NULL) aprint_normal(", using %s", device_xname(kme->me_dv)); #if NWSMUX == 0 sc->sc_input = kme; #endif #endif sc->sc_focusidx = 0; sc->sc_focus = sc->sc_scr[0]; start = 1; wsdisplay_console_attached = 1; } aprint_normal("\n"); aprint_naive("\n"); #if NWSKBD > 0 && NWSMUX > 0 wsmux_set_display(mux, sc->sc_dev); #endif mutex_init(&sc->sc_flagsmtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&sc->sc_flagscv, "wssw"); sc->sc_accessops = accessops; sc->sc_accesscookie = accesscookie; sc->sc_scrdata = scrdata; #ifdef WSDISPLAY_SCROLLSUPPORT sc->sc_scroll_values = wsdisplay_default_scroll_values; #endif /* * Set up a number of virtual screens if wanted. The * WSDISPLAYIO_ADDSCREEN ioctl is more flexible, so this code * is for special cases like installation kernels. */ for (i = start; i < wsdisplay_defaultscreens; i++) { if (wsdisplay_addscreen(sc, i, 0, 0)) break; } if (i > start) wsdisplay_addscreen_print(sc, start, i-start); if (!pmf_device_register(sc->sc_dev, wsdisplay_suspend, NULL)) aprint_error_dev(sc->sc_dev, "couldn't establish power handler\n"); } void wsdisplay_cnattach(const struct wsscreen_descr *type, void *cookie, int ccol, int crow, long defattr) { const struct wsemul_ops *wsemul; KASSERT(wsdisplay_console_initted < 2); KASSERT(type->nrows > 0); KASSERT(type->ncols > 0); KASSERT(crow < type->nrows); KASSERT(ccol < type->ncols); wsdisplay_console_conf.emulops = type->textops; wsdisplay_console_conf.emulcookie = cookie; wsdisplay_console_conf.scrdata = type; wsemul = wsemul_pick(0); /* default */ wsdisplay_console_conf.wsemul = wsemul; wsdisplay_console_conf.wsemulcookie = (*wsemul->cnattach)(type, cookie, ccol, crow, defattr); if (cn_tab != &wsdisplay_cons) wsdisplay_ocn = cn_tab; if (wsdisplay_ocn != NULL && wsdisplay_ocn->cn_halt != NULL) wsdisplay_ocn->cn_halt(wsdisplay_ocn->cn_dev); cn_tab = &wsdisplay_cons; wsdisplay_console_initted = 2; } void wsdisplay_preattach(const struct wsscreen_descr *type, void *cookie, int ccol, int crow, long defattr) { const struct wsemul_ops *wsemul; KASSERT(!wsdisplay_console_initted); KASSERT(type->nrows > 0); KASSERT(type->ncols > 0); KASSERT(crow < type->nrows); KASSERT(ccol < type->ncols); wsdisplay_console_conf.emulops = type->textops; wsdisplay_console_conf.emulcookie = cookie; wsdisplay_console_conf.scrdata = type; wsemul = wsemul_pick(0); /* default */ wsdisplay_console_conf.wsemul = wsemul; wsdisplay_console_conf.wsemulcookie = (*wsemul->cnattach)(type, cookie, ccol, crow, defattr); if (cn_tab != &wsdisplay_cons) wsdisplay_ocn = cn_tab; if (wsdisplay_ocn != NULL && wsdisplay_ocn->cn_halt != NULL) wsdisplay_ocn->cn_halt(wsdisplay_ocn->cn_dev); cn_tab = &wsdisplay_cons; wsdisplay_console_initted = 1; } void wsdisplay_cndetach(void) { KASSERT(wsdisplay_console_initted == 2); cn_tab = wsdisplay_ocn; wsdisplay_console_initted = 0; } /* * Tty and cdevsw functions. */ int wsdisplayopen(dev_t dev, int flag, int mode, struct lwp *l) { struct wsdisplay_softc *sc; struct tty *tp; int newopen, error; struct wsscreen *scr; sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev)); if (sc == NULL) /* make sure it was attached */ return ENXIO; if (ISWSDISPLAYSTAT(dev)) { wsevent_init(&sc->evar, l->l_proc); return 0; } if (ISWSDISPLAYCTL(dev)) return 0; if (WSDISPLAYSCREEN(dev) >= WSDISPLAY_MAXSCREEN) return ENXIO; if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL) return ENXIO; if (WSSCREEN_HAS_TTY(scr)) { tp = scr->scr_tty; tp->t_oproc = wsdisplaystart; tp->t_param = wsdisplayparam; tp->t_dev = dev; newopen = (tp->t_state & TS_ISOPEN) == 0; if (kauth_authorize_device_tty(l->l_cred, KAUTH_DEVICE_TTY_OPEN, tp)) return EBUSY; if (newopen) { ttychars(tp); tp->t_iflag = TTYDEF_IFLAG; tp->t_oflag = TTYDEF_OFLAG; tp->t_cflag = TTYDEF_CFLAG; tp->t_lflag = TTYDEF_LFLAG; tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; wsdisplayparam(tp, &tp->t_termios); ttsetwater(tp); } tp->t_state |= TS_CARR_ON; error = ((*tp->t_linesw->l_open)(dev, tp)); if (error) return error; if (newopen && WSSCREEN_HAS_EMULATOR(scr)) { /* set window sizes as appropriate, and reset the emulation */ tp->t_winsize.ws_row = scr->scr_dconf->scrdata->nrows; tp->t_winsize.ws_col = scr->scr_dconf->scrdata->ncols; /* wsdisplay_set_emulation() */ } } scr->scr_flags |= SCR_OPEN; return 0; } int wsdisplayclose(dev_t dev, int flag, int mode, struct lwp *l) { device_t dv; struct wsdisplay_softc *sc; struct tty *tp; struct wsscreen *scr; dv = device_lookup(&wsdisplay_cd, WSDISPLAYUNIT(dev)); sc = device_private(dv); if (ISWSDISPLAYSTAT(dev)) { wsevent_fini(&sc->evar); return 0; } if (ISWSDISPLAYCTL(dev)) return 0; if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL) return 0; if (WSSCREEN_HAS_TTY(scr)) { if (scr->scr_hold_screen) { int s; /* XXX RESET KEYBOARD LEDS, etc. */ s = spltty(); /* avoid conflict with keyboard */ wsdisplay_kbdholdscreen(dv, 0); splx(s); } tp = scr->scr_tty; (*tp->t_linesw->l_close)(tp, flag); ttyclose(tp); } if (scr->scr_syncops) (*scr->scr_syncops->destroy)(scr->scr_synccookie); if (WSSCREEN_HAS_EMULATOR(scr)) { scr->scr_flags &= ~SCR_GRAPHICS; (*scr->scr_dconf->wsemul->reset)(scr->scr_dconf->wsemulcookie, WSEMUL_RESET); if (wsdisplay_clearonclose) (*scr->scr_dconf->wsemul->reset) (scr->scr_dconf->wsemulcookie, WSEMUL_CLEARSCREEN); } #ifdef WSDISPLAY_COMPAT_RAWKBD if (scr->scr_rawkbd) { int kbmode = WSKBD_TRANSLATED; (void)wsdisplay_internal_ioctl(sc, scr, WSKBDIO_SETMODE, (void *)&kbmode, 0, l); } #endif scr->scr_flags &= ~SCR_OPEN; return 0; } int wsdisplayread(dev_t dev, struct uio *uio, int flag) { struct wsdisplay_softc *sc; struct tty *tp; struct wsscreen *scr; int error; sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev)); if (ISWSDISPLAYSTAT(dev)) { error = wsevent_read(&sc->evar, uio, flag); return error; } if (ISWSDISPLAYCTL(dev)) return 0; if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL) return ENXIO; if (!WSSCREEN_HAS_TTY(scr)) return ENODEV; tp = scr->scr_tty; return (*tp->t_linesw->l_read)(tp, uio, flag); } int wsdisplaywrite(dev_t dev, struct uio *uio, int flag) { struct wsdisplay_softc *sc; struct tty *tp; struct wsscreen *scr; sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev)); if (ISWSDISPLAYSTAT(dev)) { return 0; } if (ISWSDISPLAYCTL(dev)) return 0; if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL) return ENXIO; if (!WSSCREEN_HAS_TTY(scr)) return ENODEV; tp = scr->scr_tty; return (*tp->t_linesw->l_write)(tp, uio, flag); } int wsdisplaypoll(dev_t dev, int events, struct lwp *l) { struct wsdisplay_softc *sc; struct tty *tp; struct wsscreen *scr; sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev)); if (ISWSDISPLAYSTAT(dev)) return wsevent_poll(&sc->evar, events, l); if (ISWSDISPLAYCTL(dev)) return 0; if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL) return POLLHUP; if (!WSSCREEN_HAS_TTY(scr)) return POLLERR; tp = scr->scr_tty; return (*tp->t_linesw->l_poll)(tp, events, l); } int wsdisplaykqfilter(dev_t dev, struct knote *kn) { struct wsdisplay_softc *sc; struct wsscreen *scr; sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev)); if (ISWSDISPLAYCTL(dev)) return 1; if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL) return 1; if (WSSCREEN_HAS_TTY(scr)) return ttykqfilter(dev, kn); else return 1; } struct tty * wsdisplaytty(dev_t dev) { struct wsdisplay_softc *sc; struct wsscreen *scr; sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev)); if (ISWSDISPLAYSTAT(dev)) panic("wsdisplaytty() on status device"); if (ISWSDISPLAYCTL(dev)) panic("wsdisplaytty() on ctl device"); if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL) return NULL; return scr->scr_tty; } int wsdisplayioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { device_t dv; struct wsdisplay_softc *sc; struct tty *tp; int error; struct wsscreen *scr; dv = device_lookup(&wsdisplay_cd, WSDISPLAYUNIT(dev)); sc = device_private(dv); #ifdef WSDISPLAY_COMPAT_USL error = wsdisplay_usl_ioctl1(dv, cmd, data, flag, l); if (error != EPASSTHROUGH) return error; #endif if (ISWSDISPLAYSTAT(dev)) return wsdisplay_stat_ioctl(sc, cmd, data, flag, l); if (ISWSDISPLAYCTL(dev)) return wsdisplay_cfg_ioctl(sc, cmd, data, flag, l); if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL) return ENXIO; if (WSSCREEN_HAS_TTY(scr)) { tp = scr->scr_tty; /* do the line discipline ioctls first */ error = (*tp->t_linesw->l_ioctl)(tp, cmd, data, flag, l); if (error != EPASSTHROUGH) return error; /* then the tty ioctls */ error = ttioctl(tp, cmd, data, flag, l); if (error != EPASSTHROUGH) return error; } #ifdef WSDISPLAY_COMPAT_USL error = wsdisplay_usl_ioctl2(sc, scr, cmd, data, flag, l); if (error != EPASSTHROUGH) return error; #endif return wsdisplay_internal_ioctl(sc, scr, cmd, data, flag, l); } int wsdisplay_param(device_t dv, u_long cmd, struct wsdisplay_param *dp) { struct wsdisplay_softc *sc = device_private(dv); return (*sc->sc_accessops->ioctl)(sc->sc_accesscookie, sc->sc_focus->scr_dconf->emulcookie, cmd, (void *)dp, 0, NULL); } int wsdisplay_internal_ioctl(struct wsdisplay_softc *sc, struct wsscreen *scr, u_long cmd, void *data, int flag, struct lwp *l) { int error; char namebuf[32]; struct wsdisplay_font fd; #ifdef WSDISPLAY_SCROLLSUPPORT struct wsdisplay_scroll_data *ksdp, *usdp; #endif #if NWSKBD > 0 struct wsevsrc *inp; #ifdef WSDISPLAY_COMPAT_RAWKBD switch (cmd) { case WSKBDIO_SETMODE: scr->scr_rawkbd = (*(int *)data == WSKBD_RAW); return wsdisplay_update_rawkbd(sc, scr); case WSKBDIO_GETMODE: *(int *)data = (scr->scr_rawkbd ? WSKBD_RAW : WSKBD_TRANSLATED); return 0; } #endif inp = sc->sc_input; if (inp == NULL) return ENXIO; error = wsevsrc_display_ioctl(inp, cmd, data, flag, l); if (error != EPASSTHROUGH) return error; #endif /* NWSKBD > 0 */ switch (cmd) { case WSDISPLAYIO_GMODE: if (scr->scr_flags & SCR_GRAPHICS) { if (scr->scr_flags & SCR_DUMBFB) *(u_int *)data = WSDISPLAYIO_MODE_DUMBFB; else *(u_int *)data = WSDISPLAYIO_MODE_MAPPED; } else *(u_int *)data = WSDISPLAYIO_MODE_EMUL; return 0; case WSDISPLAYIO_SMODE: #define d (*(int *)data) if (d != WSDISPLAYIO_MODE_EMUL && d != WSDISPLAYIO_MODE_MAPPED && d != WSDISPLAYIO_MODE_DUMBFB) return EINVAL; if (WSSCREEN_HAS_EMULATOR(scr)) { scr->scr_flags &= ~SCR_GRAPHICS; if (d == WSDISPLAYIO_MODE_MAPPED || d == WSDISPLAYIO_MODE_DUMBFB) scr->scr_flags |= SCR_GRAPHICS | ((d == WSDISPLAYIO_MODE_DUMBFB) ? SCR_DUMBFB : 0); } else if (d == WSDISPLAYIO_MODE_EMUL) return EINVAL; (void)(*sc->sc_accessops->ioctl)(sc->sc_accesscookie, scr->scr_dconf->emulcookie, cmd, data, flag, l); return 0; #undef d #ifdef WSDISPLAY_SCROLLSUPPORT #define SETSCROLLLINES(dstp, srcp, dfltp) \ do { \ (dstp)->fastlines = ((srcp)->which & \ WSDISPLAY_SCROLL_DOFASTLINES) ? \ (srcp)->fastlines : (dfltp)->fastlines; \ (dstp)->slowlines = ((srcp)->which & \ WSDISPLAY_SCROLL_DOSLOWLINES) ? \ (srcp)->slowlines : (dfltp)->slowlines; \ (dstp)->which = WSDISPLAY_SCROLL_DOALL; \ } while (0) case WSDISPLAYIO_DSSCROLL: usdp = (struct wsdisplay_scroll_data *)data; ksdp = &sc->sc_scroll_values; SETSCROLLLINES(ksdp, usdp, ksdp); return 0; case WSDISPLAYIO_DGSCROLL: usdp = (struct wsdisplay_scroll_data *)data; ksdp = &sc->sc_scroll_values; SETSCROLLLINES(usdp, ksdp, ksdp); return 0; #else case WSDISPLAYIO_DSSCROLL: case WSDISPLAYIO_DGSCROLL: return ENODEV; #endif case WSDISPLAYIO_SFONT: #define d ((struct wsdisplay_usefontdata *)data) if (!sc->sc_accessops->load_font) return EINVAL; if (d->name) { error = copyinstr(d->name, namebuf, sizeof(namebuf), 0); if (error) return error; fd.name = namebuf; } else fd.name = 0; fd.data = 0; error = (*sc->sc_accessops->load_font)(sc->sc_accesscookie, scr->scr_dconf->emulcookie, &fd); if (!error && WSSCREEN_HAS_EMULATOR(scr)) { (*scr->scr_dconf->wsemul->reset) (scr->scr_dconf->wsemulcookie, WSEMUL_SYNCFONT); if (scr->scr_dconf->wsemul->resize) { (*scr->scr_dconf->wsemul->resize) (scr->scr_dconf->wsemulcookie, scr->scr_dconf->scrdata); /* update the tty's size */ scr->scr_tty->t_winsize.ws_row = scr->scr_dconf->scrdata->nrows; scr->scr_tty->t_winsize.ws_col = scr->scr_dconf->scrdata->ncols; /* send SIGWINCH to the process group on our tty */ kpreempt_disable(); ttysig(scr->scr_tty, TTYSIG_PG1, SIGWINCH); kpreempt_enable(); } } return error; #undef d #ifdef WSDISPLAY_CUSTOM_OUTPUT case WSDISPLAYIO_GMSGATTRS: #define d ((struct wsdisplay_msgattrs *)data) (*scr->scr_dconf->wsemul->getmsgattrs) (scr->scr_dconf->wsemulcookie, d); return 0; #undef d case WSDISPLAYIO_SMSGATTRS: { #define d ((struct wsdisplay_msgattrs *)data) int i; for (i = 0; i < WSDISPLAY_MAXSCREEN; i++) if (sc->sc_scr[i] != NULL) (*sc->sc_scr[i]->scr_dconf->wsemul->setmsgattrs) (sc->sc_scr[i]->scr_dconf->wsemulcookie, sc->sc_scr[i]->scr_dconf->scrdata, d); } return 0; #undef d #else case WSDISPLAYIO_GMSGATTRS: case WSDISPLAYIO_SMSGATTRS: return ENODEV; #endif case WSDISPLAYIO_SETVERSION: return wsevent_setversion(&sc->evar, *(int *)data); } /* check ioctls for display */ return (*sc->sc_accessops->ioctl)(sc->sc_accesscookie, scr->scr_dconf->emulcookie, cmd, data, flag, l); } int wsdisplay_stat_ioctl(struct wsdisplay_softc *sc, u_long cmd, void *data, int flag, struct lwp *l) { switch (cmd) { case WSDISPLAYIO_GETACTIVESCREEN: *(int*)data = wsdisplay_getactivescreen(sc); return 0; } return EPASSTHROUGH; } int wsdisplay_cfg_ioctl(struct wsdisplay_softc *sc, u_long cmd, void *data, int flag, struct lwp *l) { int error; char *type, typebuf[16], *emul, emulbuf[16]; void *tbuf; u_int fontsz; #if defined(COMPAT_14) && NWSKBD > 0 struct wsmux_device wsmuxdata; #endif #if NWSKBD > 0 struct wsevsrc *inp; #endif switch (cmd) { case WSDISPLAYIO_ADDSCREEN: #define d ((struct wsdisplay_addscreendata *)data) if (d->screentype) { error = copyinstr(d->screentype, typebuf, sizeof(typebuf), 0); if (error) return error; type = typebuf; } else type = 0; if (d->emul) { error = copyinstr(d->emul, emulbuf, sizeof(emulbuf),0); if (error) return error; emul = emulbuf; } else emul = 0; if ((error = wsdisplay_addscreen(sc, d->idx, type, emul)) == 0) wsdisplay_addscreen_print(sc, d->idx, 0); return error; #undef d case WSDISPLAYIO_DELSCREEN: #define d ((struct wsdisplay_delscreendata *)data) return wsdisplay_delscreen(sc, d->idx, d->flags); #undef d case WSDISPLAYIO_LDFONT: #define d ((struct wsdisplay_font *)data) if (!sc->sc_accessops->load_font) return EINVAL; if (d->name) { error = copyinstr(d->name, typebuf, sizeof(typebuf), 0); if (error) return error; d->name = typebuf; } else d->name = "loaded"; /* ??? */ fontsz = d->fontheight * d->stride * d->numchars; if (fontsz > WSDISPLAY_MAXFONTSZ) return EINVAL; tbuf = malloc(fontsz, M_DEVBUF, M_WAITOK); error = copyin(d->data, tbuf, fontsz); if (error) { free(tbuf, M_DEVBUF); return error; } d->data = tbuf; error = (*sc->sc_accessops->load_font)(sc->sc_accesscookie, 0, d); free(tbuf, M_DEVBUF); #undef d return error; #if NWSKBD > 0 #ifdef COMPAT_14 case _O_WSDISPLAYIO_SETKEYBOARD: #define d ((struct wsdisplay_kbddata *)data) inp = sc->sc_input; if (inp == NULL) return ENXIO; switch (d->op) { case _O_WSDISPLAY_KBD_ADD: if (d->idx == -1) { d->idx = wskbd_pickfree(); if (d->idx == -1) return ENXIO; } wsmuxdata.type = WSMUX_KBD; wsmuxdata.idx = d->idx; return wsevsrc_ioctl(inp, WSMUX_ADD_DEVICE, &wsmuxdata, flag, l); case _O_WSDISPLAY_KBD_DEL: wsmuxdata.type = WSMUX_KBD; wsmuxdata.idx = d->idx; return wsevsrc_ioctl(inp, WSMUX_REMOVE_DEVICE, &wsmuxdata, flag, l); default: return EINVAL; } #undef d #endif case WSMUXIO_ADD_DEVICE: #define d ((struct wsmux_device *)data) if (d->idx == -1 && d->type == WSMUX_KBD) d->idx = wskbd_pickfree(); #undef d /* FALLTHROUGH */ case WSMUXIO_INJECTEVENT: case WSMUXIO_REMOVE_DEVICE: case WSMUXIO_LIST_DEVICES: inp = sc->sc_input; if (inp == NULL) return ENXIO; return wsevsrc_ioctl(inp, cmd, data, flag, l); #endif /* NWSKBD > 0 */ } return EPASSTHROUGH; } int wsdisplay_stat_inject(device_t dv, u_int type, int value) { struct wsdisplay_softc *sc = device_private(dv); struct wseventvar *evar; struct wscons_event event; evar = &sc->evar; if (evar == NULL) return 0; if (evar->q == NULL) return 1; event.type = type; event.value = value; if (wsevent_inject(evar, &event, 1) != 0) { log(LOG_WARNING, "wsdisplay: event queue overflow\n"); return 1; } return 0; } paddr_t wsdisplaymmap(dev_t dev, off_t offset, int prot) { struct wsdisplay_softc *sc; struct wsscreen *scr; sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev)); if (ISWSDISPLAYSTAT(dev)) return -1; if (ISWSDISPLAYCTL(dev)) return -1; if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL) return -1; if (!(scr->scr_flags & SCR_GRAPHICS)) return -1; /* pass mmap to display */ return (*sc->sc_accessops->mmap)(sc->sc_accesscookie, scr->scr_dconf->emulcookie, offset, prot); } void wsdisplaystart(struct tty *tp) { struct wsdisplay_softc *sc; struct wsscreen *scr; int s, n; u_char *tbuf; s = spltty(); if (tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP)) { splx(s); return; } sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(tp->t_dev)); if ((scr = sc->sc_scr[WSDISPLAYSCREEN(tp->t_dev)]) == NULL) { splx(s); return; } if (scr->scr_hold_screen) { tp->t_state |= TS_TIMEOUT; splx(s); return; } tp->t_state |= TS_BUSY; splx(s); #ifdef DIAGNOSTIC scr->scr_in_ttyoutput = 1; #endif /* * Drain output from ring buffer. * The output will normally be in one contiguous chunk, but when the * ring wraps, it will be in two pieces.. one at the end of the ring, * the other at the start. For performance, rather than loop here, * we output one chunk, see if there's another one, and if so, output * it too. */ n = ndqb(&tp->t_outq, 0); tbuf = tp->t_outq.c_cf; if (!(scr->scr_flags & SCR_GRAPHICS)) { KASSERT(WSSCREEN_HAS_EMULATOR(scr)); (*scr->scr_dconf->wsemul->output)(scr->scr_dconf->wsemulcookie, tbuf, n, 0); #ifdef WSDISPLAY_MULTICONS if (wsdisplay_multicons_enable && scr->scr_dconf == &wsdisplay_console_conf && wsdisplay_ocn && wsdisplay_ocn->cn_putc) { for (int i = 0; i < n; i++) wsdisplay_ocn->cn_putc( wsdisplay_ocn->cn_dev, tbuf[i]); } #endif } ndflush(&tp->t_outq, n); if ((n = ndqb(&tp->t_outq, 0)) > 0) { tbuf = tp->t_outq.c_cf; if (!(scr->scr_flags & SCR_GRAPHICS)) { KASSERT(WSSCREEN_HAS_EMULATOR(scr)); (*scr->scr_dconf->wsemul->output) (scr->scr_dconf->wsemulcookie, tbuf, n, 0); #ifdef WSDISPLAY_MULTICONS if (wsdisplay_multicons_enable && scr->scr_dconf == &wsdisplay_console_conf && wsdisplay_ocn && wsdisplay_ocn->cn_putc) { for (int i = 0; i < n; i++) wsdisplay_ocn->cn_putc( wsdisplay_ocn->cn_dev, tbuf[i]); } #endif } ndflush(&tp->t_outq, n); } #ifdef DIAGNOSTIC scr->scr_in_ttyoutput = 0; #endif s = spltty(); tp->t_state &= ~TS_BUSY; /* Come back if there's more to do */ if (ttypull(tp)) { tp->t_state |= TS_TIMEOUT; callout_schedule(&tp->t_rstrt_ch, (hz > 128) ? (hz / 128) : 1); } splx(s); } void wsdisplaystop(struct tty *tp, int flag) { int s; s = spltty(); if (ISSET(tp->t_state, TS_BUSY)) if (!ISSET(tp->t_state, TS_TTSTOP)) SET(tp->t_state, TS_FLUSH); splx(s); } /* Set line parameters. */ int wsdisplayparam(struct tty *tp, struct termios *t) { tp->t_ispeed = t->c_ispeed; tp->t_ospeed = t->c_ospeed; tp->t_cflag = t->c_cflag; return 0; } /* * Callbacks for the emulation code. */ void wsdisplay_emulbell(void *v) { struct wsscreen *scr = v; if (scr == NULL) /* console, before real attach */ return; if (scr->scr_flags & SCR_GRAPHICS) /* can this happen? */ return; (void) wsdisplay_internal_ioctl(scr->sc, scr, WSKBDIO_BELL, NULL, FWRITE, NULL); } void wsdisplay_emulinput(void *v, const u_char *data, u_int count) { struct wsscreen *scr = v; struct tty *tp; int (*ifcn)(int, struct tty *); if (v == NULL) /* console, before real attach */ return; if (scr->scr_flags & SCR_GRAPHICS) /* XXX can't happen */ return; if (!WSSCREEN_HAS_TTY(scr)) return; tp = scr->scr_tty; /* * XXX bad hack to work around locking problems in tty.c: * ttyinput() will try to lock again, causing deadlock. * We assume that wsdisplay_emulinput() can only be called * from within wsdisplaystart(), and thus the tty lock * is already held. Use an entry point which doesn't lock. */ KASSERT(scr->scr_in_ttyoutput); ifcn = tp->t_linesw->l_rint; if (ifcn == ttyinput) ifcn = ttyinput_wlock; while (count-- > 0) (*ifcn)(*data++, tp); } /* * Calls from the keyboard interface. */ void wsdisplay_kbdinput(device_t dv, keysym_t ks) { struct wsdisplay_softc *sc = device_private(dv); struct wsscreen *scr; const char *dp; int count; struct tty *tp; KASSERT(sc != NULL); scr = sc->sc_focus; if (!scr || !WSSCREEN_HAS_TTY(scr)) return; tp = scr->scr_tty; if (KS_GROUP(ks) == KS_GROUP_Plain && KS_VALUE(ks) <= 0x7f) (*tp->t_linesw->l_rint)(KS_VALUE(ks), tp); else if (WSSCREEN_HAS_EMULATOR(scr)) { count = (*scr->scr_dconf->wsemul->translate) (scr->scr_dconf->wsemulcookie, ks, &dp); while (count-- > 0) (*tp->t_linesw->l_rint)((unsigned char)(*dp++), tp); } } #if defined(WSDISPLAY_COMPAT_RAWKBD) int wsdisplay_update_rawkbd(struct wsdisplay_softc *sc, struct wsscreen *scr) { #if NWSKBD > 0 int s, raw, data, error; struct wsevsrc *inp; s = spltty(); raw = (scr ? scr->scr_rawkbd : 0); if (scr != sc->sc_focus || sc->sc_rawkbd == raw) { splx(s); return 0; } data = raw ? WSKBD_RAW : WSKBD_TRANSLATED; inp = sc->sc_input; if (inp == NULL) { splx(s); return ENXIO; } error = wsevsrc_display_ioctl(inp, WSKBDIO_SETMODE, &data, 0, 0); if (!error) sc->sc_rawkbd = raw; splx(s); return error; #else return 0; #endif } #endif static void wsdisplay_switch3_cb(void *arg, int error, int waitok) { device_t dv = arg; wsdisplay_switch3(dv, error, waitok); } static int wsdisplay_switch3(device_t dv, int error, int waitok) { struct wsdisplay_softc *sc = device_private(dv); int no; struct wsscreen *scr; if (!(sc->sc_flags & SC_SWITCHPENDING)) { aprint_error_dev(dv, "wsdisplay_switch3: not switching\n"); return EINVAL; } no = sc->sc_screenwanted; if (no < 0 || no >= WSDISPLAY_MAXSCREEN) panic("wsdisplay_switch3: invalid screen %d", no); scr = sc->sc_scr[no]; if (!scr) { aprint_error_dev(dv, "wsdisplay_switch3: screen %d disappeared\n", no); error = ENXIO; } if (error) { /* try to recover, avoid recursion */ if (sc->sc_oldscreen == WSDISPLAY_NULLSCREEN) { aprint_error_dev(dv, "wsdisplay_switch3: giving up\n"); sc->sc_focus = 0; #ifdef WSDISPLAY_COMPAT_RAWKBD wsdisplay_update_rawkbd(sc, 0); #endif sc->sc_flags &= ~SC_SWITCHPENDING; return error; } sc->sc_screenwanted = sc->sc_oldscreen; sc->sc_oldscreen = WSDISPLAY_NULLSCREEN; return wsdisplay_switch1(dv, 0, waitok); } if (scr->scr_syncops && !error) sc->sc_flags |= SC_XATTACHED; sc->sc_flags &= ~SC_SWITCHPENDING; if (!error && (scr->scr_flags & SCR_WAITACTIVE)) wakeup(scr); return error; } static void wsdisplay_switch2_cb(void *arg, int error, int waitok) { device_t dv = arg; wsdisplay_switch2(dv, error, waitok); } static int wsdisplay_switch2(device_t dv, int error, int waitok) { struct wsdisplay_softc *sc = device_private(dv); int no; struct wsscreen *scr; if (!(sc->sc_flags & SC_SWITCHPENDING)) { aprint_error_dev(dv, "wsdisplay_switch2: not switching\n"); return EINVAL; } no = sc->sc_screenwanted; if (no < 0 || no >= WSDISPLAY_MAXSCREEN) panic("wsdisplay_switch2: invalid screen %d", no); scr = sc->sc_scr[no]; if (!scr) { aprint_error_dev(dv, "wsdisplay_switch2: screen %d disappeared\n", no); error = ENXIO; } if (error) { /* try to recover, avoid recursion */ if (sc->sc_oldscreen == WSDISPLAY_NULLSCREEN) { aprint_error_dev(dv, "wsdisplay_switch2: giving up\n"); sc->sc_focus = 0; sc->sc_flags &= ~SC_SWITCHPENDING; return error; } sc->sc_screenwanted = sc->sc_oldscreen; sc->sc_oldscreen = WSDISPLAY_NULLSCREEN; return wsdisplay_switch1(dv, 0, waitok); } sc->sc_focusidx = no; sc->sc_focus = scr; #ifdef WSDISPLAY_COMPAT_RAWKBD (void) wsdisplay_update_rawkbd(sc, scr); #endif /* keyboard map??? */ if (scr->scr_syncops && !(sc->sc_isconsole && wsdisplay_cons_pollmode)) { error = (*scr->scr_syncops->attach)(scr->scr_synccookie, waitok, wsdisplay_switch3_cb, dv); if (error == EAGAIN) { /* switch will be done asynchronously */ return 0; } } return wsdisplay_switch3(dv, error, waitok); } static void wsdisplay_switch1_cb(void *arg, int error, int waitok) { device_t dv = arg; wsdisplay_switch1(dv, error, waitok); } static int wsdisplay_switch1(device_t dv, int error, int waitok) { struct wsdisplay_softc *sc = device_private(dv); int no; struct wsscreen *scr; if (!(sc->sc_flags & SC_SWITCHPENDING)) { aprint_error_dev(dv, "wsdisplay_switch1: not switching\n"); return EINVAL; } no = sc->sc_screenwanted; if (no == WSDISPLAY_NULLSCREEN) { sc->sc_flags &= ~SC_SWITCHPENDING; if (!error) { sc->sc_flags &= ~SC_XATTACHED; sc->sc_focus = 0; } wakeup(sc); return error; } if (no < 0 || no >= WSDISPLAY_MAXSCREEN) panic("wsdisplay_switch1: invalid screen %d", no); scr = sc->sc_scr[no]; if (!scr) { aprint_error_dev(dv, "wsdisplay_switch1: screen %d disappeared\n", no); error = ENXIO; } if (error) { sc->sc_flags &= ~SC_SWITCHPENDING; return error; } sc->sc_flags &= ~SC_XATTACHED; error = (*sc->sc_accessops->show_screen)(sc->sc_accesscookie, scr->scr_dconf->emulcookie, waitok, sc->sc_isconsole && wsdisplay_cons_pollmode ? 0 : wsdisplay_switch2_cb, dv); if (error == EAGAIN) { /* switch will be done asynchronously */ return 0; } return wsdisplay_switch2(dv, error, waitok); } int wsdisplay_switch(device_t dv, int no, int waitok) { struct wsdisplay_softc *sc = device_private(dv); int s, res = 0; struct wsscreen *scr; if (no != WSDISPLAY_NULLSCREEN) { if ((no < 0 || no >= WSDISPLAY_MAXSCREEN)) return EINVAL; if (sc->sc_scr[no] == NULL) return ENXIO; } wsdisplay_stat_inject(dv, WSCONS_EVENT_SCREEN_SWITCH, no); s = spltty(); if ((sc->sc_focus && no == sc->sc_focusidx) || (sc->sc_focus == NULL && no == WSDISPLAY_NULLSCREEN)) { splx(s); return 0; } if (sc->sc_flags & SC_SWITCHPENDING) { splx(s); return EBUSY; } sc->sc_flags |= SC_SWITCHPENDING; sc->sc_screenwanted = no; splx(s); scr = sc->sc_focus; if (!scr) { sc->sc_oldscreen = WSDISPLAY_NULLSCREEN; return wsdisplay_switch1(dv, 0, waitok); } else sc->sc_oldscreen = sc->sc_focusidx; if (scr->scr_syncops) { if (!(sc->sc_flags & SC_XATTACHED) || (sc->sc_isconsole && wsdisplay_cons_pollmode)) { /* nothing to do here */ return wsdisplay_switch1(dv, 0, waitok); } res = (*scr->scr_syncops->detach)(scr->scr_synccookie, waitok, wsdisplay_switch1_cb, dv); if (res == EAGAIN) { /* switch will be done asynchronously */ return 0; } } else if (scr->scr_flags & SCR_GRAPHICS) { /* no way to save state */ res = EBUSY; } return wsdisplay_switch1(dv, res, waitok); } void wsdisplay_reset(device_t dv, enum wsdisplay_resetops op) { struct wsdisplay_softc *sc = device_private(dv); struct wsscreen *scr; KASSERT(sc != NULL); scr = sc->sc_focus; if (!scr) return; switch (op) { case WSDISPLAY_RESETEMUL: if (!WSSCREEN_HAS_EMULATOR(scr)) break; (*scr->scr_dconf->wsemul->reset)(scr->scr_dconf->wsemulcookie, WSEMUL_RESET); break; case WSDISPLAY_RESETCLOSE: wsdisplay_closescreen(sc, scr); break; } } bool wsdisplay_isconsole(struct wsdisplay_softc *sc) { return sc->sc_isconsole; } /* * Interface for (external) VT switch / process synchronization code */ int wsscreen_attach_sync(struct wsscreen *scr, const struct wscons_syncops *ops, void *cookie) { if (scr->scr_syncops) { /* * The screen is already claimed. * Check if the owner is still alive. */ if ((*scr->scr_syncops->check)(scr->scr_synccookie)) return EBUSY; } scr->scr_syncops = ops; scr->scr_synccookie = cookie; if (scr == scr->sc->sc_focus) scr->sc->sc_flags |= SC_XATTACHED; return 0; } int wsscreen_detach_sync(struct wsscreen *scr) { if (!scr->scr_syncops) return EINVAL; scr->scr_syncops = 0; if (scr == scr->sc->sc_focus) scr->sc->sc_flags &= ~SC_XATTACHED; return 0; } int wsscreen_lookup_sync(struct wsscreen *scr, const struct wscons_syncops *ops, /* used as ID */ void **cookiep) { if (!scr->scr_syncops || ops != scr->scr_syncops) return EINVAL; *cookiep = scr->scr_synccookie; return 0; } /* * Interface to virtual screen stuff */ int wsdisplay_maxscreenidx(struct wsdisplay_softc *sc) { return (WSDISPLAY_MAXSCREEN - 1); } int wsdisplay_screenstate(struct wsdisplay_softc *sc, int idx) { if (idx < 0 || idx >= WSDISPLAY_MAXSCREEN) return EINVAL; if (!sc->sc_scr[idx]) return ENXIO; return ((sc->sc_scr[idx]->scr_flags & SCR_OPEN) ? EBUSY : 0); } int wsdisplay_getactivescreen(struct wsdisplay_softc *sc) { return (sc->sc_focus ? sc->sc_focusidx : WSDISPLAY_NULLSCREEN); } int wsscreen_switchwait(struct wsdisplay_softc *sc, int no) { struct wsscreen *scr; int s, res = 0; if (no == WSDISPLAY_NULLSCREEN) { s = spltty(); while (sc->sc_focus && res == 0) { res = tsleep(sc, PCATCH, "wswait", 0); } splx(s); return res; } if (no < 0 || no >= WSDISPLAY_MAXSCREEN) return ENXIO; scr = sc->sc_scr[no]; if (!scr) return ENXIO; s = spltty(); if (scr != sc->sc_focus) { scr->scr_flags |= SCR_WAITACTIVE; res = tsleep(scr, PCATCH, "wswait", 0); if (scr != sc->sc_scr[no]) res = ENXIO; /* disappeared in the meantime */ else scr->scr_flags &= ~SCR_WAITACTIVE; } splx(s); return res; } void wsdisplay_kbdholdscreen(device_t dv, int hold) { struct wsdisplay_softc *sc = device_private(dv); struct wsscreen *scr; scr = sc->sc_focus; if (!scr) return; if (hold) scr->scr_hold_screen = 1; else { scr->scr_hold_screen = 0; callout_schedule(&scr->scr_tty->t_rstrt_ch, 0); } } #if NWSKBD > 0 void wsdisplay_set_console_kbd(struct wsevsrc *src) { if (wsdisplay_console_device == NULL) { src->me_dispdv = NULL; return; } #if NWSMUX > 0 if (wsmux_attach_sc((struct wsmux_softc *) wsdisplay_console_device->sc_input, src)) { src->me_dispdv = NULL; return; } #else wsdisplay_console_device->sc_input = src; #endif src->me_dispdv = wsdisplay_console_device->sc_dev; } #endif /* NWSKBD > 0 */ /* * Console interface. */ void wsdisplay_cnputc(dev_t dev, int i) { struct wsscreen_internal *dc; u_char c = i; if (!wsdisplay_console_initted) return; if ((wsdisplay_console_device != NULL) && (wsdisplay_console_device->sc_scr[0] != NULL) && (wsdisplay_console_device->sc_scr[0]->scr_flags & SCR_GRAPHICS)) return; dc = &wsdisplay_console_conf; (*dc->wsemul->output)(dc->wsemulcookie, &c, 1, 1); #ifdef WSDISPLAY_MULTICONS if (!wsdisplay_multicons_suspended && wsdisplay_multicons_enable && wsdisplay_ocn && wsdisplay_ocn->cn_putc) wsdisplay_ocn->cn_putc(wsdisplay_ocn->cn_dev, i); #endif } static int wsdisplay_getc(dev_t dev) { int c; if (wsdisplay_cons_kbd_getc) { c = wsdisplay_cons_kbd_getc(wsdisplay_cons.cn_dev); if (c >= 0) return c; } #ifdef WSDISPLAY_MULTICONS if (!wsdisplay_multicons_suspended && wsdisplay_multicons_enable && wsdisplay_ocn && wsdisplay_ocn->cn_getc) { c = wsdisplay_ocn->cn_getc(wsdisplay_ocn->cn_dev); if (c >= 0) return c; } #endif return -1; } static void wsdisplay_pollc(dev_t dev, int on) { wsdisplay_cons_pollmode = on; /* notify to fb drivers */ if (wsdisplay_console_device != NULL && wsdisplay_console_device->sc_accessops->pollc != NULL) (*wsdisplay_console_device->sc_accessops->pollc) (wsdisplay_console_device->sc_accesscookie, on); /* notify to kbd drivers */ if (wsdisplay_cons_kbd_pollc) (*wsdisplay_cons_kbd_pollc)(NODEV, on); #ifdef WSDISPLAY_MULTICONS /* notify to old console driver */ if (!wsdisplay_multicons_suspended && wsdisplay_multicons_enable && wsdisplay_ocn && wsdisplay_ocn->cn_pollc) wsdisplay_ocn->cn_pollc(wsdisplay_ocn->cn_dev, on); #endif } void wsdisplay_set_cons_kbd(int (*get)(dev_t), void (*poll)(dev_t, int), void (*bell)(dev_t, u_int, u_int, u_int)) { wsdisplay_cons.cn_bell = bell; wsdisplay_cons_kbd_getc = get; wsdisplay_cons_kbd_pollc = poll; } void wsdisplay_unset_cons_kbd(void) { wsdisplay_cons.cn_bell = NULL; wsdisplay_cons_kbd_getc = NULL; wsdisplay_cons_kbd_pollc = NULL; } #ifdef WSDISPLAY_MULTICONS void wsdisplay_multicons_suspend(bool suspend) { wsdisplay_multicons_suspended = suspend; } #endif #ifdef WSDISPLAY_MULTICONS SYSCTL_SETUP(sysctl_hw_wsdisplay_setup, "sysctl hw.wsdisplay subtree setup") { const struct sysctlnode *wsdisplay_node; if (sysctl_createv(clog, 0, NULL, &wsdisplay_node, CTLFLAG_PERMANENT, CTLTYPE_NODE, "wsdisplay", NULL, NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL) != 0) return; sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_READWRITE, CTLTYPE_BOOL, "multicons", SYSCTL_DESCR("Enable wsdisplay multicons"), NULL, 0, &wsdisplay_multicons_enable, 0, CTL_HW, wsdisplay_node->sysctl_num, CTL_CREATE, CTL_EOL); } #endif |
| 26 3 3 29 26 26 3 3 28 2 28 28 28 26 2 2 26 26 26 29 3 3 3 29 29 29 29 29 2 1 2 28 8 1 11 11 12 11 11 7 7 10 7 11 7 7 6 11 11 9 6 11 29 26 26 3 3 3 27 28 26 26 26 3 3 3 2 27 29 29 11 9 5 12 3 12 19 29 29 29 29 19 29 8 8 8 19 29 6 28 29 5 27 29 29 19 1 1 1 29 1 1 29 2 12 12 1 6 7 2 2 2 24 25 14 25 13 3 17 13 8 6 8 1 1 8 28 28 12 9 3 3 12 12 9 12 3 12 12 12 12 12 28 28 26 16 26 16 25 26 18 17 28 28 27 28 12 11 25 25 21 5 5 1 25 25 25 24 3 27 27 27 12 17 27 27 27 7 24 7 27 25 27 27 27 8 23 27 24 24 3 3 25 27 27 18 27 24 22 27 24 22 1 22 3 27 24 24 3 3 3 24 24 24 24 3 3 3 26 4 4 1 4 4 22 15 21 4 22 22 8 22 22 3 22 22 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 | /* $NetBSD: tcp_output.c,v 1.214 2021/12/30 23:03:44 andvar Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 * * NRL grants permission for redistribution and use in source and binary * forms, with or without modification, of the software and documentation * created at NRL provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgements: * This product includes software developed by the University of * California, Berkeley and its contributors. * This product includes software developed at the Information * Technology Division, US Naval Research Laboratory. * 4. Neither the name of the NRL nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation * are those of the authors and should not be interpreted as representing * official policies, either expressed or implied, of the US Naval * Research Laboratory (NRL). */ /*- * Copyright (c) 1997, 1998, 2001, 2005, 2006 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation * Facility, NASA Ames Research Center. * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum. * This code is derived from software contributed to The NetBSD Foundation * by Rui Paulo. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.214 2021/12/30 23:03:44 andvar Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" #include "opt_ipsec.h" #include "opt_tcp_debug.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/errno.h> #include <sys/domain.h> #include <sys/kernel.h> #ifdef TCP_SIGNATURE #include <sys/md5.h> #endif #include <net/if.h> #include <net/route.h> #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/in_pcb.h> #include <netinet/ip_var.h> #ifdef INET6 #include <netinet/ip6.h> #include <netinet6/in6_var.h> #include <netinet6/ip6_var.h> #include <netinet6/in6_pcb.h> #include <netinet6/nd6.h> #endif #ifdef IPSEC #include <netipsec/ipsec.h> #include <netipsec/key.h> #ifdef INET6 #include <netipsec/ipsec6.h> #endif #endif #include <netinet/tcp.h> #define TCPOUTFLAGS #include <netinet/tcp_fsm.h> #include <netinet/tcp_seq.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #include <netinet/tcp_private.h> #include <netinet/tcp_congctl.h> #include <netinet/tcp_debug.h> #include <netinet/in_offload.h> #include <netinet6/in6_offload.h> /* * Knob to enable Congestion Window Monitoring, and control * the burst size it allows. Default burst is 4 packets, per * the Internet draft. */ int tcp_cwm = 0; int tcp_cwm_burstsize = 4; int tcp_do_autosndbuf = 1; int tcp_autosndbuf_inc = 8 * 1024; int tcp_autosndbuf_max = 256 * 1024; #ifdef TCP_OUTPUT_COUNTERS #include <sys/device.h> extern struct evcnt tcp_output_bigheader; extern struct evcnt tcp_output_predict_hit; extern struct evcnt tcp_output_predict_miss; extern struct evcnt tcp_output_copysmall; extern struct evcnt tcp_output_copybig; extern struct evcnt tcp_output_refbig; #define TCP_OUTPUT_COUNTER_INCR(ev) (ev)->ev_count++ #else #define TCP_OUTPUT_COUNTER_INCR(ev) /* nothing */ #endif /* TCP_OUTPUT_COUNTERS */ static int tcp_segsize(struct tcpcb *tp, int *txsegsizep, int *rxsegsizep, bool *alwaysfragp) { struct inpcb *inp = tp->t_inpcb; #ifdef INET6 struct in6pcb *in6p = tp->t_in6pcb; #endif struct socket *so = NULL; struct rtentry *rt; struct ifnet *ifp; int size; int hdrlen; int optlen; *alwaysfragp = false; size = tcp_mssdflt; KASSERT(!(tp->t_inpcb && tp->t_in6pcb)); switch (tp->t_family) { case AF_INET: hdrlen = sizeof(struct ip) + sizeof(struct tcphdr); break; #ifdef INET6 case AF_INET6: hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); break; #endif default: hdrlen = 1; /* prevent zero sized segments */ goto out; } rt = NULL; if (inp) { rt = in_pcbrtentry(inp); so = inp->inp_socket; } #ifdef INET6 if (in6p) { rt = in6_pcbrtentry(in6p); so = in6p->in6p_socket; } #endif if (rt == NULL) { goto out; } ifp = rt->rt_ifp; if (tp->t_mtudisc && rt->rt_rmx.rmx_mtu != 0) { #ifdef INET6 if (in6p && rt->rt_rmx.rmx_mtu < IPV6_MMTU) { /* * RFC2460 section 5, last paragraph: if path MTU is * smaller than 1280, use 1280 as packet size and * attach fragment header. */ size = IPV6_MMTU - hdrlen - sizeof(struct ip6_frag); *alwaysfragp = true; } else size = rt->rt_rmx.rmx_mtu - hdrlen; #else size = rt->rt_rmx.rmx_mtu - hdrlen; #endif } else if (ifp->if_flags & IFF_LOOPBACK) size = ifp->if_mtu - hdrlen; else if (inp && tp->t_mtudisc) size = ifp->if_mtu - hdrlen; else if (inp && in_localaddr(inp->inp_faddr)) size = ifp->if_mtu - hdrlen; #ifdef INET6 else if (in6p) { if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) { /* mapped addr case */ struct in_addr d; memcpy(&d, &in6p->in6p_faddr.s6_addr32[3], sizeof(d)); if (tp->t_mtudisc || in_localaddr(d)) size = ifp->if_mtu - hdrlen; } else { /* * for IPv6, path MTU discovery is always turned on, * or the node must use packet size <= 1280. */ size = tp->t_mtudisc ? ifp->if_mtu : IPV6_MMTU; size -= hdrlen; } } #endif if (inp) in_pcbrtentry_unref(rt, inp); #ifdef INET6 if (in6p) in6_pcbrtentry_unref(rt, in6p); #endif out: /* * Now we must make room for whatever extra TCP/IP options are in * the packet. */ optlen = tcp_optlen(tp); /* * XXX tp->t_ourmss should have the right size, but without this code * fragmentation will occur... need more investigation */ if (inp) { #if defined(IPSEC) if (ipsec_used && !ipsec_pcb_skip_ipsec(inp->inp_sp, IPSEC_DIR_OUTBOUND)) optlen += ipsec4_hdrsiz_tcp(tp); #endif optlen += ip_optlen(inp); } #ifdef INET6 if (in6p && tp->t_family == AF_INET) { #if defined(IPSEC) if (ipsec_used && !ipsec_pcb_skip_ipsec(in6p->in6p_sp, IPSEC_DIR_OUTBOUND)) optlen += ipsec4_hdrsiz_tcp(tp); #endif /* XXX size -= ip_optlen(in6p); */ } else if (in6p && tp->t_family == AF_INET6) { #if defined(IPSEC) if (ipsec_used && !ipsec_pcb_skip_ipsec(in6p->in6p_sp, IPSEC_DIR_OUTBOUND)) optlen += ipsec6_hdrsiz_tcp(tp); #endif optlen += ip6_optlen(in6p); } #endif size -= optlen; /* * There may not be any room for data if mtu is too small. This * includes zero-sized. */ if (size <= 0) { return EMSGSIZE; } /* * *rxsegsizep holds *estimated* inbound segment size (estimation * assumes that path MTU is the same for both ways). this is only * for silly window avoidance, do not use the value for other purposes. * * ipseclen is subtracted from both sides, this may not be right. * I'm not quite sure about this (could someone comment). */ *txsegsizep = uimin(tp->t_peermss - optlen, size); *rxsegsizep = uimin(tp->t_ourmss - optlen, size); /* * Never send more than half a buffer full. This insures that we can * always keep 2 packets on the wire, no matter what SO_SNDBUF is, and * therefore acks will never be delayed unless we run out of data to * transmit. */ if (so) { *txsegsizep = uimin(so->so_snd.sb_hiwat >> 1, *txsegsizep); } /* * A segment must at least store header + options */ if (*txsegsizep < hdrlen + optlen) { return EMSGSIZE; } if (*txsegsizep != tp->t_segsz) { /* * If the new segment size is larger, we don't want to * mess up the congestion window, but if it is smaller * we'll have to reduce the congestion window to ensure * that we don't get into trouble with initial windows * and the rest. In any case, if the segment size * has changed, chances are the path has, too, and * our congestion window will be different. */ if (*txsegsizep < tp->t_segsz) { tp->snd_cwnd = uimax((tp->snd_cwnd / tp->t_segsz) * *txsegsizep, *txsegsizep); tp->snd_ssthresh = uimax((tp->snd_ssthresh / tp->t_segsz) * *txsegsizep, *txsegsizep); } tp->t_segsz = *txsegsizep; } return 0; } static int tcp_build_datapkt(struct tcpcb *tp, struct socket *so, int off, long len, int hdrlen, struct mbuf **mp) { struct mbuf *m, *m0; uint64_t *tcps; tcps = TCP_STAT_GETREF(); if (tp->t_force && len == 1) tcps[TCP_STAT_SNDPROBE]++; else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { tp->t_sndrexmitpack++; tcps[TCP_STAT_SNDREXMITPACK]++; tcps[TCP_STAT_SNDREXMITBYTE] += len; } else { tcps[TCP_STAT_SNDPACK]++; tcps[TCP_STAT_SNDBYTE] += len; } TCP_STAT_PUTREF(); MGETHDR(m, M_DONTWAIT, MT_HEADER); if (__predict_false(m == NULL)) return ENOBUFS; MCLAIM(m, &tcp_tx_mowner); /* * XXX Because other code assumes headers will fit in * XXX one header mbuf. * * (This code should almost *never* be run.) */ if (__predict_false((max_linkhdr + hdrlen) > MHLEN)) { TCP_OUTPUT_COUNTER_INCR(&tcp_output_bigheader); MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_freem(m); return ENOBUFS; } } m->m_data += max_linkhdr; m->m_len = hdrlen; /* * To avoid traversing the whole sb_mb chain for correct * data to send, remember last sent mbuf, its offset and * the sent size. When called the next time, see if the * data to send is directly following the previous transfer. * This is important for large TCP windows. */ if (off == 0 || tp->t_lastm == NULL || (tp->t_lastoff + tp->t_lastlen) != off) { TCP_OUTPUT_COUNTER_INCR(&tcp_output_predict_miss); /* * Either a new packet or a retransmit. * Start from the beginning. */ tp->t_lastm = so->so_snd.sb_mb; tp->t_inoff = off; } else { TCP_OUTPUT_COUNTER_INCR(&tcp_output_predict_hit); tp->t_inoff += tp->t_lastlen; } /* Traverse forward to next packet */ while (tp->t_inoff > 0) { if (tp->t_lastm == NULL) panic("tp->t_lastm == NULL"); if (tp->t_inoff < tp->t_lastm->m_len) break; tp->t_inoff -= tp->t_lastm->m_len; tp->t_lastm = tp->t_lastm->m_next; } tp->t_lastoff = off; tp->t_lastlen = len; m0 = tp->t_lastm; off = tp->t_inoff; if (len <= M_TRAILINGSPACE(m)) { m_copydata(m0, off, (int)len, mtod(m, char *) + hdrlen); m->m_len += len; TCP_OUTPUT_COUNTER_INCR(&tcp_output_copysmall); } else { m->m_next = m_copym(m0, off, (int)len, M_DONTWAIT); if (m->m_next == NULL) { m_freem(m); return ENOBUFS; } #ifdef TCP_OUTPUT_COUNTERS if (m->m_next->m_flags & M_EXT) TCP_OUTPUT_COUNTER_INCR(&tcp_output_refbig); else TCP_OUTPUT_COUNTER_INCR(&tcp_output_copybig); #endif } *mp = m; return 0; } /* * Tcp output routine: figure out what should be sent and send it. */ int tcp_output(struct tcpcb *tp) { struct rtentry *rt = NULL; struct socket *so; struct route *ro; long len, win; int off, flags, error; struct mbuf *m; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif struct tcphdr *th; u_char opt[MAX_TCPOPTLEN], *optp; #define OPT_FITS(more) ((optlen + (more)) <= sizeof(opt)) unsigned optlen, hdrlen, packetlen; unsigned int sack_numblks; int idle, sendalot, txsegsize, rxsegsize; int txsegsize_nosack; int maxburst = TCP_MAXBURST; int af; /* address family on the wire */ int iphdrlen; int has_tso4, has_tso6; int has_tso, use_tso; bool alwaysfrag; int sack_rxmit; int sack_bytes_rxmt; int ecn_tos; struct sackhole *p; #ifdef TCP_SIGNATURE int sigoff = 0; #endif uint64_t *tcps; KASSERT(!(tp->t_inpcb && tp->t_in6pcb)); so = NULL; ro = NULL; if (tp->t_inpcb) { so = tp->t_inpcb->inp_socket; ro = &tp->t_inpcb->inp_route; } #ifdef INET6 else if (tp->t_in6pcb) { so = tp->t_in6pcb->in6p_socket; ro = &tp->t_in6pcb->in6p_route; } #endif switch (af = tp->t_family) { case AF_INET: if (tp->t_inpcb) break; #ifdef INET6 /* mapped addr case */ if (tp->t_in6pcb) break; #endif return EINVAL; #ifdef INET6 case AF_INET6: if (tp->t_in6pcb) break; return EINVAL; #endif default: return EAFNOSUPPORT; } if (tcp_segsize(tp, &txsegsize, &rxsegsize, &alwaysfrag)) return EMSGSIZE; idle = (tp->snd_max == tp->snd_una); /* * Determine if we can use TCP segmentation offload: * - If we're using IPv4 * - If there is not an IPsec policy that prevents it * - If the interface can do it */ has_tso4 = has_tso6 = false; has_tso4 = tp->t_inpcb != NULL && #if defined(IPSEC) (!ipsec_used || ipsec_pcb_skip_ipsec(tp->t_inpcb->inp_sp, IPSEC_DIR_OUTBOUND)) && #endif (rt = rtcache_validate(&tp->t_inpcb->inp_route)) != NULL && (rt->rt_ifp->if_capenable & IFCAP_TSOv4) != 0; if (rt != NULL) { rtcache_unref(rt, &tp->t_inpcb->inp_route); rt = NULL; } #if defined(INET6) has_tso6 = tp->t_in6pcb != NULL && #if defined(IPSEC) (!ipsec_used || ipsec_pcb_skip_ipsec(tp->t_in6pcb->in6p_sp, IPSEC_DIR_OUTBOUND)) && #endif (rt = rtcache_validate(&tp->t_in6pcb->in6p_route)) != NULL && (rt->rt_ifp->if_capenable & IFCAP_TSOv6) != 0; if (rt != NULL) rtcache_unref(rt, &tp->t_in6pcb->in6p_route); #endif /* defined(INET6) */ has_tso = (has_tso4 || has_tso6) && !alwaysfrag; /* * Restart Window computation. From draft-floyd-incr-init-win-03: * * Optionally, a TCP MAY set the restart window to the * minimum of the value used for the initial window and * the current value of cwnd (in other words, using a * larger value for the restart window should never increase * the size of cwnd). */ if (tcp_cwm) { /* * Hughes/Touch/Heidemann Congestion Window Monitoring. * Count the number of packets currently pending * acknowledgement, and limit our congestion window * to a pre-determined allowed burst size plus that count. * This prevents bursting once all pending packets have * been acknowledged (i.e. transmission is idle). * * XXX Link this to Initial Window? */ tp->snd_cwnd = uimin(tp->snd_cwnd, (tcp_cwm_burstsize * txsegsize) + (tp->snd_nxt - tp->snd_una)); } else { if (idle && (tcp_now - tp->t_rcvtime) >= tp->t_rxtcur) { /* * We have been idle for "a while" and no acks are * expected to clock out any data we send -- * slow start to get ack "clock" running again. */ int ss = tcp_init_win; if (tp->t_inpcb && in_localaddr(tp->t_inpcb->inp_faddr)) ss = tcp_init_win_local; #ifdef INET6 if (tp->t_in6pcb && in6_localaddr(&tp->t_in6pcb->in6p_faddr)) ss = tcp_init_win_local; #endif tp->snd_cwnd = uimin(tp->snd_cwnd, TCP_INITIAL_WINDOW(ss, txsegsize)); } } txsegsize_nosack = txsegsize; again: ecn_tos = 0; use_tso = has_tso; if ((tp->t_flags & (TF_ECN_SND_CWR|TF_ECN_SND_ECE)) != 0) { /* don't duplicate CWR/ECE. */ use_tso = 0; } TCP_REASS_LOCK(tp); sack_numblks = tcp_sack_numblks(tp); if (sack_numblks) { int sackoptlen; sackoptlen = TCP_SACK_OPTLEN(sack_numblks); if (sackoptlen > txsegsize_nosack) { sack_numblks = 0; /* give up SACK */ txsegsize = txsegsize_nosack; } else { if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) { /* don't duplicate D-SACK. */ use_tso = 0; } txsegsize = txsegsize_nosack - sackoptlen; } } else { txsegsize = txsegsize_nosack; } /* * Determine length of data that should be transmitted, and * flags that should be used. If there is some data or critical * controls (SYN, RST) to send, then transmit; otherwise, * investigate further. * * Readjust SACK information to avoid resending duplicate data. */ if (TCP_SACK_ENABLED(tp) && SEQ_LT(tp->snd_nxt, tp->snd_max)) tcp_sack_adjust(tp); sendalot = 0; off = tp->snd_nxt - tp->snd_una; win = uimin(tp->snd_wnd, tp->snd_cwnd); flags = tcp_outflags[tp->t_state]; /* * Send any SACK-generated retransmissions. If we're explicitly trying * to send out new data (when sendalot is 1), bypass this function. * If we retransmit in fast recovery mode, decrement snd_cwnd, since * we're replacing a (future) new transmission with a retransmission * now, and we previously incremented snd_cwnd in tcp_input(). */ /* * Still in sack recovery, reset rxmit flag to zero. */ sack_rxmit = 0; sack_bytes_rxmt = 0; len = 0; p = NULL; do { long cwin; if (!TCP_SACK_ENABLED(tp)) break; if (tp->t_partialacks < 0) break; p = tcp_sack_output(tp, &sack_bytes_rxmt); if (p == NULL) break; cwin = uimin(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt; if (cwin < 0) cwin = 0; /* Do not retransmit SACK segments beyond snd_recover */ if (SEQ_GT(p->end, tp->snd_recover)) { /* * (At least) part of sack hole extends beyond * snd_recover. Check to see if we can rexmit data * for this hole. */ if (SEQ_GEQ(p->rxmit, tp->snd_recover)) { /* * Can't rexmit any more data for this hole. * That data will be rexmitted in the next * sack recovery episode, when snd_recover * moves past p->rxmit. */ p = NULL; break; } /* Can rexmit part of the current hole */ len = ((long)ulmin(cwin, tp->snd_recover - p->rxmit)); } else len = ((long)ulmin(cwin, p->end - p->rxmit)); off = p->rxmit - tp->snd_una; if (off + len > so->so_snd.sb_cc) { /* 1 for TH_FIN */ KASSERT(off + len == so->so_snd.sb_cc + 1); KASSERT(p->rxmit + len == tp->snd_max); len = so->so_snd.sb_cc - off; } if (len > 0) { sack_rxmit = 1; sendalot = 1; } } while (/*CONSTCOND*/0); /* * If in persist timeout with window of 0, send 1 byte. * Otherwise, if window is small but nonzero * and timer expired, we will send what we can * and go to transmit state. */ if (tp->t_force) { if (win == 0) { /* * If we still have some data to send, then * clear the FIN bit. Usually this would * happen below when it realizes that we * aren't sending all the data. However, * if we have exactly 1 byte of unset data, * then it won't clear the FIN bit below, * and if we are in persist state, we wind * up sending the packet without recording * that we sent the FIN bit. * * We can't just blindly clear the FIN bit, * because if we don't have any more data * to send then the probe will be the FIN * itself. */ if (off < so->so_snd.sb_cc) flags &= ~TH_FIN; win = 1; } else { TCP_TIMER_DISARM(tp, TCPT_PERSIST); tp->t_rxtshift = 0; } } if (sack_rxmit == 0) { if (TCP_SACK_ENABLED(tp) && tp->t_partialacks >= 0) { long cwin; /* * We are inside of a SACK recovery episode and are * sending new data, having retransmitted all the * data possible in the scoreboard. */ if (tp->snd_wnd < so->so_snd.sb_cc) { len = tp->snd_wnd - off; flags &= ~TH_FIN; } else { len = so->so_snd.sb_cc - off; } /* * From FreeBSD: * Don't remove this (len > 0) check ! * We explicitly check for len > 0 here (although it * isn't really necessary), to work around a gcc * optimization issue - to force gcc to compute * len above. Without this check, the computation * of len is bungled by the optimizer. */ if (len > 0) { cwin = tp->snd_cwnd - (tp->snd_nxt - tp->sack_newdata) - sack_bytes_rxmt; if (cwin < 0) cwin = 0; if (cwin < len) { len = cwin; flags &= ~TH_FIN; } } } else if (win < so->so_snd.sb_cc) { len = win - off; flags &= ~TH_FIN; } else { len = so->so_snd.sb_cc - off; } } if (len < 0) { /* * If FIN has been sent but not acked, * but we haven't been called to retransmit, * len will be -1. Otherwise, window shrank * after we sent into it. If window shrank to 0, * cancel pending retransmit, pull snd_nxt back * to (closed) window, and set the persist timer * if it isn't already going. If the window didn't * close completely, just wait for an ACK. * * If we have a pending FIN, either it has already been * transmitted or it is outside the window, so drop it. * If the FIN has been transmitted, but this is not a * retransmission, then len must be -1. Therefore we also * prevent here the sending of `gratuitous FINs'. This * eliminates the need to check for that case below (e.g. * to back up snd_nxt before the FIN so that the sequence * number is correct). */ len = 0; flags &= ~TH_FIN; if (win == 0) { TCP_TIMER_DISARM(tp, TCPT_REXMT); tp->t_rxtshift = 0; tp->snd_nxt = tp->snd_una; if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) tcp_setpersist(tp); } } /* * Automatic sizing enables the performance of large buffers * and most of the efficiency of small ones by only allocating * space when it is needed. * * The criteria to step up the send buffer one notch are: * 1. receive window of remote host is larger than send buffer * (with a fudge factor of 5/4th); * 2. send buffer is filled to 7/8th with data (so we actually * have data to make use of it); * 3. send buffer fill has not hit maximal automatic size; * 4. our send window (slow start and cogestion controlled) is * larger than sent but unacknowledged data in send buffer. * * The remote host receive window scaling factor may limit the * growing of the send buffer before it reaches its allowed * maximum. * * It scales directly with slow start or congestion window * and does at most one step per received ACK. This fast * scaling has the drawback of growing the send buffer beyond * what is strictly necessary to make full use of a given * delay*bandwidth product. However testing has shown this not * to be much of an problem. At worst we are trading wasting * of available bandwidth (the non-use of it) for wasting some * socket buffer memory. * * TODO: Shrink send buffer during idle periods together * with congestion window. Requires another timer. */ if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) { if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat && so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) && so->so_snd.sb_cc < tcp_autosndbuf_max && win >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) { if (!sbreserve(&so->so_snd, uimin(so->so_snd.sb_hiwat + tcp_autosndbuf_inc, tcp_autosndbuf_max), so)) so->so_snd.sb_flags &= ~SB_AUTOSIZE; } } if (len > txsegsize) { if (use_tso) { /* * Truncate TSO transfers to IP_MAXPACKET, and make * sure that we send equal size transfers down the * stack (rather than big-small-big-small-...). */ #ifdef INET6 CTASSERT(IPV6_MAXPACKET == IP_MAXPACKET); #endif len = (uimin(len, IP_MAXPACKET) / txsegsize) * txsegsize; if (len <= txsegsize) { use_tso = 0; } } else len = txsegsize; flags &= ~TH_FIN; sendalot = 1; } else use_tso = 0; if (sack_rxmit) { if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) flags &= ~TH_FIN; } win = sbspace(&so->so_rcv); /* * Sender silly window avoidance. If connection is idle * and can send all data, a maximum segment, * at least a maximum default-size segment do it, * or are forced, do it; otherwise don't bother. * If peer's buffer is tiny, then send * when window is at least half open. * If retransmitting (possibly after persist timer forced us * to send into a small window), then must resend. */ if (len) { if (len >= txsegsize) goto send; if ((so->so_state & SS_MORETOCOME) == 0 && ((idle || tp->t_flags & TF_NODELAY) && len + off >= so->so_snd.sb_cc)) goto send; if (tp->t_force) goto send; if (len >= tp->max_sndwnd / 2) goto send; if (SEQ_LT(tp->snd_nxt, tp->snd_max)) goto send; if (sack_rxmit) goto send; } /* * Compare available window to amount of window known to peer * (as advertised window less next expected input). If the * difference is at least twice the size of the largest segment * we expect to receive (i.e. two segments) or at least 50% of * the maximum possible window, then want to send a window update * to peer. */ if (win > 0) { /* * "adv" is the amount we can increase the window, * taking into account that we are limited by * TCP_MAXWIN << tp->rcv_scale. */ long recwin = uimin(win, (long)TCP_MAXWIN << tp->rcv_scale); long oldwin, adv; /* * rcv_nxt may overtake rcv_adv when we accept a * zero-window probe. */ if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) oldwin = tp->rcv_adv - tp->rcv_nxt; else oldwin = 0; /* * If the new window size ends up being the same as or * less than the old size when it is scaled, then * don't force a window update. */ if (recwin >> tp->rcv_scale <= oldwin >> tp->rcv_scale) goto dontupdate; adv = recwin - oldwin; if (adv >= (long) (2 * rxsegsize)) goto send; if (2 * adv >= (long) so->so_rcv.sb_hiwat) goto send; } dontupdate: /* * Send if we owe peer an ACK. */ if (tp->t_flags & TF_ACKNOW) goto send; if (flags & (TH_SYN|TH_FIN|TH_RST)) goto send; if (SEQ_GT(tp->snd_up, tp->snd_una)) goto send; /* * In SACK, it is possible for tcp_output to fail to send a segment * after the retransmission timer has been turned off. Make sure * that the retransmission timer is set. */ if (TCP_SACK_ENABLED(tp) && SEQ_GT(tp->snd_max, tp->snd_una) && !TCP_TIMER_ISARMED(tp, TCPT_REXMT) && !TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) { TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); goto just_return; } /* * TCP window updates are not reliable, rather a polling protocol * using ``persist'' packets is used to insure receipt of window * updates. The three ``states'' for the output side are: * idle not doing retransmits or persists * persisting to move a small or zero window * (re)transmitting and thereby not persisting * * tp->t_timer[TCPT_PERSIST] * is set when we are in persist state. * tp->t_force * is set when we are called to send a persist packet. * tp->t_timer[TCPT_REXMT] * is set when we are retransmitting * The output side is idle when both timers are zero. * * If send window is too small, there is data to transmit, and no * retransmit or persist is pending, then go to persist state. * If nothing happens soon, send when timer expires: * if window is nonzero, transmit what we can, * otherwise force out a byte. */ if (so->so_snd.sb_cc && TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0 && TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) { tp->t_rxtshift = 0; tcp_setpersist(tp); } /* * No reason to send a segment, just return. */ just_return: TCP_REASS_UNLOCK(tp); return 0; send: /* * Before ESTABLISHED, force sending of initial options unless TCP set * not to do any options. * * Note: we assume that the IP/TCP header plus TCP options always fit * in a single mbuf, leaving room for a maximum link header, i.e.: * max_linkhdr + IP_header + TCP_header + optlen <= MCLBYTES */ optlen = 0; optp = opt; switch (af) { case AF_INET: iphdrlen = sizeof(struct ip) + sizeof(struct tcphdr); break; #ifdef INET6 case AF_INET6: iphdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); break; #endif default: /*pacify gcc*/ iphdrlen = 0; break; } hdrlen = iphdrlen; if (flags & TH_SYN) { struct rtentry *synrt; synrt = NULL; if (tp->t_inpcb) synrt = in_pcbrtentry(tp->t_inpcb); #ifdef INET6 if (tp->t_in6pcb) synrt = in6_pcbrtentry(tp->t_in6pcb); #endif tp->snd_nxt = tp->iss; tp->t_ourmss = tcp_mss_to_advertise(synrt != NULL ? synrt->rt_ifp : NULL, af); if (tp->t_inpcb) in_pcbrtentry_unref(synrt, tp->t_inpcb); #ifdef INET6 if (tp->t_in6pcb) in6_pcbrtentry_unref(synrt, tp->t_in6pcb); #endif if ((tp->t_flags & TF_NOOPT) == 0 && OPT_FITS(TCPOLEN_MAXSEG)) { *optp++ = TCPOPT_MAXSEG; *optp++ = TCPOLEN_MAXSEG; *optp++ = (tp->t_ourmss >> 8) & 0xff; *optp++ = tp->t_ourmss & 0xff; optlen += TCPOLEN_MAXSEG; if ((tp->t_flags & TF_REQ_SCALE) && ((flags & TH_ACK) == 0 || (tp->t_flags & TF_RCVD_SCALE)) && OPT_FITS(TCPOLEN_WINDOW + TCPOLEN_NOP)) { *((uint32_t *)optp) = htonl( TCPOPT_NOP << 24 | TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 | tp->request_r_scale); optp += TCPOLEN_WINDOW + TCPOLEN_NOP; optlen += TCPOLEN_WINDOW + TCPOLEN_NOP; } if (tcp_do_sack && OPT_FITS(TCPOLEN_SACK_PERMITTED)) { *optp++ = TCPOPT_SACK_PERMITTED; *optp++ = TCPOLEN_SACK_PERMITTED; optlen += TCPOLEN_SACK_PERMITTED; } } } /* * Send a timestamp and echo-reply if this is a SYN and our side * wants to use timestamps (TF_REQ_TSTMP is set) or both our side * and our peer have sent timestamps in our SYN's. */ if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && (flags & TH_RST) == 0 && ((flags & (TH_SYN|TH_ACK)) == TH_SYN || (tp->t_flags & TF_RCVD_TSTMP))) { int alen = 0; while (optlen % 4 != 2) { optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; alen++; } if (OPT_FITS(TCPOLEN_TIMESTAMP)) { *optp++ = TCPOPT_TIMESTAMP; *optp++ = TCPOLEN_TIMESTAMP; uint32_t *lp = (uint32_t *)optp; /* Form timestamp option (appendix A of RFC 1323) */ *lp++ = htonl(TCP_TIMESTAMP(tp)); *lp = htonl(tp->ts_recent); optp += TCPOLEN_TIMESTAMP - 2; optlen += TCPOLEN_TIMESTAMP; /* Set receive buffer autosizing timestamp. */ if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE)) tp->rfbuf_ts = TCP_TIMESTAMP(tp); } else { optp -= alen; optlen -= alen; } } #ifdef TCP_SIGNATURE if (tp->t_flags & TF_SIGNATURE) { /* * Initialize TCP-MD5 option (RFC2385) */ if (!OPT_FITS(TCPOLEN_SIGNATURE)) goto reset; *optp++ = TCPOPT_SIGNATURE; *optp++ = TCPOLEN_SIGNATURE; sigoff = optlen + 2; memset(optp, 0, TCP_SIGLEN); optlen += TCPOLEN_SIGNATURE; optp += TCP_SIGLEN; } #endif /* * Tack on the SACK block if it is necessary. */ if (sack_numblks) { int alen = 0; int sack_len = sack_numblks * 8; while (optlen % 4 != 2) { optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; alen++; } if (OPT_FITS(sack_len + 2)) { struct ipqent *tiqe; *optp++ = TCPOPT_SACK; *optp++ = sack_len + 2; uint32_t *lp = (uint32_t *)optp; if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) { sack_numblks--; *lp++ = htonl(tp->rcv_dsack_block.left); *lp++ = htonl(tp->rcv_dsack_block.right); tp->rcv_sack_flags &= ~TCPSACK_HAVED; } for (tiqe = TAILQ_FIRST(&tp->timeq); sack_numblks > 0; tiqe = TAILQ_NEXT(tiqe, ipqe_timeq)) { KASSERT(tiqe != NULL); sack_numblks--; *lp++ = htonl(tiqe->ipqe_seq); *lp++ = htonl(tiqe->ipqe_seq + tiqe->ipqe_len + ((tiqe->ipqe_flags & TH_FIN) != 0 ? 1 : 0)); } optlen += sack_len + 2; optp += sack_len; } else { optp -= alen; optlen -= alen; } } /* Terminate and pad TCP options to a 4 byte boundary. */ if (optlen % 4) { if (!OPT_FITS(TCPOLEN_EOL)) { reset: TCP_REASS_UNLOCK(tp); error = ECONNABORTED; goto out; } optlen += TCPOLEN_EOL; *optp++ = TCPOPT_EOL; } /* * According to RFC 793 (STD0007): * "The content of the header beyond the End-of-Option option * must be header padding (i.e., zero)." * and later: "The padding is composed of zeros." */ while (optlen % 4) { if (!OPT_FITS(TCPOLEN_PAD)) goto reset; optlen += TCPOLEN_PAD; *optp++ = TCPOPT_PAD; } TCP_REASS_UNLOCK(tp); hdrlen += optlen; #ifdef DIAGNOSTIC if (!use_tso && len > txsegsize) panic("tcp data to be sent is larger than segment"); else if (use_tso && len > IP_MAXPACKET) panic("tcp data to be sent is larger than max TSO size"); if (max_linkhdr + hdrlen > MCLBYTES) panic("tcphdr too big"); #endif /* * Grab a header mbuf, attaching a copy of data to * be transmitted, and initialize the header from * the template for sends on this connection. */ if (len) { error = tcp_build_datapkt(tp, so, off, len, hdrlen, &m); if (error) goto out; /* * If we're sending everything we've got, set PUSH. * (This will keep happy those implementations which only * give data to the user when a buffer fills or * a PUSH comes in.) */ if (off + len == so->so_snd.sb_cc) flags |= TH_PUSH; } else { tcps = TCP_STAT_GETREF(); if (tp->t_flags & TF_ACKNOW) tcps[TCP_STAT_SNDACKS]++; else if (flags & (TH_SYN|TH_FIN|TH_RST)) tcps[TCP_STAT_SNDCTRL]++; else if (SEQ_GT(tp->snd_up, tp->snd_una)) tcps[TCP_STAT_SNDURG]++; else tcps[TCP_STAT_SNDWINUP]++; TCP_STAT_PUTREF(); MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m != NULL && max_linkhdr + hdrlen > MHLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_freem(m); m = NULL; } } if (m == NULL) { error = ENOBUFS; goto out; } MCLAIM(m, &tcp_tx_mowner); m->m_data += max_linkhdr; m->m_len = hdrlen; } m_reset_rcvif(m); switch (af) { case AF_INET: ip = mtod(m, struct ip *); #ifdef INET6 ip6 = NULL; #endif th = (struct tcphdr *)(ip + 1); break; #ifdef INET6 case AF_INET6: ip = NULL; ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); break; #endif default: /*pacify gcc*/ ip = NULL; #ifdef INET6 ip6 = NULL; #endif th = NULL; break; } if (tp->t_template == NULL) panic("%s: no template", __func__); if (tp->t_template->m_len < iphdrlen) panic("%s: %d < %d", __func__, tp->t_template->m_len, iphdrlen); bcopy(mtod(tp->t_template, void *), mtod(m, void *), iphdrlen); /* * If we are starting a connection, send ECN setup * SYN packet. If we are on a retransmit, we may * resend those bits a number of times as per * RFC 3168. */ if (tp->t_state == TCPS_SYN_SENT && tcp_do_ecn) { if (tp->t_flags & TF_SYN_REXMT) { if (tp->t_ecn_retries--) flags |= TH_ECE|TH_CWR; } else { flags |= TH_ECE|TH_CWR; tp->t_ecn_retries = tcp_ecn_maxretries; } } if (TCP_ECN_ALLOWED(tp)) { /* * If the peer has ECN, mark data packets * ECN capable. Ignore pure ack packets, retransmissions * and window probes. */ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && !(tp->t_force && len == 1)) { ecn_tos = IPTOS_ECN_ECT0; TCP_STATINC(TCP_STAT_ECN_ECT); } /* * Reply with proper ECN notifications. */ if (tp->t_flags & TF_ECN_SND_CWR) { flags |= TH_CWR; tp->t_flags &= ~TF_ECN_SND_CWR; } if (tp->t_flags & TF_ECN_SND_ECE) { flags |= TH_ECE; } } /* * If we are doing retransmissions, then snd_nxt will * not reflect the first unsent octet. For ACK only * packets, we do not want the sequence number of the * retransmitted packet, we want the sequence number * of the next unsent octet. So, if there is no data * (and no SYN or FIN), use snd_max instead of snd_nxt * when filling in ti_seq. But if we are in persist * state, snd_max might reflect one byte beyond the * right edge of the window, so use snd_nxt in that * case, since we know we aren't doing a retransmission. * (retransmit and persist are mutually exclusive...) */ if (TCP_SACK_ENABLED(tp) && sack_rxmit) { th->th_seq = htonl(p->rxmit); p->rxmit += len; } else { if (len || (flags & (TH_SYN|TH_FIN)) || TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) th->th_seq = htonl(tp->snd_nxt); else th->th_seq = htonl(tp->snd_max); } th->th_ack = htonl(tp->rcv_nxt); if (optlen) { memcpy(th + 1, opt, optlen); th->th_off = (sizeof (struct tcphdr) + optlen) >> 2; } th->th_flags = flags; /* * Calculate receive window. Don't shrink window, * but avoid silly window syndrome. */ if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)rxsegsize) win = 0; if (win > (long)TCP_MAXWIN << tp->rcv_scale) win = (long)TCP_MAXWIN << tp->rcv_scale; if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt)) win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt); th->th_win = htons((u_int16_t) (win>>tp->rcv_scale)); if (th->th_win == 0) { tp->t_sndzerowin++; } if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { u_int32_t urp = tp->snd_up - tp->snd_nxt; if (urp > IP_MAXPACKET) urp = IP_MAXPACKET; th->th_urp = htons((u_int16_t)urp); th->th_flags |= TH_URG; } else /* * If no urgent pointer to send, then we pull * the urgent pointer to the left edge of the send window * so that it doesn't drift into the send window on sequence * number wraparound. */ tp->snd_up = tp->snd_una; /* drag it along */ #ifdef TCP_SIGNATURE if (sigoff && (tp->t_flags & TF_SIGNATURE)) { struct secasvar *sav; u_int8_t *sigp; sav = tcp_signature_getsav(m); if (sav == NULL) { if (m) m_freem(m); return EPERM; } m->m_pkthdr.len = hdrlen + len; sigp = (char *)th + sizeof(*th) + sigoff; tcp_signature(m, th, (char *)th - mtod(m, char *), sav, sigp); key_sa_recordxfer(sav, m); KEY_SA_UNREF(&sav); } #endif /* * Set ourselves up to be checksummed just before the packet * hits the wire. */ switch (af) { case AF_INET: m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); if (use_tso) { m->m_pkthdr.segsz = txsegsize; m->m_pkthdr.csum_flags = M_CSUM_TSOv4; } else { m->m_pkthdr.csum_flags = M_CSUM_TCPv4; if (len + optlen) { /* Fixup the pseudo-header checksum. */ /* XXXJRT Not IP Jumbogram safe. */ th->th_sum = in_cksum_addword(th->th_sum, htons((u_int16_t) (len + optlen))); } } break; #ifdef INET6 case AF_INET6: m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); if (use_tso) { m->m_pkthdr.segsz = txsegsize; m->m_pkthdr.csum_flags = M_CSUM_TSOv6; } else { m->m_pkthdr.csum_flags = M_CSUM_TCPv6; if (len + optlen) { /* Fixup the pseudo-header checksum. */ /* XXXJRT: Not IPv6 Jumbogram safe. */ th->th_sum = in_cksum_addword(th->th_sum, htons((u_int16_t) (len + optlen))); } } break; #endif } /* * In transmit state, time the transmission and arrange for * the retransmit. In persist state, just set snd_max. */ if (tp->t_force == 0 || TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) { tcp_seq startseq = tp->snd_nxt; /* * Advance snd_nxt over sequence space of this segment. * There are no states in which we send both a SYN and a FIN, * so we collapse the tests for these flags. */ if (flags & (TH_SYN|TH_FIN)) tp->snd_nxt++; if (sack_rxmit) goto timer; tp->snd_nxt += len; if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { tp->snd_max = tp->snd_nxt; /* * Time this transmission if not a retransmission and * not currently timing anything. */ if (tp->t_rtttime == 0) { tp->t_rtttime = tcp_now; tp->t_rtseq = startseq; TCP_STATINC(TCP_STAT_SEGSTIMED); } } /* * Set retransmit timer if not currently set, * and not doing an ack or a keep-alive probe. * Initial value for retransmit timer is smoothed * round-trip time + 2 * round-trip time variance. * Initialize shift counter which is used for backoff * of retransmit time. */ timer: if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) { if ((sack_rxmit && tp->snd_nxt != tp->snd_max) || tp->snd_nxt != tp->snd_una) { if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) { TCP_TIMER_DISARM(tp, TCPT_PERSIST); tp->t_rxtshift = 0; } TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); } else if (len == 0 && so->so_snd.sb_cc > 0 && TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) { /* * If we are sending a window probe and there's * unacked data in the socket, make sure at * least the persist timer is running. */ tp->t_rxtshift = 0; tcp_setpersist(tp); } } } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) tp->snd_max = tp->snd_nxt + len; #ifdef TCP_DEBUG /* * Trace. */ if (so->so_options & SO_DEBUG) tcp_trace(TA_OUTPUT, tp->t_state, tp, m, 0); #endif /* * Fill in IP length and desired time to live and * send to IP level. There should be a better way * to handle ttl and tos; we could keep them in * the template, but need a way to checksum without them. */ m->m_pkthdr.len = hdrlen + len; switch (af) { case AF_INET: ip->ip_len = htons(m->m_pkthdr.len); packetlen = m->m_pkthdr.len; if (tp->t_inpcb) { ip->ip_ttl = tp->t_inpcb->inp_ip.ip_ttl; ip->ip_tos = tp->t_inpcb->inp_ip.ip_tos | ecn_tos; } #ifdef INET6 else if (tp->t_in6pcb) { ip->ip_ttl = in6_selecthlim(tp->t_in6pcb, NULL); /*XXX*/ ip->ip_tos = ecn_tos; /*XXX*/ } #endif break; #ifdef INET6 case AF_INET6: packetlen = m->m_pkthdr.len; ip6->ip6_nxt = IPPROTO_TCP; if (tp->t_in6pcb) { /* * we separately set hoplimit for every segment, since * the user might want to change the value via * setsockopt. Also, desired default hop limit might * be changed via Neighbor Discovery. */ ip6->ip6_hlim = in6_selecthlim_rt(tp->t_in6pcb); } ip6->ip6_flow |= htonl(ecn_tos << 20); /* ip6->ip6_flow = ??? (from template) */ /* ip6_plen will be filled in ip6_output(). */ break; #endif default: /*pacify gcc*/ packetlen = 0; break; } switch (af) { case AF_INET: { struct mbuf *opts; if (tp->t_inpcb) opts = tp->t_inpcb->inp_options; else opts = NULL; error = ip_output(m, opts, ro, (tp->t_mtudisc ? IP_MTUDISC : 0) | (so->so_options & SO_DONTROUTE), NULL, tp->t_inpcb); break; } #ifdef INET6 case AF_INET6: { struct ip6_pktopts *opts; if (tp->t_in6pcb) opts = tp->t_in6pcb->in6p_outputopts; else opts = NULL; error = ip6_output(m, opts, ro, so->so_options & SO_DONTROUTE, NULL, tp->t_in6pcb, NULL); break; } #endif default: error = EAFNOSUPPORT; break; } if (error) { out: if (error == ENOBUFS) { TCP_STATINC(TCP_STAT_SELFQUENCH); if (tp->t_inpcb) tcp_quench(tp->t_inpcb); #ifdef INET6 if (tp->t_in6pcb) tcp6_quench(tp->t_in6pcb); #endif error = 0; } else if ((error == EHOSTUNREACH || error == ENETDOWN) && TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_softerror = error; error = 0; } /* Back out the sequence number advance. */ if (sack_rxmit) p->rxmit -= len; /* Restart the delayed ACK timer, if necessary. */ if (tp->t_flags & TF_DELACK) TCP_RESTART_DELACK(tp); return error; } if (packetlen > tp->t_pmtud_mtu_sent) tp->t_pmtud_mtu_sent = packetlen; tcps = TCP_STAT_GETREF(); tcps[TCP_STAT_SNDTOTAL]++; if (tp->t_flags & TF_DELACK) tcps[TCP_STAT_DELACK]++; TCP_STAT_PUTREF(); /* * Data sent (as far as we can tell). * If this advertises a larger window than any other segment, * then remember the size of the advertised window. * Any pending ACK has now been sent. */ if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) tp->rcv_adv = tp->rcv_nxt + win; tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~TF_ACKNOW; TCP_CLEAR_DELACK(tp); #ifdef DIAGNOSTIC if (maxburst < 0) printf("tcp_output: maxburst exceeded by %d\n", -maxburst); #endif if (sendalot && (tp->t_congctl == &tcp_reno_ctl || --maxburst)) goto again; return 0; } void tcp_setpersist(struct tcpcb *tp) { int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2); int nticks; if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) panic("tcp_output REXMT"); /* * Start/restart persistance timer. */ if (t < tp->t_rttmin) t = tp->t_rttmin; TCPT_RANGESET(nticks, t * tcp_backoff[tp->t_rxtshift], TCPTV_PERSMIN, TCPTV_PERSMAX); TCP_TIMER_ARM(tp, TCPT_PERSIST, nticks); if (tp->t_rxtshift < TCP_MAXRXTSHIFT) tp->t_rxtshift++; } |
| 6 5 5 4 4 4 4 4 4 4 4 3 3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | /* $NetBSD: kern_mod_80.c,v 1.6 2019/12/12 02:15:42 pgoyette Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * System calls relating to loadable modules. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_mod_80.c,v 1.6 2019/12/12 02:15:42 pgoyette Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" #include "opt_modular.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/namei.h> #include <sys/kauth.h> #include <sys/kmem.h> #include <sys/kobj.h> #include <sys/module.h> #include <sys/syscall.h> #include <sys/syscallargs.h> #include <sys/compat_stub.h> #include <compat/sys/module.h> #include <compat/common/compat_mod.h> static int compat_80_modstat(int cmd, struct iovec *iov, void *arg) { omodstat_t *oms, *omso; modinfo_t *mi; module_t *mod; vaddr_t addr; size_t size; size_t omslen; size_t used; int error; int omscnt; bool stataddr; const char *suffix = "..."; if (cmd != MODCTL_OSTAT) return EINVAL; error = copyin(arg, iov, sizeof(*iov)); if (error != 0) { return error; } /* If not privileged, don't expose kernel addresses. */ error = kauth_authorize_system(kauth_cred_get(), KAUTH_SYSTEM_MODULE, 0, (void *)(uintptr_t)MODCTL_STAT, NULL, NULL); stataddr = (error == 0); kernconfig_lock(); omscnt = 0; TAILQ_FOREACH(mod, &module_list, mod_chain) { omscnt++; mi = mod->mod_info; } TAILQ_FOREACH(mod, &module_builtins, mod_chain) { omscnt++; mi = mod->mod_info; } omslen = omscnt * sizeof(omodstat_t); omso = kmem_zalloc(omslen, KM_SLEEP); oms = omso; TAILQ_FOREACH(mod, &module_list, mod_chain) { mi = mod->mod_info; strlcpy(oms->oms_name, mi->mi_name, sizeof(oms->oms_name)); if (mi->mi_required != NULL) { used = strlcpy(oms->oms_required, mi->mi_required, sizeof(oms->oms_required)); if (used >= sizeof(oms->oms_required)) { oms->oms_required[sizeof(oms->oms_required) - strlen(suffix) - 1] = '\0'; strlcat(oms->oms_required, suffix, sizeof(oms->oms_required)); } } if (mod->mod_kobj != NULL && stataddr) { kobj_stat(mod->mod_kobj, &addr, &size); oms->oms_addr = addr; oms->oms_size = size; } oms->oms_class = mi->mi_class; oms->oms_refcnt = mod->mod_refcnt; oms->oms_source = mod->mod_source; oms->oms_flags = mod->mod_flags; oms++; } TAILQ_FOREACH(mod, &module_builtins, mod_chain) { mi = mod->mod_info; strlcpy(oms->oms_name, mi->mi_name, sizeof(oms->oms_name)); if (mi->mi_required != NULL) { used = strlcpy(oms->oms_required, mi->mi_required, sizeof(oms->oms_required)); if (used >= sizeof(oms->oms_required)) { oms->oms_required[sizeof(oms->oms_required) - strlen(suffix) - 1] = '\0'; strlcat(oms->oms_required, suffix, sizeof(oms->oms_required)); } } if (mod->mod_kobj != NULL && stataddr) { kobj_stat(mod->mod_kobj, &addr, &size); oms->oms_addr = addr; oms->oms_size = size; } oms->oms_class = mi->mi_class; oms->oms_refcnt = -1; KASSERT(mod->mod_source == MODULE_SOURCE_KERNEL); oms->oms_source = mod->mod_source; oms++; } kernconfig_unlock(); error = copyout(omso, iov->iov_base, uimin(omslen, iov->iov_len)); kmem_free(omso, omslen); if (error == 0) { iov->iov_len = omslen; error = copyout(iov, arg, sizeof(*iov)); } return error; } void kern_mod_80_init(void) { MODULE_HOOK_SET(compat_modstat_80_hook, compat_80_modstat); } void kern_mod_80_fini(void) { MODULE_HOOK_UNSET(compat_modstat_80_hook); } |
| 3 2 2 2 3 3 1 1 1 1 11 11 10 1 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | /* $NetBSD: sysmon.c,v 1.32 2022/03/28 12:33:21 riastradh Exp $ */ /*- * Copyright (c) 2000 Zembu Labs, Inc. * All rights reserved. * * Author: Jason R. Thorpe <thorpej@zembu.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Zembu Labs, Inc. * 4. Neither the name of Zembu Labs nor the names of its employees may * be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY ZEMBU LABS, INC. ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WAR- * RANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DIS- * CLAIMED. IN NO EVENT SHALL ZEMBU LABS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Clearing house for system monitoring hardware. We currently * handle environmental sensors, watchdog timers, and power management. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: sysmon.c,v 1.32 2022/03/28 12:33:21 riastradh Exp $"); #include <sys/param.h> #include <sys/conf.h> #include <sys/errno.h> #include <sys/fcntl.h> #include <sys/callout.h> #include <sys/kernel.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/module.h> #include <sys/mutex.h> #include <sys/device.h> #include <sys/once.h> #include <dev/sysmon/sysmonvar.h> dev_type_open(sysmonopen); dev_type_close(sysmonclose); dev_type_ioctl(sysmonioctl); dev_type_read(sysmonread); dev_type_poll(sysmonpoll); dev_type_kqfilter(sysmonkqfilter); const struct cdevsw sysmon_cdevsw = { .d_open = sysmonopen, .d_close = sysmonclose, .d_read = sysmonread, .d_write = nowrite, .d_ioctl = sysmonioctl, .d_stop = nostop, .d_tty = notty, .d_poll = sysmonpoll, .d_mmap = nommap, .d_kqfilter = sysmonkqfilter, .d_discard = nodiscard, .d_flag = D_OTHER | D_MPSAFE }; static int sysmon_modcmd(modcmd_t, void *); static int sm_init_once(void); /* * Info about our minor "devices" */ static struct sysmon_opvec *sysmon_opvec_table[] = { NULL, NULL, NULL }; static int sysmon_refcnt[] = { 0, 0, 0 }; static const char *sysmon_mod[] = { "sysmon_envsys", "sysmon_wdog", "sysmon_power" }; static kmutex_t sysmon_minor_mtx; #ifdef _MODULE static bool sm_is_attached; #endif ONCE_DECL(once_sm); /* * sysmon_attach_minor * * Attach a minor device for wdog, power, or envsys. Manage a * reference count so we can prevent the device from being * detached if there are still users with the minor device opened. * * If the opvec argument is NULL, this is a request to detach the * minor device - make sure the refcnt is zero! */ int sysmon_attach_minor(int minor, struct sysmon_opvec *opvec) { int ret; mutex_enter(&sysmon_minor_mtx); if (opvec) { if (sysmon_opvec_table[minor] == NULL) { sysmon_refcnt[minor] = 0; sysmon_opvec_table[minor] = opvec; ret = 0; } else ret = EEXIST; } else { if (sysmon_refcnt[minor] == 0) { sysmon_opvec_table[minor] = NULL; ret = 0; } else ret = EBUSY; } mutex_exit(&sysmon_minor_mtx); return ret; } /* * sysmonopen: * * Open the system monitor device. */ int sysmonopen(dev_t dev, int flag, int mode, struct lwp *l) { int error; mutex_enter(&sysmon_minor_mtx); switch (minor(dev)) { case SYSMON_MINOR_ENVSYS: case SYSMON_MINOR_WDOG: case SYSMON_MINOR_POWER: if (sysmon_opvec_table[minor(dev)] == NULL) { mutex_exit(&sysmon_minor_mtx); error = module_autoload(sysmon_mod[minor(dev)], MODULE_CLASS_DRIVER); if (error) return error; mutex_enter(&sysmon_minor_mtx); if (sysmon_opvec_table[minor(dev)] == NULL) { error = ENODEV; break; } } error = (sysmon_opvec_table[minor(dev)]->so_open)(dev, flag, mode, l); if (error == 0) sysmon_refcnt[minor(dev)]++; break; default: error = ENODEV; } mutex_exit(&sysmon_minor_mtx); return error; } /* * sysmonclose: * * Close the system monitor device. */ int sysmonclose(dev_t dev, int flag, int mode, struct lwp *l) { int error; switch (minor(dev)) { case SYSMON_MINOR_ENVSYS: case SYSMON_MINOR_WDOG: case SYSMON_MINOR_POWER: if (sysmon_opvec_table[minor(dev)] == NULL) error = ENODEV; else { error = (sysmon_opvec_table[minor(dev)]->so_close)(dev, flag, mode, l); if (error == 0) { sysmon_refcnt[minor(dev)]--; KASSERT(sysmon_refcnt[minor(dev)] >= 0); } } break; default: error = ENODEV; } return (error); } /* * sysmonioctl: * * Perform a control request. */ int sysmonioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { int error; switch (minor(dev)) { case SYSMON_MINOR_ENVSYS: case SYSMON_MINOR_WDOG: case SYSMON_MINOR_POWER: if (sysmon_opvec_table[minor(dev)] == NULL) error = ENODEV; else error = (sysmon_opvec_table[minor(dev)]->so_ioctl)(dev, cmd, data, flag, l); break; default: error = ENODEV; } return (error); } /* * sysmonread: * * Perform a read request. */ int sysmonread(dev_t dev, struct uio *uio, int flags) { int error; switch (minor(dev)) { case SYSMON_MINOR_POWER: if (sysmon_opvec_table[minor(dev)] == NULL) error = ENODEV; else error = (sysmon_opvec_table[minor(dev)]->so_read)(dev, uio, flags); break; default: error = ENODEV; } return (error); } /* * sysmonpoll: * * Poll the system monitor device. */ int sysmonpoll(dev_t dev, int events, struct lwp *l) { int rv; switch (minor(dev)) { case SYSMON_MINOR_POWER: if (sysmon_opvec_table[minor(dev)] == NULL) rv = events; else rv = (sysmon_opvec_table[minor(dev)]->so_poll)(dev, events, l); break; default: rv = events; } return (rv); } /* * sysmonkqfilter: * * Kqueue filter for the system monitor device. */ int sysmonkqfilter(dev_t dev, struct knote *kn) { int error; switch (minor(dev)) { case SYSMON_MINOR_POWER: if (sysmon_opvec_table[minor(dev)] == NULL) error = ENODEV; else error = (sysmon_opvec_table[minor(dev)]->so_filter)(dev, kn); break; default: error = 1; } return (error); } MODULE(MODULE_CLASS_DRIVER, sysmon, NULL); static int sm_init_once(void) { mutex_init(&sysmon_minor_mtx, MUTEX_DEFAULT, IPL_NONE); return 0; } int sysmon_init(void) { int error; #ifdef _MODULE devmajor_t bmajor, cmajor; #endif error = RUN_ONCE(&once_sm, sm_init_once); #ifdef _MODULE mutex_enter(&sysmon_minor_mtx); if (!sm_is_attached) { bmajor = cmajor = -1; error = devsw_attach("sysmon", NULL, &bmajor, &sysmon_cdevsw, &cmajor); sm_is_attached = (error != 0); } mutex_exit(&sysmon_minor_mtx); #endif return error; } int sysmon_fini(void) { int error = 0; if ((sysmon_opvec_table[SYSMON_MINOR_ENVSYS] != NULL) || (sysmon_opvec_table[SYSMON_MINOR_WDOG] != NULL) || (sysmon_opvec_table[SYSMON_MINOR_POWER] != NULL)) error = EBUSY; #ifdef _MODULE if (error == 0) { mutex_enter(&sysmon_minor_mtx); sm_is_attached = false; devsw_detach(NULL, &sysmon_cdevsw); mutex_exit(&sysmon_minor_mtx); } #endif return error; } static int sysmon_modcmd(modcmd_t cmd, void *arg) { int ret; switch (cmd) { case MODULE_CMD_INIT: ret = sysmon_init(); break; case MODULE_CMD_FINI: ret = sysmon_fini(); break; case MODULE_CMD_STAT: default: ret = ENOTTY; } return ret; } |
| 466 466 435 465 66 426 465 432 428 260 428 432 433 433 433 433 433 430 14 433 433 433 433 248 459 50 49 50 42 50 49 43 50 50 50 50 50 5 4 4 4 4 46 4 42 40 41 41 10 9 7 1 1 10 9 9 30 38 15 18 19 19 18 16 16 36 36 35 33 33 33 12 12 33 33 24 16 33 26 31 31 31 31 31 30 31 31 30 31 30 15 11 15 15 13 9 9 9 9 31 24 22 22 15 15 15 15 15 22 12 12 12 12 12 12 12 12 8 8 8 30 29 30 30 30 30 14 30 30 29 30 30 30 23 15 15 12 15 7 15 15 13 15 15 7 7 7 7 15 15 15 14 14 11 10 14 14 14 13 5 5 4 13 13 7 11 11 542 541 492 391 391 391 490 447 446 447 447 447 490 476 476 476 491 391 490 476 490 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 | /* $NetBSD: ffs_inode.c,v 1.131 2020/07/31 04:07:30 chs Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.131 2020/07/31 04:07:30 chs Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" #include "opt_quota.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/buf.h> #include <sys/file.h> #include <sys/fstrans.h> #include <sys/kauth.h> #include <sys/kernel.h> #include <sys/kmem.h> #include <sys/mount.h> #include <sys/proc.h> #include <sys/resourcevar.h> #include <sys/trace.h> #include <sys/vnode.h> #include <sys/wapbl.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_bswap.h> #include <ufs/ufs/ufs_wapbl.h> #include <ufs/ffs/fs.h> #include <ufs/ffs/ffs_extern.h> static int ffs_indirtrunc(struct inode *, daddr_t, daddr_t, daddr_t, int, int64_t *); /* * Update the access, modified, and inode change times as specified * by the IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. * The IN_MODIFIED flag is used to specify that the inode needs to be * updated but that the times have already been set. The access * and modified times are taken from the second and third parameters; * the inode change time is always taken from the current time. If * UPDATE_WAIT flag is set, or UPDATE_DIROP is set then wait for the * disk write of the inode to complete. */ int ffs_update(struct vnode *vp, const struct timespec *acc, const struct timespec *mod, int updflags) { struct fs *fs; struct buf *bp; struct inode *ip; int error; void *cp; int waitfor, flags; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (0); ip = VTOI(vp); FFS_ITIMES(ip, acc, mod, NULL); if (updflags & UPDATE_CLOSE) flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED); else flags = ip->i_flag & IN_MODIFIED; if (flags == 0) return (0); fs = ip->i_fs; if ((flags & IN_MODIFIED) != 0 && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) { waitfor = updflags & UPDATE_WAIT; if ((updflags & UPDATE_DIROP) != 0) waitfor |= UPDATE_WAIT; } else waitfor = 0; /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ ip->i_ffs1_ouid = ip->i_uid; /* XXX */ ip->i_ffs1_ogid = ip->i_gid; /* XXX */ } /* XXX */ error = bread(ip->i_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, B_MODIFY, &bp); if (error) { return (error); } ip->i_flag &= ~(IN_MODIFIED | IN_ACCESSED); /* Keep unlinked inode list up to date */ KDASSERTMSG(DIP(ip, nlink) == ip->i_nlink, "DIP(ip, nlink) [%d] == ip->i_nlink [%d]", DIP(ip, nlink), ip->i_nlink); if (ip->i_mode) { if (ip->i_nlink > 0) { UFS_WAPBL_UNREGISTER_INODE(ip->i_ump->um_mountp, ip->i_number, ip->i_mode); } else { UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, ip->i_number, ip->i_mode); } } if (fs->fs_magic == FS_UFS1_MAGIC) { cp = (char *)bp->b_data + (ino_to_fsbo(fs, ip->i_number) * DINODE1_SIZE); #ifdef FFS_EI if (UFS_FSNEEDSWAP(fs)) ffs_dinode1_swap(ip->i_din.ffs1_din, (struct ufs1_dinode *)cp); else #endif memcpy(cp, ip->i_din.ffs1_din, DINODE1_SIZE); } else { cp = (char *)bp->b_data + (ino_to_fsbo(fs, ip->i_number) * DINODE2_SIZE); #ifdef FFS_EI if (UFS_FSNEEDSWAP(fs)) ffs_dinode2_swap(ip->i_din.ffs2_din, (struct ufs2_dinode *)cp); else #endif memcpy(cp, ip->i_din.ffs2_din, DINODE2_SIZE); } if (waitfor) { return (bwrite(bp)); } else { bdwrite(bp); return (0); } } #define SINGLE 0 /* index of single indirect block */ #define DOUBLE 1 /* index of double indirect block */ #define TRIPLE 2 /* index of triple indirect block */ /* * Truncate the inode oip to at most length size, freeing the * disk blocks. */ int ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) { daddr_t lastblock; struct inode *oip = VTOI(ovp); struct mount *omp = ovp->v_mount; daddr_t bn, lastiblock[UFS_NIADDR], indir_lbn[UFS_NIADDR]; daddr_t blks[UFS_NDADDR + UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR]; struct fs *fs; int extblocks; int offset, pgoffset, level; int64_t blocksreleased = 0, datablocks; int i, aflag, nblocks; int error, allerror = 0; off_t osize; int sync; struct ufsmount *ump = oip->i_ump; void *dcookie; long bsize; bool wapbl = omp->mnt_wapbl != NULL; UFS_WAPBL_JLOCK_ASSERT(ump->um_mountp); if (ovp->v_type == VCHR || ovp->v_type == VBLK || ovp->v_type == VFIFO || ovp->v_type == VSOCK) { KASSERT(oip->i_size == 0); return 0; } if (length < 0) return (EINVAL); /* * Historically clients did not have to specify which data * they were truncating. So, if not specified, we assume * traditional behavior, e.g., just the normal data. */ if ((ioflag & (IO_EXT | IO_NORMAL)) == 0) ioflag |= IO_NORMAL; fs = oip->i_fs; #define i_din2 i_din.ffs2_din extblocks = 0; datablocks = DIP(oip, blocks); if (fs->fs_magic == FS_UFS2_MAGIC && oip->i_din2->di_extsize > 0) { extblocks = btodb(ffs_fragroundup(fs, oip->i_din2->di_extsize)); datablocks -= extblocks; } if ((ioflag & IO_EXT) && extblocks > 0) { if (length != 0) panic("ffs_truncate: partial trunc of extdata"); { #ifdef QUOTA (void) chkdq(oip, -extblocks, NOCRED, FORCE); #endif osize = oip->i_din2->di_extsize; oip->i_din2->di_blocks -= extblocks; oip->i_din2->di_extsize = 0; for (i = 0; i < UFS_NXADDR; i++) { binvalbuf(ovp, -1 - i); oldblks[i] = oip->i_din2->di_extb[i]; oip->i_din2->di_extb[i] = 0; } oip->i_flag |= IN_CHANGE; if ((error = ffs_update(ovp, NULL, NULL, 0))) return (error); for (i = 0; i < UFS_NXADDR; i++) { if (oldblks[i] == 0) continue; bsize = ffs_sblksize(fs, osize, i); if (wapbl) { error = UFS_WAPBL_REGISTER_DEALLOCATION(omp, FFS_FSBTODB(fs, oldblks[i]), bsize, NULL); if (error) return error; } else ffs_blkfree(fs, oip->i_devvp, oldblks[i], bsize, oip->i_number); } extblocks = 0; } } if ((ioflag & IO_NORMAL) == 0) return (0); if (ovp->v_type == VLNK && (oip->i_size < ump->um_maxsymlinklen || (ump->um_maxsymlinklen == 0 && datablocks == 0))) { KDASSERT(length == 0); memset(SHORTLINK(oip), 0, (size_t)oip->i_size); oip->i_size = 0; DIP_ASSIGN(oip, size, 0); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(ovp, NULL, NULL, 0)); } if (oip->i_size == length) { /* still do a uvm_vnp_setsize() as writesize may be larger */ uvm_vnp_setsize(ovp, length); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(ovp, NULL, NULL, 0)); } if (length > ump->um_maxfilesize) return (EFBIG); if ((oip->i_flags & SF_SNAPSHOT) != 0) ffs_snapremove(ovp); osize = oip->i_size; aflag = ioflag & IO_SYNC ? B_SYNC : 0; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { if (ffs_lblkno(fs, osize) < UFS_NDADDR && ffs_lblkno(fs, osize) != ffs_lblkno(fs, length) && ffs_blkroundup(fs, osize) != osize) { off_t eob; eob = ffs_blkroundup(fs, osize); uvm_vnp_setwritesize(ovp, eob); error = ufs_balloc_range(ovp, osize, eob - osize, cred, aflag); if (error) { (void) ffs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return error; } if (ioflag & IO_SYNC) { rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER); VOP_PUTPAGES(ovp, trunc_page(osize & fs->fs_bmask), round_page(eob), PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); } } uvm_vnp_setwritesize(ovp, length); error = ufs_balloc_range(ovp, length - 1, 1, cred, aflag); if (error) { (void) ffs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return (error); } uvm_vnp_setsize(ovp, length); oip->i_flag |= IN_CHANGE | IN_UPDATE; KASSERT(ovp->v_size == oip->i_size); return (ffs_update(ovp, NULL, NULL, 0)); } /* * When truncating a regular file down to a non-block-aligned size, * we must zero the part of last block which is past the new EOF. * We must synchronously flush the zeroed pages to disk * since the new pages will be invalidated as soon as we * inform the VM system of the new, smaller size. * We must do this before acquiring the GLOCK, since fetching * the pages will acquire the GLOCK internally. * So there is a window where another thread could see a whole * zeroed page past EOF, but that's life. */ offset = ffs_blkoff(fs, length); pgoffset = length & PAGE_MASK; if (ovp->v_type == VREG && (pgoffset != 0 || offset != 0) && osize > length) { daddr_t lbn; voff_t eoz; int size; if (offset != 0) { error = ufs_balloc_range(ovp, length - 1, 1, cred, aflag); if (error) return error; } lbn = ffs_lblkno(fs, length); size = ffs_blksize(fs, oip, lbn); eoz = MIN(MAX(ffs_lblktosize(fs, lbn) + size, round_page(pgoffset)), osize); ubc_zerorange(&ovp->v_uobj, length, eoz - length, UBC_VNODE_FLAGS(ovp)); if (round_page(eoz) > round_page(length)) { rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER); error = VOP_PUTPAGES(ovp, round_page(length), round_page(eoz), PGO_CLEANIT | PGO_DEACTIVATE | PGO_JOURNALLOCKED | ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); if (error) return error; } } genfs_node_wrlock(ovp); oip->i_size = length; DIP_ASSIGN(oip, size, length); uvm_vnp_setsize(ovp, length); /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = ffs_lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - UFS_NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - FFS_NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ sync = 0; for (level = TRIPLE; level >= SINGLE; level--) { blks[UFS_NDADDR + level] = DIP(oip, ib[level]); if (lastiblock[level] < 0 && blks[UFS_NDADDR + level] != 0) { sync = 1; DIP_ASSIGN(oip, ib[level], 0); lastiblock[level] = -1; } } for (i = 0; i < UFS_NDADDR; i++) { blks[i] = DIP(oip, db[i]); if (i > lastblock && blks[i] != 0) { sync = 1; DIP_ASSIGN(oip, db[i], 0); } } oip->i_flag |= IN_CHANGE | IN_UPDATE; if (sync) { error = ffs_update(ovp, NULL, NULL, UPDATE_WAIT); if (error && !allerror) allerror = error; } /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ for (i = 0; i < UFS_NDADDR; i++) { bn = DIP(oip, db[i]); DIP_ASSIGN(oip, db[i], blks[i]); blks[i] = bn; } for (i = 0; i < UFS_NIADDR; i++) { bn = DIP(oip, ib[i]); DIP_ASSIGN(oip, ib[i], blks[UFS_NDADDR + i]); blks[UFS_NDADDR + i] = bn; } oip->i_size = osize; DIP_ASSIGN(oip, size, osize); error = vtruncbuf(ovp, lastblock + 1, 0, 0); if (error && !allerror) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -UFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - FFS_NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = ffs_getib(fs, oip, level); if (bn != 0) { if (lastiblock[level] < 0 && oip->i_ump->um_mountp->mnt_wapbl) { error = UFS_WAPBL_REGISTER_DEALLOCATION( oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn), fs->fs_bsize, &dcookie); if (error) goto out; } else { dcookie = NULL; } error = ffs_indirtrunc(oip, indir_lbn[level], FFS_FSBTODB(fs, bn), lastiblock[level], level, &blocksreleased); if (error) { if (dcookie) { UFS_WAPBL_UNREGISTER_DEALLOCATION( oip->i_ump->um_mountp, dcookie); } goto out; } if (lastiblock[level] < 0) { if (!dcookie) ffs_blkfree(fs, oip->i_devvp, bn, fs->fs_bsize, oip->i_number); DIP_ASSIGN(oip, ib[level], 0); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = UFS_NDADDR - 1; i > lastblock; i--) { bn = ffs_getdb(fs, oip, i); if (bn == 0) continue; bsize = ffs_blksize(fs, oip, i); if ((oip->i_ump->um_mountp->mnt_wapbl) && (ovp->v_type != VREG)) { error = UFS_WAPBL_REGISTER_DEALLOCATION( oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn), bsize, NULL); if (error) goto out; } else ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number); DIP_ASSIGN(oip, db[i], 0); blocksreleased += btodb(bsize); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = ffs_getdb(fs, oip, lastblock); if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = ffs_blksize(fs, oip, lastblock); oip->i_size = length; DIP_ASSIGN(oip, size, length); newspace = ffs_blksize(fs, oip, lastblock); if (newspace == 0) panic("itrunc: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += ffs_numfrags(fs, newspace); if ((oip->i_ump->um_mountp->mnt_wapbl) && (ovp->v_type != VREG)) { error = UFS_WAPBL_REGISTER_DEALLOCATION( oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn), oldspace - newspace, NULL); if (error) goto out; } else ffs_blkfree(fs, oip->i_devvp, bn, oldspace - newspace, oip->i_number); blocksreleased += btodb(oldspace - newspace); } } done: for (level = SINGLE; level <= TRIPLE; level++) KASSERTMSG((blks[UFS_NDADDR + level] == DIP(oip, ib[level])), "itrunc1 blk mismatch: %jx != %jx", (uintmax_t)blks[UFS_NDADDR + level], (uintmax_t)DIP(oip, ib[level])); for (i = 0; i < UFS_NDADDR; i++) KASSERTMSG((blks[i] == DIP(oip, db[i])), "itrunc2 blk mismatch: %jx != %jx", (uintmax_t)blks[i], (uintmax_t)DIP(oip, db[i])); KASSERTMSG((length != 0 || extblocks || LIST_EMPTY(&ovp->v_cleanblkhd)), "itrunc3: zero length and nonempty cleanblkhd"); KASSERTMSG((length != 0 || extblocks || LIST_EMPTY(&ovp->v_dirtyblkhd)), "itrunc3: zero length and nonempty dirtyblkhd"); out: /* * Set length back to old size if deallocation failed. Some indirect * blocks were deallocated creating a hole, but that is okay. */ if (error == EAGAIN) { if (!allerror) allerror = error; length = osize; uvm_vnp_setsize(ovp, length); } /* * Put back the real size. */ oip->i_size = length; DIP_ASSIGN(oip, size, length); DIP_ADD(oip, blocks, -blocksreleased); genfs_node_unlock(ovp); oip->i_flag |= IN_CHANGE; UFS_WAPBL_UPDATE(ovp, NULL, NULL, 0); #if defined(QUOTA) || defined(QUOTA2) (void) chkdq(oip, -blocksreleased, NOCRED, 0); #endif KASSERT(ovp->v_type != VREG || ovp->v_size == oip->i_size); return (allerror); } /* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, int level, int64_t *countp) { int i; struct buf *bp; struct fs *fs = ip->i_fs; int32_t *bap1 = NULL; int64_t *bap2 = NULL; struct vnode *vp; daddr_t nb, nlbn, last; char *copy = NULL; int64_t factor; int64_t nblocks; int error = 0, allerror = 0; const int needswap = UFS_FSNEEDSWAP(fs); const int wapbl = (ip->i_ump->um_mountp->mnt_wapbl != NULL); void *dcookie; #define RBAP(ip, i) (((ip)->i_ump->um_fstype == UFS1) ? \ ufs_rw32(bap1[i], needswap) : ufs_rw64(bap2[i], needswap)) #define BAP_ASSIGN(ip, i, value) \ do { \ if ((ip)->i_ump->um_fstype == UFS1) \ bap1[i] = (value); \ else \ bap2[i] = (value); \ } while(0) /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= FFS_NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); error = ffs_getblk(vp, lbn, FFS_NOBLK, fs->fs_bsize, false, &bp); if (error) return error; if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); } else { trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn); curlwp->l_ru.ru_inblock++; /* pay for read */ bp->b_flags |= B_READ; bp->b_flags &= ~B_COWDONE; /* we change blkno below */ if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); bp->b_blkno = dbn; BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); VOP_STRATEGY(vp, bp); error = biowait(bp); if (error == 0) error = fscow_run(bp, true); } if (error) { brelse(bp, 0); return error; } /* * Clear reference to blocks to be removed on disk, before actually * reclaiming them, so that fsck is more likely to be able to recover * the filesystem if system goes down during the truncate process. * This assumes the truncate process would not fail, contrary * to the wapbl case. */ if (ip->i_ump->um_fstype == UFS1) bap1 = (int32_t *)bp->b_data; else bap2 = (int64_t *)bp->b_data; if (lastbn >= 0 && !wapbl) { copy = kmem_alloc(fs->fs_bsize, KM_SLEEP); memcpy((void *)copy, bp->b_data, (u_int)fs->fs_bsize); for (i = last + 1; i < FFS_NINDIR(fs); i++) BAP_ASSIGN(ip, i, 0); error = bwrite(bp); if (error) allerror = error; if (ip->i_ump->um_fstype == UFS1) bap1 = (int32_t *)copy; else bap2 = (int64_t *)copy; } /* * Recursively free totally unused blocks. */ for (i = FFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = RBAP(ip, i); if (nb == 0) continue; if ((ip->i_ump->um_mountp->mnt_wapbl) && ((level > SINGLE) || (ITOV(ip)->v_type != VREG))) { error = UFS_WAPBL_REGISTER_DEALLOCATION( ip->i_ump->um_mountp, FFS_FSBTODB(fs, nb), fs->fs_bsize, &dcookie); if (error) goto out; } else { dcookie = NULL; } if (level > SINGLE) { error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb), (daddr_t)-1, level - 1, countp); if (error) { if (dcookie) { UFS_WAPBL_UNREGISTER_DEALLOCATION( ip->i_ump->um_mountp, dcookie); } goto out; } } if (!dcookie) ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize, ip->i_number); BAP_ASSIGN(ip, i, 0); *countp += nblocks; } /* * Recursively free blocks on the now last partial indirect block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = RBAP(ip, i); if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb), last, level - 1, countp); if (error) goto out; } } out: if (error && !allerror) allerror = error; if (copy != NULL) { kmem_free(copy, fs->fs_bsize); } else if (lastbn < 0 && error == 0) { /* all freed, release without writing back */ brelse(bp, BC_INVAL); } else if (wapbl) { /* only partially freed, write the updated block */ error = bwrite(bp); if (!allerror) allerror = error; } return (allerror); } void ffs_itimes(struct inode *ip, const struct timespec *acc, const struct timespec *mod, const struct timespec *cre) { struct timespec now; if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY))) { return; } vfs_timestamp(&now); if (ip->i_flag & IN_ACCESS) { if (acc == NULL) acc = &now; DIP_ASSIGN(ip, atime, acc->tv_sec); DIP_ASSIGN(ip, atimensec, acc->tv_nsec); } if (ip->i_flag & (IN_UPDATE | IN_MODIFY)) { if ((ip->i_flags & SF_SNAPSHOT) == 0) { if (mod == NULL) mod = &now; DIP_ASSIGN(ip, mtime, mod->tv_sec); DIP_ASSIGN(ip, mtimensec, mod->tv_nsec); } ip->i_modrev++; } if (ip->i_flag & (IN_CHANGE | IN_MODIFY)) { if (cre == NULL) cre = &now; DIP_ASSIGN(ip, ctime, cre->tv_sec); DIP_ASSIGN(ip, ctimensec, cre->tv_nsec); } if (ip->i_flag & (IN_ACCESS | IN_MODIFY)) ip->i_flag |= IN_ACCESSED; if (ip->i_flag & (IN_UPDATE | IN_CHANGE)) ip->i_flag |= IN_MODIFIED; ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY); } |
| 12 29 29 29 12 12 12 12 569 567 34 34 580 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 | /* $NetBSD: subr_percpu.c,v 1.25 2020/05/11 21:37:31 riastradh Exp $ */ /*- * Copyright (c)2007,2008 YAMAMOTO Takashi, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * per-cpu storage. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.25 2020/05/11 21:37:31 riastradh Exp $"); #include <sys/param.h> #include <sys/cpu.h> #include <sys/kernel.h> #include <sys/kmem.h> #include <sys/mutex.h> #include <sys/percpu.h> #include <sys/rwlock.h> #include <sys/vmem.h> #include <sys/xcall.h> #define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1) #define PERCPU_QCACHE_MAX 0 #define PERCPU_IMPORT_SIZE 2048 struct percpu { unsigned pc_offset; size_t pc_size; percpu_callback_t pc_ctor; percpu_callback_t pc_dtor; void *pc_cookie; LIST_ENTRY(percpu) pc_list; }; static krwlock_t percpu_swap_lock __cacheline_aligned; static vmem_t * percpu_offset_arena __read_mostly; static struct { kmutex_t lock; unsigned int nextoff; LIST_HEAD(, percpu) ctor_list; struct lwp *busy; kcondvar_t cv; } percpu_allocation __cacheline_aligned; static percpu_cpu_t * cpu_percpu(struct cpu_info *ci) { return &ci->ci_data.cpu_percpu; } static unsigned int percpu_offset(percpu_t *pc) { const unsigned int off = pc->pc_offset; KASSERT(off < percpu_allocation.nextoff); return off; } /* * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge */ __noubsan static void percpu_cpu_swap(void *p1, void *p2) { struct cpu_info * const ci = p1; percpu_cpu_t * const newpcc = p2; percpu_cpu_t * const pcc = cpu_percpu(ci); KASSERT(ci == curcpu() || !mp_online); /* * swap *pcc and *newpcc unless anyone has beaten us. */ rw_enter(&percpu_swap_lock, RW_WRITER); if (newpcc->pcc_size > pcc->pcc_size) { percpu_cpu_t tmp; int s; tmp = *pcc; /* * block interrupts so that we don't lose their modifications. */ s = splhigh(); /* * copy data to new storage. */ memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size); /* * this assignment needs to be atomic for percpu_getptr_remote. */ pcc->pcc_data = newpcc->pcc_data; splx(s); pcc->pcc_size = newpcc->pcc_size; *newpcc = tmp; } rw_exit(&percpu_swap_lock); } /* * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space */ static void percpu_cpu_enlarge(size_t size) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; for (CPU_INFO_FOREACH(cii, ci)) { percpu_cpu_t pcc; pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */ pcc.pcc_size = size; if (!mp_online) { percpu_cpu_swap(ci, &pcc); } else { uint64_t where; where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci); xc_wait(where); } KASSERT(pcc.pcc_size <= size); if (pcc.pcc_data != NULL) { kmem_free(pcc.pcc_data, pcc.pcc_size); } } } /* * percpu_backend_alloc: vmem import callback for percpu_offset_arena */ static int percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize, vm_flag_t vmflags, vmem_addr_t *addrp) { unsigned int offset; unsigned int nextoff; ASSERT_SLEEPABLE(); KASSERT(dummy == NULL); if ((vmflags & VM_NOSLEEP) != 0) return ENOMEM; size = roundup(size, PERCPU_IMPORT_SIZE); mutex_enter(&percpu_allocation.lock); offset = percpu_allocation.nextoff; percpu_allocation.nextoff = nextoff = percpu_allocation.nextoff + size; mutex_exit(&percpu_allocation.lock); percpu_cpu_enlarge(nextoff); *resultsize = size; *addrp = (vmem_addr_t)offset; return 0; } static void percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci) { size_t sz = (uintptr_t)vp2; memset(vp, 0, sz); } /* * percpu_zero: initialize percpu storage with zero. */ static void percpu_zero(percpu_t *pc, size_t sz) { percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz); } /* * percpu_init: subsystem initialization */ void percpu_init(void) { ASSERT_SLEEPABLE(); rw_init(&percpu_swap_lock); mutex_init(&percpu_allocation.lock, MUTEX_DEFAULT, IPL_NONE); percpu_allocation.nextoff = PERCPU_QUANTUM_SIZE; LIST_INIT(&percpu_allocation.ctor_list); percpu_allocation.busy = NULL; cv_init(&percpu_allocation.cv, "percpu"); percpu_offset_arena = vmem_xcreate("percpu", 0, 0, PERCPU_QUANTUM_SIZE, percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP, IPL_NONE); } /* * percpu_init_cpu: cpu initialization * * => should be called before the cpu appears on the list for CPU_INFO_FOREACH. * => may be called for static CPUs afterward (typically just primary CPU) */ void percpu_init_cpu(struct cpu_info *ci) { percpu_cpu_t * const pcc = cpu_percpu(ci); struct percpu *pc; size_t size = percpu_allocation.nextoff; /* XXX racy */ ASSERT_SLEEPABLE(); /* * For the primary CPU, prior percpu_create may have already * triggered allocation, so there's nothing more for us to do * here. */ if (pcc->pcc_size) return; KASSERT(pcc->pcc_data == NULL); /* * Otherwise, allocate storage and, while the constructor list * is locked, run constructors for all percpus on this CPU. */ pcc->pcc_size = size; if (size) { pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP); mutex_enter(&percpu_allocation.lock); while (percpu_allocation.busy) cv_wait(&percpu_allocation.cv, &percpu_allocation.lock); percpu_allocation.busy = curlwp; LIST_FOREACH(pc, &percpu_allocation.ctor_list, pc_list) { KASSERT(pc->pc_ctor); mutex_exit(&percpu_allocation.lock); (*pc->pc_ctor)((char *)pcc->pcc_data + pc->pc_offset, pc->pc_cookie, ci); mutex_enter(&percpu_allocation.lock); } KASSERT(percpu_allocation.busy == curlwp); percpu_allocation.busy = NULL; cv_broadcast(&percpu_allocation.cv); mutex_exit(&percpu_allocation.lock); } } /* * percpu_alloc: allocate percpu storage * * => called in thread context. * => considered as an expensive and rare operation. * => allocated storage is initialized with zeros. */ percpu_t * percpu_alloc(size_t size) { return percpu_create(size, NULL, NULL, NULL); } /* * percpu_create: allocate percpu storage and associate ctor/dtor with it * * => called in thread context. * => considered as an expensive and rare operation. * => allocated storage is initialized by ctor, or zeros if ctor is null * => percpu_free will call dtor first, if dtor is nonnull * => ctor or dtor may sleep, even on allocation */ percpu_t * percpu_create(size_t size, percpu_callback_t ctor, percpu_callback_t dtor, void *cookie) { vmem_addr_t offset; percpu_t *pc; ASSERT_SLEEPABLE(); (void)vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT, &offset); pc = kmem_alloc(sizeof(*pc), KM_SLEEP); pc->pc_offset = offset; pc->pc_size = size; pc->pc_ctor = ctor; pc->pc_dtor = dtor; pc->pc_cookie = cookie; if (ctor) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; void *buf; /* * Wait until nobody is using the list of percpus with * constructors. */ mutex_enter(&percpu_allocation.lock); while (percpu_allocation.busy) cv_wait(&percpu_allocation.cv, &percpu_allocation.lock); percpu_allocation.busy = curlwp; mutex_exit(&percpu_allocation.lock); /* * Run the constructor for all CPUs. We use a * temporary buffer wo that we need not hold the * percpu_swap_lock while running the constructor. */ buf = kmem_alloc(size, KM_SLEEP); for (CPU_INFO_FOREACH(cii, ci)) { memset(buf, 0, size); (*ctor)(buf, cookie, ci); percpu_traverse_enter(); memcpy(percpu_getptr_remote(pc, ci), buf, size); percpu_traverse_exit(); } explicit_memset(buf, 0, size); kmem_free(buf, size); /* * Insert the percpu into the list of percpus with * constructors. We are now done using the list, so it * is safe for concurrent percpu_create or concurrent * percpu_init_cpu to run. */ mutex_enter(&percpu_allocation.lock); KASSERT(percpu_allocation.busy == curlwp); percpu_allocation.busy = NULL; cv_broadcast(&percpu_allocation.cv); LIST_INSERT_HEAD(&percpu_allocation.ctor_list, pc, pc_list); mutex_exit(&percpu_allocation.lock); } else { percpu_zero(pc, size); } return pc; } /* * percpu_free: free percpu storage * * => called in thread context. * => considered as an expensive and rare operation. */ void percpu_free(percpu_t *pc, size_t size) { ASSERT_SLEEPABLE(); KASSERT(size == pc->pc_size); /* * If there's a constructor, take the percpu off the list of * percpus with constructors, but first wait until nobody is * using the list. */ if (pc->pc_ctor) { mutex_enter(&percpu_allocation.lock); while (percpu_allocation.busy) cv_wait(&percpu_allocation.cv, &percpu_allocation.lock); LIST_REMOVE(pc, pc_list); mutex_exit(&percpu_allocation.lock); } /* If there's a destructor, run it now for all CPUs. */ if (pc->pc_dtor) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; void *buf; buf = kmem_alloc(size, KM_SLEEP); for (CPU_INFO_FOREACH(cii, ci)) { percpu_traverse_enter(); memcpy(buf, percpu_getptr_remote(pc, ci), size); explicit_memset(percpu_getptr_remote(pc, ci), 0, size); percpu_traverse_exit(); (*pc->pc_dtor)(buf, pc->pc_cookie, ci); } explicit_memset(buf, 0, size); kmem_free(buf, size); } vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size); kmem_free(pc, sizeof(*pc)); } /* * percpu_getref: * * => safe to be used in either thread or interrupt context * => disables preemption; must be bracketed with a percpu_putref() */ void * percpu_getref(percpu_t *pc) { kpreempt_disable(); return percpu_getptr_remote(pc, curcpu()); } /* * percpu_putref: * * => drops the preemption-disabled count after caller is done with per-cpu * data */ void percpu_putref(percpu_t *pc) { kpreempt_enable(); } /* * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote: * helpers to access remote cpu's percpu data. * * => called in thread context. * => percpu_traverse_enter can block low-priority xcalls. * => typical usage would be: * * sum = 0; * percpu_traverse_enter(); * for (CPU_INFO_FOREACH(cii, ci)) { * unsigned int *p = percpu_getptr_remote(pc, ci); * sum += *p; * } * percpu_traverse_exit(); */ void percpu_traverse_enter(void) { ASSERT_SLEEPABLE(); rw_enter(&percpu_swap_lock, RW_READER); } void percpu_traverse_exit(void) { rw_exit(&percpu_swap_lock); } void * percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci) { return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)]; } /* * percpu_foreach: call the specified callback function for each cpus. * * => must be called from thread context. * => callback executes on **current** CPU (or, really, arbitrary CPU, * in case of preemption) * => caller should not rely on the cpu iteration order. * => the callback function should be minimum because it is executed with * holding a global lock, which can block low-priority xcalls. * eg. it's illegal for a callback function to sleep for memory allocation. */ void percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; percpu_traverse_enter(); for (CPU_INFO_FOREACH(cii, ci)) { (*cb)(percpu_getptr_remote(pc, ci), arg, ci); } percpu_traverse_exit(); } struct percpu_xcall_ctx { percpu_callback_t ctx_cb; void *ctx_arg; }; static void percpu_xcfunc(void * const v1, void * const v2) { percpu_t * const pc = v1; struct percpu_xcall_ctx * const ctx = v2; (*ctx->ctx_cb)(percpu_getref(pc), ctx->ctx_arg, curcpu()); percpu_putref(pc); } /* * percpu_foreach_xcall: call the specified callback function for each * cpu. This version uses an xcall to run the callback on each cpu. * * => must be called from thread context. * => callback executes on **remote** CPU in soft-interrupt context * (at the specified soft interrupt priority). * => caller should not rely on the cpu iteration order. * => the callback function should be minimum because it may be * executed in soft-interrupt context. eg. it's illegal for * a callback function to sleep for memory allocation. */ void percpu_foreach_xcall(percpu_t *pc, u_int xcflags, percpu_callback_t cb, void *arg) { struct percpu_xcall_ctx ctx = { .ctx_cb = cb, .ctx_arg = arg, }; CPU_INFO_ITERATOR cii; struct cpu_info *ci; for (CPU_INFO_FOREACH(cii, ci)) { xc_wait(xc_unicast(xcflags, percpu_xcfunc, pc, &ctx, ci)); } } |
| 14 6 3 5 6 4 5 3 3 9 7 7 18 17 16 15 15 10 10 10 10 10 10 10 10 10 8 7 9 9 14 10 14 16 17 8 7 7 6 3 3 2 12 3 2 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 | /* $NetBSD: vfs_syscalls_30.c,v 1.45 2022/03/12 20:46:03 riastradh Exp $ */ /*- * Copyright (c) 2005, 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Christos Zoulas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls_30.c,v 1.45 2022/03/12 20:46:03 riastradh Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/filedesc.h> #include <sys/kernel.h> #include <sys/file.h> #include <sys/stat.h> #include <sys/socketvar.h> #include <sys/vnode.h> #include <sys/mount.h> #include <sys/proc.h> #include <sys/uio.h> #include <sys/dirent.h> #include <sys/malloc.h> #include <sys/kauth.h> #include <sys/vfs_syscalls.h> #include <sys/syscall.h> #include <sys/syscallvar.h> #include <sys/syscallargs.h> #include <compat/common/compat_mod.h> #include <compat/common/compat_util.h> #include <compat/sys/stat.h> #include <compat/sys/dirent.h> #include <compat/sys/mount.h> #include <compat/sys/statvfs.h> static const struct syscall_package vfs_syscalls_30_syscalls[] = { { SYS_compat_30___fhstat30, 0, (sy_call_t *)compat_30_sys___fhstat30 }, { SYS_compat_30___fstat13, 0, (sy_call_t *)compat_30_sys___fstat13 }, { SYS_compat_30___lstat13, 0, (sy_call_t *)compat_30_sys___lstat13 }, { SYS_compat_30___stat13, 0, (sy_call_t *)compat_30_sys___stat13 }, { SYS_compat_30_fhopen, 0, (sy_call_t *)compat_30_sys_fhopen }, { SYS_compat_30_fhstat, 0, (sy_call_t *)compat_30_sys_fhstat }, { SYS_compat_30_fhstatvfs1, 0, (sy_call_t *)compat_30_sys_fhstatvfs1 }, { SYS_compat_30_getdents, 0, (sy_call_t *)compat_30_sys_getdents }, { SYS_compat_30_getfh, 0, (sy_call_t *)compat_30_sys_getfh }, { 0,0, NULL } }; /* * Convert from a new to an old stat structure. */ static void cvtstat(struct stat13 *ost, const struct stat *st) { /* Handle any padding. */ memset(ost, 0, sizeof(*ost)); ost->st_dev = st->st_dev; ost->st_ino = (uint32_t)st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; timespec_to_timespec50(&st->st_atimespec, &ost->st_atimespec); timespec_to_timespec50(&st->st_mtimespec, &ost->st_mtimespec); timespec_to_timespec50(&st->st_ctimespec, &ost->st_ctimespec); timespec_to_timespec50(&st->st_birthtimespec, &ost->st_birthtimespec); ost->st_size = st->st_size; ost->st_blocks = st->st_blocks; ost->st_blksize = st->st_blksize; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } /* * Get file status; this version follows links. */ /* ARGSUSED */ int compat_30_sys___stat13(struct lwp *l, const struct compat_30_sys___stat13_args *uap, register_t *retval) { /* { syscallarg(const char *) path; syscallarg(struct stat13 *) ub; } */ struct stat sb; struct stat13 osb; int error; error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); if (error) return error; cvtstat(&osb, &sb); return copyout(&osb, SCARG(uap, ub), sizeof(osb)); } /* * Get file status; this version does not follow links. */ /* ARGSUSED */ int compat_30_sys___lstat13(struct lwp *l, const struct compat_30_sys___lstat13_args *uap, register_t *retval) { /* { syscallarg(const char *) path; syscallarg(struct stat13 *) ub; } */ struct stat sb; struct stat13 osb; int error; error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); if (error) return error; cvtstat(&osb, &sb); return copyout(&osb, SCARG(uap, ub), sizeof(osb)); } /* ARGSUSED */ int compat_30_sys_fhstat(struct lwp *l, const struct compat_30_sys_fhstat_args *uap, register_t *retval) { /* { syscallarg(const struct compat_30_fhandle *) fhp; syscallarg(struct stat13 *) sb; } */ struct stat sb; struct stat13 osb; int error; error = do_fhstat(l, SCARG(uap, fhp), sizeof(*SCARG(uap, fhp)), &sb); if (error) return error; cvtstat(&osb, &sb); return copyout(&osb, SCARG(uap, sb), sizeof(osb)); } /* * Return status information about a file descriptor. */ /* ARGSUSED */ int compat_30_sys___fstat13(struct lwp *l, const struct compat_30_sys___fstat13_args *uap, register_t *retval) { /* { syscallarg(int) fd; syscallarg(struct stat13 *) sb; } */ struct stat sb; struct stat13 osb; int error; error = do_sys_fstat(SCARG(uap, fd), &sb); if (error) return error; cvtstat(&osb, &sb); return copyout(&osb, SCARG(uap, sb), sizeof(osb)); } /* * Read a block of directory entries in a file system independent format. */ int compat_30_sys_getdents(struct lwp *l, const struct compat_30_sys_getdents_args *uap, register_t *retval) { /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(size_t) count; } */ struct dirent *bdp; struct vnode *vp; char *inp, *tbuf; /* BSD-format */ int len, reclen; /* BSD-format */ char *outp; /* NetBSD-3.0-format */ int resid; struct file *fp; struct uio auio; struct iovec aiov; struct dirent12 idb; off_t off; /* true file offset */ int buflen, error, eofflag; off_t *cookiebuf = NULL, *cookie; int ncookies; bool any = false; /* fd_getvnode() will use the descriptor for us */ if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) return error; if ((fp->f_flag & FREAD) == 0) { error = EBADF; goto out1; } vp = fp->f_vnode; if (vp->v_type != VDIR) { error = EINVAL; goto out1; } buflen = uimin(MAXBSIZE, SCARG(uap, count)); tbuf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); off = fp->f_offset; again: aiov.iov_base = tbuf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_resid = buflen; auio.uio_offset = off; UIO_SETUP_SYSSPACE(&auio); /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &cookiebuf, &ncookies); if (error) goto out; inp = tbuf; outp = SCARG(uap, buf); resid = SCARG(uap, count); if ((len = buflen - auio.uio_resid) == 0) goto eof; for (cookie = cookiebuf; len > 0; len -= reclen) { bdp = (struct dirent *)inp; reclen = bdp->d_reclen; if (reclen & _DIRENT_ALIGN(bdp)) panic("%s: bad reclen %d", __func__, reclen); if (cookie) off = *cookie++; /* each entry points to the next */ else off += reclen; if ((off >> 32) != 0) { compat_offseterr(vp, "netbsd30_getdents"); error = EINVAL; goto out; } memset(&idb, 0, sizeof(idb)); if (bdp->d_namlen >= sizeof(idb.d_name)) idb.d_namlen = sizeof(idb.d_name) - 1; else idb.d_namlen = bdp->d_namlen; idb.d_reclen = _DIRENT_SIZE(&idb); if (reclen > len || resid < idb.d_reclen) { /* entry too big for buffer, so just stop */ any = true; break; } /* * Massage in place to make a NetBSD-3.0-shaped dirent * (otherwise we have to worry about touching user memory * outside of the copyout() call). */ idb.d_fileno = (u_int32_t)bdp->d_fileno; idb.d_type = bdp->d_type; (void)memcpy(idb.d_name, bdp->d_name, idb.d_namlen); memset(idb.d_name + idb.d_namlen, 0, idb.d_reclen - _DIRENT_NAMEOFF(&idb) - idb.d_namlen); if ((error = copyout(&idb, outp, idb.d_reclen)) != 0) goto out; /* advance past this real entry */ inp += reclen; /* advance output past NetBSD-3.0-shaped entry */ outp += idb.d_reclen; resid -= idb.d_reclen; any = true; } /* if we squished out the whole block, try again */ if (!any) { if (cookiebuf) free(cookiebuf, M_TEMP); cookiebuf = NULL; goto again; } fp->f_offset = off; /* update the vnode offset */ eof: *retval = SCARG(uap, count) - resid; out: VOP_UNLOCK(vp); if (cookiebuf) free(cookiebuf, M_TEMP); free(tbuf, M_TEMP); out1: fd_putfile(SCARG(uap, fd)); return error; } /* * Get file handle system call */ int compat_30_sys_getfh(struct lwp *l, const struct compat_30_sys_getfh_args *uap, register_t *retval) { /* { syscallarg(char *) fname; syscallarg(struct compat_30_fhandle *) fhp; } */ struct vnode *vp; struct compat_30_fhandle fh; int error; struct pathbuf *pb; struct nameidata nd; size_t sz; /* * Must be super user */ error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL); if (error) return (error); error = pathbuf_copyin(SCARG(uap, fname), &pb); if (error) { return error; } NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); error = namei(&nd); pathbuf_destroy(pb); if (error) return error; vp = nd.ni_vp; sz = sizeof(struct compat_30_fhandle); error = vfs_composefh(vp, (void *)&fh, &sz); vput(vp); CTASSERT(FHANDLE_SIZE_COMPAT == sizeof(struct compat_30_fhandle)); if (sz != FHANDLE_SIZE_COMPAT) { error = EINVAL; } if (error) return error; return copyout(&fh, SCARG(uap, fhp), sizeof(fh)); } /* * Open a file given a file handle. * * Check permissions, allocate an open file structure, * and call the device open routine if any. */ int compat_30_sys_fhopen(struct lwp *l, const struct compat_30_sys_fhopen_args *uap, register_t *retval) { /* { syscallarg(const fhandle_t *) fhp; syscallarg(int) flags; } */ return dofhopen(l, SCARG(uap, fhp), FHANDLE_SIZE_COMPAT, SCARG(uap, flags), retval); } /* ARGSUSED */ int compat_30_sys___fhstat30(struct lwp *l, const struct compat_30_sys___fhstat30_args *uap_30, register_t *retval) { /* { syscallarg(const fhandle_t *) fhp; syscallarg(struct stat30 *) sb; } */ struct stat sb; struct stat13 osb; int error; error = do_fhstat(l, SCARG(uap_30, fhp), FHANDLE_SIZE_COMPAT, &sb); if (error) return error; cvtstat(&osb, &sb); return copyout(&osb, SCARG(uap_30, sb), sizeof(osb)); } /* ARGSUSED */ int compat_30_sys_fhstatvfs1(struct lwp *l, const struct compat_30_sys_fhstatvfs1_args *uap, register_t *retval) { /* { syscallarg(const fhandle_t *) fhp; syscallarg(struct statvfs90 *) buf; syscallarg(int) flags; } */ struct statvfs *sb = STATVFSBUF_GET(); int error = do_fhstatvfs(l, SCARG(uap, fhp), FHANDLE_SIZE_COMPAT, sb, SCARG(uap, flags)); if (!error) { error = statvfs_to_statvfs90_copy(sb, SCARG(uap, buf), sizeof(struct statvfs90)); } STATVFSBUF_PUT(sb); return error; } int vfs_syscalls_30_init(void) { return syscall_establish(NULL, vfs_syscalls_30_syscalls); } int vfs_syscalls_30_fini(void) { return syscall_disestablish(NULL, vfs_syscalls_30_syscalls); } |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 | /* $NetBSD: drm_drv.c,v 1.23 2022/07/17 14:11:18 riastradh Exp $ */ /* * Created: Fri Jan 19 10:48:35 2001 by faith@acm.org * * Copyright 2001 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Author Rickard E. (Rik) Faith <faith@valinux.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: drm_drv.c,v 1.23 2022/07/17 14:11:18 riastradh Exp $"); #include <linux/debugfs.h> #include <linux/fs.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/mount.h> #include <linux/pseudo_fs.h> #include <linux/slab.h> #include <linux/srcu.h> #include <drm/drm_client.h> #include <drm/drm_color_mgmt.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_mode_object.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" #include "drm_internal.h" #include "drm_legacy.h" #include <linux/nbsd-namespace.h> MODULE_AUTHOR("Gareth Hughes, Leif Delgass, José Fonseca, Jon Smirl"); MODULE_DESCRIPTION("DRM shared core routines"); MODULE_LICENSE("GPL and additional rights"); #ifdef __NetBSD__ spinlock_t drm_minor_lock; struct idr drm_minors_idr; #else static DEFINE_SPINLOCK(drm_minor_lock); static struct idr drm_minors_idr; #endif /* * If the drm core fails to init for whatever reason, * we should prevent any drivers from registering with it. * It's best to check this at drm_dev_init(), as some drivers * prefer to embed struct drm_device into their own device * structure and call drm_dev_init() themselves. */ bool drm_core_init_complete = false; #ifndef __NetBSD__ static struct dentry *drm_debugfs_root; #endif #ifdef __NetBSD__ struct srcu_struct drm_unplug_srcu; #else DEFINE_STATIC_SRCU(drm_unplug_srcu); #endif /* * DRM Minors * A DRM device can provide several char-dev interfaces on the DRM-Major. Each * of them is represented by a drm_minor object. Depending on the capabilities * of the device-driver, different interfaces are registered. * * Minors can be accessed via dev->$minor_name. This pointer is either * NULL or a valid drm_minor pointer and stays valid as long as the device is * valid. This means, DRM minors have the same life-time as the underlying * device. However, this doesn't mean that the minor is active. Minors are * registered and unregistered dynamically according to device-state. */ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev, unsigned int type) { switch (type) { case DRM_MINOR_PRIMARY: return &dev->primary; case DRM_MINOR_RENDER: return &dev->render; default: BUG(); } } static int drm_minor_alloc(struct drm_device *dev, unsigned int type) { struct drm_minor *minor; unsigned long flags; int r; minor = kzalloc(sizeof(*minor), GFP_KERNEL); if (!minor) return -ENOMEM; minor->type = type; minor->dev = dev; idr_preload(GFP_KERNEL); spin_lock_irqsave(&drm_minor_lock, flags); r = idr_alloc(&drm_minors_idr, NULL, 64 * type, 64 * (type + 1), GFP_NOWAIT); spin_unlock_irqrestore(&drm_minor_lock, flags); idr_preload_end(); if (r < 0) goto err_free; minor->index = r; #ifndef __NetBSD__ /* XXX drm sysfs */ minor->kdev = drm_sysfs_minor_alloc(minor); if (IS_ERR(minor->kdev)) { r = PTR_ERR(minor->kdev); goto err_index; } #endif *drm_minor_get_slot(dev, type) = minor; return 0; err_index: __unused spin_lock_irqsave(&drm_minor_lock, flags); idr_remove(&drm_minors_idr, minor->index); spin_unlock_irqrestore(&drm_minor_lock, flags); err_free: kfree(minor); return r; } static void drm_minor_free(struct drm_device *dev, unsigned int type) { struct drm_minor **slot, *minor; unsigned long flags; slot = drm_minor_get_slot(dev, type); minor = *slot; if (!minor) return; #ifndef __NetBSD__ /* XXX drm sysfs */ put_device(minor->kdev); #endif spin_lock_irqsave(&drm_minor_lock, flags); idr_remove(&drm_minors_idr, minor->index); spin_unlock_irqrestore(&drm_minor_lock, flags); kfree(minor); *slot = NULL; } static int drm_minor_register(struct drm_device *dev, unsigned int type) { struct drm_minor *minor; unsigned long flags; #ifndef __NetBSD__ int ret; #endif DRM_DEBUG("\n"); minor = *drm_minor_get_slot(dev, type); if (!minor) return 0; #ifndef __NetBSD__ ret = drm_debugfs_init(minor, minor->index, drm_debugfs_root); if (ret) { DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n"); goto err_debugfs; } ret = device_add(minor->kdev); if (ret) goto err_debugfs; #endif /* replace NULL with @minor so lookups will succeed from now on */ spin_lock_irqsave(&drm_minor_lock, flags); idr_replace(&drm_minors_idr, minor, minor->index); spin_unlock_irqrestore(&drm_minor_lock, flags); DRM_DEBUG("new minor registered %d\n", minor->index); return 0; #ifndef __NetBSD__ err_debugfs: drm_debugfs_cleanup(minor); return ret; #endif } static void drm_minor_unregister(struct drm_device *dev, unsigned int type) { struct drm_minor *minor; unsigned long flags; minor = *drm_minor_get_slot(dev, type); #ifdef __NetBSD__ if (!minor) #else if (!minor || !device_is_registered(minor->kdev)) #endif return; /* replace @minor with NULL so lookups will fail from now on */ spin_lock_irqsave(&drm_minor_lock, flags); idr_replace(&drm_minors_idr, NULL, minor->index); spin_unlock_irqrestore(&drm_minor_lock, flags); #ifndef __NetBSD__ device_del(minor->kdev); dev_set_drvdata(minor->kdev, NULL); /* safety belt */ drm_debugfs_cleanup(minor); #endif } /* * Looks up the given minor-ID and returns the respective DRM-minor object. The * refence-count of the underlying device is increased so you must release this * object with drm_minor_release(). * * As long as you hold this minor, it is guaranteed that the object and the * minor->dev pointer will stay valid! However, the device may get unplugged and * unregistered while you hold the minor. */ struct drm_minor *drm_minor_acquire(unsigned int minor_id) { struct drm_minor *minor; unsigned long flags; spin_lock_irqsave(&drm_minor_lock, flags); minor = idr_find(&drm_minors_idr, minor_id); if (minor) drm_dev_get(minor->dev); spin_unlock_irqrestore(&drm_minor_lock, flags); if (!minor) { return ERR_PTR(-ENODEV); } else if (drm_dev_is_unplugged(minor->dev)) { drm_dev_put(minor->dev); return ERR_PTR(-ENODEV); } return minor; } void drm_minor_release(struct drm_minor *minor) { drm_dev_put(minor->dev); } /** * DOC: driver instance overview * * A device instance for a drm driver is represented by &struct drm_device. This * is initialized with drm_dev_init(), usually from bus-specific ->probe() * callbacks implemented by the driver. The driver then needs to initialize all * the various subsystems for the drm device like memory management, vblank * handling, modesetting support and intial output configuration plus obviously * initialize all the corresponding hardware bits. Finally when everything is up * and running and ready for userspace the device instance can be published * using drm_dev_register(). * * There is also deprecated support for initalizing device instances using * bus-specific helpers and the &drm_driver.load callback. But due to * backwards-compatibility needs the device instance have to be published too * early, which requires unpretty global locking to make safe and is therefore * only support for existing drivers not yet converted to the new scheme. * * When cleaning up a device instance everything needs to be done in reverse: * First unpublish the device instance with drm_dev_unregister(). Then clean up * any other resources allocated at device initialization and drop the driver's * reference to &drm_device using drm_dev_put(). * * Note that the lifetime rules for &drm_device instance has still a lot of * historical baggage. Hence use the reference counting provided by * drm_dev_get() and drm_dev_put() only carefully. * * Display driver example * ~~~~~~~~~~~~~~~~~~~~~~ * * The following example shows a typical structure of a DRM display driver. * The example focus on the probe() function and the other functions that is * almost always present and serves as a demonstration of devm_drm_dev_init() * usage with its accompanying drm_driver->release callback. * * .. code-block:: c * * struct driver_device { * struct drm_device drm; * void *userspace_facing; * struct clk *pclk; * }; * * static void driver_drm_release(struct drm_device *drm) * { * struct driver_device *priv = container_of(...); * * drm_mode_config_cleanup(drm); * drm_dev_fini(drm); * kfree(priv->userspace_facing); * kfree(priv); * } * * static struct drm_driver driver_drm_driver = { * [...] * .release = driver_drm_release, * }; * * static int driver_probe(struct platform_device *pdev) * { * struct driver_device *priv; * struct drm_device *drm; * int ret; * * // devm_kzalloc() can't be used here because the drm_device ' * // lifetime can exceed the device lifetime if driver unbind * // happens when userspace still has open file descriptors. * priv = kzalloc(sizeof(*priv), GFP_KERNEL); * if (!priv) * return -ENOMEM; * * drm = &priv->drm; * * ret = devm_drm_dev_init(&pdev->dev, drm, &driver_drm_driver); * if (ret) { * kfree(drm); * return ret; * } * * drm_mode_config_init(drm); * * priv->userspace_facing = kzalloc(..., GFP_KERNEL); * if (!priv->userspace_facing) * return -ENOMEM; * * priv->pclk = devm_clk_get(dev, "PCLK"); * if (IS_ERR(priv->pclk)) * return PTR_ERR(priv->pclk); * * // Further setup, display pipeline etc * * platform_set_drvdata(pdev, drm); * * drm_mode_config_reset(drm); * * ret = drm_dev_register(drm); * if (ret) * return ret; * * drm_fbdev_generic_setup(drm, 32); * * return 0; * } * * // This function is called before the devm_ resources are released * static int driver_remove(struct platform_device *pdev) * { * struct drm_device *drm = platform_get_drvdata(pdev); * * drm_dev_unregister(drm); * drm_atomic_helper_shutdown(drm) * * return 0; * } * * // This function is called on kernel restart and shutdown * static void driver_shutdown(struct platform_device *pdev) * { * drm_atomic_helper_shutdown(platform_get_drvdata(pdev)); * } * * static int __maybe_unused driver_pm_suspend(struct device *dev) * { * return drm_mode_config_helper_suspend(dev_get_drvdata(dev)); * } * * static int __maybe_unused driver_pm_resume(struct device *dev) * { * drm_mode_config_helper_resume(dev_get_drvdata(dev)); * * return 0; * } * * static const struct dev_pm_ops driver_pm_ops = { * SET_SYSTEM_SLEEP_PM_OPS(driver_pm_suspend, driver_pm_resume) * }; * * static struct platform_driver driver_driver = { * .driver = { * [...] * .pm = &driver_pm_ops, * }, * .probe = driver_probe, * .remove = driver_remove, * .shutdown = driver_shutdown, * }; * module_platform_driver(driver_driver); * * Drivers that want to support device unplugging (USB, DT overlay unload) should * use drm_dev_unplug() instead of drm_dev_unregister(). The driver must protect * regions that is accessing device resources to prevent use after they're * released. This is done using drm_dev_enter() and drm_dev_exit(). There is one * shortcoming however, drm_dev_unplug() marks the drm_device as unplugged before * drm_atomic_helper_shutdown() is called. This means that if the disable code * paths are protected, they will not run on regular driver module unload, * possibily leaving the hardware enabled. */ /** * drm_put_dev - Unregister and release a DRM device * @dev: DRM device * * Called at module unload time or when a PCI device is unplugged. * * Cleans up all DRM device, calling drm_lastclose(). * * Note: Use of this function is deprecated. It will eventually go away * completely. Please use drm_dev_unregister() and drm_dev_put() explicitly * instead to make sure that the device isn't userspace accessible any more * while teardown is in progress, ensuring that userspace can't access an * inconsistent state. */ void drm_put_dev(struct drm_device *dev) { DRM_DEBUG("\n"); if (!dev) { DRM_ERROR("cleanup called no dev\n"); return; } drm_dev_unregister(dev); drm_dev_put(dev); } EXPORT_SYMBOL(drm_put_dev); /** * drm_dev_enter - Enter device critical section * @dev: DRM device * @idx: Pointer to index that will be passed to the matching drm_dev_exit() * * This function marks and protects the beginning of a section that should not * be entered after the device has been unplugged. The section end is marked * with drm_dev_exit(). Calls to this function can be nested. * * Returns: * True if it is OK to enter the section, false otherwise. */ bool drm_dev_enter(struct drm_device *dev, int *idx) { *idx = srcu_read_lock(&drm_unplug_srcu); if (dev->unplugged) { srcu_read_unlock(&drm_unplug_srcu, *idx); return false; } return true; } EXPORT_SYMBOL(drm_dev_enter); /** * drm_dev_exit - Exit device critical section * @idx: index returned from drm_dev_enter() * * This function marks the end of a section that should not be entered after * the device has been unplugged. */ void drm_dev_exit(int idx) { srcu_read_unlock(&drm_unplug_srcu, idx); } EXPORT_SYMBOL(drm_dev_exit); /** * drm_dev_unplug - unplug a DRM device * @dev: DRM device * * This unplugs a hotpluggable DRM device, which makes it inaccessible to * userspace operations. Entry-points can use drm_dev_enter() and * drm_dev_exit() to protect device resources in a race free manner. This * essentially unregisters the device like drm_dev_unregister(), but can be * called while there are still open users of @dev. */ void drm_dev_unplug(struct drm_device *dev) { /* * After synchronizing any critical read section is guaranteed to see * the new value of ->unplugged, and any critical section which might * still have seen the old value of ->unplugged is guaranteed to have * finished. */ dev->unplugged = true; synchronize_srcu(&drm_unplug_srcu); drm_dev_unregister(dev); } EXPORT_SYMBOL(drm_dev_unplug); #ifdef __NetBSD__ static void * drm_fs_inode_new(void) { return NULL; } static void drm_fs_inode_free(void *inode) { KASSERT(inode == NULL); } #else /* * DRM internal mount * We want to be able to allocate our own "struct address_space" to control * memory-mappings in VRAM (or stolen RAM, ...). However, core MM does not allow * stand-alone address_space objects, so we need an underlying inode. As there * is no way to allocate an independent inode easily, we need a fake internal * VFS mount-point. * * The drm_fs_inode_new() function allocates a new inode, drm_fs_inode_free() * frees it again. You are allowed to use iget() and iput() to get references to * the inode. But each drm_fs_inode_new() call must be paired with exactly one * drm_fs_inode_free() call (which does not have to be the last iput()). * We use drm_fs_inode_*() to manage our internal VFS mount-point and share it * between multiple inode-users. You could, technically, call * iget() + drm_fs_inode_free() directly after alloc and sometime later do an * iput(), but this way you'd end up with a new vfsmount for each inode. */ static int drm_fs_cnt; static struct vfsmount *drm_fs_mnt; static int drm_fs_init_fs_context(struct fs_context *fc) { return init_pseudo(fc, 0x010203ff) ? 0 : -ENOMEM; } static struct file_system_type drm_fs_type = { .name = "drm", .owner = THIS_MODULE, .init_fs_context = drm_fs_init_fs_context, .kill_sb = kill_anon_super, }; static struct inode *drm_fs_inode_new(void) { struct inode *inode; int r; r = simple_pin_fs(&drm_fs_type, &drm_fs_mnt, &drm_fs_cnt); if (r < 0) { DRM_ERROR("Cannot mount pseudo fs: %d\n", r); return ERR_PTR(r); } inode = alloc_anon_inode(drm_fs_mnt->mnt_sb); if (IS_ERR(inode)) simple_release_fs(&drm_fs_mnt, &drm_fs_cnt); return inode; } static void drm_fs_inode_free(struct inode *inode) { if (inode) { iput(inode); simple_release_fs(&drm_fs_mnt, &drm_fs_cnt); } } #endif /** * DOC: component helper usage recommendations * * DRM drivers that drive hardware where a logical device consists of a pile of * independent hardware blocks are recommended to use the :ref:`component helper * library<component>`. For consistency and better options for code reuse the * following guidelines apply: * * - The entire device initialization procedure should be run from the * &component_master_ops.master_bind callback, starting with drm_dev_init(), * then binding all components with component_bind_all() and finishing with * drm_dev_register(). * * - The opaque pointer passed to all components through component_bind_all() * should point at &struct drm_device of the device instance, not some driver * specific private structure. * * - The component helper fills the niche where further standardization of * interfaces is not practical. When there already is, or will be, a * standardized interface like &drm_bridge or &drm_panel, providing its own * functions to find such components at driver load time, like * drm_of_find_panel_or_bridge(), then the component helper should not be * used. */ /** * drm_dev_init - Initialise new DRM device * @dev: DRM device * @driver: DRM driver * @parent: Parent device object * * Initialize a new DRM device. No device registration is done. * Call drm_dev_register() to advertice the device to user space and register it * with other core subsystems. This should be done last in the device * initialization sequence to make sure userspace can't access an inconsistent * state. * * The initial ref-count of the object is 1. Use drm_dev_get() and * drm_dev_put() to take and drop further ref-counts. * * It is recommended that drivers embed &struct drm_device into their own device * structure. * * Drivers that do not want to allocate their own device struct * embedding &struct drm_device can call drm_dev_alloc() instead. For drivers * that do embed &struct drm_device it must be placed first in the overall * structure, and the overall structure must be allocated using kmalloc(): The * drm core's release function unconditionally calls kfree() on the @dev pointer * when the final reference is released. To override this behaviour, and so * allow embedding of the drm_device inside the driver's device struct at an * arbitrary offset, you must supply a &drm_driver.release callback and control * the finalization explicitly. * * RETURNS: * 0 on success, or error code on failure. */ int drm_dev_init(struct drm_device *dev, struct drm_driver *driver, struct device *parent) { int ret; if (!drm_core_init_complete) { DRM_ERROR("DRM core is not initialized\n"); return -ENODEV; } if (WARN_ON(!parent)) return -EINVAL; kref_init(&dev->ref); dev->dev = get_device(parent); dev->driver = driver; /* no per-device feature limits by default */ dev->driver_features = ~0u; drm_legacy_init_members(dev); INIT_LIST_HEAD(&dev->filelist); INIT_LIST_HEAD(&dev->filelist_internal); INIT_LIST_HEAD(&dev->clientlist); INIT_LIST_HEAD(&dev->vblank_event_list); spin_lock_init(&dev->event_lock); mutex_init(&dev->struct_mutex); mutex_init(&dev->filelist_mutex); mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); dev->sc_monitor_hotplug.smpsw_name = PSWITCH_HK_DISPLAY_CYCLE; dev->sc_monitor_hotplug.smpsw_type = PSWITCH_TYPE_HOTKEY; ret = sysmon_pswitch_register(&dev->sc_monitor_hotplug); if (ret) goto err_pswitch; dev->anon_inode = drm_fs_inode_new(); if (IS_ERR(dev->anon_inode)) { ret = PTR_ERR(dev->anon_inode); DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret); goto err_free; } if (drm_core_check_feature(dev, DRIVER_RENDER)) { ret = drm_minor_alloc(dev, DRM_MINOR_RENDER); if (ret) goto err_minors; } ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY); if (ret) goto err_minors; ret = drm_legacy_create_map_hash(dev); if (ret) goto err_minors; drm_legacy_ctxbitmap_init(dev); if (drm_core_check_feature(dev, DRIVER_GEM)) { ret = drm_gem_init(dev); if (ret) { DRM_ERROR("Cannot initialize graphics execution manager (GEM)\n"); goto err_ctxbitmap; } } ret = drm_dev_set_unique(dev, dev_name(parent)); if (ret) goto err_setunique; return 0; err_setunique: if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_destroy(dev); err_ctxbitmap: drm_legacy_ctxbitmap_cleanup(dev); drm_legacy_remove_map_hash(dev); err_minors: drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); drm_fs_inode_free(dev->anon_inode); err_free: #ifdef __NetBSD__ sysmon_pswitch_unregister(&dev->sc_monitor_hotplug); err_pswitch: #endif #ifndef __NetBSD__ /* XXX drm sysfs */ put_device(dev->dev); #endif mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); mutex_destroy(&dev->struct_mutex); spin_lock_destroy(&dev->event_lock); drm_legacy_destroy_members(dev); return ret; } EXPORT_SYMBOL(drm_dev_init); #ifndef __NetBSD__ static void devm_drm_dev_init_release(void *data) { drm_dev_put(data); } /** * devm_drm_dev_init - Resource managed drm_dev_init() * @parent: Parent device object * @dev: DRM device * @driver: DRM driver * * Managed drm_dev_init(). The DRM device initialized with this function is * automatically put on driver detach using drm_dev_put(). You must supply a * &drm_driver.release callback to control the finalization explicitly. * * RETURNS: * 0 on success, or error code on failure. */ int devm_drm_dev_init(struct device *parent, struct drm_device *dev, struct drm_driver *driver) { int ret; if (WARN_ON(!driver->release)) return -EINVAL; ret = drm_dev_init(dev, driver, parent); if (ret) return ret; ret = devm_add_action(parent, devm_drm_dev_init_release, dev); if (ret) devm_drm_dev_init_release(dev); return ret; } EXPORT_SYMBOL(devm_drm_dev_init); #endif /** * drm_dev_fini - Finalize a dead DRM device * @dev: DRM device * * Finalize a dead DRM device. This is the converse to drm_dev_init() and * frees up all data allocated by it. All driver private data should be * finalized first. Note that this function does not free the @dev, that is * left to the caller. * * The ref-count of @dev must be zero, and drm_dev_fini() should only be called * from a &drm_driver.release callback. */ void drm_dev_fini(struct drm_device *dev) { drm_vblank_cleanup(dev); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_destroy(dev); drm_legacy_ctxbitmap_cleanup(dev); drm_legacy_remove_map_hash(dev); drm_fs_inode_free(dev->anon_inode); drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); #ifdef __NetBSD__ sysmon_pswitch_unregister(&dev->sc_monitor_hotplug); #endif #ifndef __NetBSD__ /* XXX drm sysfs */ put_device(dev->dev); #endif mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); mutex_destroy(&dev->struct_mutex); spin_lock_destroy(&dev->event_lock); drm_legacy_destroy_members(dev); kfree(dev->unique); } EXPORT_SYMBOL(drm_dev_fini); /** * drm_dev_alloc - Allocate new DRM device * @driver: DRM driver to allocate device for * @parent: Parent device object * * Allocate and initialize a new DRM device. No device registration is done. * Call drm_dev_register() to advertice the device to user space and register it * with other core subsystems. This should be done last in the device * initialization sequence to make sure userspace can't access an inconsistent * state. * * The initial ref-count of the object is 1. Use drm_dev_get() and * drm_dev_put() to take and drop further ref-counts. * * Note that for purely virtual devices @parent can be NULL. * * Drivers that wish to subclass or embed &struct drm_device into their * own struct should look at using drm_dev_init() instead. * * RETURNS: * Pointer to new DRM device, or ERR_PTR on failure. */ struct drm_device *drm_dev_alloc(struct drm_driver *driver, struct device *parent) { struct drm_device *dev; int ret; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); ret = drm_dev_init(dev, driver, parent); if (ret) { kfree(dev); return ERR_PTR(ret); } return dev; } EXPORT_SYMBOL(drm_dev_alloc); static void drm_dev_release(struct kref *ref) { struct drm_device *dev = container_of(ref, struct drm_device, ref); if (dev->driver->release) { dev->driver->release(dev); } else { drm_dev_fini(dev); kfree(dev); } } /** * drm_dev_get - Take reference of a DRM device * @dev: device to take reference of or NULL * * This increases the ref-count of @dev by one. You *must* already own a * reference when calling this. Use drm_dev_put() to drop this reference * again. * * This function never fails. However, this function does not provide *any* * guarantee whether the device is alive or running. It only provides a * reference to the object and the memory associated with it. */ void drm_dev_get(struct drm_device *dev) { if (dev) kref_get(&dev->ref); } EXPORT_SYMBOL(drm_dev_get); /** * drm_dev_put - Drop reference of a DRM device * @dev: device to drop reference of or NULL * * This decreases the ref-count of @dev by one. The device is destroyed if the * ref-count drops to zero. */ void drm_dev_put(struct drm_device *dev) { if (dev) kref_put(&dev->ref, drm_dev_release); } EXPORT_SYMBOL(drm_dev_put); static int create_compat_control_link(struct drm_device *dev) { struct drm_minor *minor; char *name; int ret; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return 0; minor = *drm_minor_get_slot(dev, DRM_MINOR_PRIMARY); if (!minor) return 0; /* * Some existing userspace out there uses the existing of the controlD* * sysfs files to figure out whether it's a modeset driver. It only does * readdir, hence a symlink is sufficient (and the least confusing * option). Otherwise controlD* is entirely unused. * * Old controlD chardev have been allocated in the range * 64-127. */ name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64); if (!name) return -ENOMEM; #ifdef __NetBSD__ /* XXX sysfs */ ret = 0; #else ret = sysfs_create_link(minor->kdev->kobj.parent, &minor->kdev->kobj, name); #endif kfree(name); return ret; } static void remove_compat_control_link(struct drm_device *dev) { struct drm_minor *minor; char *name; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return; minor = *drm_minor_get_slot(dev, DRM_MINOR_PRIMARY); if (!minor) return; name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64); if (!name) return; #ifndef __NetBSD__ /* XXX sysfs */ sysfs_remove_link(minor->kdev->kobj.parent, name); #endif kfree(name); } /** * drm_dev_register - Register DRM device * @dev: Device to register * @flags: Flags passed to the driver's .load() function * * Register the DRM device @dev with the system, advertise device to user-space * and start normal device operation. @dev must be initialized via drm_dev_init() * previously. * * Never call this twice on any device! * * NOTE: To ensure backward compatibility with existing drivers method this * function calls the &drm_driver.load method after registering the device * nodes, creating race conditions. Usage of the &drm_driver.load methods is * therefore deprecated, drivers must perform all initialization before calling * drm_dev_register(). * * RETURNS: * 0 on success, negative error code on failure. */ int drm_dev_register(struct drm_device *dev, unsigned long flags) { struct drm_driver *driver = dev->driver; int ret; #ifndef __NetBSD__ mutex_lock(&drm_global_mutex); #endif ret = drm_minor_register(dev, DRM_MINOR_RENDER); if (ret) goto err_minors; ret = drm_minor_register(dev, DRM_MINOR_PRIMARY); if (ret) goto err_minors; ret = create_compat_control_link(dev); if (ret) goto err_minors; dev->registered = true; if (dev->driver->load) { ret = dev->driver->load(dev, flags); if (ret) goto err_minors; } if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_modeset_register_all(dev); ret = 0; DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", driver->name, driver->major, driver->minor, driver->patchlevel, driver->date, dev->dev ? dev_name(dev->dev) : "virtual device", dev->primary->index); goto out_unlock; err_minors: remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); out_unlock: #ifndef __NetBSD__ mutex_unlock(&drm_global_mutex); #endif return ret; } EXPORT_SYMBOL(drm_dev_register); /** * drm_dev_unregister - Unregister DRM device * @dev: Device to unregister * * Unregister the DRM device from the system. This does the reverse of * drm_dev_register() but does not deallocate the device. The caller must call * drm_dev_put() to drop their final reference. * * A special form of unregistering for hotpluggable devices is drm_dev_unplug(), * which can be called while there are still open users of @dev. * * This should be called first in the device teardown code to make sure * userspace can't access the device instance any more. */ void drm_dev_unregister(struct drm_device *dev) { if (drm_core_check_feature(dev, DRIVER_LEGACY)) drm_lastclose(dev); dev->registered = false; drm_client_dev_unregister(dev); if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_modeset_unregister_all(dev); if (dev->driver->unload) dev->driver->unload(dev); #ifndef __NetBSD__ /* Moved to drm_pci. */ if (dev->agp) drm_pci_agp_destroy(dev); #endif drm_legacy_rmmaps(dev); remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); } EXPORT_SYMBOL(drm_dev_unregister); /** * drm_dev_set_unique - Set the unique name of a DRM device * @dev: device of which to set the unique name * @name: unique name * * Sets the unique name of a DRM device using the specified string. This is * already done by drm_dev_init(), drivers should only override the default * unique name for backwards compatibility reasons. * * Return: 0 on success or a negative error code on failure. */ int drm_dev_set_unique(struct drm_device *dev, const char *name) { kfree(dev->unique); dev->unique = kstrdup(name, GFP_KERNEL); return dev->unique ? 0 : -ENOMEM; } EXPORT_SYMBOL(drm_dev_set_unique); #ifndef __NetBSD__ /* * DRM Core * The DRM core module initializes all global DRM objects and makes them * available to drivers. Once setup, drivers can probe their respective * devices. * Currently, core management includes: * - The "DRM-Global" key/value database * - Global ID management for connectors * - DRM major number allocation * - DRM minor management * - DRM sysfs class * - DRM debugfs root * * Furthermore, the DRM core provides dynamic char-dev lookups. For each * interface registered on a DRM device, you can request minor numbers from DRM * core. DRM core takes care of major-number management and char-dev * registration. A stub ->open() callback forwards any open() requests to the * registered minor. */ static int drm_stub_open(struct inode *inode, struct file *filp) { const struct file_operations *new_fops; struct drm_minor *minor; int err; DRM_DEBUG("\n"); mutex_lock(&drm_global_mutex); minor = drm_minor_acquire(iminor(inode)); if (IS_ERR(minor)) { err = PTR_ERR(minor); goto out_unlock; } new_fops = fops_get(minor->dev->driver->fops); if (!new_fops) { err = -ENODEV; goto out_release; } replace_fops(filp, new_fops); if (filp->f_op->open) err = filp->f_op->open(inode, filp); else err = 0; out_release: drm_minor_release(minor); out_unlock: mutex_unlock(&drm_global_mutex); return err; } static const struct file_operations drm_stub_fops = { .owner = THIS_MODULE, .open = drm_stub_open, .llseek = noop_llseek, }; static void drm_core_exit(void) { unregister_chrdev(DRM_MAJOR, "drm"); debugfs_remove(drm_debugfs_root); drm_sysfs_destroy(); idr_destroy(&drm_minors_idr); drm_connector_ida_destroy(); } static int __init drm_core_init(void) { int ret; drm_connector_ida_init(); idr_init(&drm_minors_idr); ret = drm_sysfs_init(); if (ret < 0) { DRM_ERROR("Cannot create DRM class: %d\n", ret); goto error; } drm_debugfs_root = debugfs_create_dir("dri", NULL); ret = register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops); if (ret < 0) goto error; drm_core_init_complete = true; DRM_DEBUG("Initialized\n"); return 0; error: drm_core_exit(); return ret; } module_init(drm_core_init); module_exit(drm_core_exit); #endif |
| 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 | /* $NetBSD: uplcom.c,v 1.94 2022/07/06 15:24:14 hannken Exp $ */ /* * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Ichiro FUKUHARA (ichiro@ichiro.org). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * General information: http://www.prolific.com.tw/fr_pl2303.htm * http://www.hitachi-hitec.com/jyouhou/prolific/2303.pdf */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uplcom.c,v 1.94 2022/07/06 15:24:14 hannken Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/kmem.h> #include <sys/ioctl.h> #include <sys/conf.h> #include <sys/tty.h> #include <sys/file.h> #include <sys/select.h> #include <sys/proc.h> #include <sys/device.h> #include <sys/poll.h> #include <sys/sysctl.h> #include <dev/usb/usb.h> #include <dev/usb/usbcdc.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdevs.h> #include <dev/usb/usb_quirks.h> #include <dev/usb/usbhist.h> #include <dev/usb/ucomvar.h> #ifdef USB_DEBUG #ifndef UPLCOM_DEBUG #define uplcomdebug 0 #else int uplcomdebug = 0; SYSCTL_SETUP(sysctl_hw_uplcom_setup, "sysctl hw.uplcom setup") { int err; const struct sysctlnode *rnode; const struct sysctlnode *cnode; err = sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "uplcom", SYSCTL_DESCR("uplcom global controls"), NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL); if (err) goto fail; /* control debugging printfs */ err = sysctl_createv(clog, 0, &rnode, &cnode, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug", SYSCTL_DESCR("Enable debugging output"), NULL, 0, &uplcomdebug, sizeof(uplcomdebug), CTL_CREATE, CTL_EOL); if (err) goto fail; return; fail: aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err); } #endif /* UCOM_DEBUG */ #endif /* USB_DEBUG */ #define DPRINTF(FMT,A,B,C,D) USBHIST_LOGN(uplcomdebug,1,FMT,A,B,C,D) #define DPRINTFN(N,FMT,A,B,C,D) USBHIST_LOGN(uplcomdebug,N,FMT,A,B,C,D) #define UPLCOMHIST_FUNC() USBHIST_FUNC() #define UPLCOMHIST_CALLED(name) USBHIST_CALLED(uplcomdebug) #define UPLCOM_CONFIG_INDEX 0 #define UPLCOM_IFACE_INDEX 0 #define UPLCOM_SECOND_IFACE_INDEX 1 #define UPLCOM_SET_REQUEST 0x01 #define UPLCOM_SET_CRTSCTS_0 0x41 #define UPLCOM_SET_CRTSCTS_HX 0x61 #define UPLCOM_HX_STATUS_REG 0x8080 #define UPLCOM_N_SERIAL_CTS 0x80 #define UPLCOM_HXN_SET_REQUEST 0x80 #define UPLCOM_HXN_SET_CRTSCTS_REG 0x0A #define UPLCOM_HXN_SET_CRTSCTS 0xFA enum pl2303_type { UPLCOM_TYPE_0, /* we use this for all non-HX variants */ UPLCOM_TYPE_HX, UPLCOM_TYPE_HXN, }; struct uplcom_softc { device_t sc_dev; /* base device */ struct usbd_device * sc_udev; /* USB device */ struct usbd_interface * sc_iface; /* interface */ int sc_iface_number; /* interface number */ struct usbd_interface * sc_intr_iface; /* interrupt interface */ int sc_intr_number; /* interrupt number */ struct usbd_pipe * sc_intr_pipe; /* interrupt pipe */ u_char *sc_intr_buf; /* interrupt buffer */ int sc_isize; usb_cdc_line_state_t sc_line_state; /* current line state */ int sc_dtr; /* current DTR state */ int sc_rts; /* current RTS state */ device_t sc_subdev; /* ucom device */ bool sc_dying; /* disconnecting */ u_char sc_lsr; /* Local status register */ u_char sc_msr; /* uplcom status register */ enum pl2303_type sc_type; /* PL2303 chip type */ }; /* * These are the maximum number of bytes transferred per frame. * The output buffer size cannot be increased due to the size encoding. */ #define UPLCOMIBUFSIZE 256 #define UPLCOMOBUFSIZE 256 static usbd_status uplcom_reset(struct uplcom_softc *); static usbd_status uplcom_set_line_coding(struct uplcom_softc *, usb_cdc_line_state_t *); static usbd_status uplcom_set_crtscts(struct uplcom_softc *); static void uplcom_intr(struct usbd_xfer *, void *, usbd_status); static void uplcom_set(void *, int, int, int); static void uplcom_dtr(struct uplcom_softc *, int); static void uplcom_rts(struct uplcom_softc *, int); static void uplcom_break(struct uplcom_softc *, int); static void uplcom_set_line_state(struct uplcom_softc *); static void uplcom_get_status(void *, int, u_char *, u_char *); #if TODO static int uplcom_ioctl(void *, int, u_long, void *, int, proc_t *); #endif static int uplcom_param(void *, int, struct termios *); static int uplcom_open(void *, int); static void uplcom_close(void *, int); static usbd_status uplcom_vendor_control_write(struct usbd_device *, uint16_t, uint16_t); static void uplcom_close_pipe(struct uplcom_softc *); static const struct ucom_methods uplcom_methods = { .ucom_get_status = uplcom_get_status, .ucom_set = uplcom_set, .ucom_param = uplcom_param, .ucom_ioctl = NULL, /* TODO */ .ucom_open = uplcom_open, .ucom_close = uplcom_close, }; static const struct usb_devno uplcom_devs[] = { /* I/O DATA USB-RSAQ2 */ { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_RSAQ2 }, /* I/O DATA USB-RSAQ3 */ { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_RSAQ3 }, /* I/O DATA USB-RSAQ */ { USB_VENDOR_IODATA, USB_PRODUCT_IODATA_USBRSAQ }, /* I/O DATA USB-RSAQ5 */ { USB_VENDOR_IODATA, USB_PRODUCT_IODATA_USBRSAQ5 }, /* PLANEX USB-RS232 URS-03 */ { USB_VENDOR_ATEN, USB_PRODUCT_ATEN_UC232A }, /* various */ { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303 }, /* SMART Technologies USB to serial */ { USB_VENDOR_PROLIFIC2, USB_PRODUCT_PROLIFIC2_PL2303 }, /* IOGEAR/ATENTRIPPLITE */ { USB_VENDOR_TRIPPLITE, USB_PRODUCT_TRIPPLITE_U209 }, /* ELECOM UC-SGT */ { USB_VENDOR_ELECOM, USB_PRODUCT_ELECOM_UCSGT }, /* ELECOM UC-SGT0 */ { USB_VENDOR_ELECOM, USB_PRODUCT_ELECOM_UCSGT0 }, /* Panasonic 50" Touch Panel */ { USB_VENDOR_PANASONIC, USB_PRODUCT_PANASONIC_TYTP50P6S }, /* RATOC REX-USB60 */ { USB_VENDOR_RATOC, USB_PRODUCT_RATOC_REXUSB60 }, /* TDK USB-PHS Adapter UHA6400 */ { USB_VENDOR_TDK, USB_PRODUCT_TDK_UHA6400 }, /* TDK USB-PDC Adapter UPA9664 */ { USB_VENDOR_TDK, USB_PRODUCT_TDK_UPA9664 }, /* Sony Ericsson USB Cable */ { USB_VENDOR_SUSTEEN, USB_PRODUCT_SUSTEEN_DCU10 }, /* SOURCENEXT KeikaiDenwa 8 */ { USB_VENDOR_SOURCENEXT, USB_PRODUCT_SOURCENEXT_KEIKAI8 }, /* SOURCENEXT KeikaiDenwa 8 with charger */ { USB_VENDOR_SOURCENEXT, USB_PRODUCT_SOURCENEXT_KEIKAI8_CHG }, /* HAL Corporation Crossam2+USB */ { USB_VENDOR_HAL, USB_PRODUCT_HAL_IMR001 }, /* Sitecom USB to serial cable */ { USB_VENDOR_SITECOM, USB_PRODUCT_SITECOM_CN104 }, /* Pharos USB GPS - Microsoft version */ { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303X }, /* Willcom WS002IN (DD) */ { USB_VENDOR_NETINDEX, USB_PRODUCT_NETINDEX_WS002IN }, /* COREGA CG-USBRS232R */ { USB_VENDOR_COREGA, USB_PRODUCT_COREGA_CGUSBRS232R }, /* Sharp CE-175TU (USB to Zaurus option port 15 adapter) */ { USB_VENDOR_SHARP, USB_PRODUCT_SHARP_CE175TU }, /* Various */ { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GB }, { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GC }, { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GE }, { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GL }, { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GS }, { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GT }, }; #define uplcom_lookup(v, p) usb_lookup(uplcom_devs, v, p) static int uplcom_match(device_t, cfdata_t, void *); static void uplcom_attach(device_t, device_t, void *); static void uplcom_childdet(device_t, device_t); static int uplcom_detach(device_t, int); CFATTACH_DECL2_NEW(uplcom, sizeof(struct uplcom_softc), uplcom_match, uplcom_attach, uplcom_detach, NULL, NULL, uplcom_childdet); static int uplcom_match(device_t parent, cfdata_t match, void *aux) { struct usb_attach_arg *uaa = aux; return uplcom_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ? UMATCH_VENDOR_PRODUCT : UMATCH_NONE; } static void uplcom_attach(device_t parent, device_t self, void *aux) { struct uplcom_softc *sc = device_private(self); struct usb_attach_arg *uaa = aux; struct usbd_device *dev = uaa->uaa_device; usb_device_descriptor_t *ddesc; usb_config_descriptor_t *cdesc; usb_interface_descriptor_t *id; usb_endpoint_descriptor_t *ed; usb_device_request_t req; char *devinfop; const char *devname = device_xname(self); usbd_status err; uint8_t val; int i; struct ucom_attach_args ucaa; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); DPRINTF("sc=%#jx", (uintptr_t)sc, 0, 0, 0); sc->sc_dev = self; sc->sc_dying = false; aprint_naive("\n"); aprint_normal("\n"); devinfop = usbd_devinfo_alloc(dev, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); sc->sc_udev = dev; /* initialize endpoints */ ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1; sc->sc_intr_number = -1; sc->sc_intr_pipe = NULL; /* Move the device into the configured state. */ err = usbd_set_config_index(dev, UPLCOM_CONFIG_INDEX, 1); if (err) { aprint_error("\n%s: failed to set configuration, err=%s\n", devname, usbd_errstr(err)); sc->sc_dying = true; return; } /* determine chip type */ ddesc = usbd_get_device_descriptor(dev); if (ddesc->bDeviceClass != UDCLASS_COMM && ddesc->bMaxPacketSize == 0x40) sc->sc_type = UPLCOM_TYPE_HX; if (sc->sc_type == UPLCOM_TYPE_HX) { req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = UPLCOM_SET_REQUEST; USETW(req.wValue, UPLCOM_HX_STATUS_REG); USETW(req.wIndex, sc->sc_iface_number); USETW(req.wLength, 1); err = usbd_do_request(sc->sc_udev, &req, &val); if (err) sc->sc_type = UPLCOM_TYPE_HXN; } #ifdef UPLCOM_DEBUG /* print the chip type */ if (sc->sc_type == UPLCOM_TYPE_HXN) { DPRINTF("chiptype HXN", 0, 0, 0, 0); } else if (sc->sc_type == UPLCOM_TYPE_HX) { DPRINTF("chiptype HX", 0, 0, 0, 0); } else { DPRINTF("chiptype 0", 0, 0, 0, 0); } #endif /* Move the device into the configured state. */ err = usbd_set_config_index(dev, UPLCOM_CONFIG_INDEX, 1); if (err) { aprint_error_dev(self, "failed to set configuration: %s\n", usbd_errstr(err)); sc->sc_dying = true; return; } /* get the config descriptor */ cdesc = usbd_get_config_descriptor(sc->sc_udev); if (cdesc == NULL) { aprint_error_dev(self, "failed to get configuration descriptor\n"); sc->sc_dying = true; return; } /* get the (first/common) interface */ err = usbd_device2interface_handle(dev, UPLCOM_IFACE_INDEX, &sc->sc_iface); if (err) { aprint_error("\n%s: failed to get interface, err=%s\n", devname, usbd_errstr(err)); sc->sc_dying = true; return; } /* Find the interrupt endpoints */ id = usbd_get_interface_descriptor(sc->sc_iface); sc->sc_iface_number = id->bInterfaceNumber; for (i = 0; i < id->bNumEndpoints; i++) { ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i); if (ed == NULL) { aprint_error_dev(self, "no endpoint descriptor for %d\n", i); sc->sc_dying = true; return; } if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) { sc->sc_intr_number = ed->bEndpointAddress; sc->sc_isize = UGETW(ed->wMaxPacketSize); } } if (sc->sc_intr_number== -1) { aprint_error_dev(self, "Could not find interrupt in\n"); sc->sc_dying = true; return; } /* keep interface for interrupt */ sc->sc_intr_iface = sc->sc_iface; /* * USB-RSAQ1 has two interface * * USB-RSAQ1 | USB-RSAQ2 * -----------------+----------------- * Interface 0 |Interface 0 * Interrupt(0x81) | Interrupt(0x81) * -----------------+ BulkIN(0x02) * Interface 1 | BulkOUT(0x83) * BulkIN(0x02) | * BulkOUT(0x83) | */ if (cdesc->bNumInterface == 2) { err = usbd_device2interface_handle(dev, UPLCOM_SECOND_IFACE_INDEX, &sc->sc_iface); if (err) { aprint_error("\n%s: failed to get second interface, " "err=%s\n", devname, usbd_errstr(err)); sc->sc_dying = true; return; } } /* Find the bulk{in,out} endpoints */ id = usbd_get_interface_descriptor(sc->sc_iface); sc->sc_iface_number = id->bInterfaceNumber; for (i = 0; i < id->bNumEndpoints; i++) { ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i); if (ed == NULL) { aprint_error_dev(self, "no endpoint descriptor for %d\n", i); sc->sc_dying = true; return; } if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) { ucaa.ucaa_bulkin = ed->bEndpointAddress; } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT && UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) { ucaa.ucaa_bulkout = ed->bEndpointAddress; } } if (ucaa.ucaa_bulkin == -1) { aprint_error_dev(self, "Could not find data bulk in\n"); sc->sc_dying = true; return; } if (ucaa.ucaa_bulkout == -1) { aprint_error_dev(self, "Could not find data bulk out\n"); sc->sc_dying = true; return; } sc->sc_dtr = sc->sc_rts = -1; ucaa.ucaa_portno = UCOM_UNK_PORTNO; /* ucaa_bulkin, ucaa_bulkout set above */ ucaa.ucaa_ibufsize = UPLCOMIBUFSIZE; ucaa.ucaa_obufsize = UPLCOMOBUFSIZE; ucaa.ucaa_ibufsizepad = UPLCOMIBUFSIZE; ucaa.ucaa_opkthdrlen = 0; ucaa.ucaa_device = dev; ucaa.ucaa_iface = sc->sc_iface; ucaa.ucaa_methods = &uplcom_methods; ucaa.ucaa_arg = sc; ucaa.ucaa_info = NULL; err = uplcom_reset(sc); if (err) { aprint_error_dev(self, "reset failed, %s\n", usbd_errstr(err)); sc->sc_dying = true; return; } usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev); DPRINTF("in=%#jx out=%#jx intr=%#jx", ucaa.ucaa_bulkin, ucaa.ucaa_bulkout, sc->sc_intr_number, 0); sc->sc_subdev = config_found(self, &ucaa, ucomprint, CFARGS(.submatch = ucomsubmatch)); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); return; } static void uplcom_childdet(device_t self, device_t child) { struct uplcom_softc *sc = device_private(self); UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); KASSERT(sc->sc_subdev == child); sc->sc_subdev = NULL; } static void uplcom_close_pipe(struct uplcom_softc *sc) { if (sc->sc_intr_pipe != NULL) { usbd_abort_pipe(sc->sc_intr_pipe); usbd_close_pipe(sc->sc_intr_pipe); sc->sc_intr_pipe = NULL; } if (sc->sc_intr_buf != NULL) { kmem_free(sc->sc_intr_buf, sc->sc_isize); sc->sc_intr_buf = NULL; } } static int uplcom_detach(device_t self, int flags) { struct uplcom_softc *sc = device_private(self); int rv = 0; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); DPRINTF("sc=%#jx flags=%jd", (uintptr_t)sc, flags, 0, 0); sc->sc_dying = true; uplcom_close_pipe(sc); if (sc->sc_subdev != NULL) { rv = config_detach(sc->sc_subdev, flags); sc->sc_subdev = NULL; } usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev); pmf_device_deregister(self); return rv; } usbd_status uplcom_reset(struct uplcom_softc *sc) { usb_device_request_t req; usbd_status err; if (sc->sc_type == UPLCOM_TYPE_HXN) return 0; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = UPLCOM_SET_REQUEST; USETW(req.wValue, 0); USETW(req.wIndex, sc->sc_iface_number); USETW(req.wLength, 0); err = usbd_do_request(sc->sc_udev, &req, 0); if (err) return EIO; return 0; } struct pl2303x_init { uint8_t req_type; uint8_t request; uint16_t value; uint16_t index; }; static const struct pl2303x_init pl2303x[] = { { UT_READ_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x8484, 0 }, { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x0404, 0 }, { UT_READ_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x8484, 0 }, { UT_READ_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x8383, 0 }, { UT_READ_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x8484, 0 }, { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x0404, 1 }, { UT_READ_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x8484, 0 }, { UT_READ_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x8383, 0 }, { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0, 1 }, { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 1, 0 }, { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 2, 0x44 } }; #define N_PL2302X_INIT (sizeof(pl2303x)/sizeof(pl2303x[0])) static usbd_status uplcom_pl2303x_init(struct uplcom_softc *sc) { usb_device_request_t req; usbd_status err; int i; for (i = 0; i < N_PL2302X_INIT; i++) { char buf[1]; void *b; req.bmRequestType = pl2303x[i].req_type; req.bRequest = pl2303x[i].request; USETW(req.wValue, pl2303x[i].value); USETW(req.wIndex, pl2303x[i].index); if (UT_GET_DIR(req.bmRequestType) == UT_READ) { b = buf; USETW(req.wLength, sizeof(buf)); } else { b = NULL; USETW(req.wLength, 0); } err = usbd_do_request(sc->sc_udev, &req, b); if (err) { aprint_error_dev(sc->sc_dev, "uplcom_pl2303x_init failed: %s\n", usbd_errstr(err)); return EIO; } } return 0; } static void uplcom_set_line_state(struct uplcom_softc *sc) { usb_device_request_t req; int ls; /* make sure we have initialized state for sc_dtr and sc_rts */ if (sc->sc_dtr == -1) sc->sc_dtr = 0; if (sc->sc_rts == -1) sc->sc_rts = 0; ls = (sc->sc_dtr ? UCDC_LINE_DTR : 0) | (sc->sc_rts ? UCDC_LINE_RTS : 0); req.bmRequestType = UT_WRITE_CLASS_INTERFACE; req.bRequest = UCDC_SET_CONTROL_LINE_STATE; USETW(req.wValue, ls); USETW(req.wIndex, sc->sc_iface_number); USETW(req.wLength, 0); (void)usbd_do_request(sc->sc_udev, &req, 0); } static void uplcom_set(void *addr, int portno, int reg, int onoff) { struct uplcom_softc *sc = addr; if (sc->sc_dying) return; switch (reg) { case UCOM_SET_DTR: uplcom_dtr(sc, onoff); break; case UCOM_SET_RTS: uplcom_rts(sc, onoff); break; case UCOM_SET_BREAK: uplcom_break(sc, onoff); break; default: break; } } static void uplcom_dtr(struct uplcom_softc *sc, int onoff) { UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); DPRINTF("onoff=%jd", onoff, 0, 0, 0); if (sc->sc_dtr != -1 && !sc->sc_dtr == !onoff) return; sc->sc_dtr = !!onoff; uplcom_set_line_state(sc); } static void uplcom_rts(struct uplcom_softc *sc, int onoff) { UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); DPRINTF("onoff=%jd", onoff, 0, 0, 0); if (sc->sc_rts != -1 && !sc->sc_rts == !onoff) return; sc->sc_rts = !!onoff; uplcom_set_line_state(sc); } static void uplcom_break(struct uplcom_softc *sc, int onoff) { usb_device_request_t req; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); DPRINTF("onoff=%jd", onoff, 0, 0, 0); req.bmRequestType = UT_WRITE_CLASS_INTERFACE; req.bRequest = UCDC_SEND_BREAK; USETW(req.wValue, onoff ? UCDC_BREAK_ON : UCDC_BREAK_OFF); USETW(req.wIndex, sc->sc_iface_number); USETW(req.wLength, 0); (void)usbd_do_request(sc->sc_udev, &req, 0); } static usbd_status uplcom_set_crtscts(struct uplcom_softc *sc) { usb_device_request_t req; usbd_status err; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); req.bmRequestType = UT_WRITE_VENDOR_DEVICE; if (sc->sc_type == UPLCOM_TYPE_HXN) { req.bRequest = UPLCOM_HXN_SET_REQUEST; USETW(req.wValue, UPLCOM_HXN_SET_CRTSCTS_REG); } else { req.bRequest = UPLCOM_SET_REQUEST; USETW(req.wValue, 0); } if (sc->sc_type == UPLCOM_TYPE_HXN) USETW(req.wIndex, UPLCOM_HXN_SET_CRTSCTS); else if (sc->sc_type == UPLCOM_TYPE_HX) USETW(req.wIndex, UPLCOM_SET_CRTSCTS_HX); else USETW(req.wIndex, UPLCOM_SET_CRTSCTS_0); USETW(req.wLength, 0); err = usbd_do_request(sc->sc_udev, &req, 0); if (err) { DPRINTF("failed, err=%jd", err, 0, 0, 0); return err; } return USBD_NORMAL_COMPLETION; } static usbd_status uplcom_set_line_coding(struct uplcom_softc *sc, usb_cdc_line_state_t *state) { usb_device_request_t req; usbd_status err; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); DPRINTF("rate=%jd fmt=%jd parity=%jd bits=%jd", UGETDW(state->dwDTERate), state->bCharFormat, state->bParityType, state->bDataBits); if (memcmp(state, &sc->sc_line_state, UCDC_LINE_STATE_LENGTH) == 0) { DPRINTF("already set", 0, 0, 0, 0); return USBD_NORMAL_COMPLETION; } req.bmRequestType = UT_WRITE_CLASS_INTERFACE; req.bRequest = UCDC_SET_LINE_CODING; USETW(req.wValue, 0); USETW(req.wIndex, sc->sc_iface_number); USETW(req.wLength, UCDC_LINE_STATE_LENGTH); err = usbd_do_request(sc->sc_udev, &req, state); if (err) { DPRINTF("failed, err=%ju", err, 0, 0, 0); return err; } sc->sc_line_state = *state; return USBD_NORMAL_COMPLETION; } static int uplcom_param(void *addr, int portno, struct termios *t) { struct uplcom_softc *sc = addr; usbd_status err; usb_cdc_line_state_t ls; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); DPRINTF("sc=%#jx", (uintptr_t)sc, 0, 0, 0); if (sc->sc_dying) return EIO; USETDW(ls.dwDTERate, t->c_ospeed); if (ISSET(t->c_cflag, CSTOPB)) ls.bCharFormat = UCDC_STOP_BIT_2; else ls.bCharFormat = UCDC_STOP_BIT_1; if (ISSET(t->c_cflag, PARENB)) { if (ISSET(t->c_cflag, PARODD)) ls.bParityType = UCDC_PARITY_ODD; else ls.bParityType = UCDC_PARITY_EVEN; } else ls.bParityType = UCDC_PARITY_NONE; switch (ISSET(t->c_cflag, CSIZE)) { case CS5: ls.bDataBits = 5; break; case CS6: ls.bDataBits = 6; break; case CS7: ls.bDataBits = 7; break; case CS8: ls.bDataBits = 8; break; } err = uplcom_set_line_coding(sc, &ls); if (err) { DPRINTF("err=%jd", err, 0, 0, 0); return EIO; } if (ISSET(t->c_cflag, CRTSCTS)) uplcom_set_crtscts(sc); if (sc->sc_rts == -1 || sc->sc_dtr == -1) uplcom_set_line_state(sc); if (err) { DPRINTF("err=%jd", err, 0, 0, 0); return EIO; } return 0; } static usbd_status uplcom_vendor_control_write(struct usbd_device *dev, uint16_t value, uint16_t index) { usb_device_request_t req; usbd_status err; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = UPLCOM_SET_REQUEST; USETW(req.wValue, value); USETW(req.wIndex, index); USETW(req.wLength, 0); err = usbd_do_request(dev, &req, NULL); if (err) { DPRINTF("vendor write failed, err=%jd", err, 0, 0, 0); } return err; } static int uplcom_open(void *addr, int portno) { struct uplcom_softc *sc = addr; usbd_status err = 0; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); DPRINTF("sc=%#jx", (uintptr_t)sc, 0, 0, 0); if (sc->sc_dying) return EIO; /* Some unknown device frobbing. */ if (sc->sc_type == UPLCOM_TYPE_HX) uplcom_vendor_control_write(sc->sc_udev, 2, 0x44); else uplcom_vendor_control_write(sc->sc_udev, 2, 0x24); if (sc->sc_intr_number != -1 && sc->sc_intr_pipe == NULL) { sc->sc_intr_buf = kmem_alloc(sc->sc_isize, KM_SLEEP); err = usbd_open_pipe_intr(sc->sc_intr_iface, sc->sc_intr_number, USBD_SHORT_XFER_OK, &sc->sc_intr_pipe, sc, sc->sc_intr_buf, sc->sc_isize, uplcom_intr, USBD_DEFAULT_INTERVAL); if (err) { DPRINTF("cannot open interrupt pipe (addr %jd)", sc->sc_intr_number, 0, 0, 0); } } if (err == 0 && sc->sc_type == UPLCOM_TYPE_HX) err = uplcom_pl2303x_init(sc); return err; } static void uplcom_close(void *addr, int portno) { struct uplcom_softc *sc = addr; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); DPRINTF("sc=%#jx", (uintptr_t)sc, 0, 0, 0); if (sc->sc_dying) return; uplcom_close_pipe(sc); } static void uplcom_intr(struct usbd_xfer *xfer, void *priv, usbd_status status) { struct uplcom_softc *sc = priv; u_char *buf = sc->sc_intr_buf; u_char pstatus; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); if (sc->sc_dying) return; if (status != USBD_NORMAL_COMPLETION) { if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) return; DPRINTF("abnormal status: %ju", status, 0, 0, 0); usbd_clear_endpoint_stall_async(sc->sc_intr_pipe); return; } DPRINTF("uplcom status = %02jx", buf[8], 0, 0, 0); sc->sc_lsr = sc->sc_msr = 0; pstatus = buf[8]; if (ISSET(pstatus, UPLCOM_N_SERIAL_CTS)) sc->sc_msr |= UMSR_CTS; if (ISSET(pstatus, UCDC_N_SERIAL_RI)) sc->sc_msr |= UMSR_RI; if (ISSET(pstatus, UCDC_N_SERIAL_DSR)) sc->sc_msr |= UMSR_DSR; if (ISSET(pstatus, UCDC_N_SERIAL_DCD)) sc->sc_msr |= UMSR_DCD; ucom_status_change(device_private(sc->sc_subdev)); } static void uplcom_get_status(void *addr, int portno, u_char *lsr, u_char *msr) { struct uplcom_softc *sc = addr; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); if (sc->sc_dying) return; *lsr = sc->sc_lsr; *msr = sc->sc_msr; } #if TODO static int uplcom_ioctl(void *addr, int portno, u_long cmd, void *data, int flag, proc_t *p) { struct uplcom_softc *sc = addr; int error = 0; UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED(); if (sc->sc_dying) return EIO; DPRINTF("cmd=0x%08lx", cmd, 0, 0, 0); switch (cmd) { case TIOCNOTTY: case TIOCMGET: case TIOCMSET: case USB_GET_CM_OVER_DATA: case USB_SET_CM_OVER_DATA: break; default: DPRINTF("unknown", 0, 0, 0, 0); error = ENOTTY; break; } return error; } #endif |
| 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 | /* $NetBSD: uipaq.c,v 1.30 2021/08/07 16:19:17 thorpej Exp $ */ /* $OpenBSD: uipaq.c,v 1.1 2005/06/17 23:50:33 deraadt Exp $ */ /* * Copyright (c) 2000-2005 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Lennart Augustsson (lennart@augustsson.net) at * Carlstedt Research & Technology. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * iPAQ driver * * 19 July 2003: Incorporated changes suggested by Sam Lawrance from * the uppc module * * * Contact isis@cs.umd.edu if you have any questions/comments about this driver */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uipaq.c,v 1.30 2021/08/07 16:19:17 thorpej Exp $"); #ifdef _KERNEL_OPT #include "opt_usb.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/device.h> #include <sys/conf.h> #include <sys/tty.h> #include <dev/usb/usb.h> #include <dev/usb/usbcdc.h> /*UCDC_* stuff */ #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdevs.h> #include <dev/usb/ucomvar.h> #ifdef UIPAQ_DEBUG #define DPRINTF(x) if (uipaqdebug) printf x #define DPRINTFN(n,x) if (uipaqdebug>(n)) printf x int uipaqdebug = 0; #else #define DPRINTF(x) #define DPRINTFN(n,x) #endif #define UIPAQ_CONFIG_NO 1 #define UIPAQ_IFACE_INDEX 0 #define UIPAQIBUFSIZE 1024 #define UIPAQOBUFSIZE 1024 struct uipaq_softc { device_t sc_dev; /* base device */ struct usbd_device * sc_udev; /* device */ struct usbd_interface * sc_iface; /* interface */ device_t sc_subdev; /* ucom uses that */ uint16_t sc_lcr; /* state for DTR/RTS */ uint16_t sc_flags; bool sc_dying; }; /* Callback routines */ static void uipaq_set(void *, int, int, int); static int uipaq_open(void *, int); /* Support routines. */ /* based on uppc module by Sam Lawrance */ static void uipaq_dtr(struct uipaq_softc *, int); static void uipaq_rts(struct uipaq_softc *, int); static void uipaq_break(struct uipaq_softc *, int); static const struct ucom_methods uipaq_methods = { .ucom_set = uipaq_set, .ucom_open = uipaq_open, }; struct uipaq_type { struct usb_devno uv_dev; uint16_t uv_flags; }; static const struct uipaq_type uipaq_devs[] = { {{ USB_VENDOR_HP, USB_PRODUCT_HP_2215 }, 0 }, {{ USB_VENDOR_HP, USB_PRODUCT_HP_568J }, 0}, {{ USB_VENDOR_COMPAQ, USB_PRODUCT_COMPAQ_IPAQPOCKETPC} , 0}, {{ USB_VENDOR_CASIO, USB_PRODUCT_CASIO_BE300} , 0}, {{ USB_VENDOR_SHARP, USB_PRODUCT_SHARP_WS007SH} , 0}, {{ USB_VENDOR_SHARP, USB_PRODUCT_SHARP_WS011SH} , 0} }; #define uipaq_lookup(v, p) ((const struct uipaq_type *)usb_lookup(uipaq_devs, v, p)) static int uipaq_match(device_t, cfdata_t, void *); static void uipaq_attach(device_t, device_t, void *); static void uipaq_childdet(device_t, device_t); static int uipaq_detach(device_t, int); CFATTACH_DECL2_NEW(uipaq, sizeof(struct uipaq_softc), uipaq_match, uipaq_attach, uipaq_detach, NULL, NULL, uipaq_childdet); static int uipaq_match(device_t parent, cfdata_t match, void *aux) { struct usb_attach_arg *uaa = aux; DPRINTFN(20,("uipaq: vendor=%#x, product=%#x\n", uaa->uaa_vendor, uaa->uaa_product)); return uipaq_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ? UMATCH_VENDOR_PRODUCT : UMATCH_NONE; } static void uipaq_attach(device_t parent, device_t self, void *aux) { struct uipaq_softc *sc = device_private(self); struct usb_attach_arg *uaa = aux; struct usbd_device *dev = uaa->uaa_device; struct usbd_interface *iface; usb_interface_descriptor_t *id; usb_endpoint_descriptor_t *ed; char *devinfop; const char *devname = device_xname(self); int i; usbd_status err; struct ucom_attach_args ucaa; DPRINTFN(10,("\nuipaq_attach: sc=%p\n", sc)); sc->sc_dev = self; sc->sc_dying = false; aprint_naive("\n"); aprint_normal("\n"); devinfop = usbd_devinfo_alloc(dev, 0); aprint_normal_dev(self, "%s\n", devinfop); usbd_devinfo_free(devinfop); /* Move the device into the configured state. */ err = usbd_set_config_no(dev, UIPAQ_CONFIG_NO, 1); if (err) { aprint_error_dev(self, "failed to set configuration, err=%s\n", usbd_errstr(err)); goto bad; } err = usbd_device2interface_handle(dev, UIPAQ_IFACE_INDEX, &iface); if (err) { aprint_error("\n%s: failed to get interface, err=%s\n", devname, usbd_errstr(err)); goto bad; } sc->sc_flags = uipaq_lookup(uaa->uaa_vendor, uaa->uaa_product)->uv_flags; id = usbd_get_interface_descriptor(iface); sc->sc_udev = dev; sc->sc_iface = iface; ucaa.ucaa_ibufsize = UIPAQIBUFSIZE; ucaa.ucaa_obufsize = UIPAQOBUFSIZE; ucaa.ucaa_ibufsizepad = UIPAQIBUFSIZE; ucaa.ucaa_opkthdrlen = 0; ucaa.ucaa_device = dev; ucaa.ucaa_iface = iface; ucaa.ucaa_methods = &uipaq_methods; ucaa.ucaa_arg = sc; ucaa.ucaa_portno = UCOM_UNK_PORTNO; ucaa.ucaa_info = "Generic"; /* err = uipaq_init(sc); if (err) { printf("%s: init failed, %s\n", device_xname(sc->sc_dev), usbd_errstr(err)); goto bad; }*/ usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev); ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1; for (i=0; i<id->bNumEndpoints; i++) { ed = usbd_interface2endpoint_descriptor(iface, i); if (ed == NULL) { aprint_error_dev(self, "no endpoint descriptor for %d\n", i); goto bad; } if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN && (ed->bmAttributes & UE_XFERTYPE) == UE_BULK) { ucaa.ucaa_bulkin = ed->bEndpointAddress; } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT && (ed->bmAttributes & UE_XFERTYPE) == UE_BULK) { ucaa.ucaa_bulkout = ed->bEndpointAddress; } } if (ucaa.ucaa_bulkin == -1 || ucaa.ucaa_bulkout == -1) { aprint_error_dev(self, "no proper endpoints found (%d,%d) \n", ucaa.ucaa_bulkin, ucaa.ucaa_bulkout); return; } sc->sc_subdev = config_found(self, &ucaa, ucomprint, CFARGS(.submatch = ucomsubmatch)); return; bad: DPRINTF(("uipaq_attach: ATTACH ERROR\n")); sc->sc_dying = true; return; } void uipaq_dtr(struct uipaq_softc* sc, int onoff) { usb_device_request_t req; usbd_status err; int retries = 3; DPRINTF(("%s: uipaq_dtr: onoff=%x\n", device_xname(sc->sc_dev), onoff)); /* Avoid sending unnecessary requests */ if (onoff && (sc->sc_lcr & UCDC_LINE_DTR)) return; if (!onoff && !(sc->sc_lcr & UCDC_LINE_DTR)) return; /* Other parameters depend on reg */ req.bmRequestType = UT_WRITE_CLASS_INTERFACE; req.bRequest = UCDC_SET_CONTROL_LINE_STATE; sc->sc_lcr = onoff ? sc->sc_lcr | UCDC_LINE_DTR : sc->sc_lcr & ~UCDC_LINE_DTR; USETW(req.wValue, sc->sc_lcr); USETW(req.wIndex, 0x0); USETW(req.wLength, 0); /* Fire off the request a few times if necessary */ while (retries) { err = usbd_do_request(sc->sc_udev, &req, NULL); if (!err) break; retries--; } } void uipaq_rts(struct uipaq_softc* sc, int onoff) { usb_device_request_t req; usbd_status err; int retries = 3; DPRINTF(("%s: uipaq_rts: onoff=%x\n", device_xname(sc->sc_dev), onoff)); /* Avoid sending unnecessary requests */ if (onoff && (sc->sc_lcr & UCDC_LINE_RTS)) return; if (!onoff && !(sc->sc_lcr & UCDC_LINE_RTS)) return; req.bmRequestType = UT_WRITE_CLASS_INTERFACE; req.bRequest = UCDC_SET_CONTROL_LINE_STATE; sc->sc_lcr = onoff ? sc->sc_lcr | UCDC_LINE_RTS : sc->sc_lcr & ~UCDC_LINE_RTS; USETW(req.wValue, sc->sc_lcr); USETW(req.wIndex, 0x0); USETW(req.wLength, 0); while (retries) { err = usbd_do_request(sc->sc_udev, &req, NULL); if (!err) break; retries--; } } void uipaq_break(struct uipaq_softc* sc, int onoff) { usb_device_request_t req; usbd_status err; int retries = 3; DPRINTF(("%s: uipaq_break: onoff=%x\n", device_xname(sc->sc_dev), onoff)); req.bmRequestType = UT_WRITE_CLASS_INTERFACE; req.bRequest = UCDC_SEND_BREAK; USETW(req.wValue, onoff ? UCDC_BREAK_ON : UCDC_BREAK_OFF); USETW(req.wIndex, 0x0); USETW(req.wLength, 0); while (retries) { err = usbd_do_request(sc->sc_udev, &req, NULL); if (!err) break; retries--; } } void uipaq_set(void *addr, int portno, int reg, int onoff) { struct uipaq_softc* sc = addr; if (sc->sc_dying) return; switch (reg) { case UCOM_SET_DTR: uipaq_dtr(addr, onoff); break; case UCOM_SET_RTS: uipaq_rts(addr, onoff); break; case UCOM_SET_BREAK: uipaq_break(addr, onoff); break; default: aprint_error_dev(sc->sc_dev, "unhandled set request: reg=%x onoff=%x\n", reg, onoff); return; } } static int uipaq_open(void *arg, int portno) { struct uipaq_softc *sc = arg; if (sc->sc_dying) return EIO; return 0; } static void uipaq_childdet(device_t self, device_t child) { struct uipaq_softc *sc = device_private(self); KASSERT(sc->sc_subdev == child); sc->sc_subdev = NULL; } static int uipaq_detach(device_t self, int flags) { struct uipaq_softc *sc = device_private(self); int rv = 0; DPRINTF(("uipaq_detach: sc=%p flags=%d\n", sc, flags)); sc->sc_dying = true; if (sc->sc_subdev != NULL) { rv |= config_detach(sc->sc_subdev, flags); sc->sc_subdev = NULL; } if (sc->sc_udev != NULL) usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev); return rv; } |
| 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 | /* $NetBSD: ufs_vfsops.c,v 1.60 2020/05/01 08:43:37 hannken Exp $ */ /* * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: ufs_vfsops.c,v 1.60 2020/05/01 08:43:37 hannken Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" #include "opt_quota.h" #include "opt_wapbl.h" #endif #include <sys/param.h> #include <sys/mount.h> #include <sys/proc.h> #include <sys/buf.h> #include <sys/module.h> #include <sys/vnode.h> #include <sys/kmem.h> #include <sys/kauth.h> #include <miscfs/specfs/specdev.h> #include <sys/quotactl.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> #ifdef UFS_DIRHASH #include <ufs/ufs/dirhash.h> #endif /* how many times ufs_init() was called */ static int ufs_initcount = 0; pool_cache_t ufs_direct_cache; /* * Make a filesystem operational. * Nothing to do at the moment. */ /* ARGSUSED */ int ufs_start(struct mount *mp, int flags) { return (0); } /* * Return the root of a filesystem. */ int ufs_root(struct mount *mp, int lktype, struct vnode **vpp) { struct vnode *nvp; int error; if ((error = VFS_VGET(mp, (ino_t)UFS_ROOTINO, lktype, &nvp)) != 0) return (error); *vpp = nvp; return (0); } /* * Look up and return a vnode/inode pair by inode number. */ int ufs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp) { int error; error = vcache_get(mp, &ino, sizeof(ino), vpp); if (error) return error; error = vn_lock(*vpp, lktype); if (error) { vrele(*vpp); *vpp = NULL; return error; } return 0; } /* * Do operations associated with quotas */ int ufs_quotactl(struct mount *mp, struct quotactl_args *args) { #if !defined(QUOTA) && !defined(QUOTA2) (void) mp; (void) args; return (EOPNOTSUPP); #else struct lwp *l = curlwp; int error; /* Mark the mount busy, as we're passing it to kauth(9). */ error = vfs_busy(mp); if (error) { return (error); } mutex_enter(mp->mnt_updating); error = quota_handle_cmd(mp, l, args); mutex_exit(mp->mnt_updating); vfs_unbusy(mp); return (error); #endif } #if 0 switch (cmd) { case Q_SYNC: break; case Q_GETQUOTA: /* The user can always query about his own quota. */ if (uid == kauth_cred_getuid(l->l_cred)) break; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, KAUTH_ARG(uid), NULL); break; case Q_QUOTAON: case Q_QUOTAOFF: error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_ONOFF, mp, NULL, NULL); break; case Q_SETQUOTA: case Q_SETUSE: error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_MANAGE, mp, KAUTH_ARG(uid), NULL); break; default: error = EINVAL; break; } type = cmds & SUBCMDMASK; if (!error) { /* Only check if there was no error above. */ if ((u_int)type >= MAXQUOTAS) error = EINVAL; } if (error) { vfs_unbusy(mp); return (error); } mutex_enter(mp->mnt_updating); switch (cmd) { case Q_QUOTAON: error = quotaon(l, mp, type, arg); break; case Q_QUOTAOFF: error = quotaoff(l, mp, type); break; case Q_SETQUOTA: error = setquota(mp, uid, type, arg); break; case Q_SETUSE: error = setuse(mp, uid, type, arg); break; case Q_GETQUOTA: error = getquota(mp, uid, type, arg); break; case Q_SYNC: error = qsync(mp); break; default: error = EINVAL; } mutex_exit(mp->mnt_updating); vfs_unbusy(mp); return (error); #endif /* * This is the generic part of fhtovp called after the underlying * filesystem has validated the file handle. */ int ufs_fhtovp(struct mount *mp, struct ufid *ufhp, int lktype, struct vnode **vpp) { struct vnode *nvp; struct inode *ip; int error; if ((error = VFS_VGET(mp, ufhp->ufid_ino, lktype, &nvp)) != 0) { if (error == ENOENT) error = ESTALE; *vpp = NULLVP; return (error); } ip = VTOI(nvp); KASSERT(ip != NULL); if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen || ((ip->i_mode & IFMT) == IFDIR && ip->i_size == 0)) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; return (0); } /* * Initialize UFS filesystems, done only once. */ void ufs_init(void) { if (ufs_initcount++ > 0) return; ufs_direct_cache = pool_cache_init(sizeof(struct direct), 0, 0, 0, "ufsdir", NULL, IPL_NONE, NULL, NULL, NULL); #if defined(QUOTA) || defined(QUOTA2) dqinit(); #endif #ifdef UFS_DIRHASH ufsdirhash_init(); #endif #ifdef UFS_EXTATTR ufs_extattr_init(); #endif } void ufs_reinit(void) { #if defined(QUOTA) || defined(QUOTA2) dqreinit(); #endif } /* * Free UFS filesystem resources, done only once. */ void ufs_done(void) { if (--ufs_initcount > 0) return; #if defined(QUOTA) || defined(QUOTA2) dqdone(); #endif pool_cache_destroy(ufs_direct_cache); #ifdef UFS_DIRHASH ufsdirhash_done(); #endif #ifdef UFS_EXTATTR ufs_extattr_done(); #endif } /* * module interface */ #ifdef WAPBL MODULE(MODULE_CLASS_MISC, ufs, "wapbl"); #else MODULE(MODULE_CLASS_MISC, ufs, NULL); #endif static int ufs_modcmd(modcmd_t cmd, void *arg) { int error; switch (cmd) { case MODULE_CMD_INIT: ufs_init(); error = 0; break; case MODULE_CMD_FINI: ufs_done(); error = 0; break; default: error = ENOTTY; break; } return error; } |
| 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 | /* $NetBSD: irmce.c,v 1.8 2021/08/07 16:19:16 thorpej Exp $ */ /*- * Copyright (c) 2011 Jared D. McNeill <jmcneill@invisible.ca> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * IR receiver/transceiver for Windows Media Center */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: irmce.c,v 1.8 2021/08/07 16:19:16 thorpej Exp $"); #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/device.h> #include <sys/conf.h> #include <sys/bus.h> #include <sys/select.h> #include <sys/module.h> #include <dev/usb/usb.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include <dev/usb/usbdevs.h> #include <dev/ir/ir.h> #include <dev/ir/cirio.h> #include <dev/ir/cirvar.h> enum irmce_state { IRMCE_STATE_HEADER, IRMCE_STATE_IRDATA, IRMCE_STATE_CMDHEADER, IRMCE_STATE_CMDDATA, }; struct irmce_softc { device_t sc_dev; device_t sc_cirdev; struct usbd_device * sc_udev; struct usbd_interface * sc_iface; int sc_bulkin_ep; uint16_t sc_bulkin_maxpktsize; struct usbd_pipe * sc_bulkin_pipe; struct usbd_xfer * sc_bulkin_xfer; uint8_t * sc_bulkin_buffer; int sc_bulkout_ep; uint16_t sc_bulkout_maxpktsize; struct usbd_pipe * sc_bulkout_pipe; struct usbd_xfer * sc_bulkout_xfer; uint8_t * sc_bulkout_buffer; bool sc_raw; uint8_t sc_ir_buf[16]; size_t sc_ir_bufused; size_t sc_ir_resid; enum irmce_state sc_ir_state; uint8_t sc_ir_header; bool sc_rc6_hb[256]; size_t sc_rc6_nhb; }; static int irmce_match(device_t, cfdata_t, void *); static void irmce_attach(device_t, device_t, void *); static int irmce_detach(device_t, int); static void irmce_childdet(device_t, device_t); static int irmce_activate(device_t, enum devact); static int irmce_rescan(device_t, const char *, const int *); static int irmce_print(void *, const char *); static int irmce_reset(struct irmce_softc *); static int irmce_open(void *, int, int, struct proc *); static int irmce_close(void *, int, int, struct proc *); static int irmce_read(void *, struct uio *, int); static int irmce_write(void *, struct uio *, int); static int irmce_setparams(void *, struct cir_params *); static const struct cir_methods irmce_cir_methods = { .im_open = irmce_open, .im_close = irmce_close, .im_read = irmce_read, .im_write = irmce_write, .im_setparams = irmce_setparams, }; static const struct { uint16_t vendor; uint16_t product; } irmce_devices[] = { { USB_VENDOR_SMK, USB_PRODUCT_SMK_MCE_IR }, }; CFATTACH_DECL2_NEW(irmce, sizeof(struct irmce_softc), irmce_match, irmce_attach, irmce_detach, irmce_activate, irmce_rescan, irmce_childdet); static int irmce_match(device_t parent, cfdata_t match, void *opaque) { struct usbif_attach_arg *uiaa = opaque; unsigned int i; for (i = 0; i < __arraycount(irmce_devices); i++) { if (irmce_devices[i].vendor == uiaa->uiaa_vendor && irmce_devices[i].product == uiaa->uiaa_product) return UMATCH_VENDOR_PRODUCT; } return UMATCH_NONE; } static void irmce_attach(device_t parent, device_t self, void *opaque) { struct irmce_softc *sc = device_private(self); struct usbif_attach_arg *uiaa = opaque; usb_endpoint_descriptor_t *ed; char *devinfop; unsigned int i; uint8_t nep; if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); aprint_naive("\n"); devinfop = usbd_devinfo_alloc(uiaa->uiaa_device, 0); aprint_normal(": %s\n", devinfop); usbd_devinfo_free(devinfop); sc->sc_dev = self; sc->sc_udev = uiaa->uiaa_device; sc->sc_iface = uiaa->uiaa_iface; nep = 0; usbd_endpoint_count(sc->sc_iface, &nep); sc->sc_bulkin_ep = sc->sc_bulkout_ep = -1; for (i = 0; i < nep; i++) { int dir, type; ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i); if (ed == NULL) { aprint_error_dev(self, "couldn't read endpoint descriptor %d\n", i); continue; } dir = UE_GET_DIR(ed->bEndpointAddress); type = UE_GET_XFERTYPE(ed->bmAttributes); if (type != UE_BULK) continue; if (dir == UE_DIR_IN && sc->sc_bulkin_ep == -1) { sc->sc_bulkin_ep = ed->bEndpointAddress; sc->sc_bulkin_maxpktsize = UE_GET_SIZE(UGETW(ed->wMaxPacketSize)) * (UE_GET_TRANS(UGETW(ed->wMaxPacketSize)) + 1); } if (dir == UE_DIR_OUT && sc->sc_bulkout_ep == -1) { sc->sc_bulkout_ep = ed->bEndpointAddress; sc->sc_bulkout_maxpktsize = UE_GET_SIZE(UGETW(ed->wMaxPacketSize)) * (UE_GET_TRANS(UGETW(ed->wMaxPacketSize)) + 1); } } aprint_debug_dev(self, "in 0x%02x/%d out 0x%02x/%d\n", sc->sc_bulkin_ep, sc->sc_bulkin_maxpktsize, sc->sc_bulkout_ep, sc->sc_bulkout_maxpktsize); if (sc->sc_bulkin_maxpktsize < 16 || sc->sc_bulkout_maxpktsize < 16) { aprint_error_dev(self, "bad maxpktsize\n"); return; } usbd_status err; err = usbd_open_pipe(sc->sc_iface, sc->sc_bulkin_ep, USBD_EXCLUSIVE_USE, &sc->sc_bulkin_pipe); if (err) { aprint_error_dev(sc->sc_dev, "couldn't open bulk-in pipe: %s\n", usbd_errstr(err)); return; } err = usbd_open_pipe(sc->sc_iface, sc->sc_bulkout_ep, USBD_EXCLUSIVE_USE, &sc->sc_bulkout_pipe); if (err) { aprint_error_dev(sc->sc_dev, "couldn't open bulk-out pipe: %s\n", usbd_errstr(err)); usbd_close_pipe(sc->sc_bulkin_pipe); sc->sc_bulkin_pipe = NULL; return; } int error; error = usbd_create_xfer(sc->sc_bulkin_pipe, sc->sc_bulkin_maxpktsize, 0, 0, &sc->sc_bulkin_xfer); if (error) { goto fail; } error = usbd_create_xfer(sc->sc_bulkout_pipe, sc->sc_bulkout_maxpktsize, USBD_FORCE_SHORT_XFER, 0, &sc->sc_bulkout_xfer); if (error) { goto fail; } sc->sc_bulkin_buffer = usbd_get_buffer(sc->sc_bulkin_xfer); sc->sc_bulkout_buffer = usbd_get_buffer(sc->sc_bulkout_xfer); irmce_rescan(self, NULL, NULL); return; fail: if (sc->sc_bulkin_xfer) usbd_destroy_xfer(sc->sc_bulkin_xfer); if (sc->sc_bulkout_xfer) usbd_destroy_xfer(sc->sc_bulkout_xfer); } static int irmce_detach(device_t self, int flags) { struct irmce_softc *sc = device_private(self); int error; if (sc->sc_cirdev) { error = config_detach(sc->sc_cirdev, flags); if (error) return error; } if (sc->sc_bulkin_pipe) { usbd_abort_pipe(sc->sc_bulkin_pipe); } if (sc->sc_bulkout_pipe) { usbd_abort_pipe(sc->sc_bulkout_pipe); } if (sc->sc_bulkin_xfer) { usbd_destroy_xfer(sc->sc_bulkin_xfer); sc->sc_bulkin_buffer = NULL; sc->sc_bulkin_xfer = NULL; } if (sc->sc_bulkout_xfer) { usbd_destroy_xfer(sc->sc_bulkout_xfer); sc->sc_bulkout_buffer = NULL; sc->sc_bulkout_xfer = NULL; } if (sc->sc_bulkin_pipe) { usbd_close_pipe(sc->sc_bulkin_pipe); sc->sc_bulkin_pipe = NULL; } if (sc->sc_bulkout_pipe) { usbd_close_pipe(sc->sc_bulkout_pipe); sc->sc_bulkout_pipe = NULL; } pmf_device_deregister(self); return 0; } static int irmce_activate(device_t self, enum devact act) { return 0; } static int irmce_rescan(device_t self, const char *ifattr, const int *locators) { struct irmce_softc *sc = device_private(self); struct ir_attach_args iaa; if (sc->sc_cirdev == NULL) { iaa.ia_type = IR_TYPE_CIR; iaa.ia_methods = &irmce_cir_methods; iaa.ia_handle = sc; sc->sc_cirdev = config_found(self, &iaa, irmce_print, CFARGS_NONE); } return 0; } static int irmce_print(void *priv, const char *pnp) { if (pnp) aprint_normal("cir at %s", pnp); return UNCONF; } static void irmce_childdet(device_t self, device_t child) { struct irmce_softc *sc = device_private(self); if (sc->sc_cirdev == child) sc->sc_cirdev = NULL; } static int irmce_reset(struct irmce_softc *sc) { static const uint8_t reset_cmd[] = { 0x00, 0xff, 0xaa }; uint8_t *p = sc->sc_bulkout_buffer; usbd_status err; uint32_t wlen; unsigned int n; for (n = 0; n < __arraycount(reset_cmd); n++) *p++ = reset_cmd[n]; wlen = sizeof(reset_cmd); err = usbd_bulk_transfer(sc->sc_bulkout_xfer, sc->sc_bulkout_pipe, USBD_FORCE_SHORT_XFER, USBD_DEFAULT_TIMEOUT, sc->sc_bulkout_buffer, &wlen); if (err != USBD_NORMAL_COMPLETION) { if (err == USBD_INTERRUPTED) return EINTR; else if (err == USBD_TIMEOUT) return ETIMEDOUT; else return EIO; } return 0; } static int irmce_open(void *priv, int flag, int mode, struct proc *p) { struct irmce_softc *sc = priv; int err = irmce_reset(sc); if (err) { aprint_error_dev(sc->sc_dev, "couldn't reset device: %s\n", usbd_errstr(err)); } sc->sc_ir_state = IRMCE_STATE_HEADER; sc->sc_rc6_nhb = 0; return 0; } static int irmce_close(void *priv, int flag, int mode, struct proc *p) { struct irmce_softc *sc = priv; if (sc->sc_bulkin_pipe) { usbd_abort_pipe(sc->sc_bulkin_pipe); } if (sc->sc_bulkout_pipe) { usbd_abort_pipe(sc->sc_bulkout_pipe); } return 0; } static int irmce_rc6_decode(struct irmce_softc *sc, uint8_t *buf, size_t buflen, struct uio *uio) { bool *hb = &sc->sc_rc6_hb[0]; unsigned int n; int state, pulse; uint32_t data; uint8_t mode; bool idle = false; for (n = 0; n < buflen; n++) { state = (buf[n] & 0x80) ? 1 : 0; pulse = (buf[n] & 0x7f) * 50; if (pulse >= 300 && pulse <= 600) { hb[sc->sc_rc6_nhb++] = state; } else if (pulse >= 680 && pulse <= 1080) { hb[sc->sc_rc6_nhb++] = state; hb[sc->sc_rc6_nhb++] = state; } else if (pulse >= 1150 && pulse <= 1450) { hb[sc->sc_rc6_nhb++] = state; hb[sc->sc_rc6_nhb++] = state; hb[sc->sc_rc6_nhb++] = state; } else if (pulse >= 2400 && pulse <= 2800) { hb[sc->sc_rc6_nhb++] = state; hb[sc->sc_rc6_nhb++] = state; hb[sc->sc_rc6_nhb++] = state; hb[sc->sc_rc6_nhb++] = state; hb[sc->sc_rc6_nhb++] = state; hb[sc->sc_rc6_nhb++] = state; } else if (pulse > 3000) { if (sc->sc_rc6_nhb & 1) hb[sc->sc_rc6_nhb++] = state; idle = true; break; } else { aprint_debug_dev(sc->sc_dev, "error parsing RC6 stream (pulse=%d)\n", pulse); return EIO; } } if (!idle) return 0; if (sc->sc_rc6_nhb < 20) { aprint_debug_dev(sc->sc_dev, "not enough RC6 data\n"); return EIO; } /* RC6 leader 11111100 */ if (!hb[0] || !hb[1] || !hb[2] || !hb[3] || !hb[4] || !hb[5] || hb[6] || hb[7]) { aprint_debug_dev(sc->sc_dev, "bad RC6 leader\n"); return EIO; } /* start bit 10 */ if (!hb[8] || hb[9]) { aprint_debug_dev(sc->sc_dev, "missing RC6 start bit\n"); return EIO; } /* mode info */ mode = 0x00; for (n = 10; n < 15; n += 2) { if (hb[n] && !hb[n + 1]) mode = (mode << 1) | 1; else if (!hb[n] && hb[n + 1]) mode = (mode << 1) | 0; else { aprint_debug_dev(sc->sc_dev, "bad RC6 mode bits\n"); return EIO; } } data = 0; for (n = 20; n < sc->sc_rc6_nhb; n += 2) { if (hb[n] && !hb[n + 1]) data = (data << 1) | 1; else if (!hb[n] && hb[n + 1]) data = (data << 1) | 0; else { aprint_debug_dev(sc->sc_dev, "bad RC6 data bits\n"); return EIO; } } sc->sc_rc6_nhb = 0; return uiomove(&data, sizeof(data), uio); } static int irmce_process(struct irmce_softc *sc, uint8_t *buf, size_t buflen, struct uio *uio) { uint8_t *p = buf; uint8_t data, cmd; int error; while (p - buf < (ssize_t)buflen) { switch (sc->sc_ir_state) { case IRMCE_STATE_HEADER: sc->sc_ir_header = data = *p++; if ((data & 0xe0) == 0x80 && (data & 0x1f) != 0x1f) { sc->sc_ir_bufused = 0; sc->sc_ir_resid = data & 0x1f; sc->sc_ir_state = IRMCE_STATE_IRDATA; if (sc->sc_ir_resid > sizeof(sc->sc_ir_buf)) return EIO; if (sc->sc_ir_resid == 0) sc->sc_ir_state = IRMCE_STATE_HEADER; } else { sc->sc_ir_state = IRMCE_STATE_CMDHEADER; } break; case IRMCE_STATE_CMDHEADER: cmd = *p++; data = sc->sc_ir_header; if (data == 0x00 && cmd == 0x9f) sc->sc_ir_resid = 1; else if (data == 0xff && cmd == 0x0b) sc->sc_ir_resid = 2; else if (data == 0x9f) { if (cmd == 0x04 || cmd == 0x06 || cmd == 0x0c || cmd == 0x15) { sc->sc_ir_resid = 2; } else if (cmd == 0x01 || cmd == 0x08 || cmd == 0x14) { sc->sc_ir_resid = 1; } } if (sc->sc_ir_resid > 0) sc->sc_ir_state = IRMCE_STATE_CMDDATA; else sc->sc_ir_state = IRMCE_STATE_HEADER; break; case IRMCE_STATE_IRDATA: sc->sc_ir_resid--; sc->sc_ir_buf[sc->sc_ir_bufused++] = *p; p++; if (sc->sc_ir_resid == 0) { sc->sc_ir_state = IRMCE_STATE_HEADER; error = irmce_rc6_decode(sc, sc->sc_ir_buf, sc->sc_ir_bufused, uio); if (error) sc->sc_rc6_nhb = 0; } break; case IRMCE_STATE_CMDDATA: p++; sc->sc_ir_resid--; if (sc->sc_ir_resid == 0) sc->sc_ir_state = IRMCE_STATE_HEADER; break; } } return 0; } static int irmce_read(void *priv, struct uio *uio, int flag) { struct irmce_softc *sc = priv; usbd_status err; uint32_t rlen; int error = 0; while (uio->uio_resid > 0) { rlen = sc->sc_bulkin_maxpktsize; err = usbd_bulk_transfer(sc->sc_bulkin_xfer, sc->sc_bulkin_pipe, USBD_SHORT_XFER_OK, USBD_DEFAULT_TIMEOUT, sc->sc_bulkin_buffer, &rlen); if (err != USBD_NORMAL_COMPLETION) { if (err == USBD_INTERRUPTED) return EINTR; else if (err == USBD_TIMEOUT) continue; else return EIO; } if (sc->sc_raw) { error = uiomove(sc->sc_bulkin_buffer, rlen, uio); break; } else { error = irmce_process(sc, sc->sc_bulkin_buffer, rlen, uio); if (error) break; } } return error; } static int irmce_write(void *priv, struct uio *uio, int flag) { return EIO; } static int irmce_setparams(void *priv, struct cir_params *params) { struct irmce_softc *sc = priv; if (params->raw > 1) return EINVAL; sc->sc_raw = params->raw; return 0; } MODULE(MODULE_CLASS_DRIVER, irmce, NULL); #ifdef _MODULE #include "ioconf.c" #endif static int irmce_modcmd(modcmd_t cmd, void *opaque) { switch (cmd) { case MODULE_CMD_INIT: #ifdef _MODULE return config_init_component(cfdriver_ioconf_irmce, cfattach_ioconf_irmce, cfdata_ioconf_irmce); #else return 0; #endif case MODULE_CMD_FINI: #ifdef _MODULE return config_fini_component(cfdriver_ioconf_irmce, cfattach_ioconf_irmce, cfdata_ioconf_irmce); #else return 0; #endif default: return ENOTTY; } } |
| 7 6 3 3 3 5 3 3 3 1 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | /* $NetBSD: vfs_syscalls_20.c,v 1.46 2020/06/28 14:37:53 christos Exp $ */ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls_20.c,v 1.46 2020/06/28 14:37:53 christos Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/filedesc.h> #include <sys/kernel.h> #include <sys/file.h> #include <sys/stat.h> #include <sys/vnode.h> #include <sys/mount.h> #include <sys/proc.h> #include <sys/uio.h> #include <sys/dirent.h> #include <sys/sysctl.h> #include <sys/syscall.h> #include <sys/syscallvar.h> #include <sys/syscallargs.h> #include <sys/kauth.h> #include <sys/vfs_syscalls.h> #include <compat/common/compat_mod.h> #include <compat/sys/mount.h> #include <compat/sys/statvfs.h> static const struct syscall_package vfs_syscalls_20_syscalls[] = { { SYS_compat_20_fhstatfs, 0, (sy_call_t *)compat_20_sys_fhstatfs }, { SYS_compat_20_fstatfs, 0, (sy_call_t *)compat_20_sys_fstatfs }, { SYS_compat_20_getfsstat, 0, (sy_call_t *)compat_20_sys_getfsstat }, { SYS_compat_20_statfs, 0, (sy_call_t *)compat_20_sys_statfs }, { 0, 0, NULL } }; /* * Get filesystem statistics. */ /* ARGSUSED */ int compat_20_sys_statfs(struct lwp *l, const struct compat_20_sys_statfs_args *uap, register_t *retval) { /* { syscallarg(const char *) path; syscallarg(struct statfs12 *) buf; } */ struct mount *mp; struct statvfs *sbuf; int error; struct vnode *vp; error = namei_simple_user(SCARG(uap, path), NSM_FOLLOW_TRYEMULROOT, &vp); if (error != 0) return error; mp = vp->v_mount; sbuf = STATVFSBUF_GET(); if ((error = dostatvfs(mp, sbuf, l, 0, 1)) != 0) goto done; error = statvfs_to_statfs12_copy(sbuf, SCARG(uap, buf), 0); done: vrele(vp); STATVFSBUF_PUT(sbuf); return error; } /* * Get filesystem statistics. */ /* ARGSUSED */ int compat_20_sys_fstatfs(struct lwp *l, const struct compat_20_sys_fstatfs_args *uap, register_t *retval) { /* { syscallarg(int) fd; syscallarg(struct statfs12 *) buf; } */ struct file *fp; struct mount *mp; struct statvfs *sbuf; int error; /* fd_getvnode() will use the descriptor for us */ if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) return (error); mp = fp->f_vnode->v_mount; sbuf = STATVFSBUF_GET(); if ((error = dostatvfs(mp, sbuf, l, 0, 1)) != 0) goto out; error = statvfs_to_statfs12_copy(sbuf, SCARG(uap, buf), 0); out: fd_putfile(SCARG(uap, fd)); STATVFSBUF_PUT(sbuf); return error; } /* * Get statistics on all filesystems. */ int compat_20_sys_getfsstat(struct lwp *l, const struct compat_20_sys_getfsstat_args *uap, register_t *retval) { /* { syscallarg(struct statfs12 *) buf; syscallarg(long) bufsize; syscallarg(int) flags; } */ return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), SCARG(uap, flags), statvfs_to_statfs12_copy, sizeof(struct statfs12), retval); } int compat_20_sys_fhstatfs(struct lwp *l, const struct compat_20_sys_fhstatfs_args *uap, register_t *retval) { /* { syscallarg(const struct compat_30_fhandle *) fhp; syscallarg(struct statfs12 *) buf; } */ struct statvfs *sbuf; struct compat_30_fhandle fh; struct mount *mp; struct vnode *vp; int error; /* * Must be super user */ if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL))) return (error); if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fh))) != 0) return (error); if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) return (ESTALE); error = VFS_FHTOVP(mp, (struct fid*)&fh.fh_fid, LK_EXCLUSIVE, &vp); if (error != 0) return (error); mp = vp->v_mount; VOP_UNLOCK(vp); sbuf = STATVFSBUF_GET(); if ((error = VFS_STATVFS(mp, sbuf)) != 0) goto out; error = statvfs_to_statfs12_copy(sbuf, SCARG(uap, buf), 0); out: vrele(vp); STATVFSBUF_PUT(sbuf); return error; } int vfs_syscalls_20_init(void) { return syscall_establish(NULL, vfs_syscalls_20_syscalls); } int vfs_syscalls_20_fini(void) { return syscall_disestablish(NULL, vfs_syscalls_20_syscalls); } |
| 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | /* $NetBSD: ccd_60.c,v 1.11 2019/12/12 02:15:42 pgoyette Exp $ */ /*- * Copyright (c) 2018 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: ccd_60.c,v 1.11 2019/12/12 02:15:42 pgoyette Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" #endif #include <sys/param.h> #include <sys/systm.h> #include <sys/disk.h> #include <sys/lwp.h> #include <sys/compat_stub.h> #include <dev/ccdvar.h> #include <compat/sys/ccdvar.h> /* * Compat code must not be called if on a platform where * sizeof (size_t) == sizeof (uint64_t) as CCDIOCSET will * be the same as CCDIOCSET_60 */ static int compat_60_ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l, int (*f)(dev_t, u_long, void *, int, struct lwp *)) { switch (cmd) { #ifdef CCDIOCSET_60 case CCDIOCSET_60: { if (data == NULL) return 0; struct ccd_ioctl ccio; struct ccd_ioctl_60 *ccio60 = data; ccio.ccio_disks = ccio60->ccio_disks; ccio.ccio_ndisks = ccio60->ccio_ndisks; ccio.ccio_ileave = ccio60->ccio_ileave; ccio.ccio_flags = ccio60->ccio_flags; ccio.ccio_unit = ccio60->ccio_unit; int error = (*f)(dev, CCDIOCSET, &ccio, flag, l); if (!error) { /* Copy data back, adjust types if necessary */ ccio60->ccio_disks = ccio.ccio_disks; ccio60->ccio_ndisks = ccio.ccio_ndisks; ccio60->ccio_ileave = ccio.ccio_ileave; ccio60->ccio_flags = ccio.ccio_flags; ccio60->ccio_unit = ccio.ccio_unit; ccio60->ccio_size = (size_t)ccio.ccio_size; } return error; } case CCDIOCCLR_60: if (data == NULL) return ENOSYS; /* * ccio_size member not used, so existing struct OK * drop through to existing non-compat version */ return (*f)(dev, CCDIOCCLR, data, flag, l); #endif default: return ENOSYS; } } void ccd_60_init(void) { MODULE_HOOK_SET(ccd_ioctl_60_hook, compat_60_ccdioctl); } void ccd_60_fini(void) { MODULE_HOOK_UNSET(ccd_ioctl_60_hook); } |
| 874 875 375 375 375 250 251 250 249 165 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | /* $NetBSD: kern_mutex_obj.c,v 1.9 2022/04/09 23:38:33 riastradh Exp $ */ /*- * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_mutex_obj.c,v 1.9 2022/04/09 23:38:33 riastradh Exp $"); #include <sys/param.h> #include <sys/atomic.h> #include <sys/mutex.h> #include <sys/pool.h> /* Mutex cache */ #define MUTEX_OBJ_MAGIC 0x5aa3c85d struct kmutexobj { kmutex_t mo_lock; u_int mo_magic; u_int mo_refcnt; }; static int mutex_obj_ctor(void *, void *, int); static pool_cache_t mutex_obj_cache __read_mostly; /* * mutex_obj_init: * * Initialize the mutex object store. */ void mutex_obj_init(void) { mutex_obj_cache = pool_cache_init(sizeof(struct kmutexobj), coherency_unit, 0, 0, "mutex", NULL, IPL_NONE, mutex_obj_ctor, NULL, NULL); } /* * mutex_obj_ctor: * * Initialize a new lock for the cache. */ static int mutex_obj_ctor(void *arg, void *obj, int flags) { struct kmutexobj * mo = obj; mo->mo_magic = MUTEX_OBJ_MAGIC; return 0; } /* * mutex_obj_alloc: * * Allocate a single lock object, waiting for memory if needed. */ kmutex_t * mutex_obj_alloc(kmutex_type_t type, int ipl) { struct kmutexobj *mo; extern void _mutex_init(kmutex_t *, kmutex_type_t, int, uintptr_t); mo = pool_cache_get(mutex_obj_cache, PR_WAITOK); _mutex_init(&mo->mo_lock, type, ipl, (uintptr_t)__builtin_return_address(0)); mo->mo_refcnt = 1; return (kmutex_t *)mo; } /* * mutex_obj_alloc: * * Allocate a single lock object, failing if no memory available. */ kmutex_t * mutex_obj_tryalloc(kmutex_type_t type, int ipl) { struct kmutexobj *mo; extern void _mutex_init(kmutex_t *, kmutex_type_t, int, uintptr_t); mo = pool_cache_get(mutex_obj_cache, PR_NOWAIT); if (__predict_true(mo != NULL)) { _mutex_init(&mo->mo_lock, type, ipl, (uintptr_t)__builtin_return_address(0)); mo->mo_refcnt = 1; } return (kmutex_t *)mo; } /* * mutex_obj_hold: * * Add a single reference to a lock object. A reference to the object * must already be held, and must be held across this call. */ void mutex_obj_hold(kmutex_t *lock) { struct kmutexobj *mo = (struct kmutexobj *)lock; KASSERTMSG(mo->mo_magic == MUTEX_OBJ_MAGIC, "%s: lock %p: mo->mo_magic (%#x) != MUTEX_OBJ_MAGIC (%#x)", __func__, mo, mo->mo_magic, MUTEX_OBJ_MAGIC); KASSERTMSG(mo->mo_refcnt > 0, "%s: lock %p: mo->mo_refcnt (%#x) == 0", __func__, mo, mo->mo_refcnt); atomic_inc_uint(&mo->mo_refcnt); } /* * mutex_obj_free: * * Drop a reference from a lock object. If the last reference is being * dropped, free the object and return true. Otherwise, return false. */ bool mutex_obj_free(kmutex_t *lock) { struct kmutexobj *mo = (struct kmutexobj *)lock; KASSERTMSG(mo->mo_magic == MUTEX_OBJ_MAGIC, "%s: lock %p: mo->mo_magic (%#x) != MUTEX_OBJ_MAGIC (%#x)", __func__, mo, mo->mo_magic, MUTEX_OBJ_MAGIC); KASSERTMSG(mo->mo_refcnt > 0, "%s: lock %p: mo->mo_refcnt (%#x) == 0", __func__, mo, mo->mo_refcnt); #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_release(); #endif if (atomic_dec_uint_nv(&mo->mo_refcnt) > 0) { return false; } #ifndef __HAVE_ATOMIC_AS_MEMBAR membar_acquire(); #endif mutex_destroy(&mo->mo_lock); pool_cache_put(mutex_obj_cache, mo); return true; } /* * mutex_obj_refcnt: * * Return the reference count on a lock object. */ u_int mutex_obj_refcnt(kmutex_t *lock) { struct kmutexobj *mo = (struct kmutexobj *)lock; return mo->mo_refcnt; } |
| 3 3 7 7 6 4 6 7 6 5 3 8 8 18 16 4 15 18 8 4 3 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 | /* $NetBSD: kern_time_50.c,v 1.37 2021/09/07 11:43:02 riastradh Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Christos Zoulas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: kern_time_50.c,v 1.37 2021/09/07 11:43:02 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" #include "opt_aio.h" #include "opt_ntp.h" #include "opt_mqueue.h" #endif #include <sys/param.h> #include <sys/conf.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/filedesc.h> #include <sys/kernel.h> #include <sys/file.h> #include <sys/stat.h> #include <sys/socketvar.h> #include <sys/vnode.h> #include <sys/proc.h> #include <sys/uio.h> #include <sys/dirent.h> #include <sys/kauth.h> #include <sys/time.h> #include <sys/timex.h> #include <sys/clockctl.h> #include <sys/aio.h> #include <sys/poll.h> #include <sys/syscall.h> #include <sys/syscallargs.h> #include <sys/syscallvar.h> #include <sys/sysctl.h> #include <sys/resource.h> #include <sys/compat_stub.h> #include <compat/common/compat_util.h> #include <compat/common/compat_mod.h> #include <compat/sys/time.h> #include <compat/sys/timex.h> #include <compat/sys/resource.h> #include <compat/sys/clockctl.h> struct timeval50 boottime50; static const struct syscall_package kern_time_50_syscalls[] = { { SYS_compat_50_clock_gettime, 0, (sy_call_t *)compat_50_sys_clock_gettime }, { SYS_compat_50_clock_settime, 0, (sy_call_t *)compat_50_sys_clock_settime }, { SYS_compat_50_clock_getres, 0, (sy_call_t *)compat_50_sys_clock_getres}, { SYS_compat_50_nanosleep, 0, (sy_call_t *)compat_50_sys_nanosleep }, { SYS_compat_50_gettimeofday, 0, (sy_call_t *)compat_50_sys_gettimeofday }, { SYS_compat_50_settimeofday, 0, (sy_call_t *)compat_50_sys_settimeofday }, { SYS_compat_50_adjtime, 0, (sy_call_t *)compat_50_sys_adjtime }, { SYS_compat_50_setitimer, 0, (sy_call_t *)compat_50_sys_setitimer }, { SYS_compat_50_getitimer, 0, (sy_call_t *)compat_50_sys_getitimer }, { SYS_compat_50_aio_suspend, 0, (sy_call_t *)compat_50_sys_aio_suspend }, { SYS_compat_50_mq_timedsend, 0, (sy_call_t *)compat_50_sys_mq_timedsend }, { SYS_compat_50_mq_timedreceive, 0, (sy_call_t *)compat_50_sys_mq_timedreceive }, { SYS_compat_50_getrusage, 0, (sy_call_t *)compat_50_sys_getrusage }, { SYS_compat_50_timer_settime, 0, (sy_call_t *)compat_50_sys_timer_settime }, { SYS_compat_50_timer_gettime, 0, (sy_call_t *)compat_50_sys_timer_gettime }, { SYS_compat_50___ntp_gettime30, 0, (sy_call_t *)compat_50_sys___ntp_gettime30 }, { 0, 0, NULL } }; int compat_50_sys_clock_gettime(struct lwp *l, const struct compat_50_sys_clock_gettime_args *uap, register_t *retval) { /* { syscallarg(clockid_t) clock_id; syscallarg(struct timespec50 *) tp; } */ int error; struct timespec ats; struct timespec50 ats50; error = clock_gettime1(SCARG(uap, clock_id), &ats); if (error != 0) return error; timespec_to_timespec50(&ats, &ats50); return copyout(&ats50, SCARG(uap, tp), sizeof(ats50)); } /* ARGSUSED */ int compat_50_sys_clock_settime(struct lwp *l, const struct compat_50_sys_clock_settime_args *uap, register_t *retval) { /* { syscallarg(clockid_t) clock_id; syscallarg(const struct timespec50 *) tp; } */ int error; struct timespec ats; struct timespec50 ats50; error = copyin(SCARG(uap, tp), &ats50, sizeof(ats50)); if (error) return error; timespec50_to_timespec(&ats50, &ats); return clock_settime1(l->l_proc, SCARG(uap, clock_id), &ats, true); } int compat_50_sys_clock_getres(struct lwp *l, const struct compat_50_sys_clock_getres_args *uap, register_t *retval) { /* { syscallarg(clockid_t) clock_id; syscallarg(struct timespec50 *) tp; } */ struct timespec50 ats50; struct timespec ats; int error; error = clock_getres1(SCARG(uap, clock_id), &ats); if (error != 0) return error; if (SCARG(uap, tp)) { timespec_to_timespec50(&ats, &ats50); error = copyout(&ats50, SCARG(uap, tp), sizeof(ats50)); } return error; } /* ARGSUSED */ int compat_50_sys_nanosleep(struct lwp *l, const struct compat_50_sys_nanosleep_args *uap, register_t *retval) { /* { syscallarg(struct timespec50 *) rqtp; syscallarg(struct timespec50 *) rmtp; } */ struct timespec rmt, rqt; struct timespec50 rmt50, rqt50; int error, error1; error = copyin(SCARG(uap, rqtp), &rqt50, sizeof(rqt50)); if (error) return error; timespec50_to_timespec(&rqt50, &rqt); error = nanosleep1(l, CLOCK_MONOTONIC, 0, &rqt, SCARG(uap, rmtp) ? &rmt : NULL); if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR)) return error; timespec_to_timespec50(&rmt, &rmt50); error1 = copyout(&rmt50, SCARG(uap, rmtp), sizeof(*SCARG(uap, rmtp))); return error1 ? error1 : error; } /* ARGSUSED */ int compat_50_sys_gettimeofday(struct lwp *l, const struct compat_50_sys_gettimeofday_args *uap, register_t *retval) { /* { syscallarg(struct timeval50 *) tp; syscallarg(void *) tzp; really "struct timezone *"; } */ struct timeval atv; struct timeval50 atv50; int error = 0; struct timezone tzfake; if (SCARG(uap, tp)) { microtime(&atv); timeval_to_timeval50(&atv, &atv50); error = copyout(&atv50, SCARG(uap, tp), sizeof(*SCARG(uap, tp))); if (error) return error; } if (SCARG(uap, tzp)) { /* * NetBSD has no kernel notion of time zone, so we just * fake up a timezone struct and return it if demanded. */ memset(&tzfake, 0, sizeof(tzfake)); tzfake.tz_minuteswest = 0; tzfake.tz_dsttime = 0; error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake)); } return error; } /* ARGSUSED */ int compat_50_sys_settimeofday(struct lwp *l, const struct compat_50_sys_settimeofday_args *uap, register_t *retval) { /* { syscallarg(const struct timeval50 *) tv; syscallarg(const void *) tzp; really "const struct timezone *"; } */ struct timeval50 atv50; struct timeval atv; int error = copyin(SCARG(uap, tv), &atv50, sizeof(atv50)); if (error) return error; timeval50_to_timeval(&atv50, &atv); return settimeofday1(&atv, false, SCARG(uap, tzp), l, true); } /* ARGSUSED */ int compat_50_sys_adjtime(struct lwp *l, const struct compat_50_sys_adjtime_args *uap, register_t *retval) { /* { syscallarg(const struct timeval50 *) delta; syscallarg(struct timeval50 *) olddelta; } */ int error; struct timeval50 delta50, olddelta50; struct timeval delta, olddelta; if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0) return error; if (SCARG(uap, delta)) { error = copyin(SCARG(uap, delta), &delta50, sizeof(*SCARG(uap, delta))); if (error) return (error); timeval50_to_timeval(&delta50, &delta); } adjtime1(SCARG(uap, delta) ? &delta : NULL, SCARG(uap, olddelta) ? &olddelta : NULL, l->l_proc); if (SCARG(uap, olddelta)) { timeval_to_timeval50(&olddelta, &olddelta50); error = copyout(&olddelta50, SCARG(uap, olddelta), sizeof(*SCARG(uap, olddelta))); } return error; } /* BSD routine to set/arm an interval timer. */ /* ARGSUSED */ int compat_50_sys_getitimer(struct lwp *l, const struct compat_50_sys_getitimer_args *uap, register_t *retval) { /* { syscallarg(int) which; syscallarg(struct itimerval50 *) itv; } */ struct proc *p = l->l_proc; struct itimerval aitv; struct itimerval50 aitv50; int error; error = dogetitimer(p, SCARG(uap, which), &aitv); if (error) return error; itimerval_to_itimerval50(&aitv, &aitv50); return copyout(&aitv50, SCARG(uap, itv), sizeof(*SCARG(uap, itv))); } int compat_50_sys_setitimer(struct lwp *l, const struct compat_50_sys_setitimer_args *uap, register_t *retval) { /* { syscallarg(int) which; syscallarg(const struct itimerval50 *) itv; syscallarg(struct itimerval50 *) oitv; } */ struct proc *p = l->l_proc; int which = SCARG(uap, which); struct compat_50_sys_getitimer_args getargs; const struct itimerval50 *itvp; struct itimerval50 aitv50; struct itimerval aitv; int error; itvp = SCARG(uap, itv); if (itvp && (error = copyin(itvp, &aitv50, sizeof(aitv50))) != 0) return (error); itimerval50_to_itimerval(&aitv50, &aitv); if (SCARG(uap, oitv) != NULL) { SCARG(&getargs, which) = which; SCARG(&getargs, itv) = SCARG(uap, oitv); if ((error = compat_50_sys_getitimer(l, &getargs, retval)) != 0) return (error); } if (itvp == 0) return (0); return dosetitimer(p, which, &aitv); } int compat_50_sys_aio_suspend(struct lwp *l, const struct compat_50_sys_aio_suspend_args *uap, register_t *retval) { /* { syscallarg(const struct aiocb *const[]) list; syscallarg(int) nent; syscallarg(const struct timespec50 *) timeout; } */ #ifdef AIO struct aiocb **list; struct timespec ts; struct timespec50 ts50; int error, nent; nent = SCARG(uap, nent); if (nent <= 0 || nent > aio_listio_max) return EAGAIN; if (SCARG(uap, timeout)) { /* Convert timespec to ticks */ error = copyin(SCARG(uap, timeout), &ts50, sizeof(*SCARG(uap, timeout))); if (error) return error; timespec50_to_timespec(&ts50, &ts); } list = kmem_alloc(nent * sizeof(*list), KM_SLEEP); error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); if (error) goto out; error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); out: kmem_free(list, nent * sizeof(*list)); return error; #else return ENOSYS; #endif } int compat_50_sys_mq_timedsend(struct lwp *l, const struct compat_50_sys_mq_timedsend_args *uap, register_t *retval) { /* { syscallarg(mqd_t) mqdes; syscallarg(const char *) msg_ptr; syscallarg(size_t) msg_len; syscallarg(unsigned) msg_prio; syscallarg(const struct timespec50 *) abs_timeout; } */ #ifdef MQUEUE struct timespec50 ts50; struct timespec ts, *tsp; int error; /* Get and convert time value */ if (SCARG(uap, abs_timeout)) { error = copyin(SCARG(uap, abs_timeout), &ts50, sizeof(ts50)); if (error) return error; timespec50_to_timespec(&ts50, &ts); tsp = &ts; } else { tsp = NULL; } return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); #else return ENOSYS; #endif } int compat_50_sys_mq_timedreceive(struct lwp *l, const struct compat_50_sys_mq_timedreceive_args *uap, register_t *retval) { /* { syscallarg(mqd_t) mqdes; syscallarg(char *) msg_ptr; syscallarg(size_t) msg_len; syscallarg(unsigned *) msg_prio; syscallarg(const struct timespec50 *) abs_timeout; } */ #ifdef MQUEUE struct timespec ts, *tsp; struct timespec50 ts50; ssize_t mlen; int error; /* Get and convert time value */ if (SCARG(uap, abs_timeout)) { error = copyin(SCARG(uap, abs_timeout), &ts50, sizeof(ts50)); if (error) return error; timespec50_to_timespec(&ts50, &ts); tsp = &ts; } else { tsp = NULL; } error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); if (error == 0) *retval = mlen; return error; #else return ENOSYS; #endif } void rusage_to_rusage50(const struct rusage *ru, struct rusage50 *ru50) { memset(ru50, 0, sizeof(*ru50)); (void)memcpy(&ru50->ru_first, &ru->ru_first, (char *)&ru50->ru_last - (char *)&ru50->ru_first + sizeof(ru50->ru_last)); ru50->ru_maxrss = ru->ru_maxrss; timeval_to_timeval50(&ru->ru_utime, &ru50->ru_utime); timeval_to_timeval50(&ru->ru_stime, &ru50->ru_stime); } int compat_50_sys_getrusage(struct lwp *l, const struct compat_50_sys_getrusage_args *uap, register_t *retval) { /* { syscallarg(int) who; syscallarg(struct rusage50 *) rusage; } */ int error; struct rusage ru; struct rusage50 ru50; struct proc *p = l->l_proc; error = getrusage1(p, SCARG(uap, who), &ru); if (error != 0) return error; rusage_to_rusage50(&ru, &ru50); return copyout(&ru50, SCARG(uap, rusage), sizeof(ru50)); } /* Return the time remaining until a POSIX timer fires. */ int compat_50_sys_timer_gettime(struct lwp *l, const struct compat_50_sys_timer_gettime_args *uap, register_t *retval) { /* { syscallarg(timer_t) timerid; syscallarg(struct itimerspec50 *) value; } */ struct itimerspec its; struct itimerspec50 its50; int error; if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc, &its)) != 0) return error; itimerspec_to_itimerspec50(&its, &its50); return copyout(&its50, SCARG(uap, value), sizeof(its50)); } /* Set and arm a POSIX realtime timer */ int compat_50_sys_timer_settime(struct lwp *l, const struct compat_50_sys_timer_settime_args *uap, register_t *retval) { /* { syscallarg(timer_t) timerid; syscallarg(int) flags; syscallarg(const struct itimerspec50 *) value; syscallarg(struct itimerspec50 *) ovalue; } */ int error; struct itimerspec value, ovalue, *ovp = NULL; struct itimerspec50 value50, ovalue50; if ((error = copyin(SCARG(uap, value), &value50, sizeof(value50))) != 0) return error; itimerspec50_to_itimerspec(&value50, &value); if (SCARG(uap, ovalue)) ovp = &ovalue; if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp, SCARG(uap, flags), l->l_proc)) != 0) return error; if (ovp) { itimerspec_to_itimerspec50(&ovalue, &ovalue50); return copyout(&ovalue50, SCARG(uap, ovalue), sizeof(ovalue50)); } return 0; } /* * ntp_gettime() - NTP user application interface */ int compat_50_sys___ntp_gettime30(struct lwp *l, const struct compat_50_sys___ntp_gettime30_args *uap, register_t *retval) { if (vec_ntp_gettime == NULL) return ENOSYS; /* No NTP available in kernel */ /* { syscallarg(struct ntptimeval *) ntvp; } */ struct ntptimeval ntv; struct ntptimeval50 ntv50; int error; if (SCARG(uap, ntvp)) { (*vec_ntp_gettime)(&ntv); memset(&ntv50, 0, sizeof(ntv50)); timespec_to_timespec50(&ntv.time, &ntv50.time); ntv50.maxerror = ntv.maxerror; ntv50.esterror = ntv.esterror; ntv50.tai = ntv.tai; ntv50.time_state = ntv.time_state; error = copyout(&ntv50, SCARG(uap, ntvp), sizeof(ntv50)); if (error) return error; } *retval = (*vec_ntp_timestatus)(); return 0; } SYSCTL_SETUP(compat_sysctl_time, "Old system boottime") { struct timeval tv; getmicroboottime(&tv); timeval_to_timeval50(&tv, &boottime50); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "oboottime", SYSCTL_DESCR("System boot time"), NULL, 0, &boottime50, sizeof(boottime50), CTL_KERN, KERN_OBOOTTIME, CTL_EOL); } int kern_time_50_init(void) { int error; error = syscall_establish(NULL, kern_time_50_syscalls); return error; } int kern_time_50_fini(void) { int error; error = syscall_disestablish(NULL, kern_time_50_syscalls); return error; } |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 | /* $NetBSD: lpt.c,v 1.82 2018/09/03 16:29:31 riastradh Exp $ */ /* * Copyright (c) 1993, 1994 Charles M. Hannum. * Copyright (c) 1990 William F. Jolitz, TeleMuse * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This software is a component of "386BSD" developed by * William F. Jolitz, TeleMuse. * 4. Neither the name of the developer nor the name "386BSD" * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT. * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT * NOT MAKE USE OF THIS WORK. * * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992. * * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Device Driver for AT style parallel printer port */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: lpt.c,v 1.82 2018/09/03 16:29:31 riastradh Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/malloc.h> #include <sys/kernel.h> #include <sys/ioctl.h> #include <sys/uio.h> #include <sys/device.h> #include <sys/conf.h> #include <sys/syslog.h> #include <sys/intr.h> #include <sys/bus.h> #include <dev/ic/lptreg.h> #include <dev/ic/lptvar.h> #include "ioconf.h" #define TIMEOUT hz*16 /* wait up to 16 seconds for a ready */ #define STEP hz/4 #define LPTPRI (PZERO+8) #define LPT_BSIZE 1024 #define LPTDEBUG #ifndef LPTDEBUG #define LPRINTF(a) #else #define LPRINTF(a) if (lptdebug) printf a int lptdebug = 0; #endif dev_type_open(lptopen); dev_type_close(lptclose); dev_type_write(lptwrite); dev_type_ioctl(lptioctl); const struct cdevsw lpt_cdevsw = { .d_open = lptopen, .d_close = lptclose, .d_read = noread, .d_write = lptwrite, .d_ioctl = lptioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER }; #define LPTUNIT(s) (minor(s) & 0x1f) #define LPTFLAGS(s) (minor(s) & 0xe0) static void lptsoftintr(void *); void lpt_attach_subr(struct lpt_softc *sc) { bus_space_tag_t iot; bus_space_handle_t ioh; sc->sc_state = 0; iot = sc->sc_iot; ioh = sc->sc_ioh; bus_space_write_1(iot, ioh, lpt_control, LPC_NINIT); callout_init(&sc->sc_wakeup_ch, 0); sc->sc_sih = softint_establish(SOFTINT_SERIAL, lptsoftintr, sc); sc->sc_dev_ok = 1; } int lpt_detach_subr(device_t self, int flags) { struct lpt_softc *sc = device_private(self); sc->sc_dev_ok = 0; softint_disestablish(sc->sc_sih); callout_destroy(&sc->sc_wakeup_ch); return 0; } /* * Reset the printer, then wait until it's selected and not busy. */ int lptopen(dev_t dev, int flag, int mode, struct lwp *l) { u_char flags = LPTFLAGS(dev); struct lpt_softc *sc; bus_space_tag_t iot; bus_space_handle_t ioh; u_char control; int error; int spin; sc = device_lookup_private(&lpt_cd, LPTUNIT(dev)); if (!sc || !sc->sc_dev_ok) return ENXIO; #if 0 /* XXX what to do? */ if (sc->sc_irq == IRQUNK && (flags & LPT_NOINTR) == 0) return ENXIO; #endif #ifdef DIAGNOSTIC if (sc->sc_state) aprint_verbose_dev(sc->sc_dev, "stat=0x%x not zero\n", sc->sc_state); #endif if (sc->sc_state) return EBUSY; sc->sc_state = LPT_INIT; sc->sc_flags = flags; LPRINTF(("%s: open: flags=0x%x\n", device_xname(sc->sc_dev), (unsigned)flags)); iot = sc->sc_iot; ioh = sc->sc_ioh; if ((flags & LPT_NOPRIME) == 0) { /* assert INIT for 100 usec to start up printer */ bus_space_write_1(iot, ioh, lpt_control, LPC_SELECT); delay(100); } control = LPC_SELECT | LPC_NINIT; bus_space_write_1(iot, ioh, lpt_control, control); /* wait till ready (printer running diagnostics) */ for (spin = 0; NOT_READY_ERR(); spin += STEP) { if (spin >= TIMEOUT) { sc->sc_state = 0; return EBUSY; } /* wait 1/4 second, give up if we get a signal */ error = tsleep((void *)sc, LPTPRI | PCATCH, "lptopen", STEP); if (error != EWOULDBLOCK) { sc->sc_state = 0; return error; } } if ((flags & LPT_NOINTR) == 0) control |= LPC_IENABLE; if (flags & LPT_AUTOLF) control |= LPC_AUTOLF; sc->sc_control = control; bus_space_write_1(iot, ioh, lpt_control, control); sc->sc_inbuf = malloc(LPT_BSIZE, M_DEVBUF, M_WAITOK); sc->sc_count = 0; sc->sc_state = LPT_OPEN; if ((sc->sc_flags & LPT_NOINTR) == 0) lptwakeup(sc); LPRINTF(("%s: opened\n", device_xname(sc->sc_dev))); return 0; } int lptnotready(u_char status, struct lpt_softc *sc) { u_char new; status = (status ^ LPS_INVERT) & LPS_MASK; new = status & ~sc->sc_laststatus; sc->sc_laststatus = status; if (sc->sc_state & LPT_OPEN) { if (new & LPS_SELECT) log(LOG_NOTICE, "%s: offline\n", device_xname(sc->sc_dev)); else if (new & LPS_NOPAPER) log(LOG_NOTICE, "%s: out of paper\n", device_xname(sc->sc_dev)); else if (new & LPS_NERR) log(LOG_NOTICE, "%s: output error\n", device_xname(sc->sc_dev)); } return status; } void lptwakeup(void *arg) { struct lpt_softc *sc = arg; int s; s = splvm(); lptintr(sc); splx(s); callout_reset(&sc->sc_wakeup_ch, STEP, lptwakeup, sc); } /* * Close the device, and free the local line buffer. */ int lptclose(dev_t dev, int flag, int mode, struct lwp *l) { struct lpt_softc *sc = device_lookup_private(&lpt_cd, LPTUNIT(dev)); bus_space_tag_t iot = sc->sc_iot; bus_space_handle_t ioh = sc->sc_ioh; if (sc->sc_count) (void) lptpushbytes(sc); if ((sc->sc_flags & LPT_NOINTR) == 0) callout_stop(&sc->sc_wakeup_ch); bus_space_write_1(iot, ioh, lpt_control, LPC_NINIT); sc->sc_state = 0; bus_space_write_1(iot, ioh, lpt_control, LPC_NINIT); free(sc->sc_inbuf, M_DEVBUF); LPRINTF(("%s: closed\n", device_xname(sc->sc_dev))); return 0; } int lptpushbytes(struct lpt_softc *sc) { bus_space_tag_t iot = sc->sc_iot; bus_space_handle_t ioh = sc->sc_ioh; int error; if (sc->sc_flags & LPT_NOINTR) { int spin, tic; u_char control = sc->sc_control; while (sc->sc_count > 0) { spin = 0; while (NOT_READY()) { if (++spin < sc->sc_spinmax) continue; tic = 0; /* adapt busy-wait algorithm */ sc->sc_spinmax++; while (NOT_READY_ERR()) { /* exponential backoff */ tic = tic + tic + 1; if (tic > TIMEOUT) tic = TIMEOUT; error = tsleep((void *)sc, LPTPRI | PCATCH, "lptpsh", tic); if (error != EWOULDBLOCK) return error; } break; } bus_space_write_1(iot, ioh, lpt_data, *sc->sc_cp++); DELAY(1); bus_space_write_1(iot, ioh, lpt_control, control | LPC_STROBE); DELAY(1); sc->sc_count--; bus_space_write_1(iot, ioh, lpt_control, control); DELAY(1); /* adapt busy-wait algorithm */ if (spin*2 + 16 < sc->sc_spinmax) sc->sc_spinmax--; } } else { int s; while (sc->sc_count > 0) { /* if the printer is ready for a char, give it one */ if ((sc->sc_state & LPT_OBUSY) == 0) { LPRINTF(("%s: write %lu\n", device_xname(sc->sc_dev), (u_long)sc->sc_count)); s = splvm(); (void) lptintr(sc); splx(s); } error = tsleep((void *)sc, LPTPRI | PCATCH, "lptwrite2", 0); if (error) return error; } } return 0; } /* * Copy a line from user space to a local buffer, then call putc to get the * chars moved to the output queue. */ int lptwrite(dev_t dev, struct uio *uio, int flags) { struct lpt_softc *sc = device_lookup_private(&lpt_cd, LPTUNIT(dev)); size_t n; int error = 0; while ((n = uimin(LPT_BSIZE, uio->uio_resid)) != 0) { uiomove(sc->sc_cp = sc->sc_inbuf, n, uio); sc->sc_count = n; error = lptpushbytes(sc); if (error) { /* * Return accurate residual if interrupted or timed * out. */ uio->uio_resid += sc->sc_count; sc->sc_count = 0; return error; } } return 0; } /* * Handle printer interrupts which occur when the printer is ready to accept * another char. */ int lptintr(void *arg) { struct lpt_softc *sc = arg; bus_space_tag_t iot = sc->sc_iot; bus_space_handle_t ioh = sc->sc_ioh; #if 0 if ((sc->sc_state & LPT_OPEN) == 0) return 0; #endif /* is printer online and ready for output */ if (NOT_READY() && NOT_READY_ERR()) return 0; if (sc->sc_count) { u_char control = sc->sc_control; /* send char */ bus_space_write_1(iot, ioh, lpt_data, *sc->sc_cp++); DELAY(1); bus_space_write_1(iot, ioh, lpt_control, control | LPC_STROBE); DELAY(1); sc->sc_count--; bus_space_write_1(iot, ioh, lpt_control, control); DELAY(1); sc->sc_state |= LPT_OBUSY; } else sc->sc_state &= ~LPT_OBUSY; if (sc->sc_count == 0) { /* none, wake up the top half to get more */ softint_schedule(sc->sc_sih); } return 1; } static void lptsoftintr(void *cookie) { wakeup(cookie); } int lptioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { return ENODEV; } |
| 169 169 29 29 109 644 644 643 643 731 755 753 718 754 754 752 721 720 721 717 718 721 685 685 685 278 244 279 240 279 241 225 11 279 240 240 151 62 1632 1629 628 8 8 8 625 340 337 4 336 768 767 769 48 50 1587 1557 72 72 333 337 686 333 583 1595 1590 1299 964 963 965 23 1 74 107 1 517 519 519 74 518 518 54 54 54 54 97 484 340 340 339 72 72 340 332 10 329 201 339 339 340 339 340 14 11 338 121 120 121 119 97 23 118 231 336 7 7 7 7 337 333 333 193 90 90 332 246 333 276 333 333 110 110 333 120 120 117 117 330 271 171 110 109 2 1 53 52 108 209 185 185 22 21 21 11 21 19 20 11 11 11 11 11 11 10 21 331 205 211 72 78 271 6 6 264 271 80 9 321 113 330 116 330 1575 1579 1455 1439 1439 1550 343 1700 1691 1123 1578 1438 1440 1697 145 344 222 77 77 77 78 78 77 68 78 160 222 222 230 212 17 231 230 169 78 230 230 72 229 169 222 230 214 16 7 7 5 2 1 222 165 165 164 165 165 6 6 6 6 159 6 159 150 150 159 164 2 162 6 163 164 4 2 2 4 2 161 162 5 157 162 162 12 152 152 2 150 152 152 2 150 151 152 150 1 149 152 2 150 152 152 148 7 7 7 226 226 229 210 206 15 206 3 208 209 209 207 206 207 190 206 196 68 67 137 14 130 116 116 206 206 204 205 204 202 202 202 202 202 23 23 202 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 6 6 6 6 6 6 6 5 5 5 1 5 5 5 5 5 4 1 4 4 4 4 4 4 4 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 1 5 1 20 20 21 19 2 21 20 19 19 4 3 19 20 19 19 19 19 18 16 9 3 2 18 2 2 1 18 19 20 11 11 9 8 7 1 9 9 9 9 10 9 7 1 10 10 9 9 9 9 3 1 3 3 3 3 3 3 1 3 3 2 80 80 70 80 80 80 6 60 13 13 13 10 6 9 9 8 8 8 8 6 70 70 70 70 63 4 61 51 70 70 69 14 4 4 67 67 67 67 48 2 2 2 2 1 1 2 8 48 1 14 14 5 5 5 3 9 3 9 7 7 7 7 7 6 3 6 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 9 19 18 18 18 17 9 16 16 16 7 7 7 7 7 7 7 4 3 3 6 3 2 2 5 16 8 13 71 4 10 15 14 10 10 71 71 71 71 69 2 69 71 71 71 7 71 7 7 69 69 69 69 69 69 65 65 2 2 2 71 71 71 71 65 71 65 65 2 65 421 422 422 421 422 422 422 392 70 69 65 65 65 65 65 65 65 65 422 422 161 57 55 45 1 45 45 44 1 1 1 44 44 44 44 71 18 18 12 18 17 17 62 200 50 14 4 215 85 213 195 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 | /* $NetBSD: uvm_map.c,v 1.402 2022/06/08 16:55:00 macallan Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * uvm_map.c: uvm map operations */ #include <sys/cdefs.h> __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.402 2022/06/08 16:55:00 macallan Exp $"); #include "opt_ddb.h" #include "opt_pax.h" #include "opt_uvmhist.h" #include "opt_uvm.h" #include "opt_sysv.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/mman.h> #include <sys/proc.h> #include <sys/pool.h> #include <sys/kernel.h> #include <sys/mount.h> #include <sys/pax.h> #include <sys/vnode.h> #include <sys/filedesc.h> #include <sys/lockdebug.h> #include <sys/atomic.h> #include <sys/sysctl.h> #ifndef __USER_VA0_IS_SAFE #include <sys/kauth.h> #include "opt_user_va0_disable_default.h" #endif #include <sys/shm.h> #include <uvm/uvm.h> #include <uvm/uvm_readahead.h> #if defined(DDB) || defined(DEBUGPRINT) #include <uvm/uvm_ddb.h> #endif #ifdef UVMHIST #ifndef UVMHIST_MAPHIST_SIZE #define UVMHIST_MAPHIST_SIZE 100 #endif static struct kern_history_ent maphistbuf[UVMHIST_MAPHIST_SIZE]; UVMHIST_DEFINE(maphist) = UVMHIST_INITIALIZER(maphist, maphistbuf); #endif #if !defined(UVMMAP_COUNTERS) #define UVMMAP_EVCNT_DEFINE(name) /* nothing */ #define UVMMAP_EVCNT_INCR(ev) /* nothing */ #define UVMMAP_EVCNT_DECR(ev) /* nothing */ #else /* defined(UVMMAP_NOCOUNTERS) */ #include <sys/evcnt.h> #define UVMMAP_EVCNT_DEFINE(name) \ struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ "uvmmap", #name); \ EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name); #define UVMMAP_EVCNT_INCR(ev) uvmmap_evcnt_##ev.ev_count++ #define UVMMAP_EVCNT_DECR(ev) uvmmap_evcnt_##ev.ev_count-- #endif /* defined(UVMMAP_NOCOUNTERS) */ UVMMAP_EVCNT_DEFINE(ubackmerge) UVMMAP_EVCNT_DEFINE(uforwmerge) UVMMAP_EVCNT_DEFINE(ubimerge) UVMMAP_EVCNT_DEFINE(unomerge) UVMMAP_EVCNT_DEFINE(kbackmerge) UVMMAP_EVCNT_DEFINE(kforwmerge) UVMMAP_EVCNT_DEFINE(kbimerge) UVMMAP_EVCNT_DEFINE(knomerge) UVMMAP_EVCNT_DEFINE(map_call) UVMMAP_EVCNT_DEFINE(mlk_call) UVMMAP_EVCNT_DEFINE(mlk_hint) UVMMAP_EVCNT_DEFINE(mlk_tree) UVMMAP_EVCNT_DEFINE(mlk_treeloop) const char vmmapbsy[] = "vmmapbsy"; /* * cache for vmspace structures. */ static struct pool_cache uvm_vmspace_cache; /* * cache for dynamically-allocated map entries. */ static struct pool_cache uvm_map_entry_cache; #ifdef PMAP_GROWKERNEL /* * This global represents the end of the kernel virtual address * space. If we want to exceed this, we must grow the kernel * virtual address space dynamically. * * Note, this variable is locked by kernel_map's lock. */ vaddr_t uvm_maxkaddr; #endif #ifndef __USER_VA0_IS_SAFE #ifndef __USER_VA0_DISABLE_DEFAULT #define __USER_VA0_DISABLE_DEFAULT 1 #endif #ifdef USER_VA0_DISABLE_DEFAULT /* kernel config option overrides */ #undef __USER_VA0_DISABLE_DEFAULT #define __USER_VA0_DISABLE_DEFAULT USER_VA0_DISABLE_DEFAULT #endif int user_va0_disable = __USER_VA0_DISABLE_DEFAULT; #endif /* * macros */ /* * uvm_map_align_va: round down or up virtual address */ static __inline void uvm_map_align_va(vaddr_t *vap, vsize_t align, int topdown) { KASSERT(powerof2(align)); if (align != 0 && (*vap & (align - 1)) != 0) { if (topdown) *vap = rounddown2(*vap, align); else *vap = roundup2(*vap, align); } } /* * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging */ extern struct vm_map *pager_map; #define UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \ prot, maxprot, inh, adv, wire) \ ((ent)->etype == (type) && \ (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE)) == 0 && \ (ent)->object.uvm_obj == (uobj) && \ (ent)->protection == (prot) && \ (ent)->max_protection == (maxprot) && \ (ent)->inheritance == (inh) && \ (ent)->advice == (adv) && \ (ent)->wired_count == (wire)) /* * uvm_map_entry_link: insert entry into a map * * => map must be locked */ #define uvm_map_entry_link(map, after_where, entry) do { \ uvm_mapent_check(entry); \ (map)->nentries++; \ (entry)->prev = (after_where); \ (entry)->next = (after_where)->next; \ (entry)->prev->next = (entry); \ (entry)->next->prev = (entry); \ uvm_rb_insert((map), (entry)); \ } while (/*CONSTCOND*/ 0) /* * uvm_map_entry_unlink: remove entry from a map * * => map must be locked */ #define uvm_map_entry_unlink(map, entry) do { \ KASSERT((entry) != (map)->first_free); \ KASSERT((entry) != (map)->hint); \ uvm_mapent_check(entry); \ (map)->nentries--; \ (entry)->next->prev = (entry)->prev; \ (entry)->prev->next = (entry)->next; \ uvm_rb_remove((map), (entry)); \ } while (/*CONSTCOND*/ 0) /* * SAVE_HINT: saves the specified entry as the hint for future lookups. * * => map need not be locked. */ #define SAVE_HINT(map, check, value) do { \ if ((map)->hint == (check)) \ (map)->hint = (value); \ } while (/*CONSTCOND*/ 0) /* * clear_hints: ensure that hints don't point to the entry. * * => map must be write-locked. */ static void clear_hints(struct vm_map *map, struct vm_map_entry *ent) { SAVE_HINT(map, ent, ent->prev); if (map->first_free == ent) { map->first_free = ent->prev; } } /* * VM_MAP_RANGE_CHECK: check and correct range * * => map must at least be read locked */ #define VM_MAP_RANGE_CHECK(map, start, end) do { \ if (start < vm_map_min(map)) \ start = vm_map_min(map); \ if (end > vm_map_max(map)) \ end = vm_map_max(map); \ if (start > end) \ start = end; \ } while (/*CONSTCOND*/ 0) /* * local prototypes */ static struct vm_map_entry * uvm_mapent_alloc(struct vm_map *, int); static void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); static void uvm_mapent_free(struct vm_map_entry *); #if defined(DEBUG) static void _uvm_mapent_check(const struct vm_map_entry *, int); #define uvm_mapent_check(map) _uvm_mapent_check(map, __LINE__) #else /* defined(DEBUG) */ #define uvm_mapent_check(e) /* nothing */ #endif /* defined(DEBUG) */ static void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); static void uvm_map_reference_amap(struct vm_map_entry *, int); static int uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int, int, struct vm_map_entry *); static void uvm_map_unreference_amap(struct vm_map_entry *, int); int _uvm_map_sanity(struct vm_map *); int _uvm_tree_sanity(struct vm_map *); static vsize_t uvm_rb_maxgap(const struct vm_map_entry *); #define ROOT_ENTRY(map) ((struct vm_map_entry *)(map)->rb_tree.rbt_root) #define LEFT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_left) #define RIGHT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_right) #define PARENT_ENTRY(map, entry) \ (ROOT_ENTRY(map) == (entry) \ ? NULL : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node)) /* * These get filled in if/when SYSVSHM shared memory code is loaded * * We do this with function pointers rather the #ifdef SYSVSHM so the * SYSVSHM code can be loaded and unloaded */ void (*uvm_shmexit)(struct vmspace *) = NULL; void (*uvm_shmfork)(struct vmspace *, struct vmspace *) = NULL; static int uvm_map_compare_nodes(void *ctx, const void *nparent, const void *nkey) { const struct vm_map_entry *eparent = nparent; const struct vm_map_entry *ekey = nkey; KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end); KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end); if (eparent->start < ekey->start) return -1; if (eparent->end >= ekey->start) return 1; return 0; } static int uvm_map_compare_key(void *ctx, const void *nparent, const void *vkey) { const struct vm_map_entry *eparent = nparent; const vaddr_t va = *(const vaddr_t *) vkey; if (eparent->start < va) return -1; if (eparent->end >= va) return 1; return 0; } static const rb_tree_ops_t uvm_map_tree_ops = { .rbto_compare_nodes = uvm_map_compare_nodes, .rbto_compare_key = uvm_map_compare_key, .rbto_node_offset = offsetof(struct vm_map_entry, rb_node), .rbto_context = NULL }; /* * uvm_rb_gap: return the gap size between our entry and next entry. */ static inline vsize_t uvm_rb_gap(const struct vm_map_entry *entry) { KASSERT(entry->next != NULL); return entry->next->start - entry->end; } static vsize_t uvm_rb_maxgap(const struct vm_map_entry *entry) { struct vm_map_entry *child; vsize_t maxgap = entry->gap; /* * We need maxgap to be the largest gap of us or any of our * descendents. Since each of our children's maxgap is the * cached value of their largest gap of themselves or their * descendents, we can just use that value and avoid recursing * down the tree to calculate it. */ if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap) maxgap = child->maxgap; if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap) maxgap = child->maxgap; return maxgap; } static void uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) { struct vm_map_entry *parent; KASSERT(entry->gap == uvm_rb_gap(entry)); entry->maxgap = uvm_rb_maxgap(entry); while ((parent = PARENT_ENTRY(map, entry)) != NULL) { struct vm_map_entry *brother; vsize_t maxgap = parent->gap; unsigned int which; KDASSERT(parent->gap == uvm_rb_gap(parent)); if (maxgap < entry->maxgap) maxgap = entry->maxgap; /* * Since we work towards the root, we know entry's maxgap * value is OK, but its brothers may now be out-of-date due * to rebalancing. So refresh it. */ which = RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER; brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[which]; if (brother != NULL) { KDASSERT(brother->gap == uvm_rb_gap(brother)); brother->maxgap = uvm_rb_maxgap(brother); if (maxgap < brother->maxgap) maxgap = brother->maxgap; } parent->maxgap = maxgap; entry = parent; } } static void uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) { struct vm_map_entry *ret __diagused; entry->gap = entry->maxgap = uvm_rb_gap(entry); if (entry->prev != &map->header) entry->prev->gap = uvm_rb_gap(entry->prev); ret = rb_tree_insert_node(&map->rb_tree, entry); KASSERTMSG(ret == entry, "uvm_rb_insert: map %p: duplicate entry %p", map, ret); /* * If the previous entry is not our immediate left child, then it's an * ancestor and will be fixed up on the way to the root. We don't * have to check entry->prev against &map->header since &map->header * will never be in the tree. */ uvm_rb_fixup(map, LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry); } static void uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) { struct vm_map_entry *prev_parent = NULL, *next_parent = NULL; /* * If we are removing an interior node, then an adjacent node will * be used to replace its position in the tree. Therefore we will * need to fixup the tree starting at the parent of the replacement * node. So record their parents for later use. */ if (entry->prev != &map->header) prev_parent = PARENT_ENTRY(map, entry->prev); if (entry->next != &map->header) next_parent = PARENT_ENTRY(map, entry->next); rb_tree_remove_node(&map->rb_tree, entry); /* * If the previous node has a new parent, fixup the tree starting * at the previous node's old parent. */ if (entry->prev != &map->header) { /* * Update the previous entry's gap due to our absence. */ entry->prev->gap = uvm_rb_gap(entry->prev); uvm_rb_fixup(map, entry->prev); if (prev_parent != NULL && prev_parent != entry && prev_parent != PARENT_ENTRY(map, entry->prev)) uvm_rb_fixup(map, prev_parent); } /* * If the next node has a new parent, fixup the tree starting * at the next node's old parent. */ if (entry->next != &map->header) { uvm_rb_fixup(map, entry->next); if (next_parent != NULL && next_parent != entry && next_parent != PARENT_ENTRY(map, entry->next)) uvm_rb_fixup(map, next_parent); } } #if defined(DEBUG) int uvm_debug_check_map = 0; int uvm_debug_check_rbtree = 0; #define uvm_map_check(map, name) \ _uvm_map_check((map), (name), __FILE__, __LINE__) static void _uvm_map_check(struct vm_map *map, const char *name, const char *file, int line) { if ((uvm_debug_check_map && _uvm_map_sanity(map)) || (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) { panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)", name, map, file, line); } } #else /* defined(DEBUG) */ #define uvm_map_check(map, name) /* nothing */ #endif /* defined(DEBUG) */ #if defined(DEBUG) || defined(DDB) int _uvm_map_sanity(struct vm_map *map) { bool first_free_found = false; bool hint_found = false; const struct vm_map_entry *e; struct vm_map_entry *hint = map->hint; e = &map->header; for (;;) { if (map->first_free == e) { first_free_found = true; } else if (!first_free_found && e->next->start > e->end) { printf("first_free %p should be %p\n", map->first_free, e); return -1; } if (hint == e) { hint_found = true; } e = e->next; if (e == &map->header) { break; } } if (!first_free_found) { printf("stale first_free\n"); return -1; } if (!hint_found) { printf("stale hint\n"); return -1; } return 0; } int _uvm_tree_sanity(struct vm_map *map) { struct vm_map_entry *tmp, *trtmp; int n = 0, i = 1; for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { if (tmp->gap != uvm_rb_gap(tmp)) { printf("%d/%d gap %#lx != %#lx %s\n", n + 1, map->nentries, (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp), tmp->next == &map->header ? "(last)" : ""); goto error; } /* * If any entries are out of order, tmp->gap will be unsigned * and will likely exceed the size of the map. */ if (tmp->gap >= vm_map_max(map) - vm_map_min(map)) { printf("too large gap %zu\n", (size_t)tmp->gap); goto error; } n++; } if (n != map->nentries) { printf("nentries: %d vs %d\n", n, map->nentries); goto error; } trtmp = NULL; for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { if (tmp->maxgap != uvm_rb_maxgap(tmp)) { printf("maxgap %#lx != %#lx\n", (ulong)tmp->maxgap, (ulong)uvm_rb_maxgap(tmp)); goto error; } if (trtmp != NULL && trtmp->start >= tmp->start) { printf("corrupt: 0x%"PRIxVADDR"x >= 0x%"PRIxVADDR"x\n", trtmp->start, tmp->start); goto error; } trtmp = tmp; } for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next, i++) { trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_LEFT); if (trtmp == NULL) trtmp = &map->header; if (tmp->prev != trtmp) { printf("lookup: %d: %p->prev=%p: %p\n", i, tmp, tmp->prev, trtmp); goto error; } trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_RIGHT); if (trtmp == NULL) trtmp = &map->header; if (tmp->next != trtmp) { printf("lookup: %d: %p->next=%p: %p\n", i, tmp, tmp->next, trtmp); goto error; } trtmp = rb_tree_find_node(&map->rb_tree, &tmp->start); if (trtmp != tmp) { printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp, PARENT_ENTRY(map, tmp)); goto error; } } return (0); error: return (-1); } #endif /* defined(DEBUG) || defined(DDB) */ /* * vm_map_lock: acquire an exclusive (write) lock on a map. * * => The locking protocol provides for guaranteed upgrade from shared -> * exclusive by whichever thread currently has the map marked busy. * See "LOCKING PROTOCOL NOTES" in uvm_map.h. This is horrible; among * other problems, it defeats any fairness guarantees provided by RW * locks. */ void vm_map_lock(struct vm_map *map) { for (;;) { rw_enter(&map->lock, RW_WRITER); if (map->busy == NULL || map->busy == curlwp) { break; } mutex_enter(&map->misc_lock); rw_exit(&map->lock); if (map->busy != NULL) { cv_wait(&map->cv, &map->misc_lock); } mutex_exit(&map->misc_lock); } map->timestamp++; } /* * vm_map_lock_try: try to lock a map, failing if it is already locked. */ bool vm_map_lock_try(struct vm_map *map) { if (!rw_tryenter(&map->lock, RW_WRITER)) { return false; } if (map->busy != NULL) { rw_exit(&map->lock); return false; } map->timestamp++; return true; } /* * vm_map_unlock: release an exclusive lock on a map. */ void vm_map_unlock(struct vm_map *map) { KASSERT(rw_write_held(&map->lock)); KASSERT(map->busy == NULL || map->busy == curlwp); rw_exit(&map->lock); } /* * vm_map_unbusy: mark the map as unbusy, and wake any waiters that * want an exclusive lock. */ void vm_map_unbusy(struct vm_map *map) { KASSERT(map->busy == curlwp); /* * Safe to clear 'busy' and 'waiters' with only a read lock held: * * o they can only be set with a write lock held * o writers are blocked out with a read or write hold * o at any time, only one thread owns the set of values */ mutex_enter(&map->misc_lock); map->busy = NULL; cv_broadcast(&map->cv); mutex_exit(&map->misc_lock); } /* * vm_map_lock_read: acquire a shared (read) lock on a map. */ void vm_map_lock_read(struct vm_map *map) { rw_enter(&map->lock, RW_READER); } /* * vm_map_unlock_read: release a shared lock on a map. */ void vm_map_unlock_read(struct vm_map *map) { rw_exit(&map->lock); } /* * vm_map_busy: mark a map as busy. * * => the caller must hold the map write locked */ void vm_map_busy(struct vm_map *map) { KASSERT(rw_write_held(&map->lock)); KASSERT(map->busy == NULL); map->busy = curlwp; } /* * vm_map_locked_p: return true if the map is write locked. * * => only for debug purposes like KASSERTs. * => should not be used to verify that a map is not locked. */ bool vm_map_locked_p(struct vm_map *map) { return rw_write_held(&map->lock); } /* * uvm_mapent_alloc: allocate a map entry */ static struct vm_map_entry * uvm_mapent_alloc(struct vm_map *map, int flags) { struct vm_map_entry *me; int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); me = pool_cache_get(&uvm_map_entry_cache, pflags); if (__predict_false(me == NULL)) { return NULL; } me->flags = 0; UVMHIST_LOG(maphist, "<- new entry=%#jx [kentry=%jd]", (uintptr_t)me, (map == kernel_map), 0, 0); return me; } /* * uvm_mapent_free: free map entry */ static void uvm_mapent_free(struct vm_map_entry *me) { UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"<- freeing map entry=%#jx [flags=%#jx]", (uintptr_t)me, me->flags, 0, 0); pool_cache_put(&uvm_map_entry_cache, me); } /* * uvm_mapent_copy: copy a map entry, preserving flags */ static inline void uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) { memcpy(dst, src, sizeof(*dst)); dst->flags = 0; } #if defined(DEBUG) static void _uvm_mapent_check(const struct vm_map_entry *entry, int line) { if (entry->start >= entry->end) { goto bad; } if (UVM_ET_ISOBJ(entry)) { if (entry->object.uvm_obj == NULL) { goto bad; } } else if (UVM_ET_ISSUBMAP(entry)) { if (entry->object.sub_map == NULL) { goto bad; } } else { if (entry->object.uvm_obj != NULL || entry->object.sub_map != NULL) { goto bad; } } if (!UVM_ET_ISOBJ(entry)) { if (entry->offset != 0) { goto bad; } } return; bad: panic("%s: bad entry %p, line %d", __func__, entry, line); } #endif /* defined(DEBUG) */ /* * uvm_map_entry_unwire: unwire a map entry * * => map should be locked by caller */ static inline void uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) { entry->wired_count = 0; uvm_fault_unwire_locked(map, entry->start, entry->end); } /* * wrapper for calling amap_ref() */ static inline void uvm_map_reference_amap(struct vm_map_entry *entry, int flags) { amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, (entry->end - entry->start) >> PAGE_SHIFT, flags); } /* * wrapper for calling amap_unref() */ static inline void uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) { amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, (entry->end - entry->start) >> PAGE_SHIFT, flags); } /* * uvm_map_init: init mapping system at boot time. */ void uvm_map_init(void) { /* * first, init logging system. */ UVMHIST_FUNC(__func__); UVMHIST_LINK_STATIC(maphist); UVMHIST_LINK_STATIC(pdhist); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); /* * initialize the global lock for kernel map entry. */ mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM); } /* * uvm_map_init_caches: init mapping system caches. */ void uvm_map_init_caches(void) { /* * initialize caches. */ pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry), coherency_unit, 0, PR_LARGECACHE, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL); pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace), 0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL); } /* * clippers */ /* * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy. */ static void uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2, vaddr_t splitat) { vaddr_t adj; KASSERT(entry1->start < splitat); KASSERT(splitat < entry1->end); adj = splitat - entry1->start; entry1->end = entry2->start = splitat; if (entry1->aref.ar_amap) { amap_splitref(&entry1->aref, &entry2->aref, adj); } if (UVM_ET_ISSUBMAP(entry1)) { /* ... unlikely to happen, but play it safe */ uvm_map_reference(entry1->object.sub_map); } else if (UVM_ET_ISOBJ(entry1)) { KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */ entry2->offset += adj; if (entry1->object.uvm_obj->pgops && entry1->object.uvm_obj->pgops->pgo_reference) entry1->object.uvm_obj->pgops->pgo_reference( entry1->object.uvm_obj); } } /* * uvm_map_clip_start: ensure that the entry begins at or after * the starting address, if it doesn't we split the entry. * * => caller should use UVM_MAP_CLIP_START macro rather than calling * this directly * => map must be locked by caller */ void uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t start) { struct vm_map_entry *new_entry; /* uvm_map_simplify_entry(map, entry); */ /* XXX */ uvm_map_check(map, "clip_start entry"); uvm_mapent_check(entry); /* * Split off the front portion. note that we must insert the new * entry BEFORE this one, so that this entry has the specified * starting address. */ new_entry = uvm_mapent_alloc(map, 0); uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ uvm_mapent_splitadj(new_entry, entry, start); uvm_map_entry_link(map, entry->prev, new_entry); uvm_map_check(map, "clip_start leave"); } /* * uvm_map_clip_end: ensure that the entry ends at or before * the ending address, if it does't we split the reference * * => caller should use UVM_MAP_CLIP_END macro rather than calling * this directly * => map must be locked by caller */ void uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) { struct vm_map_entry *new_entry; uvm_map_check(map, "clip_end entry"); uvm_mapent_check(entry); /* * Create a new entry and insert it * AFTER the specified entry */ new_entry = uvm_mapent_alloc(map, 0); uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ uvm_mapent_splitadj(entry, new_entry, end); uvm_map_entry_link(map, entry, new_entry); uvm_map_check(map, "clip_end leave"); } /* * M A P - m a i n e n t r y p o i n t */ /* * uvm_map: establish a valid mapping in a map * * => assume startp is page aligned. * => assume size is a multiple of PAGE_SIZE. * => assume sys_mmap provides enough of a "hint" to have us skip * over text/data/bss area. * => map must be unlocked (we will lock it) * => <uobj,uoffset> value meanings (4 cases): * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER * [3] <uobj,uoffset> == normal mapping * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA * * case [4] is for kernel mappings where we don't know the offset until * we've found a virtual address. note that kernel object offsets are * always relative to vm_map_min(kernel_map). * * => if `align' is non-zero, we align the virtual address to the specified * alignment. * this is provided as a mechanism for large pages. * * => XXXCDC: need way to map in external amap? */ int uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size, struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) { struct uvm_map_args args; struct vm_map_entry *new_entry; int error; KASSERT((size & PAGE_MASK) == 0); KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); /* * for pager_map, allocate the new entry first to avoid sleeping * for memory while we have the map locked. */ new_entry = NULL; if (map == pager_map) { new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT)); if (__predict_false(new_entry == NULL)) return ENOMEM; } if (map == pager_map) flags |= UVM_FLAG_NOMERGE; error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align, flags, &args); if (!error) { error = uvm_map_enter(map, &args, new_entry); *startp = args.uma_start; } else if (new_entry) { uvm_mapent_free(new_entry); } #if defined(DEBUG) if (!error && VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) { uvm_km_check_empty(map, *startp, *startp + size); } #endif /* defined(DEBUG) */ return error; } /* * uvm_map_prepare: * * called with map unlocked. * on success, returns the map locked. */ int uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size, struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, struct uvm_map_args *args) { struct vm_map_entry *prev_entry; vm_prot_t prot = UVM_PROTECTION(flags); vm_prot_t maxprot = UVM_MAXPROTECTION(flags); UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist, "(map=%#jx, start=%#jx, size=%jx, flags=%#jx)", (uintptr_t)map, start, size, flags); UVMHIST_LOG(maphist, " uobj/offset %#jx/%jd", (uintptr_t)uobj, uoffset,0,0); /* * detect a popular device driver bug. */ KASSERT(doing_shutdown || curlwp != NULL); /* * zero-sized mapping doesn't make any sense. */ KASSERT(size > 0); KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0); uvm_map_check(map, "map entry"); /* * check sanity of protection code */ if ((prot & maxprot) != prot) { UVMHIST_LOG(maphist, "<- prot. failure: prot=%#jx, max=%#jx", prot, maxprot,0,0); return EACCES; } /* * figure out where to put new VM range */ retry: if (vm_map_lock_try(map) == false) { if ((flags & UVM_FLAG_TRYLOCK) != 0) { return EAGAIN; } vm_map_lock(map); /* could sleep here */ } if (flags & UVM_FLAG_UNMAP) { KASSERT(flags & UVM_FLAG_FIXED); KASSERT((flags & UVM_FLAG_NOWAIT) == 0); /* * Set prev_entry to what it will need to be after any existing * entries are removed later in uvm_map_enter(). */ if (uvm_map_lookup_entry(map, start, &prev_entry)) { if (start == prev_entry->start) prev_entry = prev_entry->prev; else UVM_MAP_CLIP_END(map, prev_entry, start); SAVE_HINT(map, map->hint, prev_entry); } } else { prev_entry = uvm_map_findspace(map, start, size, &start, uobj, uoffset, align, flags); } if (prev_entry == NULL) { unsigned int timestamp; timestamp = map->timestamp; UVMHIST_LOG(maphist,"waiting va timestamp=%#jx", timestamp,0,0,0); map->flags |= VM_MAP_WANTVA; vm_map_unlock(map); /* * try to reclaim kva and wait until someone does unmap. * fragile locking here, so we awaken every second to * recheck the condition. */ mutex_enter(&map->misc_lock); while ((map->flags & VM_MAP_WANTVA) != 0 && map->timestamp == timestamp) { if ((flags & UVM_FLAG_WAITVA) == 0) { mutex_exit(&map->misc_lock); UVMHIST_LOG(maphist, "<- uvm_map_findspace failed!", 0,0,0,0); return ENOMEM; } else { cv_timedwait(&map->cv, &map->misc_lock, hz); } } mutex_exit(&map->misc_lock); goto retry; } #ifdef PMAP_GROWKERNEL /* * If the kernel pmap can't map the requested space, * then allocate more resources for it. */ if (map == kernel_map && uvm_maxkaddr < (start + size)) uvm_maxkaddr = pmap_growkernel(start + size); #endif UVMMAP_EVCNT_INCR(map_call); /* * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in * either case we want to zero it before storing it in the map entry * (because it looks strange and confusing when debugging...) * * if uobj is not null * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping * and we do not need to change uoffset. * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset * now (based on the starting address of the map). this case is * for kernel object mappings where we don't know the offset until * the virtual address is found (with uvm_map_findspace). the * offset is the distance we are from the start of the map. */ if (uobj == NULL) { uoffset = 0; } else { if (uoffset == UVM_UNKNOWN_OFFSET) { KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); uoffset = start - vm_map_min(kernel_map); } } args->uma_flags = flags; args->uma_prev = prev_entry; args->uma_start = start; args->uma_size = size; args->uma_uobj = uobj; args->uma_uoffset = uoffset; UVMHIST_LOG(maphist, "<- done!", 0,0,0,0); return 0; } /* * uvm_map_enter: * * called with map locked. * unlock the map before returning. */ int uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args, struct vm_map_entry *new_entry) { struct vm_map_entry *prev_entry = args->uma_prev; struct vm_map_entry *dead = NULL, *dead_entries = NULL; const uvm_flag_t flags = args->uma_flags; const vm_prot_t prot = UVM_PROTECTION(flags); const vm_prot_t maxprot = UVM_MAXPROTECTION(flags); const vm_inherit_t inherit = UVM_INHERIT(flags); const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ? AMAP_EXTEND_NOWAIT : 0; const int advice = UVM_ADVICE(flags); vaddr_t start = args->uma_start; vsize_t size = args->uma_size; struct uvm_object *uobj = args->uma_uobj; voff_t uoffset = args->uma_uoffset; const int kmap = (vm_map_pmap(map) == pmap_kernel()); int merged = 0; int error; int newetype; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist, "(map=%#jx, start=%#jx, size=%ju, flags=%#jx)", (uintptr_t)map, start, size, flags); UVMHIST_LOG(maphist, " uobj/offset %#jx/%jd", (uintptr_t)uobj, uoffset,0,0); KASSERT(map->hint == prev_entry); /* bimerge case assumes this */ KASSERT(vm_map_locked_p(map)); KASSERT((flags & (UVM_FLAG_NOWAIT | UVM_FLAG_UNMAP)) != (UVM_FLAG_NOWAIT | UVM_FLAG_UNMAP)); if (uobj) newetype = UVM_ET_OBJ; else newetype = 0; if (flags & UVM_FLAG_COPYONW) { newetype |= UVM_ET_COPYONWRITE; if ((flags & UVM_FLAG_OVERLAY) == 0) newetype |= UVM_ET_NEEDSCOPY; } /* * For mappings with unmap, remove any old entries now. Adding the new * entry cannot fail because that can only happen if UVM_FLAG_NOWAIT * is set, and we do not support nowait and unmap together. */ if (flags & UVM_FLAG_UNMAP) { KASSERT(flags & UVM_FLAG_FIXED); uvm_unmap_remove(map, start, start + size, &dead_entries, 0); #ifdef DEBUG struct vm_map_entry *tmp_entry __diagused; bool rv __diagused; rv = uvm_map_lookup_entry(map, start, &tmp_entry); KASSERT(!rv); KASSERTMSG(prev_entry == tmp_entry, "args %p prev_entry %p tmp_entry %p", args, prev_entry, tmp_entry); #endif SAVE_HINT(map, map->hint, prev_entry); } /* * try and insert in map by extending previous entry, if possible. * XXX: we don't try and pull back the next entry. might be useful * for a stack, but we are currently allocating our stack in advance. */ if (flags & UVM_FLAG_NOMERGE) goto nomerge; if (prev_entry->end == start && prev_entry != &map->header && UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, 0, prot, maxprot, inherit, advice, 0)) { if (uobj && prev_entry->offset + (prev_entry->end - prev_entry->start) != uoffset) goto forwardmerge; /* * can't extend a shared amap. note: no need to lock amap to * look at refs since we don't care about its exact value. * if it is one (i.e. we have only reference) it will stay there */ if (prev_entry->aref.ar_amap && amap_refs(prev_entry->aref.ar_amap) != 1) { goto forwardmerge; } if (prev_entry->aref.ar_amap) { error = amap_extend(prev_entry, size, amapwaitflag | AMAP_EXTEND_FORWARDS); if (error) goto nomerge; } if (kmap) { UVMMAP_EVCNT_INCR(kbackmerge); } else { UVMMAP_EVCNT_INCR(ubackmerge); } UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); /* * drop our reference to uobj since we are extending a reference * that we already have (the ref count can not drop to zero). */ if (uobj && uobj->pgops->pgo_detach) uobj->pgops->pgo_detach(uobj); /* * Now that we've merged the entries, note that we've grown * and our gap has shrunk. Then fix the tree. */ prev_entry->end += size; prev_entry->gap -= size; uvm_rb_fixup(map, prev_entry); uvm_map_check(map, "map backmerged"); UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); merged++; } forwardmerge: if (prev_entry->next->start == (start + size) && prev_entry->next != &map->header && UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, 0, prot, maxprot, inherit, advice, 0)) { if (uobj && prev_entry->next->offset != uoffset + size) goto nomerge; /* * can't extend a shared amap. note: no need to lock amap to * look at refs since we don't care about its exact value. * if it is one (i.e. we have only reference) it will stay there. * * note that we also can't merge two amaps, so if we * merged with the previous entry which has an amap, * and the next entry also has an amap, we give up. * * Interesting cases: * amap, new, amap -> give up second merge (single fwd extend) * amap, new, none -> double forward extend (extend again here) * none, new, amap -> double backward extend (done here) * uobj, new, amap -> single backward extend (done here) * * XXX should we attempt to deal with someone refilling * the deallocated region between two entries that are * backed by the same amap (ie, arefs is 2, "prev" and * "next" refer to it, and adding this allocation will * close the hole, thus restoring arefs to 1 and * deallocating the "next" vm_map_entry)? -- @@@ */ if (prev_entry->next->aref.ar_amap && (amap_refs(prev_entry->next->aref.ar_amap) != 1 || (merged && prev_entry->aref.ar_amap))) { goto nomerge; } if (merged) { /* * Try to extend the amap of the previous entry to * cover the next entry as well. If it doesn't work * just skip on, don't actually give up, since we've * already completed the back merge. */ if (prev_entry->aref.ar_amap) { if (amap_extend(prev_entry, prev_entry->next->end - prev_entry->next->start, amapwaitflag | AMAP_EXTEND_FORWARDS)) goto nomerge; } /* * Try to extend the amap of the *next* entry * back to cover the new allocation *and* the * previous entry as well (the previous merge * didn't have an amap already otherwise we * wouldn't be checking here for an amap). If * it doesn't work just skip on, again, don't * actually give up, since we've already * completed the back merge. */ else if (prev_entry->next->aref.ar_amap) { if (amap_extend(prev_entry->next, prev_entry->end - prev_entry->start, amapwaitflag | AMAP_EXTEND_BACKWARDS)) goto nomerge; } } else { /* * Pull the next entry's amap backwards to cover this * new allocation. */ if (prev_entry->next->aref.ar_amap) { error = amap_extend(prev_entry->next, size, amapwaitflag | AMAP_EXTEND_BACKWARDS); if (error) goto nomerge; } } if (merged) { if (kmap) { UVMMAP_EVCNT_DECR(kbackmerge); UVMMAP_EVCNT_INCR(kbimerge); } else { UVMMAP_EVCNT_DECR(ubackmerge); UVMMAP_EVCNT_INCR(ubimerge); } } else { if (kmap) { UVMMAP_EVCNT_INCR(kforwmerge); } else { UVMMAP_EVCNT_INCR(uforwmerge); } } UVMHIST_LOG(maphist," starting forward merge", 0, 0, 0, 0); /* * drop our reference to uobj since we are extending a reference * that we already have (the ref count can not drop to zero). */ if (uobj && uobj->pgops->pgo_detach) uobj->pgops->pgo_detach(uobj); if (merged) { dead = prev_entry->next; prev_entry->end = dead->end; uvm_map_entry_unlink(map, dead); if (dead->aref.ar_amap != NULL) { prev_entry->aref = dead->aref; dead->aref.ar_amap = NULL; } } else { prev_entry->next->start -= size; if (prev_entry != &map->header) { prev_entry->gap -= size; KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry)); uvm_rb_fixup(map, prev_entry); } if (uobj) prev_entry->next->offset = uoffset; } uvm_map_check(map, "map forwardmerged"); UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0); merged++; } nomerge: if (!merged) { UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); if (kmap) { UVMMAP_EVCNT_INCR(knomerge); } else { UVMMAP_EVCNT_INCR(unomerge); } /* * allocate new entry and link it in. */ if (new_entry == NULL) { new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT)); if (__predict_false(new_entry == NULL)) { error = ENOMEM; goto done; } } new_entry->start = start; new_entry->end = new_entry->start + size; new_entry->object.uvm_obj = uobj; new_entry->offset = uoffset; new_entry->etype = newetype; if (flags & UVM_FLAG_NOMERGE) { new_entry->flags |= UVM_MAP_NOMERGE; } new_entry->protection = prot; new_entry->max_protection = maxprot; new_entry->inheritance = inherit; new_entry->wired_count = 0; new_entry->advice = advice; if (flags & UVM_FLAG_OVERLAY) { /* * to_add: for BSS we overallocate a little since we * are likely to extend */ vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? UVM_AMAP_CHUNK << PAGE_SHIFT : 0; struct vm_amap *amap = amap_alloc(size, to_add, (flags & UVM_FLAG_NOWAIT)); if (__predict_false(amap == NULL)) { error = ENOMEM; goto done; } new_entry->aref.ar_pageoff = 0; new_entry->aref.ar_amap = amap; } else { new_entry->aref.ar_pageoff = 0; new_entry->aref.ar_amap = NULL; } uvm_map_entry_link(map, prev_entry, new_entry); /* * Update the free space hint */ if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start)) map->first_free = new_entry; new_entry = NULL; } map->size += size; UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); error = 0; done: vm_map_unlock(map); if (new_entry) { uvm_mapent_free(new_entry); } if (dead) { KDASSERT(merged); uvm_mapent_free(dead); } if (dead_entries) uvm_unmap_detach(dead_entries, 0); return error; } /* * uvm_map_lookup_entry_bytree: lookup an entry in tree */ static inline bool uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address, struct vm_map_entry **entry /* OUT */) { struct vm_map_entry *prev = &map->header; struct vm_map_entry *cur = ROOT_ENTRY(map); while (cur) { UVMMAP_EVCNT_INCR(mlk_treeloop); if (address >= cur->start) { if (address < cur->end) { *entry = cur; return true; } prev = cur; cur = RIGHT_ENTRY(cur); } else cur = LEFT_ENTRY(cur); } *entry = prev; return false; } /* * uvm_map_lookup_entry: find map entry at or before an address * * => map must at least be read-locked by caller * => entry is returned in "entry" * => return value is true if address is in the returned entry */ bool uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, struct vm_map_entry **entry /* OUT */) { struct vm_map_entry *cur; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(map=%#jx,addr=%#jx,ent=%#jx)", (uintptr_t)map, address, (uintptr_t)entry, 0); /* * make a quick check to see if we are already looking at * the entry we want (which is usually the case). note also * that we don't need to save the hint here... it is the * same hint (unless we are at the header, in which case the * hint didn't buy us anything anyway). */ cur = map->hint; UVMMAP_EVCNT_INCR(mlk_call); if (cur != &map->header && address >= cur->start && cur->end > address) { UVMMAP_EVCNT_INCR(mlk_hint); *entry = cur; UVMHIST_LOG(maphist,"<- got it via hint (%#jx)", (uintptr_t)cur, 0, 0, 0); uvm_mapent_check(*entry); return (true); } uvm_map_check(map, __func__); /* * lookup in the tree. */ UVMMAP_EVCNT_INCR(mlk_tree); if (__predict_true(uvm_map_lookup_entry_bytree(map, address, entry))) { SAVE_HINT(map, map->hint, *entry); UVMHIST_LOG(maphist,"<- search got it (%#jx)", (uintptr_t)cur, 0, 0, 0); KDASSERT((*entry)->start <= address); KDASSERT(address < (*entry)->end); uvm_mapent_check(*entry); return (true); } SAVE_HINT(map, map->hint, *entry); UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); KDASSERT((*entry) == &map->header || (*entry)->end <= address); KDASSERT((*entry)->next == &map->header || address < (*entry)->next->start); return (false); } /* * See if the range between start and start + length fits in the gap * entry->next->start and entry->end. Returns 1 if fits, 0 if doesn't * fit, and -1 address wraps around. */ static int uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset, vsize_t align, int flags, int topdown, struct vm_map_entry *entry) { vaddr_t end; #ifdef PMAP_PREFER /* * push start address forward as needed to avoid VAC alias problems. * we only do this if a valid offset is specified. */ if (uoffset != UVM_UNKNOWN_OFFSET) PMAP_PREFER(uoffset, start, length, topdown); #endif if ((flags & UVM_FLAG_COLORMATCH) != 0) { KASSERT(align < uvmexp.ncolors); if (uvmexp.ncolors > 1) { const u_int colormask = uvmexp.colormask; const u_int colorsize = colormask + 1; vaddr_t hint = atop(*start); const u_int color = hint & colormask; if (color != align) { hint -= color; /* adjust to color boundary */ KASSERT((hint & colormask) == 0); if (topdown) { if (align > color) hint -= colorsize; } else { if (align < color) hint += colorsize; } *start = ptoa(hint + align); /* adjust to color */ } } } else { KASSERT(powerof2(align)); uvm_map_align_va(start, align, topdown); /* * XXX Should we PMAP_PREFER() here again? * eh...i think we're okay */ } /* * Find the end of the proposed new region. Be sure we didn't * wrap around the address; if so, we lose. Otherwise, if the * proposed new region fits before the next entry, we win. */ end = *start + length; if (end < *start) return (-1); if (entry->next->start >= end && *start >= entry->end) return (1); return (0); } static void uvm_findspace_invariants(struct vm_map *map, vaddr_t orig_hint, vaddr_t length, struct uvm_object *uobj, voff_t uoffset, vsize_t align, int flags, vaddr_t hint, struct vm_map_entry *entry, int line) { const int topdown = map->flags & VM_MAP_TOPDOWN; KASSERTMSG( topdown || hint >= orig_hint, "map=%p hint=%#"PRIxVADDR" orig_hint=%#"PRIxVADDR " length=%#"PRIxVSIZE" uobj=%p uoffset=%#llx align=%"PRIxVSIZE " flags=%#x entry=%p (uvm_map_findspace line %d)", map, hint, orig_hint, length, uobj, (unsigned long long)uoffset, align, flags, entry, line); KASSERTMSG(!topdown || hint <= orig_hint, "map=%p hint=%#"PRIxVADDR" orig_hint=%#"PRIxVADDR " length=%#"PRIxVSIZE" uobj=%p uoffset=%#llx align=%"PRIxVSIZE " flags=%#x entry=%p (uvm_map_findspace line %d)", map, hint, orig_hint, length, uobj, (unsigned long long)uoffset, align, flags, entry, line); } /* * uvm_map_findspace: find "length" sized space in "map". * * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is * set in "flags" (in which case we insist on using "hint"). * => "result" is VA returned * => uobj/uoffset are to be used to handle VAC alignment, if required * => if "align" is non-zero, we attempt to align to that value. * => caller must at least have read-locked map * => returns NULL on failure, or pointer to prev. map entry if success * => note this is a cross between the old vm_map_findspace and vm_map_find */ struct vm_map_entry * uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset, vsize_t align, int flags) { #define INVARIANTS() \ uvm_findspace_invariants(map, orig_hint, length, uobj, uoffset, align,\ flags, hint, entry, __LINE__) struct vm_map_entry *entry = NULL; struct vm_map_entry *child, *prev, *tmp; vaddr_t orig_hint __diagused; const int topdown = map->flags & VM_MAP_TOPDOWN; int avail; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist, "(map=%#jx, hint=%#jx, len=%ju, flags=%#jx...", (uintptr_t)map, hint, length, flags); UVMHIST_LOG(maphist, " uobj=%#jx, uoffset=%#jx, align=%#jx)", (uintptr_t)uobj, uoffset, align, 0); KASSERT((flags & UVM_FLAG_COLORMATCH) != 0 || powerof2(align)); KASSERT((flags & UVM_FLAG_COLORMATCH) == 0 || align < uvmexp.ncolors); KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); uvm_map_check(map, "map_findspace entry"); /* * Clamp the hint to the VM map's min/max address, and remmeber * the clamped original hint. Remember the original hint, * clamped to the min/max address. If we are aligning, then we * may have to try again with no alignment constraint if we * fail the first time. * * We use the original hint to verify later that the search has * been monotonic -- that is, nonincreasing or nondecreasing, * according to topdown or !topdown respectively. But the * clamping is not monotonic. */ if (hint < vm_map_min(map)) { /* check ranges ... */ if (flags & UVM_FLAG_FIXED) { UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); return (NULL); } hint = vm_map_min(map); } if (hint > vm_map_max(map)) { UVMHIST_LOG(maphist,"<- VA %#jx > range [%#jx->%#jx]", hint, vm_map_min(map), vm_map_max(map), 0); return (NULL); } orig_hint = hint; INVARIANTS(); UVMHIST_LOG(maphist,"<- VA %#jx vs range [%#jx->%#jx]", hint, vm_map_min(map), vm_map_max(map), 0); /* * hint may not be aligned properly; we need round up or down it * before proceeding further. */ if ((flags & UVM_FLAG_COLORMATCH) == 0) { uvm_map_align_va(&hint, align, topdown); INVARIANTS(); } UVMHIST_LOG(maphist,"<- VA %#jx vs range [%#jx->%#jx]", hint, vm_map_min(map), vm_map_max(map), 0); /* * Look for the first possible address; if there's already * something at this address, we have to start after it. */ /* * @@@: there are four, no, eight cases to consider. * * 0: found, fixed, bottom up -> fail * 1: found, fixed, top down -> fail * 2: found, not fixed, bottom up -> start after entry->end, * loop up * 3: found, not fixed, top down -> start before entry->start, * loop down * 4: not found, fixed, bottom up -> check entry->next->start, fail * 5: not found, fixed, top down -> check entry->next->start, fail * 6: not found, not fixed, bottom up -> check entry->next->start, * loop up * 7: not found, not fixed, top down -> check entry->next->start, * loop down * * as you can see, it reduces to roughly five cases, and that * adding top down mapping only adds one unique case (without * it, there would be four cases). */ if ((flags & UVM_FLAG_FIXED) == 0 && hint == (topdown ? vm_map_max(map) : vm_map_min(map))) { /* * The uvm_map_findspace algorithm is monotonic -- for * topdown VM it starts with a high hint and returns a * lower free address; for !topdown VM it starts with a * low hint and returns a higher free address. As an * optimization, start with the first (highest for * topdown, lowest for !topdown) free address. * * XXX This `optimization' probably doesn't actually do * much in practice unless userland explicitly passes * the VM map's minimum or maximum address, which * varies from machine to machine (VM_MAX/MIN_ADDRESS, * e.g. 0x7fbfdfeff000 on amd64 but 0xfffffffff000 on * aarch64) and may vary according to other factors * like sysctl vm.user_va0_disable. In particular, if * the user specifies 0 as a hint to mmap, then mmap * will choose a default address which is usually _not_ * VM_MAX/MIN_ADDRESS but something else instead like * VM_MAX_ADDRESS - stack size - guard page overhead, * in which case this branch is never hit. * * In fact, this branch appears to have been broken for * two decades between when topdown was introduced in * ~2003 and when it was adapted to handle the topdown * case without violating the monotonicity assertion in * 2022. Maybe Someone^TM should either ditch the * optimization or find a better way to do it. */ entry = map->first_free; } else { if (uvm_map_lookup_entry(map, hint, &entry)) { /* "hint" address already in use ... */ if (flags & UVM_FLAG_FIXED) { UVMHIST_LOG(maphist, "<- fixed & VA in use", 0, 0, 0, 0); return (NULL); } if (topdown) /* Start from lower gap. */ entry = entry->prev; } else if (flags & UVM_FLAG_FIXED) { if (entry->next->start >= hint + length && hint + length > hint) goto found; /* "hint" address is gap but too small */ UVMHIST_LOG(maphist, "<- fixed mapping failed", 0, 0, 0, 0); return (NULL); /* only one shot at it ... */ } else { /* * See if given hint fits in this gap. */ avail = uvm_map_space_avail(&hint, length, uoffset, align, flags, topdown, entry); INVARIANTS(); switch (avail) { case 1: goto found; case -1: goto wraparound; } if (topdown) { /* * Still there is a chance to fit * if hint > entry->end. */ } else { /* Start from higher gap. */ entry = entry->next; if (entry == &map->header) goto notfound; goto nextgap; } } } /* * Note that all UVM_FLAGS_FIXED case is already handled. */ KDASSERT((flags & UVM_FLAG_FIXED) == 0); /* Try to find the space in the red-black tree */ /* Check slot before any entry */ hint = topdown ? entry->next->start - length : entry->end; INVARIANTS(); avail = uvm_map_space_avail(&hint, length, uoffset, align, flags, topdown, entry); INVARIANTS(); switch (avail) { case 1: goto found; case -1: goto wraparound; } nextgap: KDASSERT((flags & UVM_FLAG_FIXED) == 0); /* If there is not enough space in the whole tree, we fail */ tmp = ROOT_ENTRY(map); if (tmp == NULL || tmp->maxgap < length) goto notfound; prev = NULL; /* previous candidate */ /* Find an entry close to hint that has enough space */ for (; tmp;) { KASSERT(tmp->next->start == tmp->end + tmp->gap); if (topdown) { if (tmp->next->start < hint + length && (prev == NULL || tmp->end > prev->end)) { if (tmp->gap >= length) prev = tmp; else if ((child = LEFT_ENTRY(tmp)) != NULL && child->maxgap >= length) prev = tmp; } } else { if (tmp->end >= hint && (prev == NULL || tmp->end < prev->end)) { if (tmp->gap >= length) prev = tmp; else if ((child = RIGHT_ENTRY(tmp)) != NULL && child->maxgap >= length) prev = tmp; } } if (tmp->next->start < hint + length) child = RIGHT_ENTRY(tmp); else if (tmp->end > hint) child = LEFT_ENTRY(tmp); else { if (tmp->gap >= length) break; if (topdown) child = LEFT_ENTRY(tmp); else child = RIGHT_ENTRY(tmp); } if (child == NULL || child->maxgap < length) break; tmp = child; } if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) { /* * Check if the entry that we found satifies the * space requirement */ if (topdown) { if (hint > tmp->next->start - length) hint = tmp->next->start - length; } else { if (hint < tmp->end) hint = tmp->end; } INVARIANTS(); avail = uvm_map_space_avail(&hint, length, uoffset, align, flags, topdown, tmp); INVARIANTS(); switch (avail) { case 1: entry = tmp; goto found; case -1: goto wraparound; } if (tmp->gap >= length) goto listsearch; } if (prev == NULL) goto notfound; if (topdown) { KASSERT(orig_hint >= prev->next->start - length || prev->next->start - length > prev->next->start); hint = prev->next->start - length; } else { KASSERT(orig_hint <= prev->end); hint = prev->end; } INVARIANTS(); avail = uvm_map_space_avail(&hint, length, uoffset, align, flags, topdown, prev); INVARIANTS(); switch (avail) { case 1: entry = prev; goto found; case -1: goto wraparound; } if (prev->gap >= length) goto listsearch; if (topdown) tmp = LEFT_ENTRY(prev); else tmp = RIGHT_ENTRY(prev); for (;;) { KASSERT(tmp && tmp->maxgap >= length); if (topdown) child = RIGHT_ENTRY(tmp); else child = LEFT_ENTRY(tmp); if (child && child->maxgap >= length) { tmp = child; continue; } if (tmp->gap >= length) break; if (topdown) tmp = LEFT_ENTRY(tmp); else tmp = RIGHT_ENTRY(tmp); } if (topdown) { KASSERT(orig_hint >= tmp->next->start - length || tmp->next->start - length > tmp->next->start); hint = tmp->next->start - length; } else { KASSERT(orig_hint <= tmp->end); hint = tmp->end; } INVARIANTS(); avail = uvm_map_space_avail(&hint, length, uoffset, align, flags, topdown, tmp); INVARIANTS(); switch (avail) { case 1: entry = tmp; goto found; case -1: goto wraparound; } /* * The tree fails to find an entry because of offset or alignment * restrictions. Search the list instead. */ listsearch: /* * Look through the rest of the map, trying to fit a new region in * the gap between existing regions, or after the very last region. * note: entry->end = base VA of current gap, * entry->next->start = VA of end of current gap */ INVARIANTS(); for (;;) { /* Update hint for current gap. */ hint = topdown ? entry->next->start - length : entry->end; INVARIANTS(); /* See if it fits. */ avail = uvm_map_space_avail(&hint, length, uoffset, align, flags, topdown, entry); INVARIANTS(); switch (avail) { case 1: goto found; case -1: goto wraparound; } /* Advance to next/previous gap */ if (topdown) { if (entry == &map->header) { UVMHIST_LOG(maphist, "<- failed (off start)", 0,0,0,0); goto notfound; } entry = entry->prev; } else { entry = entry->next; if (entry == &map->header) { UVMHIST_LOG(maphist, "<- failed (off end)", 0,0,0,0); goto notfound; } } } found: SAVE_HINT(map, map->hint, entry); *result = hint; UVMHIST_LOG(maphist,"<- got it! (result=%#jx)", hint, 0,0,0); INVARIANTS(); KASSERT(entry->end <= hint); KASSERT(hint + length <= entry->next->start); return (entry); wraparound: UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0); return (NULL); notfound: UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0); return (NULL); #undef INVARIANTS } /* * U N M A P - m a i n h e l p e r f u n c t i o n s */ /* * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") * * => caller must check alignment and size * => map must be locked by caller * => we return a list of map entries that we've remove from the map * in "entry_list" */ void uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, struct vm_map_entry **entry_list /* OUT */, int flags) { struct vm_map_entry *entry, *first_entry, *next; vaddr_t len; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(map=%#jx, start=%#jx, end=%#jx)", (uintptr_t)map, start, end, 0); VM_MAP_RANGE_CHECK(map, start, end); uvm_map_check(map, "unmap_remove entry"); /* * find first entry */ if (uvm_map_lookup_entry(map, start, &first_entry) == true) { /* clip and go... */ entry = first_entry; UVM_MAP_CLIP_START(map, entry, start); /* critical! prevents stale hint */ SAVE_HINT(map, entry, entry->prev); } else { entry = first_entry->next; } /* * save the free space hint */ if (map->first_free != &map->header && map->first_free->start >= start) map->first_free = entry->prev; /* * note: we now re-use first_entry for a different task. we remove * a number of map entries from the map and save them in a linked * list headed by "first_entry". once we remove them from the map * the caller should unlock the map and drop the references to the * backing objects [c.f. uvm_unmap_detach]. the object is to * separate unmapping from reference dropping. why? * [1] the map has to be locked for unmapping * [2] the map need not be locked for reference dropping * [3] dropping references may trigger pager I/O, and if we hit * a pager that does synchronous I/O we may have to wait for it. * [4] we would like all waiting for I/O to occur with maps unlocked * so that we don't block other threads. */ first_entry = NULL; *entry_list = NULL; /* * break up the area into map entry sized regions and unmap. note * that all mappings have to be removed before we can even consider * dropping references to amaps or VM objects (otherwise we could end * up with a mapping to a page on the free list which would be very bad) */ while ((entry != &map->header) && (entry->start < end)) { KASSERT((entry->flags & UVM_MAP_STATIC) == 0); UVM_MAP_CLIP_END(map, entry, end); next = entry->next; len = entry->end - entry->start; /* * unwire before removing addresses from the pmap; otherwise * unwiring will put the entries back into the pmap (XXX). */ if (VM_MAPENT_ISWIRED(entry)) { uvm_map_entry_unwire(map, entry); } if (flags & UVM_FLAG_VAONLY) { /* nothing */ } else if ((map->flags & VM_MAP_PAGEABLE) == 0) { /* * if the map is non-pageable, any pages mapped there * must be wired and entered with pmap_kenter_pa(), * and we should free any such pages immediately. * this is mostly used for kmem_map. */ KASSERT(vm_map_pmap(map) == pmap_kernel()); uvm_km_pgremove_intrsafe(map, entry->start, entry->end); } else if (UVM_ET_ISOBJ(entry) && UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { panic("%s: kernel object %p %p\n", __func__, map, entry); } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) { /* * remove mappings the standard way. lock object * and/or amap to ensure vm_page state does not * change while in pmap_remove(). */ #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ uvm_map_lock_entry(entry, RW_WRITER); #else uvm_map_lock_entry(entry, RW_READER); #endif pmap_remove(map->pmap, entry->start, entry->end); /* * note: if map is dying, leave pmap_update() for * later. if the map is to be reused (exec) then * pmap_update() will be called. if the map is * being disposed of (exit) then pmap_destroy() * will be called. */ if ((map->flags & VM_MAP_DYING) == 0) { pmap_update(vm_map_pmap(map)); } else { KASSERT(vm_map_pmap(map) != pmap_kernel()); } uvm_map_unlock_entry(entry); } #if defined(UVMDEBUG) /* * check if there's remaining mapping, * which is a bug in caller. */ vaddr_t va; for (va = entry->start; va < entry->end; va += PAGE_SIZE) { if (pmap_extract(vm_map_pmap(map), va, NULL)) { panic("%s: %#"PRIxVADDR" has mapping", __func__, va); } } if (VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) { uvm_km_check_empty(map, entry->start, entry->end); } #endif /* defined(UVMDEBUG) */ /* * remove entry from map and put it on our list of entries * that we've nuked. then go to next entry. */ UVMHIST_LOG(maphist, " removed map entry %#jx", (uintptr_t)entry, 0, 0, 0); /* critical! prevents stale hint */ SAVE_HINT(map, entry, entry->prev); uvm_map_entry_unlink(map, entry); KASSERT(map->size >= len); map->size -= len; entry->prev = NULL; entry->next = first_entry; first_entry = entry; entry = next; } uvm_map_check(map, "unmap_remove leave"); /* * now we've cleaned up the map and are ready for the caller to drop * references to the mapped objects. */ *entry_list = first_entry; UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); if (map->flags & VM_MAP_WANTVA) { mutex_enter(&map->misc_lock); map->flags &= ~VM_MAP_WANTVA; cv_broadcast(&map->cv); mutex_exit(&map->misc_lock); } } /* * uvm_unmap_detach: drop references in a chain of map entries * * => we will free the map entries as we traverse the list. */ void uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) { struct vm_map_entry *next_entry; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); while (first_entry) { KASSERT(!VM_MAPENT_ISWIRED(first_entry)); UVMHIST_LOG(maphist, " detach %#jx: amap=%#jx, obj=%#jx, submap?=%jd", (uintptr_t)first_entry, (uintptr_t)first_entry->aref.ar_amap, (uintptr_t)first_entry->object.uvm_obj, UVM_ET_ISSUBMAP(first_entry)); /* * drop reference to amap, if we've got one */ if (first_entry->aref.ar_amap) uvm_map_unreference_amap(first_entry, flags); /* * drop reference to our backing object, if we've got one */ KASSERT(!UVM_ET_ISSUBMAP(first_entry)); if (UVM_ET_ISOBJ(first_entry) && first_entry->object.uvm_obj->pgops->pgo_detach) { (*first_entry->object.uvm_obj->pgops->pgo_detach) (first_entry->object.uvm_obj); } next_entry = first_entry->next; uvm_mapent_free(first_entry); first_entry = next_entry; } UVMHIST_LOG(maphist, "<- done", 0,0,0,0); } /* * E X T R A C T I O N F U N C T I O N S */ /* * uvm_map_reserve: reserve space in a vm_map for future use. * * => we reserve space in a map by putting a dummy map entry in the * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) * => map should be unlocked (we will write lock it) * => we return true if we were able to reserve space * => XXXCDC: should be inline? */ int uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset /* hint for pmap_prefer */, vsize_t align /* alignment */, vaddr_t *raddr /* IN:hint, OUT: reserved VA */, uvm_flag_t flags /* UVM_FLAG_FIXED or UVM_FLAG_COLORMATCH or 0 */) { UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist, "(map=%#jx, size=%#jx, offset=%#jx, addr=%#jx)", (uintptr_t)map, size, offset, (uintptr_t)raddr); size = round_page(size); /* * reserve some virtual space. */ if (uvm_map(map, raddr, size, NULL, offset, align, UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) { UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); return (false); } UVMHIST_LOG(maphist, "<- done (*raddr=%#jx)", *raddr,0,0,0); return (true); } /* * uvm_map_replace: replace a reserved (blank) area of memory with * real mappings. * * => caller must WRITE-LOCK the map * => we return true if replacement was a success * => we expect the newents chain to have nnewents entrys on it and * we expect newents->prev to point to the last entry on the list * => note newents is allowed to be NULL */ static int uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, struct vm_map_entry *newents, int nnewents, vsize_t nsize, struct vm_map_entry **oldentryp) { struct vm_map_entry *oldent, *last; uvm_map_check(map, "map_replace entry"); /* * first find the blank map entry at the specified address */ if (!uvm_map_lookup_entry(map, start, &oldent)) { return (false); } /* * check to make sure we have a proper blank entry */ if (end < oldent->end) { UVM_MAP_CLIP_END(map, oldent, end); } if (oldent->start != start || oldent->end != end || oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { return (false); } #ifdef DIAGNOSTIC /* * sanity check the newents chain */ { struct vm_map_entry *tmpent = newents; int nent = 0; vsize_t sz = 0; vaddr_t cur = start; while (tmpent) { nent++; sz += tmpent->end - tmpent->start; if (tmpent->start < cur) panic("uvm_map_replace1"); if (tmpent->start >= tmpent->end || tmpent->end > end) { panic("uvm_map_replace2: " "tmpent->start=%#"PRIxVADDR ", tmpent->end=%#"PRIxVADDR ", end=%#"PRIxVADDR, tmpent->start, tmpent->end, end); } cur = tmpent->end; if (tmpent->next) { if (tmpent->next->prev != tmpent) panic("uvm_map_replace3"); } else { if (newents->prev != tmpent) panic("uvm_map_replace4"); } tmpent = tmpent->next; } if (nent != nnewents) panic("uvm_map_replace5"); if (sz != nsize) panic("uvm_map_replace6"); } #endif /* * map entry is a valid blank! replace it. (this does all the * work of map entry link/unlink...). */ if (newents) { last = newents->prev; /* critical: flush stale hints out of map */ SAVE_HINT(map, map->hint, newents); if (map->first_free == oldent) map->first_free = last; last->next = oldent->next; last->next->prev = last; /* Fix RB tree */ uvm_rb_remove(map, oldent); newents->prev = oldent->prev; newents->prev->next = newents; map->nentries = map->nentries + (nnewents - 1); /* Fixup the RB tree */ { int i; struct vm_map_entry *tmp; tmp = newents; for (i = 0; i < nnewents && tmp; i++) { uvm_rb_insert(map, tmp); tmp = tmp->next; } } } else { /* NULL list of new entries: just remove the old one */ clear_hints(map, oldent); uvm_map_entry_unlink(map, oldent); } map->size -= end - start - nsize; uvm_map_check(map, "map_replace leave"); /* * now we can free the old blank entry and return. */ *oldentryp = oldent; return (true); } /* * uvm_map_extract: extract a mapping from a map and put it somewhere * (maybe removing the old mapping) * * => maps should be unlocked (we will write lock them) * => returns 0 on success, error code otherwise * => start must be page aligned * => len must be page sized * => flags: * UVM_EXTRACT_REMOVE: remove mappings from srcmap * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go * UVM_EXTRACT_PROT_ALL: set prot to UVM_PROT_ALL as we go * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only * be used from within the kernel in a kernel level map <<< */ int uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) { vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge; struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry, *deadentry, *oldentry; struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */ vsize_t elen __unused; int nchain, error, copy_ok; vsize_t nsize; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(srcmap=%#jx,start=%#jx, len=%#jx", (uintptr_t)srcmap, start, len, 0); UVMHIST_LOG(maphist," ...,dstmap=%#jx, flags=%#jx)", (uintptr_t)dstmap, flags, 0, 0); /* * step 0: sanity check: start must be on a page boundary, length * must be page sized. can't ask for CONTIG/QREF if you asked for * REMOVE. */ KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); /* * step 1: reserve space in the target map for the extracted area */ if ((flags & UVM_EXTRACT_RESERVED) == 0) { dstaddr = vm_map_min(dstmap); if (!uvm_map_reserve(dstmap, len, start, atop(start) & uvmexp.colormask, &dstaddr, UVM_FLAG_COLORMATCH)) return (ENOMEM); KASSERT((atop(start ^ dstaddr) & uvmexp.colormask) == 0); *dstaddrp = dstaddr; /* pass address back to caller */ UVMHIST_LOG(maphist, " dstaddr=%#jx", dstaddr,0,0,0); } else { dstaddr = *dstaddrp; } /* * step 2: setup for the extraction process loop by init'ing the * map entry chain, locking src map, and looking up the first useful * entry in the map. */ end = start + len; newend = dstaddr + len; chain = endchain = NULL; nchain = 0; nsize = 0; vm_map_lock(srcmap); if (uvm_map_lookup_entry(srcmap, start, &entry)) { /* "start" is within an entry */ if (flags & UVM_EXTRACT_QREF) { /* * for quick references we don't clip the entry, so * the entry may map space "before" the starting * virtual address... this is the "fudge" factor * (which can be non-zero only the first time * through the "while" loop in step 3). */ fudge = start - entry->start; } else { /* * normal reference: we clip the map to fit (thus * fudge is zero) */ UVM_MAP_CLIP_START(srcmap, entry, start); SAVE_HINT(srcmap, srcmap->hint, entry->prev); fudge = 0; } } else { /* "start" is not within an entry ... skip to next entry */ if (flags & UVM_EXTRACT_CONTIG) { error = EINVAL; goto bad; /* definite hole here ... */ } entry = entry->next; fudge = 0; } /* save values from srcmap for step 6 */ orig_entry = entry; orig_fudge = fudge; /* * step 3: now start looping through the map entries, extracting * as we go. */ while (entry->start < end && entry != &srcmap->header) { /* if we are not doing a quick reference, clip it */ if ((flags & UVM_EXTRACT_QREF) == 0) UVM_MAP_CLIP_END(srcmap, entry, end); /* clear needs_copy (allow chunking) */ if (UVM_ET_ISNEEDSCOPY(entry)) { amap_copy(srcmap, entry, AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end); if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ error = ENOMEM; goto bad; } /* amap_copy could clip (during chunk)! update fudge */ if (fudge) { fudge = start - entry->start; orig_fudge = fudge; } } /* calculate the offset of this from "start" */ oldoffset = (entry->start + fudge) - start; /* allocate a new map entry */ newentry = uvm_mapent_alloc(dstmap, 0); if (newentry == NULL) { error = ENOMEM; goto bad; } /* set up new map entry */ newentry->next = NULL; newentry->prev = endchain; newentry->start = dstaddr + oldoffset; newentry->end = newentry->start + (entry->end - (entry->start + fudge)); if (newentry->end > newend || newentry->end < newentry->start) newentry->end = newend; newentry->object.uvm_obj = entry->object.uvm_obj; if (newentry->object.uvm_obj) { if (newentry->object.uvm_obj->pgops->pgo_reference) newentry->object.uvm_obj->pgops-> pgo_reference(newentry->object.uvm_obj); newentry->offset = entry->offset + fudge; } else { newentry->offset = 0; } newentry->etype = entry->etype; if (flags & UVM_EXTRACT_PROT_ALL) { newentry->protection = newentry->max_protection = UVM_PROT_ALL; } else { newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? entry->max_protection : entry->protection; newentry->max_protection = entry->max_protection; } newentry->inheritance = entry->inheritance; newentry->wired_count = 0; newentry->aref.ar_amap = entry->aref.ar_amap; if (newentry->aref.ar_amap) { newentry->aref.ar_pageoff = entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); uvm_map_reference_amap(newentry, AMAP_SHARED | ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); } else { newentry->aref.ar_pageoff = 0; } newentry->advice = entry->advice; if ((flags & UVM_EXTRACT_QREF) != 0) { newentry->flags |= UVM_MAP_NOMERGE; } /* now link it on the chain */ nchain++; nsize += newentry->end - newentry->start; if (endchain == NULL) { chain = endchain = newentry; } else { endchain->next = newentry; endchain = newentry; } /* end of 'while' loop! */ if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && (entry->next == &srcmap->header || entry->next->start != entry->end)) { error = EINVAL; goto bad; } entry = entry->next; fudge = 0; } /* * step 4: close off chain (in format expected by uvm_map_replace) */ if (chain) chain->prev = endchain; /* * step 5: attempt to lock the dest map so we can pmap_copy. * note usage of copy_ok: * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 */ if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) { copy_ok = 1; if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, nchain, nsize, &resentry)) { if (srcmap != dstmap) vm_map_unlock(dstmap); error = EIO; goto bad; } } else { copy_ok = 0; /* replace defered until step 7 */ } /* * step 6: traverse the srcmap a second time to do the following: * - if we got a lock on the dstmap do pmap_copy * - if UVM_EXTRACT_REMOVE remove the entries * we make use of orig_entry and orig_fudge (saved in step 2) */ if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { /* purge possible stale hints from srcmap */ if (flags & UVM_EXTRACT_REMOVE) { SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); if (srcmap->first_free != &srcmap->header && srcmap->first_free->start >= start) srcmap->first_free = orig_entry->prev; } entry = orig_entry; fudge = orig_fudge; deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ while (entry->start < end && entry != &srcmap->header) { if (copy_ok) { oldoffset = (entry->start + fudge) - start; elen = MIN(end, entry->end) - (entry->start + fudge); pmap_copy(dstmap->pmap, srcmap->pmap, dstaddr + oldoffset, elen, entry->start + fudge); } /* we advance "entry" in the following if statement */ if (flags & UVM_EXTRACT_REMOVE) { #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ uvm_map_lock_entry(entry, RW_WRITER); #else uvm_map_lock_entry(entry, RW_READER); #endif pmap_remove(srcmap->pmap, entry->start, entry->end); uvm_map_unlock_entry(entry); oldentry = entry; /* save entry */ entry = entry->next; /* advance */ uvm_map_entry_unlink(srcmap, oldentry); /* add to dead list */ oldentry->next = deadentry; deadentry = oldentry; } else { entry = entry->next; /* advance */ } /* end of 'while' loop */ fudge = 0; } pmap_update(srcmap->pmap); /* * unlock dstmap. we will dispose of deadentry in * step 7 if needed */ if (copy_ok && srcmap != dstmap) vm_map_unlock(dstmap); } else { deadentry = NULL; } /* * step 7: we are done with the source map, unlock. if copy_ok * is 0 then we have not replaced the dummy mapping in dstmap yet * and we need to do so now. */ vm_map_unlock(srcmap); if ((flags & UVM_EXTRACT_REMOVE) && deadentry) uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ /* now do the replacement if we didn't do it in step 5 */ if (copy_ok == 0) { vm_map_lock(dstmap); error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, nchain, nsize, &resentry); vm_map_unlock(dstmap); if (error == false) { error = EIO; goto bad2; } } if (resentry != NULL) uvm_mapent_free(resentry); return (0); /* * bad: failure recovery */ bad: vm_map_unlock(srcmap); bad2: /* src already unlocked */ if (chain) uvm_unmap_detach(chain, (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); if (resentry != NULL) uvm_mapent_free(resentry); if ((flags & UVM_EXTRACT_RESERVED) == 0) { uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ } return (error); } /* end of extraction functions */ /* * uvm_map_submap: punch down part of a map into a submap * * => only the kernel_map is allowed to be submapped * => the purpose of submapping is to break up the locking granularity * of a larger map * => the range specified must have been mapped previously with a uvm_map() * call [with uobj==NULL] to create a blank map entry in the main map. * [And it had better still be blank!] * => maps which contain submaps should never be copied or forked. * => to remove a submap, use uvm_unmap() on the main map * and then uvm_map_deallocate() the submap. * => main map must be unlocked. * => submap must have been init'd and have a zero reference count. * [need not be locked as we don't actually reference it] */ int uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, struct vm_map *submap) { struct vm_map_entry *entry; int error; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (uvm_map_lookup_entry(map, start, &entry)) { UVM_MAP_CLIP_START(map, entry, start); UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ } else { entry = NULL; } if (entry != NULL && entry->start == start && entry->end == end && entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { entry->etype |= UVM_ET_SUBMAP; entry->object.sub_map = submap; entry->offset = 0; uvm_map_reference(submap); error = 0; } else { error = EINVAL; } vm_map_unlock(map); return error; } /* * uvm_map_protect_user: change map protection on behalf of the user. * Enforces PAX settings as necessary. */ int uvm_map_protect_user(struct lwp *l, vaddr_t start, vaddr_t end, vm_prot_t new_prot) { int error; if ((error = PAX_MPROTECT_VALIDATE(l, new_prot))) return error; return uvm_map_protect(&l->l_proc->p_vmspace->vm_map, start, end, new_prot, false); } /* * uvm_map_protect: change map protection * * => set_max means set max_protection. * => map must be unlocked. */ #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ ~VM_PROT_WRITE : VM_PROT_ALL) int uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, vm_prot_t new_prot, bool set_max) { struct vm_map_entry *current, *entry; int error = 0; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_prot=%#jx)", (uintptr_t)map, start, end, new_prot); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (uvm_map_lookup_entry(map, start, &entry)) { UVM_MAP_CLIP_START(map, entry, start); } else { entry = entry->next; } /* * make a first pass to check for protection violations. */ current = entry; while ((current != &map->header) && (current->start < end)) { if (UVM_ET_ISSUBMAP(current)) { error = EINVAL; goto out; } if ((new_prot & current->max_protection) != new_prot) { error = EACCES; goto out; } /* * Don't allow VM_PROT_EXECUTE to be set on entries that * point to vnodes that are associated with a NOEXEC file * system. */ if (UVM_ET_ISOBJ(current) && UVM_OBJ_IS_VNODE(current->object.uvm_obj)) { struct vnode *vp = (struct vnode *) current->object.uvm_obj; if ((new_prot & VM_PROT_EXECUTE) != 0 && (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) { error = EACCES; goto out; } } current = current->next; } /* go back and fix up protections (no need to clip this time). */ current = entry; while ((current != &map->header) && (current->start < end)) { vm_prot_t old_prot; UVM_MAP_CLIP_END(map, current, end); old_prot = current->protection; if (set_max) current->protection = (current->max_protection = new_prot) & old_prot; else current->protection = new_prot; /* * update physical map if necessary. worry about copy-on-write * here -- CHECK THIS XXX */ if (current->protection != old_prot) { /* update pmap! */ #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ uvm_map_lock_entry(current, RW_WRITER); #else uvm_map_lock_entry(current, RW_READER); #endif pmap_protect(map->pmap, current->start, current->end, current->protection & MASK(current)); uvm_map_unlock_entry(current); /* * If this entry points at a vnode, and the * protection includes VM_PROT_EXECUTE, mark * the vnode as VEXECMAP. */ if (UVM_ET_ISOBJ(current)) { struct uvm_object *uobj = current->object.uvm_obj; if (UVM_OBJ_IS_VNODE(uobj) && (current->protection & VM_PROT_EXECUTE)) { vn_markexec((struct vnode *) uobj); } } } /* * If the map is configured to lock any future mappings, * wire this entry now if the old protection was VM_PROT_NONE * and the new protection is not VM_PROT_NONE. */ if ((map->flags & VM_MAP_WIREFUTURE) != 0 && VM_MAPENT_ISWIRED(current) == 0 && old_prot == VM_PROT_NONE && new_prot != VM_PROT_NONE) { /* * We must call pmap_update() here because the * pmap_protect() call above might have removed some * pmap entries and uvm_map_pageable() might create * some new pmap entries that rely on the prior * removals being completely finished. */ pmap_update(map->pmap); if (uvm_map_pageable(map, current->start, current->end, false, UVM_LK_ENTER|UVM_LK_EXIT) != 0) { /* * If locking the entry fails, remember the * error if it's the first one. Note we * still continue setting the protection in * the map, but will return the error * condition regardless. * * XXX Ignore what the actual error is, * XXX just call it a resource shortage * XXX so that it doesn't get confused * XXX what uvm_map_protect() itself would * XXX normally return. */ error = ENOMEM; } } current = current->next; } pmap_update(map->pmap); out: vm_map_unlock(map); UVMHIST_LOG(maphist, "<- done, error=%jd",error,0,0,0); return error; } #undef MASK /* * uvm_map_inherit: set inheritance code for range of addrs in map. * * => map must be unlocked * => note that the inherit code is used during a "fork". see fork * code for details. */ int uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, vm_inherit_t new_inheritance) { struct vm_map_entry *entry, *temp_entry; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_inh=%#jx)", (uintptr_t)map, start, end, new_inheritance); switch (new_inheritance) { case MAP_INHERIT_NONE: case MAP_INHERIT_COPY: case MAP_INHERIT_SHARE: case MAP_INHERIT_ZERO: break; default: UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); return EINVAL; } vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (uvm_map_lookup_entry(map, start, &temp_entry)) { entry = temp_entry; UVM_MAP_CLIP_START(map, entry, start); } else { entry = temp_entry->next; } while ((entry != &map->header) && (entry->start < end)) { UVM_MAP_CLIP_END(map, entry, end); entry->inheritance = new_inheritance; entry = entry->next; } vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); return 0; } /* * uvm_map_advice: set advice code for range of addrs in map. * * => map must be unlocked */ int uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) { struct vm_map_entry *entry, *temp_entry; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_adv=%#jx)", (uintptr_t)map, start, end, new_advice); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (uvm_map_lookup_entry(map, start, &temp_entry)) { entry = temp_entry; UVM_MAP_CLIP_START(map, entry, start); } else { entry = temp_entry->next; } /* * XXXJRT: disallow holes? */ while ((entry != &map->header) && (entry->start < end)) { UVM_MAP_CLIP_END(map, entry, end); switch (new_advice) { case MADV_NORMAL: case MADV_RANDOM: case MADV_SEQUENTIAL: /* nothing special here */ break; default: vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); return EINVAL; } entry->advice = new_advice; entry = entry->next; } vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); return 0; } /* * uvm_map_willneed: apply MADV_WILLNEED */ int uvm_map_willneed(struct vm_map *map, vaddr_t start, vaddr_t end) { struct vm_map_entry *entry; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx)", (uintptr_t)map, start, end, 0); vm_map_lock_read(map); VM_MAP_RANGE_CHECK(map, start, end); if (!uvm_map_lookup_entry(map, start, &entry)) { entry = entry->next; } while (entry->start < end) { struct vm_amap * const amap = entry->aref.ar_amap; struct uvm_object * const uobj = entry->object.uvm_obj; KASSERT(entry != &map->header); KASSERT(start < entry->end); /* * For now, we handle only the easy but commonly-requested case. * ie. start prefetching of backing uobj pages. * * XXX It might be useful to pmap_enter() the already-in-core * pages by inventing a "weak" mode for uvm_fault() which would * only do the PGO_LOCKED pgo_get(). */ if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) { off_t offset; off_t size; offset = entry->offset; if (start < entry->start) { offset += entry->start - start; } size = entry->offset + (entry->end - entry->start); if (entry->end < end) { size -= end - entry->end; } uvm_readahead(uobj, offset, size); } entry = entry->next; } vm_map_unlock_read(map); UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); return 0; } /* * uvm_map_pageable: sets the pageability of a range in a map. * * => wires map entries. should not be used for transient page locking. * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). * => regions specified as not pageable require lock-down (wired) memory * and page tables. * => map must never be read-locked * => if islocked is true, map is already write-locked * => we always unlock the map, since we must downgrade to a read-lock * to call uvm_fault_wire() * => XXXCDC: check this and try and clean it up. */ int uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, bool new_pageable, int lockflags) { struct vm_map_entry *entry, *start_entry, *failed_entry; int rv; #ifdef DIAGNOSTIC u_int timestamp_save; #endif UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_pageable=%ju)", (uintptr_t)map, start, end, new_pageable); KASSERT(map->flags & VM_MAP_PAGEABLE); if ((lockflags & UVM_LK_ENTER) == 0) vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); /* * only one pageability change may take place at one time, since * uvm_fault_wire assumes it will be called only once for each * wiring/unwiring. therefore, we have to make sure we're actually * changing the pageability for the entire region. we do so before * making any changes. */ if (uvm_map_lookup_entry(map, start, &start_entry) == false) { if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0); return EFAULT; } entry = start_entry; if (start == end) { /* nothing required */ if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (nothing)",0,0,0,0); return 0; } /* * handle wiring and unwiring separately. */ if (new_pageable) { /* unwire */ UVM_MAP_CLIP_START(map, entry, start); /* * unwiring. first ensure that the range to be unwired is * really wired down and that there are no holes. */ while ((entry != &map->header) && (entry->start < end)) { if (entry->wired_count == 0 || (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end))) { if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0); return EINVAL; } entry = entry->next; } /* * POSIX 1003.1b - a single munlock call unlocks a region, * regardless of the number of mlock calls made on that * region. */ entry = start_entry; while ((entry != &map->header) && (entry->start < end)) { UVM_MAP_CLIP_END(map, entry, end); if (VM_MAPENT_ISWIRED(entry)) uvm_map_entry_unwire(map, entry); entry = entry->next; } if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); return 0; } /* * wire case: in two passes [XXXCDC: ugly block of code here] * * 1: holding the write lock, we create any anonymous maps that need * to be created. then we clip each map entry to the region to * be wired and increment its wiring count. * * 2: we downgrade to a read lock, and call uvm_fault_wire to fault * in the pages for any newly wired area (wired_count == 1). * * downgrading to a read lock for uvm_fault_wire avoids a possible * deadlock with another thread that may have faulted on one of * the pages to be wired (it would mark the page busy, blocking * us, then in turn block on the map lock that we hold). because * of problems in the recursive lock package, we cannot upgrade * to a write lock in vm_map_lookup. thus, any actions that * require the write lock must be done beforehand. because we * keep the read lock on the map, the copy-on-write status of the * entries we modify here cannot change. */ while ((entry != &map->header) && (entry->start < end)) { if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ /* * perform actions of vm_map_lookup that need the * write lock on the map: create an anonymous map * for a copy-on-write region, or an anonymous map * for a zero-fill region. (XXXCDC: submap case * ok?) */ if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ if (UVM_ET_ISNEEDSCOPY(entry) && ((entry->max_protection & VM_PROT_WRITE) || (entry->object.uvm_obj == NULL))) { amap_copy(map, entry, 0, start, end); /* XXXCDC: wait OK? */ } } } UVM_MAP_CLIP_START(map, entry, start); UVM_MAP_CLIP_END(map, entry, end); entry->wired_count++; /* * Check for holes */ if (entry->protection == VM_PROT_NONE || (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end))) { /* * found one. amap creation actions do not need to * be undone, but the wired counts need to be restored. */ while (entry != &map->header && entry->end > start) { entry->wired_count--; entry = entry->prev; } if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); return EINVAL; } entry = entry->next; } /* * Pass 2. */ #ifdef DIAGNOSTIC timestamp_save = map->timestamp; #endif vm_map_busy(map); vm_map_unlock(map); rv = 0; entry = start_entry; while (entry != &map->header && entry->start < end) { if (entry->wired_count == 1) { rv = uvm_fault_wire(map, entry->start, entry->end, entry->max_protection, 1); if (rv) { /* * wiring failed. break out of the loop. * we'll clean up the map below, once we * have a write lock again. */ break; } } entry = entry->next; } if (rv) { /* failed? */ /* * Get back to an exclusive (write) lock. */ vm_map_lock(map); vm_map_unbusy(map); #ifdef DIAGNOSTIC if (timestamp_save + 1 != map->timestamp) panic("uvm_map_pageable: stale map"); #endif /* * first drop the wiring count on all the entries * which haven't actually been wired yet. */ failed_entry = entry; while (entry != &map->header && entry->start < end) { entry->wired_count--; entry = entry->next; } /* * now, unwire all the entries that were successfully * wired above. */ entry = start_entry; while (entry != failed_entry) { entry->wired_count--; if (VM_MAPENT_ISWIRED(entry) == 0) uvm_map_entry_unwire(map, entry); entry = entry->next; } if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); UVMHIST_LOG(maphist, "<- done (RV=%jd)", rv,0,0,0); return (rv); } if ((lockflags & UVM_LK_EXIT) == 0) { vm_map_unbusy(map); } else { /* * Get back to an exclusive (write) lock. */ vm_map_lock(map); vm_map_unbusy(map); } UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); return 0; } /* * uvm_map_pageable_all: special case of uvm_map_pageable - affects * all mapped regions. * * => map must not be locked. * => if no flags are specified, all regions are unwired. * => XXXJRT: has some of the same problems as uvm_map_pageable() above. */ int uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) { struct vm_map_entry *entry, *failed_entry; vsize_t size; int rv; #ifdef DIAGNOSTIC u_int timestamp_save; #endif UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(map=%#jx,flags=%#jx)", (uintptr_t)map, flags, 0, 0); KASSERT(map->flags & VM_MAP_PAGEABLE); vm_map_lock(map); /* * handle wiring and unwiring separately. */ if (flags == 0) { /* unwire */ /* * POSIX 1003.1b -- munlockall unlocks all regions, * regardless of how many times mlockall has been called. */ for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (VM_MAPENT_ISWIRED(entry)) uvm_map_entry_unwire(map, entry); } map->flags &= ~VM_MAP_WIREFUTURE; vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); return 0; } if (flags & MCL_FUTURE) { /* * must wire all future mappings; remember this. */ map->flags |= VM_MAP_WIREFUTURE; } if ((flags & MCL_CURRENT) == 0) { /* * no more work to do! */ UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); vm_map_unlock(map); return 0; } /* * wire case: in three passes [XXXCDC: ugly block of code here] * * 1: holding the write lock, count all pages mapped by non-wired * entries. if this would cause us to go over our limit, we fail. * * 2: still holding the write lock, we create any anonymous maps that * need to be created. then we increment its wiring count. * * 3: we downgrade to a read lock, and call uvm_fault_wire to fault * in the pages for any newly wired area (wired_count == 1). * * downgrading to a read lock for uvm_fault_wire avoids a possible * deadlock with another thread that may have faulted on one of * the pages to be wired (it would mark the page busy, blocking * us, then in turn block on the map lock that we hold). because * of problems in the recursive lock package, we cannot upgrade * to a write lock in vm_map_lookup. thus, any actions that * require the write lock must be done beforehand. because we * keep the read lock on the map, the copy-on-write status of the * entries we modify here cannot change. */ for (size = 0, entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry->protection != VM_PROT_NONE && VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ size += entry->end - entry->start; } } if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { vm_map_unlock(map); return ENOMEM; } if (limit != 0 && (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { vm_map_unlock(map); return ENOMEM; } /* * Pass 2. */ for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry->protection == VM_PROT_NONE) continue; if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ /* * perform actions of vm_map_lookup that need the * write lock on the map: create an anonymous map * for a copy-on-write region, or an anonymous map * for a zero-fill region. (XXXCDC: submap case * ok?) */ if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ if (UVM_ET_ISNEEDSCOPY(entry) && ((entry->max_protection & VM_PROT_WRITE) || (entry->object.uvm_obj == NULL))) { amap_copy(map, entry, 0, entry->start, entry->end); /* XXXCDC: wait OK? */ } } } entry->wired_count++; } /* * Pass 3. */ #ifdef DIAGNOSTIC timestamp_save = map->timestamp; #endif vm_map_busy(map); vm_map_unlock(map); rv = 0; for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry->wired_count == 1) { rv = uvm_fault_wire(map, entry->start, entry->end, entry->max_protection, 1); if (rv) { /* * wiring failed. break out of the loop. * we'll clean up the map below, once we * have a write lock again. */ break; } } } if (rv) { /* * Get back an exclusive (write) lock. */ vm_map_lock(map); vm_map_unbusy(map); #ifdef DIAGNOSTIC if (timestamp_save + 1 != map->timestamp) panic("uvm_map_pageable_all: stale map"); #endif /* * first drop the wiring count on all the entries * which haven't actually been wired yet. * * Skip VM_PROT_NONE entries like we did above. */ failed_entry = entry; for (/* nothing */; entry != &map->header; entry = entry->next) { if (entry->protection == VM_PROT_NONE) continue; entry->wired_count--; } /* * now, unwire all the entries that were successfully * wired above. * * Skip VM_PROT_NONE entries like we did above. */ for (entry = map->header.next; entry != failed_entry; entry = entry->next) { if (entry->protection == VM_PROT_NONE) continue; entry->wired_count--; if (VM_MAPENT_ISWIRED(entry)) uvm_map_entry_unwire(map, entry); } vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (RV=%jd)", rv,0,0,0); return (rv); } vm_map_unbusy(map); UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); return 0; } /* * uvm_map_clean: clean out a map range * * => valid flags: * if (flags & PGO_CLEANIT): dirty pages are cleaned first * if (flags & PGO_SYNCIO): dirty pages are written synchronously * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean * if (flags & PGO_FREE): any cached pages are freed after clean * => returns an error if any part of the specified range isn't mapped * => never a need to flush amap layer since the anonymous memory has * no permanent home, but may deactivate pages there * => called from sys_msync() and sys_madvise() * => caller must not write-lock map (read OK). * => we may sleep while cleaning if SYNCIO [with map read-locked] */ int uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) { struct vm_map_entry *current, *entry; struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; struct vm_page *pg; vaddr_t offset; vsize_t size; voff_t uoff; int error, refs; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,flags=%#jx)", (uintptr_t)map, start, end, flags); KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != (PGO_FREE|PGO_DEACTIVATE)); vm_map_lock_read(map); VM_MAP_RANGE_CHECK(map, start, end); if (uvm_map_lookup_entry(map, start, &entry) == false) { vm_map_unlock_read(map); return EFAULT; } /* * Make a first pass to check for holes and wiring problems. */ for (current = entry; current->start < end; current = current->next) { if (UVM_ET_ISSUBMAP(current)) { vm_map_unlock_read(map); return EINVAL; } if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) { vm_map_unlock_read(map); return EBUSY; } if (end <= current->end) { break; } if (current->end != current->next->start) { vm_map_unlock_read(map); return EFAULT; } } error = 0; for (current = entry; start < end; current = current->next) { amap = current->aref.ar_amap; /* upper layer */ uobj = current->object.uvm_obj; /* lower layer */ KASSERT(start >= current->start); /* * No amap cleaning necessary if: * * (1) There's no amap. * * (2) We're not deactivating or freeing pages. */ if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) goto flush_object; offset = start - current->start; size = MIN(end, current->end) - start; amap_lock(amap, RW_WRITER); for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { anon = amap_lookup(¤t->aref, offset); if (anon == NULL) continue; KASSERT(anon->an_lock == amap->am_lock); pg = anon->an_page; if (pg == NULL) { continue; } if (pg->flags & PG_BUSY) { continue; } switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { /* * In these first 3 cases, we just deactivate the page. */ case PGO_CLEANIT|PGO_FREE: case PGO_CLEANIT|PGO_DEACTIVATE: case PGO_DEACTIVATE: deactivate_it: /* * skip the page if it's loaned or wired, * since it shouldn't be on a paging queue * at all in these cases. */ if (pg->loan_count != 0 || pg->wire_count != 0) { continue; } KASSERT(pg->uanon == anon); uvm_pagelock(pg); uvm_pagedeactivate(pg); uvm_pageunlock(pg); continue; case PGO_FREE: /* * If there are multiple references to * the amap, just deactivate the page. */ if (amap_refs(amap) > 1) goto deactivate_it; /* skip the page if it's wired */ if (pg->wire_count != 0) { continue; } amap_unadd(¤t->aref, offset); refs = --anon->an_ref; if (refs == 0) { uvm_anfree(anon); } continue; } } amap_unlock(amap); flush_object: /* * flush pages if we've got a valid backing object. * note that we must always clean object pages before * freeing them since otherwise we could reveal stale * data from files. */ uoff = current->offset + (start - current->start); size = MIN(end, current->end) - start; if (uobj != NULL) { rw_enter(uobj->vmobjlock, RW_WRITER); if (uobj->pgops->pgo_put != NULL) error = (uobj->pgops->pgo_put)(uobj, uoff, uoff + size, flags | PGO_CLEANIT); else error = 0; } start += size; } vm_map_unlock_read(map); return (error); } /* * uvm_map_checkprot: check protection in map * * => must allow specified protection in a fully allocated region. * => map must be read or write locked by caller. */ bool uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, vm_prot_t protection) { struct vm_map_entry *entry; struct vm_map_entry *tmp_entry; if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { return (false); } entry = tmp_entry; while (start < end) { if (entry == &map->header) { return (false); } /* * no holes allowed */ if (start < entry->start) { return (false); } /* * check protection associated with entry */ if ((entry->protection & protection) != protection) { return (false); } start = entry->end; entry = entry->next; } return (true); } /* * uvmspace_alloc: allocate a vmspace structure. * * - structure includes vm_map and pmap * - XXX: no locking on this structure * - refcnt set to 1, rest must be init'd by caller */ struct vmspace * uvmspace_alloc(vaddr_t vmin, vaddr_t vmax, bool topdown) { struct vmspace *vm; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK); uvmspace_init(vm, NULL, vmin, vmax, topdown); UVMHIST_LOG(maphist,"<- done (vm=%#jx)", (uintptr_t)vm, 0, 0, 0); return (vm); } /* * uvmspace_init: initialize a vmspace structure. * * - XXX: no locking on this structure * - refcnt set to 1, rest must be init'd by caller */ void uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax, bool topdown) { UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist, "(vm=%#jx, pmap=%#jx, vmin=%#jx, vmax=%#jx", (uintptr_t)vm, (uintptr_t)pmap, vmin, vmax); UVMHIST_LOG(maphist, " topdown=%ju)", topdown, 0, 0, 0); memset(vm, 0, sizeof(*vm)); uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE | (topdown ? VM_MAP_TOPDOWN : 0) ); if (pmap) pmap_reference(pmap); else pmap = pmap_create(); vm->vm_map.pmap = pmap; vm->vm_refcnt = 1; UVMHIST_LOG(maphist,"<- done",0,0,0,0); } /* * uvmspace_share: share a vmspace between two processes * * - used for vfork, threads(?) */ void uvmspace_share(struct proc *p1, struct proc *p2) { uvmspace_addref(p1->p_vmspace); p2->p_vmspace = p1->p_vmspace; } #if 0 /* * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace * * - XXX: no locking on vmspace */ void uvmspace_unshare(struct lwp *l) { struct proc *p = l->l_proc; struct vmspace *nvm, *ovm = p->p_vmspace; if (ovm->vm_refcnt == 1) /* nothing to do: vmspace isn't shared in the first place */ return; /* make a new vmspace, still holding old one */ nvm = uvmspace_fork(ovm); kpreempt_disable(); pmap_deactivate(l); /* unbind old vmspace */ p->p_vmspace = nvm; pmap_activate(l); /* switch to new vmspace */ kpreempt_enable(); uvmspace_free(ovm); /* drop reference to old vmspace */ } #endif /* * uvmspace_spawn: a new process has been spawned and needs a vmspace */ void uvmspace_spawn(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown) { struct proc *p = l->l_proc; struct vmspace *nvm; #ifdef __HAVE_CPU_VMSPACE_EXEC cpu_vmspace_exec(l, start, end); #endif nvm = uvmspace_alloc(start, end, topdown); kpreempt_disable(); p->p_vmspace = nvm; pmap_activate(l); kpreempt_enable(); } /* * uvmspace_exec: the process wants to exec a new program */ void uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown) { struct proc *p = l->l_proc; struct vmspace *nvm, *ovm = p->p_vmspace; struct vm_map *map; int flags; KASSERT(ovm != NULL); #ifdef __HAVE_CPU_VMSPACE_EXEC cpu_vmspace_exec(l, start, end); #endif map = &ovm->vm_map; /* * see if more than one process is using this vmspace... */ if (ovm->vm_refcnt == 1 && topdown == ((ovm->vm_map.flags & VM_MAP_TOPDOWN) != 0)) { /* * if p is the only process using its vmspace then we can safely * recycle that vmspace for the program that is being exec'd. * But only if TOPDOWN matches the requested value for the new * vm space! */ /* * SYSV SHM semantics require us to kill all segments on an exec */ if (uvm_shmexit && ovm->vm_shm) (*uvm_shmexit)(ovm); /* * POSIX 1003.1b -- "lock future mappings" is revoked * when a process execs another program image. */ map->flags &= ~VM_MAP_WIREFUTURE; /* * now unmap the old program. * * XXX set VM_MAP_DYING for the duration, so pmap_update() * is not called until the pmap has been totally cleared out * after pmap_remove_all(), or it can confuse some pmap * implementations. it would be nice to handle this by * deferring the pmap_update() while it is known the address * space is not visible to any user LWP other than curlwp, * but there isn't an elegant way of inferring that right * now. */ flags = pmap_remove_all(map->pmap) ? UVM_FLAG_VAONLY : 0; map->flags |= VM_MAP_DYING; uvm_unmap1(map, vm_map_min(map), vm_map_max(map), flags); map->flags &= ~VM_MAP_DYING; pmap_update(map->pmap); KASSERT(map->header.prev == &map->header); KASSERT(map->nentries == 0); /* * resize the map */ vm_map_setmin(map, start); vm_map_setmax(map, end); } else { /* * p's vmspace is being shared, so we can't reuse it for p since * it is still being used for others. allocate a new vmspace * for p */ nvm = uvmspace_alloc(start, end, topdown); /* * install new vmspace and drop our ref to the old one. */ kpreempt_disable(); pmap_deactivate(l); p->p_vmspace = nvm; pmap_activate(l); kpreempt_enable(); uvmspace_free(ovm); } } /* * uvmspace_addref: add a reference to a vmspace. */ void uvmspace_addref(struct vmspace *vm) { KASSERT((vm->vm_map.flags & VM_MAP_DYING) == 0); KASSERT(vm->vm_refcnt > 0); atomic_inc_uint(&vm->vm_refcnt); } /* * uvmspace_free: free a vmspace data structure */ void uvmspace_free(struct vmspace *vm) { struct vm_map_entry *dead_entries; struct vm_map *map = &vm->vm_map; int flags; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist,"(vm=%#jx) ref=%jd", (uintptr_t)vm, vm->vm_refcnt, 0, 0); membar_release(); if (atomic_dec_uint_nv(&vm->vm_refcnt) > 0) return; membar_acquire(); /* * at this point, there should be no other references to the map. * delete all of the mappings, then destroy the pmap. */ map->flags |= VM_MAP_DYING; flags = pmap_remove_all(map->pmap) ? UVM_FLAG_VAONLY : 0; /* Get rid of any SYSV shared memory segments. */ if (uvm_shmexit && vm->vm_shm != NULL) (*uvm_shmexit)(vm); if (map->nentries) { uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map), &dead_entries, flags); if (dead_entries != NULL) uvm_unmap_detach(dead_entries, 0); } KASSERT(map->nentries == 0); KASSERT(map->size == 0); mutex_destroy(&map->misc_lock); rw_destroy(&map->lock); cv_destroy(&map->cv); pmap_destroy(map->pmap); pool_cache_put(&uvm_vmspace_cache, vm); } static struct vm_map_entry * uvm_mapent_clone(struct vm_map *new_map, struct vm_map_entry *old_entry, int flags) { struct vm_map_entry *new_entry; new_entry = uvm_mapent_alloc(new_map, 0); /* old_entry -> new_entry */ uvm_mapent_copy(old_entry, new_entry); /* new pmap has nothing wired in it */ new_entry->wired_count = 0; /* * gain reference to object backing the map (can't * be a submap, already checked this case). */ if (new_entry->aref.ar_amap) uvm_map_reference_amap(new_entry, flags); if (new_entry->object.uvm_obj && new_entry->object.uvm_obj->pgops->pgo_reference) new_entry->object.uvm_obj->pgops->pgo_reference( new_entry->object.uvm_obj); /* insert entry at end of new_map's entry list */ uvm_map_entry_link(new_map, new_map->header.prev, new_entry); return new_entry; } /* * share the mapping: this means we want the old and * new entries to share amaps and backing objects. */ static void uvm_mapent_forkshared(struct vm_map *new_map, struct vm_map *old_map, struct vm_map_entry *old_entry) { /* * if the old_entry needs a new amap (due to prev fork) * then we need to allocate it now so that we have * something we own to share with the new_entry. [in * other words, we need to clear needs_copy] */ if (UVM_ET_ISNEEDSCOPY(old_entry)) { /* get our own amap, clears needs_copy */ amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK, 0, 0); /* XXXCDC: WAITOK??? */ } uvm_mapent_clone(new_map, old_entry, AMAP_SHARED); } static void uvm_mapent_forkcopy(struct vm_map *new_map, struct vm_map *old_map, struct vm_map_entry *old_entry) { struct vm_map_entry *new_entry; /* * copy-on-write the mapping (using mmap's * MAP_PRIVATE semantics) * * allocate new_entry, adjust reference counts. * (note that new references are read-only). */ new_entry = uvm_mapent_clone(new_map, old_entry, 0); new_entry->etype |= (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); /* * the new entry will need an amap. it will either * need to be copied from the old entry or created * from scratch (if the old entry does not have an * amap). can we defer this process until later * (by setting "needs_copy") or do we need to copy * the amap now? * * we must copy the amap now if any of the following * conditions hold: * 1. the old entry has an amap and that amap is * being shared. this means that the old (parent) * process is sharing the amap with another * process. if we do not clear needs_copy here * we will end up in a situation where both the * parent and child process are referring to the * same amap with "needs_copy" set. if the * parent write-faults, the fault routine will * clear "needs_copy" in the parent by allocating * a new amap. this is wrong because the * parent is supposed to be sharing the old amap * and the new amap will break that. * * 2. if the old entry has an amap and a non-zero * wire count then we are going to have to call * amap_cow_now to avoid page faults in the * parent process. since amap_cow_now requires * "needs_copy" to be clear we might as well * clear it here as well. * */ if (old_entry->aref.ar_amap != NULL) { if ((amap_flags(old_entry->aref.ar_amap) & AMAP_SHARED) != 0 || VM_MAPENT_ISWIRED(old_entry)) { amap_copy(new_map, new_entry, AMAP_COPY_NOCHUNK, 0, 0); /* XXXCDC: M_WAITOK ... ok? */ } } /* * if the parent's entry is wired down, then the * parent process does not want page faults on * access to that memory. this means that we * cannot do copy-on-write because we can't write * protect the old entry. in this case we * resolve all copy-on-write faults now, using * amap_cow_now. note that we have already * allocated any needed amap (above). */ if (VM_MAPENT_ISWIRED(old_entry)) { /* * resolve all copy-on-write faults now * (note that there is nothing to do if * the old mapping does not have an amap). */ if (old_entry->aref.ar_amap) amap_cow_now(new_map, new_entry); } else { /* * setup mappings to trigger copy-on-write faults * we must write-protect the parent if it has * an amap and it is not already "needs_copy"... * if it is already "needs_copy" then the parent * has already been write-protected by a previous * fork operation. */ if (old_entry->aref.ar_amap && !UVM_ET_ISNEEDSCOPY(old_entry)) { if (old_entry->max_protection & VM_PROT_WRITE) { #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ uvm_map_lock_entry(old_entry, RW_WRITER); #else uvm_map_lock_entry(old_entry, RW_READER); #endif pmap_protect(old_map->pmap, old_entry->start, old_entry->end, old_entry->protection & ~VM_PROT_WRITE); uvm_map_unlock_entry(old_entry); } old_entry->etype |= UVM_ET_NEEDSCOPY; } } } /* * zero the mapping: the new entry will be zero initialized */ static void uvm_mapent_forkzero(struct vm_map *new_map, struct vm_map *old_map, struct vm_map_entry *old_entry) { struct vm_map_entry *new_entry; new_entry = uvm_mapent_clone(new_map, old_entry, 0); new_entry->etype |= (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); if (new_entry->aref.ar_amap) { uvm_map_unreference_amap(new_entry, 0); new_entry->aref.ar_pageoff = 0; new_entry->aref.ar_amap = NULL; } if (UVM_ET_ISOBJ(new_entry)) { if (new_entry->object.uvm_obj->pgops->pgo_detach) new_entry->object.uvm_obj->pgops->pgo_detach( new_entry->object.uvm_obj); new_entry->object.uvm_obj = NULL; new_entry->offset = 0; new_entry->etype &= ~UVM_ET_OBJ; } } /* * F O R K - m a i n e n t r y p o i n t */ /* * uvmspace_fork: fork a process' main map * * => create a new vmspace for child process from parent. * => parent's map must not be locked. */ struct vmspace * uvmspace_fork(struct vmspace *vm1) { struct vmspace *vm2; struct vm_map *old_map = &vm1->vm_map; struct vm_map *new_map; struct vm_map_entry *old_entry; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); vm_map_lock(old_map); vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map), vm1->vm_map.flags & VM_MAP_TOPDOWN); memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy); new_map = &vm2->vm_map; /* XXX */ old_entry = old_map->header.next; new_map->size = old_map->size; /* * go entry-by-entry */ while (old_entry != &old_map->header) { /* * first, some sanity checks on the old entry */ KASSERT(!UVM_ET_ISSUBMAP(old_entry)); KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) || !UVM_ET_ISNEEDSCOPY(old_entry)); switch (old_entry->inheritance) { case MAP_INHERIT_NONE: /* * drop the mapping, modify size */ new_map->size -= old_entry->end - old_entry->start; break; case MAP_INHERIT_SHARE: uvm_mapent_forkshared(new_map, old_map, old_entry); break; case MAP_INHERIT_COPY: uvm_mapent_forkcopy(new_map, old_map, old_entry); break; case MAP_INHERIT_ZERO: uvm_mapent_forkzero(new_map, old_map, old_entry); break; default: KASSERT(0); break; } old_entry = old_entry->next; } pmap_update(old_map->pmap); vm_map_unlock(old_map); if (uvm_shmfork && vm1->vm_shm) (*uvm_shmfork)(vm1, vm2); #ifdef PMAP_FORK pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); #endif UVMHIST_LOG(maphist,"<- done",0,0,0,0); return (vm2); } /* * uvm_mapent_trymerge: try to merge an entry with its neighbors. * * => called with map locked. * => return non zero if successfully merged. */ int uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags) { struct uvm_object *uobj; struct vm_map_entry *next; struct vm_map_entry *prev; vsize_t size; int merged = 0; bool copying; int newetype; if (entry->aref.ar_amap != NULL) { return 0; } if ((entry->flags & UVM_MAP_NOMERGE) != 0) { return 0; } uobj = entry->object.uvm_obj; size = entry->end - entry->start; copying = (flags & UVM_MERGE_COPYING) != 0; newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype; next = entry->next; if (next != &map->header && next->start == entry->end && ((copying && next->aref.ar_amap != NULL && amap_refs(next->aref.ar_amap) == 1) || (!copying && next->aref.ar_amap == NULL)) && UVM_ET_ISCOMPATIBLE(next, newetype, uobj, entry->flags, entry->protection, entry->max_protection, entry->inheritance, entry->advice, entry->wired_count) && (uobj == NULL || entry->offset + size == next->offset)) { int error; if (copying) { error = amap_extend(next, size, AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS); } else { error = 0; } if (error == 0) { if (uobj) { if (uobj->pgops->pgo_detach) { uobj->pgops->pgo_detach(uobj); } } entry->end = next->end; clear_hints(map, next); uvm_map_entry_unlink(map, next); if (copying) { entry->aref = next->aref; entry->etype &= ~UVM_ET_NEEDSCOPY; } uvm_map_check(map, "trymerge forwardmerge"); uvm_mapent_free(next); merged++; } } prev = entry->prev; if (prev != &map->header && prev->end == entry->start && ((copying && !merged && prev->aref.ar_amap != NULL && amap_refs(prev->aref.ar_amap) == 1) || (!copying && prev->aref.ar_amap == NULL)) && UVM_ET_ISCOMPATIBLE(prev, newetype, uobj, entry->flags, entry->protection, entry->max_protection, entry->inheritance, entry->advice, entry->wired_count) && (uobj == NULL || prev->offset + prev->end - prev->start == entry->offset)) { int error; if (copying) { error = amap_extend(prev, size, AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS); } else { error = 0; } if (error == 0) { if (uobj) { if (uobj->pgops->pgo_detach) { uobj->pgops->pgo_detach(uobj); } entry->offset = prev->offset; } entry->start = prev->start; clear_hints(map, prev); uvm_map_entry_unlink(map, prev); if (copying) { entry->aref = prev->aref; entry->etype &= ~UVM_ET_NEEDSCOPY; } uvm_map_check(map, "trymerge backmerge"); uvm_mapent_free(prev); merged++; } } return merged; } /* * uvm_map_setup: init map * * => map must not be in service yet. */ void uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags) { rb_tree_init(&map->rb_tree, &uvm_map_tree_ops); map->header.next = map->header.prev = &map->header; map->nentries = 0; map->size = 0; map->ref_count = 1; vm_map_setmin(map, vmin); vm_map_setmax(map, vmax); map->flags = flags; map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; map->busy = NULL; rw_init(&map->lock); cv_init(&map->cv, "vm_map"); mutex_init(&map->misc_lock, MUTEX_DRIVER, IPL_NONE); } /* * U N M A P - m a i n e n t r y p o i n t */ /* * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop") * * => caller must check alignment and size * => map must be unlocked (we will lock it) * => flags is UVM_FLAG_QUANTUM or 0. */ void uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) { struct vm_map_entry *dead_entries; UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(maphist, " (map=%#jx, start=%#jx, end=%#jx)", (uintptr_t)map, start, end, 0); KASSERTMSG(start < end, "%s: map %p: start %#jx < end %#jx", __func__, map, (uintmax_t)start, (uintmax_t)end); if (map == kernel_map) { LOCKDEBUG_MEM_CHECK((void *)start, end - start); } /* * work now done by helper functions. wipe the pmap's and then * detach from the dead entries... */ vm_map_lock(map); uvm_unmap_remove(map, start, end, &dead_entries, flags); vm_map_unlock(map); if (dead_entries != NULL) uvm_unmap_detach(dead_entries, 0); UVMHIST_LOG(maphist, "<- done", 0,0,0,0); } /* * uvm_map_reference: add reference to a map * * => map need not be locked */ void uvm_map_reference(struct vm_map *map) { atomic_inc_uint(&map->ref_count); } void uvm_map_lock_entry(struct vm_map_entry *entry, krw_t op) { if (entry->aref.ar_amap != NULL) { amap_lock(entry->aref.ar_amap, op); } if (UVM_ET_ISOBJ(entry)) { rw_enter(entry->object.uvm_obj->vmobjlock, op); } } void uvm_map_unlock_entry(struct vm_map_entry *entry) { if (UVM_ET_ISOBJ(entry)) { rw_exit(entry->object.uvm_obj->vmobjlock); } if (entry->aref.ar_amap != NULL) { amap_unlock(entry->aref.ar_amap); } } #define UVM_VOADDR_TYPE_MASK 0x3UL #define UVM_VOADDR_TYPE_UOBJ 0x1UL #define UVM_VOADDR_TYPE_ANON 0x2UL #define UVM_VOADDR_OBJECT_MASK ~UVM_VOADDR_TYPE_MASK #define UVM_VOADDR_GET_TYPE(voa) \ ((voa)->object & UVM_VOADDR_TYPE_MASK) #define UVM_VOADDR_GET_OBJECT(voa) \ ((voa)->object & UVM_VOADDR_OBJECT_MASK) #define UVM_VOADDR_SET_OBJECT(voa, obj, type) \ do { \ KASSERT(((uintptr_t)(obj) & UVM_VOADDR_TYPE_MASK) == 0); \ (voa)->object = ((uintptr_t)(obj)) | (type); \ } while (/*CONSTCOND*/0) #define UVM_VOADDR_GET_UOBJ(voa) \ ((struct uvm_object *)UVM_VOADDR_GET_OBJECT(voa)) #define UVM_VOADDR_SET_UOBJ(voa, uobj) \ UVM_VOADDR_SET_OBJECT(voa, uobj, UVM_VOADDR_TYPE_UOBJ) #define UVM_VOADDR_GET_ANON(voa) \ ((struct vm_anon *)UVM_VOADDR_GET_OBJECT(voa)) #define UVM_VOADDR_SET_ANON(voa, anon) \ UVM_VOADDR_SET_OBJECT(voa, anon, UVM_VOADDR_TYPE_ANON) /* * uvm_voaddr_acquire: returns the virtual object address corresponding * to the specified virtual address. * * => resolves COW so the true page identity is tracked. * * => acquires a reference on the page's owner (uvm_object or vm_anon) */ bool uvm_voaddr_acquire(struct vm_map * const map, vaddr_t const va, struct uvm_voaddr * const voaddr) { struct vm_map_entry *entry; struct vm_anon *anon = NULL; bool result = false; bool exclusive = false; void (*unlock_fn)(struct vm_map *); UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=%#jx,va=%#jx)", (uintptr_t)map, va, 0, 0); const vaddr_t start = trunc_page(va); const vaddr_t end = round_page(va+1); lookup_again: if (__predict_false(exclusive)) { vm_map_lock(map); unlock_fn = vm_map_unlock; } else { vm_map_lock_read(map); unlock_fn = vm_map_unlock_read; } if (__predict_false(!uvm_map_lookup_entry(map, start, &entry))) { unlock_fn(map); UVMHIST_LOG(maphist,"<- done (no entry)",0,0,0,0); return false; } if (__predict_false(entry->protection == VM_PROT_NONE)) { unlock_fn(map); UVMHIST_LOG(maphist,"<- done (PROT_NONE)",0,0,0,0); return false; } /* * We have a fast path for the common case of "no COW resolution * needed" whereby we have taken a read lock on the map and if * we don't encounter any need to create a vm_anon then great! * But if we do, we loop around again, instead taking an exclusive * lock so that we can perform the fault. * * In the event that we have to resolve the fault, we do nearly the * same work as uvm_map_pageable() does: * * 1: holding the write lock, we create any anonymous maps that need * to be created. however, we do NOT need to clip the map entries * in this case. * * 2: we downgrade to a read lock, and call uvm_fault_wire to fault * in the page (assuming the entry is not already wired). this * is done because we need the vm_anon to be present. */ if (__predict_true(!VM_MAPENT_ISWIRED(entry))) { bool need_fault = false; /* * perform the action of vm_map_lookup that need the * write lock on the map: create an anonymous map for * a copy-on-write region, or an anonymous map for * a zero-fill region. */ if (__predict_false(UVM_ET_ISSUBMAP(entry))) { unlock_fn(map); UVMHIST_LOG(maphist,"<- done (submap)",0,0,0,0); return false; } if (__predict_false(UVM_ET_ISNEEDSCOPY(entry) && ((entry->max_protection & VM_PROT_WRITE) || (entry->object.uvm_obj == NULL)))) { if (!exclusive) { /* need to take the slow path */ KASSERT(unlock_fn == vm_map_unlock_read); vm_map_unlock_read(map); exclusive = true; goto lookup_again; } need_fault = true; amap_copy(map, entry, 0, start, end); /* XXXCDC: wait OK? */ } /* * do a quick check to see if the fault has already * been resolved to the upper layer. */ if (__predict_true(entry->aref.ar_amap != NULL && need_fault == false)) { amap_lock(entry->aref.ar_amap, RW_WRITER); anon = amap_lookup(&entry->aref, start - entry->start); if (__predict_true(anon != NULL)) { /* amap unlocked below */ goto found_anon; } amap_unlock(entry->aref.ar_amap); need_fault = true; } /* * we predict this test as false because if we reach * this point, then we are likely dealing with a * shared memory region backed by a uvm_object, in * which case a fault to create the vm_anon is not * necessary. */ if (__predict_false(need_fault)) { if (exclusive) { vm_map_busy(map); vm_map_unlock(map); unlock_fn = vm_map_unbusy; } if (uvm_fault_wire(map, start, end, entry->max_protection, 1)) { /* wiring failed */ unlock_fn(map); UVMHIST_LOG(maphist,"<- done (wire failed)", 0,0,0,0); return false; } /* * now that we have resolved the fault, we can unwire * the page. */ if (exclusive) { vm_map_lock(map); vm_map_unbusy(map); unlock_fn = vm_map_unlock; } uvm_fault_unwire_locked(map, start, end); } } /* check the upper layer */ if (entry->aref.ar_amap) { amap_lock(entry->aref.ar_amap, RW_WRITER); anon = amap_lookup(&entry->aref, start - entry->start); if (anon) { found_anon: KASSERT(anon->an_lock == entry->aref.ar_amap->am_lock); anon->an_ref++; rw_obj_hold(anon->an_lock); KASSERT(anon->an_ref != 0); UVM_VOADDR_SET_ANON(voaddr, anon); voaddr->offset = va & PAGE_MASK; result = true; } amap_unlock(entry->aref.ar_amap); } /* check the lower layer */ if (!result && UVM_ET_ISOBJ(entry)) { struct uvm_object *uobj = entry->object.uvm_obj; KASSERT(uobj != NULL); (*uobj->pgops->pgo_reference)(uobj); UVM_VOADDR_SET_UOBJ(voaddr, uobj); voaddr->offset = entry->offset + (va - entry->start); result = true; } unlock_fn(map); if (result) { UVMHIST_LOG(maphist, "<- done OK (type=%jd,owner=%#jx,offset=%#jx)", UVM_VOADDR_GET_TYPE(voaddr), UVM_VOADDR_GET_OBJECT(voaddr), voaddr->offset, 0); } else { UVMHIST_LOG(maphist,"<- done (failed)",0,0,0,0); } return result; } /* * uvm_voaddr_release: release the references held by the * vitual object address. */ void uvm_voaddr_release(struct uvm_voaddr * const voaddr) { switch (UVM_VOADDR_GET_TYPE(voaddr)) { case UVM_VOADDR_TYPE_UOBJ: { struct uvm_object * const uobj = UVM_VOADDR_GET_UOBJ(voaddr); KASSERT(uobj != NULL); KASSERT(uobj->pgops->pgo_detach != NULL); (*uobj->pgops->pgo_detach)(uobj); break; } case UVM_VOADDR_TYPE_ANON: { struct vm_anon * const anon = UVM_VOADDR_GET_ANON(voaddr); krwlock_t *lock; KASSERT(anon != NULL); rw_enter((lock = anon->an_lock), RW_WRITER); KASSERT(anon->an_ref > 0); if (--anon->an_ref == 0) { uvm_anfree(anon); } rw_exit(lock); rw_obj_free(lock); break; } default: panic("uvm_voaddr_release: bad type"); } memset(voaddr, 0, sizeof(*voaddr)); } /* * uvm_voaddr_compare: compare two uvm_voaddr objects. * * => memcmp() semantics */ int uvm_voaddr_compare(const struct uvm_voaddr * const voaddr1, const struct uvm_voaddr * const voaddr2) { const uintptr_t type1 = UVM_VOADDR_GET_TYPE(voaddr1); const uintptr_t type2 = UVM_VOADDR_GET_TYPE(voaddr2); KASSERT(type1 == UVM_VOADDR_TYPE_UOBJ || type1 == UVM_VOADDR_TYPE_ANON); KASSERT(type2 == UVM_VOADDR_TYPE_UOBJ || type2 == UVM_VOADDR_TYPE_ANON); if (type1 < type2) return -1; if (type1 > type2) return 1; const uintptr_t addr1 = UVM_VOADDR_GET_OBJECT(voaddr1); const uintptr_t addr2 = UVM_VOADDR_GET_OBJECT(voaddr2); if (addr1 < addr2) return -1; if (addr1 > addr2) return 1; if (voaddr1->offset < voaddr2->offset) return -1; if (voaddr1->offset > voaddr2->offset) return 1; return 0; } #if defined(DDB) || defined(DEBUGPRINT) /* * uvm_map_printit: actually prints the map */ void uvm_map_printit(struct vm_map *map, bool full, void (*pr)(const char *, ...)) { struct vm_map_entry *entry; (*pr)("MAP %p: [%#lx->%#lx]\n", map, vm_map_min(map), vm_map_max(map)); (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=%#x\n", map->nentries, map->size, map->ref_count, map->timestamp, map->flags); (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap, pmap_resident_count(map->pmap), pmap_wired_count(map->pmap)); if (!full) return; for (entry = map->header.next; entry != &map->header; entry = entry->next) { (*pr)(" - %p: %#lx->%#lx: obj=%p/%#llx, amap=%p/%d\n", entry, entry->start, entry->end, entry->object.uvm_obj, (long long)entry->offset, entry->aref.ar_amap, entry->aref.ar_pageoff); (*pr)( "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " "wc=%d, adv=%d%s\n", (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', entry->protection, entry->max_protection, entry->inheritance, entry->wired_count, entry->advice, entry == map->first_free ? " (first_free)" : ""); } } void uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...)) { struct vm_map *map; for (map = kernel_map;;) { struct vm_map_entry *entry; if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) { break; } (*pr)("%p is %p+%zu from VMMAP %p\n", (void *)addr, (void *)entry->start, (size_t)(addr - (uintptr_t)entry->start), map); if (!UVM_ET_ISSUBMAP(entry)) { break; } map = entry->object.sub_map; } } #endif /* DDB || DEBUGPRINT */ #ifndef __USER_VA0_IS_SAFE static int sysctl_user_va0_disable(SYSCTLFN_ARGS) { struct sysctlnode node; int t, error; node = *rnode; node.sysctl_data = &t; t = user_va0_disable; error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) return (error); if (!t && user_va0_disable && kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MAP_VA_ZERO, 0, NULL, NULL, NULL)) return EPERM; user_va0_disable = !!t; return 0; } #endif static int fill_vmentry(struct lwp *l, struct proc *p, struct kinfo_vmentry *kve, struct vm_map *m, struct vm_map_entry *e) { #ifndef _RUMPKERNEL int error; memset(kve, 0, sizeof(*kve)); KASSERT(e != NULL); if (UVM_ET_ISOBJ(e)) { struct uvm_object *uobj = e->object.uvm_obj; KASSERT(uobj != NULL); kve->kve_ref_count = uobj->uo_refs; kve->kve_count = uobj->uo_npages; if (UVM_OBJ_IS_VNODE(uobj)) { struct vattr va; struct vnode *vp = (struct vnode *)uobj; vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &va, l->l_cred); VOP_UNLOCK(vp); kve->kve_type = KVME_TYPE_VNODE; if (error == 0) { kve->kve_vn_size = vp->v_size; kve->kve_vn_type = (int)vp->v_type; kve->kve_vn_mode = va.va_mode; kve->kve_vn_rdev = va.va_rdev; kve->kve_vn_fileid = va.va_fileid; kve->kve_vn_fsid = va.va_fsid; error = vnode_to_path(kve->kve_path, sizeof(kve->kve_path) / 2, vp, l, p); } } else if (UVM_OBJ_IS_KERN_OBJECT(uobj)) { kve->kve_type = KVME_TYPE_KERN; } else if (UVM_OBJ_IS_DEVICE(uobj)) { kve->kve_type = KVME_TYPE_DEVICE; } else if (UVM_OBJ_IS_AOBJ(uobj)) { kve->kve_type = KVME_TYPE_ANON; } else { kve->kve_type = KVME_TYPE_OBJECT; } } else if (UVM_ET_ISSUBMAP(e)) { struct vm_map *map = e->object.sub_map; KASSERT(map != NULL); kve->kve_ref_count = map->ref_count; kve->kve_count = map->nentries; kve->kve_type = KVME_TYPE_SUBMAP; } else kve->kve_type = KVME_TYPE_UNKNOWN; kve->kve_start = e->start; kve->kve_end = e->end; kve->kve_offset = e->offset; kve->kve_wired_count = e->wired_count; kve->kve_inheritance = e->inheritance; kve->kve_attributes = 0; /* unused */ kve->kve_advice = e->advice; #define PROT(p) (((p) & VM_PROT_READ) ? KVME_PROT_READ : 0) | \ (((p) & VM_PROT_WRITE) ? KVME_PROT_WRITE : 0) | \ (((p) & VM_PROT_EXECUTE) ? KVME_PROT_EXEC : 0) kve->kve_protection = PROT(e->protection); kve->kve_max_protection = PROT(e->max_protection); kve->kve_flags |= (e->etype & UVM_ET_COPYONWRITE) ? KVME_FLAG_COW : 0; kve->kve_flags |= (e->etype & UVM_ET_NEEDSCOPY) ? KVME_FLAG_NEEDS_COPY : 0; kve->kve_flags |= (m->flags & VM_MAP_TOPDOWN) ? KVME_FLAG_GROWS_DOWN : KVME_FLAG_GROWS_UP; kve->kve_flags |= (m->flags & VM_MAP_PAGEABLE) ? KVME_FLAG_PAGEABLE : 0; #endif return 0; } static int fill_vmentries(struct lwp *l, pid_t pid, u_int elem_size, void *oldp, size_t *oldlenp) { int error; struct proc *p; struct kinfo_vmentry *vme; struct vmspace *vm; struct vm_map *map; struct vm_map_entry *entry; char *dp; size_t count, vmesize; if (elem_size == 0 || elem_size > 2 * sizeof(*vme)) return EINVAL; if (oldp) { if (*oldlenp > 10UL * 1024UL * 1024UL) return E2BIG; count = *oldlenp / elem_size; if (count == 0) return ENOMEM; vmesize = count * sizeof(*vme); } else vmesize = 0; if ((error = proc_find_locked(l, &p, pid)) != 0) return error; vme = NULL; count = 0; if ((error = proc_vmspace_getref(p, &vm)) != 0) goto out; map = &vm->vm_map; vm_map_lock_read(map); dp = oldp; if (oldp) vme = kmem_alloc(vmesize, KM_SLEEP); for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (oldp && (dp - (char *)oldp) < vmesize) { error = fill_vmentry(l, p, &vme[count], map, entry); if (error) goto out; dp += elem_size; } count++; } vm_map_unlock_read(map); uvmspace_free(vm); out: if (pid != -1) mutex_exit(p->p_lock); if (error == 0) { const u_int esize = uimin(sizeof(*vme), elem_size); dp = oldp; for (size_t i = 0; i < count; i++) { if (oldp && (dp - (char *)oldp) < vmesize) { error = sysctl_copyout(l, &vme[i], dp, esize); if (error) break; dp += elem_size; } else break; } count *= elem_size; if (oldp != NULL && *oldlenp < count) error = ENOSPC; *oldlenp = count; } if (vme) kmem_free(vme, vmesize); return error; } static int sysctl_vmproc(SYSCTLFN_ARGS) { int error; if (namelen == 1 && name[0] == CTL_QUERY) return (sysctl_query(SYSCTLFN_CALL(rnode))); if (namelen == 0) return EINVAL; switch (name[0]) { case VM_PROC_MAP: if (namelen != 3) return EINVAL; sysctl_unlock(); error = fill_vmentries(l, name[1], name[2], oldp, oldlenp); sysctl_relock(); return error; default: return EINVAL; } } SYSCTL_SETUP(sysctl_uvmmap_setup, "sysctl uvmmap setup") { sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "proc", SYSCTL_DESCR("Process vm information"), sysctl_vmproc, 0, NULL, 0, CTL_VM, VM_PROC, CTL_EOL); #ifndef __USER_VA0_IS_SAFE sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "user_va0_disable", SYSCTL_DESCR("Disable VA 0"), sysctl_user_va0_disable, 0, &user_va0_disable, 0, CTL_VM, CTL_CREATE, CTL_EOL); #endif } |
| 58 40 387 426 77 23 11 54 67 357 26 26 26 6 21 358 4 1 1 134 134 4 3 132 132 1 4 131 2 1 107 140 7 138 5 136 96 64 1 1 95 95 133 3 1 131 2 129 129 1 45 45 127 2 132 87 132 88 133 26 19 25 1 24 2 22 22 1 21 9 9 6 6 6 6 6 22 22 3 19 19 16 4 4 4 1 4 2 4 22 22 4 2 3 5 3 5 14 14 14 10 8 7 11 9 1 1 1 9 4 5 10 6 5 4 4 3 1 2 2 2 2 3 5 5 4 4 3 2 1 2 2 3 4 5 15 14 14 6 1 8 4 7 19 18 16 4 11 10 2 9 663 5 662 662 663 659 104 555 199 197 358 353 669 5 5 669 664 135 6 4 662 4 655 658 500 410 491 184 149 182 3 3 3 3 2 3 6 6 5 5 5 6 1 1 32 29 23 13 12 2 17 31 10 9 8 8 1 6 5 4 4 3 5 18 17 3 17 16 18 13 5 4 13 9 6 14 13 13 5 3 8 8 6 5 6 3 3 1 1 17 16 77 69 24 71 148 142 147 145 145 137 135 3 111 3 7 122 116 7 121 125 9 1 8 9 2 140 16 13 15 3 12 2 1 1 2 10 8 8 20 11 9 12 10 8 6 5 5 10 11 18 3 1 2 3 2 9 11 37 37 35 29 34 33 2 1 1 2 31 31 31 34 37 15 22 7 6 6 1 4 2 1 1 2 2 2 2 2 10 10 32 32 29 30 30 19 20 20 10 8 9 8 4 1 3 4 28 12 10 11 10 9 8 19 18 2 16 14 13 3 15 104 28 28 1 27 25 24 3 21 24 4 13 11 11 11 10 9 10 2 6 4 4 1 4 3 4 3 1 7 8 9 34 58 34 58 48 55 17 31 8 7 6 5 10 10 9 42 41 41 1 39 42 3 13 12 11 5 5 4 4 4 3 5 10 5 3 4 5 3 5 11 8 10 10 11 4 7 10 18 5 3 4 11 6 3 5 7 11 3 8 10 14 9 7 8 4 2 4 10 4 4 8 5 7 3 2 23 23 7 2 7 2 16 10 6 14 17 23 21 5 5 4 5 6 16 16 33 43 43 43 43 18 25 11 14 41 2 1 39 1 41 26 18 26 26 25 26 26 8 26 14 26 39 20 33 20 7 6 19 2 17 16 18 18 14 14 32 30 27 28 27 26 25 24 10 9 6 4 4 4 3 5 8 5 3 4 6 4 4 37 14 8 59 60 57 57 55 56 50 50 50 50 50 50 1 1 48 44 44 44 23 44 44 43 44 44 44 44 44 24 44 43 24 14 42 40 12 10 8 31 31 31 31 13 12 13 13 12 19 53 54 56 81 34 113 106 111 7 104 2 1 1 2 103 98 98 12 13 11 10 9 9 10 11 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 254 |