27 27 18 9 9 18 134 134 134 1798 1819 3 3 229 230 230 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 /* $OpenBSD: uipc_mbuf2.c,v 1.45 2020/12/12 11:48:54 jan Exp $ */ /* $KAME: uipc_mbuf2.c,v 1.29 2001/02/14 13:42:10 itojun Exp $ */ /* $NetBSD: uipc_mbuf.c,v 1.40 1999/04/01 00:23:25 thorpej Exp $ */ /* * Copyright (C) 1999 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> #include <sys/pool.h> #include <sys/mbuf.h> extern struct pool mtagpool; /* can't call it m_dup(), as freebsd[34] uses m_dup() with different arg */ static struct mbuf *m_dup1(struct mbuf *, int, int, int); /* * ensure that [off, off + len] is contiguous on the mbuf chain "m". * packet chain before "off" is kept untouched. * if offp == NULL, the target will start at <retval, 0> on resulting chain. * if offp != NULL, the target will start at <retval, *offp> on resulting chain. * * on error return (NULL return value), original "m" will be freed. * * XXX m_trailingspace/m_leadingspace on shared cluster (sharedcluster) */ struct mbuf * m_pulldown(struct mbuf *m, int off, int len, int *offp) { struct mbuf *n, *o; int hlen, tlen, olen; int sharedcluster; /* check invalid arguments. */ if (m == NULL) panic("m == NULL in m_pulldown()"); if ((n = m_getptr(m, off, &off)) == NULL) { m_freem(m); return (NULL); /* mbuf chain too short */ } sharedcluster = M_READONLY(n); /* * the target data is on <n, off>. * if we got enough data on the mbuf "n", we're done. */ if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster) goto ok; /* * when len <= n->m_len - off and off != 0, it is a special case. * len bytes from <n, off> sits in single mbuf, but the caller does * not like the starting position (off). * chop the current mbuf into two pieces, set off to 0. */ if (len <= n->m_len - off) { struct mbuf *mlast; o = m_dup1(n, off, n->m_len - off, M_DONTWAIT); if (o == NULL) { m_freem(m); return (NULL); /* ENOBUFS */ } for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next) ; n->m_len = off; mlast->m_next = n->m_next; n->m_next = o; n = o; off = 0; goto ok; } /* * we need to take hlen from <n, off> and tlen from <n->m_next, 0>, * and construct contiguous mbuf with m_len == len. * note that hlen + tlen == len, and tlen > 0. */ hlen = n->m_len - off; tlen = len - hlen; /* * ensure that we have enough trailing data on mbuf chain. * if not, we can do nothing about the chain. */ olen = 0; for (o = n->m_next; o != NULL; o = o->m_next) olen += o->m_len; if (hlen + olen < len) { m_freem(m); return (NULL); /* mbuf chain too short */ } /* * easy cases first. * we need to use m_copydata() to get data from <n->m_next, 0>. */ if ((off == 0 || offp) && m_trailingspace(n) >= tlen && !sharedcluster) { m_copydata(n->m_next, 0, tlen, mtod(n, caddr_t) + n->m_len); n->m_len += tlen; m_adj(n->m_next, tlen); goto ok; } if ((off == 0 || offp) && m_leadingspace(n->m_next) >= hlen && !sharedcluster && n->m_next->m_len >= tlen) { n->m_next->m_data -= hlen; n->m_next->m_len += hlen; memmove(mtod(n->m_next, caddr_t), mtod(n, caddr_t) + off, hlen); n->m_len -= hlen; n = n->m_next; off = 0; goto ok; } /* * now, we need to do the hard way. don't m_copym as there's no room * on both ends. */ if (len > MAXMCLBYTES) { m_freem(m); return (NULL); } MGET(o, M_DONTWAIT, m->m_type); if (o && len > MLEN) { MCLGETL(o, M_DONTWAIT, len); if ((o->m_flags & M_EXT) == 0) { m_free(o); o = NULL; } } if (!o) { m_freem(m); return (NULL); /* ENOBUFS */ } /* get hlen from <n, off> into <o, 0> */ o->m_len = hlen; memmove(mtod(o, caddr_t), mtod(n, caddr_t) + off, hlen); n->m_len -= hlen; /* get tlen from <n->m_next, 0> into <o, hlen> */ m_copydata(n->m_next, 0, tlen, mtod(o, caddr_t) + o->m_len); o->m_len += tlen; m_adj(n->m_next, tlen); o->m_next = n->m_next; n->m_next = o; n = o; off = 0; ok: if (offp) *offp = off; return (n); } static struct mbuf * m_dup1(struct mbuf *m, int off, int len, int wait) { struct mbuf *n; int l; if (len > MAXMCLBYTES) return (NULL); if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { MGETHDR(n, wait, m->m_type); if (n == NULL) return (NULL); if (m_dup_pkthdr(n, m, wait)) { m_free(n); return (NULL); } l = MHLEN; } else { MGET(n, wait, m->m_type); l = MLEN; } if (n && len > l) { MCLGETL(n, wait, len); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (!n) return (NULL); m_copydata(m, off, len, mtod(n, caddr_t)); n->m_len = len; return (n); } /* Get a packet tag structure along with specified data following. */ struct m_tag * m_tag_get(int type, int len, int wait) { struct m_tag *t; if (len < 0) return (NULL); if (len > PACKET_TAG_MAXSIZE) panic("requested tag size for pool %#x is too big", type); t = pool_get(&mtagpool, wait == M_WAITOK ? PR_WAITOK : PR_NOWAIT); if (t == NULL) return (NULL); t->m_tag_id = type; t->m_tag_len = len; return (t); } /* Prepend a packet tag. */ void m_tag_prepend(struct mbuf *m, struct m_tag *t) { SLIST_INSERT_HEAD(&m->m_pkthdr.ph_tags, t, m_tag_link); m->m_pkthdr.ph_tagsset |= t->m_tag_id; } /* Unlink and free a packet tag. */ void m_tag_delete(struct mbuf *m, struct m_tag *t) { u_int32_t ph_tagsset = 0; struct m_tag *p; SLIST_REMOVE(&m->m_pkthdr.ph_tags, t, m_tag, m_tag_link); pool_put(&mtagpool, t); SLIST_FOREACH(p, &m->m_pkthdr.ph_tags, m_tag_link) ph_tagsset |= p->m_tag_id; m->m_pkthdr.ph_tagsset = ph_tagsset; } /* Unlink and free a packet tag chain. */ void m_tag_delete_chain(struct mbuf *m) { struct m_tag *p; while ((p = SLIST_FIRST(&m->m_pkthdr.ph_tags)) != NULL) { SLIST_REMOVE_HEAD(&m->m_pkthdr.ph_tags, m_tag_link); pool_put(&mtagpool, p); } m->m_pkthdr.ph_tagsset = 0; } /* Find a tag, starting from a given position. */ struct m_tag * m_tag_find(struct mbuf *m, int type, struct m_tag *t) { struct m_tag *p; if (!(m->m_pkthdr.ph_tagsset & type)) return (NULL); if (t == NULL) p = SLIST_FIRST(&m->m_pkthdr.ph_tags); else p = SLIST_NEXT(t, m_tag_link); while (p != NULL) { if (p->m_tag_id == type) return (p); p = SLIST_NEXT(p, m_tag_link); } return (NULL); } /* Copy a single tag. */ struct m_tag * m_tag_copy(struct m_tag *t, int wait) { struct m_tag *p; p = m_tag_get(t->m_tag_id, t->m_tag_len, wait); if (p == NULL) return (NULL); memcpy(p + 1, t + 1, t->m_tag_len); /* Copy the data */ return (p); } /* * Copy two tag chains. The destination mbuf (to) loses any attached * tags even if the operation fails. This should not be a problem, as * m_tag_copy_chain() is typically called with a newly-allocated * destination mbuf. */ int m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int wait) { struct m_tag *p, *t, *tprev = NULL; m_tag_delete_chain(to); SLIST_FOREACH(p, &from->m_pkthdr.ph_tags, m_tag_link) { t = m_tag_copy(p, wait); if (t == NULL) { m_tag_delete_chain(to); return (ENOBUFS); } if (tprev == NULL) SLIST_INSERT_HEAD(&to->m_pkthdr.ph_tags, t, m_tag_link); else SLIST_INSERT_AFTER(tprev, t, m_tag_link); tprev = t; to->m_pkthdr.ph_tagsset |= t->m_tag_id; } return (0); } /* Initialize tags on an mbuf. */ void m_tag_init(struct mbuf *m) { SLIST_INIT(&m->m_pkthdr.ph_tags); } /* Get first tag in chain. */ struct m_tag * m_tag_first(struct mbuf *m) { return (SLIST_FIRST(&m->m_pkthdr.ph_tags)); } /* Get next tag in chain. */ struct m_tag * m_tag_next(struct mbuf *m, struct m_tag *t) { return (SLIST_NEXT(t, m_tag_link)); }
35 22 21 22 33 34 34 34 34 34 22 22 22 16 16 16 16 21 21 15 15 7 7 7 7 17 17 22 22 83 83 84 81 57 57 56 1 1 34 34 34 34 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 /* $OpenBSD: pf_if.c,v 1.111 2023/06/30 09:58:30 mvs Exp $ */ /* * Copyright 2005 Henning Brauer <henning@openbsd.org> * Copyright 2005 Ryan McBride <mcbride@openbsd.org> * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2003 Cedric Berger * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/filio.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/kernel.h> #include <sys/device.h> #include <sys/time.h> #include <sys/pool.h> #include <sys/syslog.h> #include <net/if.h> #include <net/if_var.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_var.h> #include <net/pfvar.h> #include <netinet/ip_icmp.h> #include <netinet/tcp.h> #include <netinet/udp.h> #ifdef INET6 #include <netinet/ip6.h> #include <netinet/icmp6.h> #endif /* INET6 */ #include <net/pfvar_priv.h> #define isupper(c) ((c) >= 'A' && (c) <= 'Z') #define islower(c) ((c) >= 'a' && (c) <= 'z') #define isalpha(c) (isupper(c)||islower(c)) struct pfi_kif *pfi_all = NULL; struct pool pfi_addr_pl; struct pfi_ifhead pfi_ifs; long pfi_update = 1; struct pfr_addr *pfi_buffer; int pfi_buffer_cnt; int pfi_buffer_max; void pfi_kif_update(struct pfi_kif *); void pfi_dynaddr_update(struct pfi_dynaddr *dyn); void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, u_int8_t, int); void pfi_kifaddr_update(void *); void pfi_instance_add(struct ifnet *, u_int8_t, int); void pfi_address_add(struct sockaddr *, sa_family_t, u_int8_t); int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); int pfi_skip_if(const char *, struct pfi_kif *); int pfi_unmask(void *); void pfi_group_change(const char *); RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); #define PFI_BUFFER_MAX 0x10000 #define PFI_MTYPE M_PF struct pfi_kif * pfi_kif_alloc(const char *kif_name, int mflags) { struct pfi_kif *kif; kif = malloc(sizeof(*pfi_all), PFI_MTYPE, mflags|M_ZERO); if (kif == NULL) return (NULL); strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); kif->pfik_tzero = gettime(); TAILQ_INIT(&kif->pfik_dynaddrs); if (!strcmp(kif->pfik_name, "any")) { /* both so it works in the ioctl and the regular case */ kif->pfik_flags |= PFI_IFLAG_ANY; kif->pfik_flags_new |= PFI_IFLAG_ANY; } return (kif); } void pfi_kif_free(struct pfi_kif *kif) { if (kif == NULL) return; if (kif->pfik_rules || kif->pfik_states || kif->pfik_routes || kif->pfik_srcnodes || kif->pfik_flagrefs) panic("kif is still alive"); free(kif, PFI_MTYPE, sizeof(*kif)); } void pfi_initialize(void) { /* * The first time we arrive here is during kernel boot, * when if_attachsetup() for the first time. No locking * is needed in this case, because it's granted there * is a single thread, which sets pfi_all global var. */ if (pfi_all != NULL) /* already initialized */ return; pool_init(&pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, IPL_SOFTNET, 0, "pfiaddrpl", NULL); pfi_buffer_max = 64; pfi_buffer = mallocarray(pfi_buffer_max, sizeof(*pfi_buffer), PFI_MTYPE, M_WAITOK); pfi_all = pfi_kif_alloc(IFG_ALL, M_WAITOK); if (RB_INSERT(pfi_ifhead, &pfi_ifs, pfi_all) != NULL) panic("IFG_ALL kif found already"); } struct pfi_kif * pfi_kif_find(const char *kif_name) { struct pfi_kif_cmp s; PF_ASSERT_LOCKED(); memset(&s, 0, sizeof(s)); strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name)); return (RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)); } struct pfi_kif * pfi_kif_get(const char *kif_name, struct pfi_kif **prealloc) { struct pfi_kif *kif; PF_ASSERT_LOCKED(); if ((kif = pfi_kif_find(kif_name))) return (kif); /* create new one */ if ((prealloc == NULL) || (*prealloc == NULL)) { kif = pfi_kif_alloc(kif_name, M_NOWAIT); if (kif == NULL) return (NULL); } else { kif = *prealloc; *prealloc = NULL; } RB_INSERT(pfi_ifhead, &pfi_ifs, kif); return (kif); } void pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) { PF_ASSERT_LOCKED(); switch (what) { case PFI_KIF_REF_RULE: kif->pfik_rules++; break; case PFI_KIF_REF_STATE: kif->pfik_states++; break; case PFI_KIF_REF_ROUTE: kif->pfik_routes++; break; case PFI_KIF_REF_SRCNODE: kif->pfik_srcnodes++; break; case PFI_KIF_REF_FLAG: kif->pfik_flagrefs++; break; default: panic("pfi_kif_ref with unknown type"); } } void pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) { if (kif == NULL) return; PF_ASSERT_LOCKED(); switch (what) { case PFI_KIF_REF_NONE: break; case PFI_KIF_REF_RULE: if (kif->pfik_rules <= 0) { DPFPRINTF(LOG_ERR, "pfi_kif_unref (%s): rules refcount <= 0", kif->pfik_name); return; } kif->pfik_rules--; break; case PFI_KIF_REF_STATE: if (kif->pfik_states <= 0) { DPFPRINTF(LOG_ERR, "pfi_kif_unref (%s): state refcount <= 0", kif->pfik_name); return; } kif->pfik_states--; break; case PFI_KIF_REF_ROUTE: if (kif->pfik_routes <= 0) { DPFPRINTF(LOG_ERR, "pfi_kif_unref (%s): route refcount <= 0", kif->pfik_name); return; } kif->pfik_routes--; break; case PFI_KIF_REF_SRCNODE: if (kif->pfik_srcnodes <= 0) { DPFPRINTF(LOG_ERR, "pfi_kif_unref (%s): src-node refcount <= 0", kif->pfik_name); return; } kif->pfik_srcnodes--; break; case PFI_KIF_REF_FLAG: if (kif->pfik_flagrefs <= 0) { DPFPRINTF(LOG_ERR, "pfi_kif_unref (%s): flags refcount <= 0", kif->pfik_name); return; } kif->pfik_flagrefs--; break; default: panic("pfi_kif_unref (%s) with unknown type", kif->pfik_name); } if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all) return; if (kif->pfik_rules || kif->pfik_states || kif->pfik_routes || kif->pfik_srcnodes || kif->pfik_flagrefs) return; RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); free(kif, PFI_MTYPE, sizeof(*kif)); } int pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) { struct ifg_list *p; if (rule_kif == NULL || rule_kif == packet_kif) return (1); if (rule_kif->pfik_group != NULL) TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next) if (p->ifgl_group == rule_kif->pfik_group) return (1); if (rule_kif->pfik_flags & PFI_IFLAG_ANY && packet_kif->pfik_ifp && !(packet_kif->pfik_ifp->if_flags & IFF_LOOPBACK)) return (1); return (0); } void pfi_attach_ifnet(struct ifnet *ifp) { struct pfi_kif *kif; struct task *t; PF_LOCK(); pfi_initialize(); pfi_update++; if ((kif = pfi_kif_get(ifp->if_xname, NULL)) == NULL) panic("%s: pfi_kif_get failed", __func__); kif->pfik_ifp = ifp; ifp->if_pf_kif = (caddr_t)kif; t = malloc(sizeof(*t), PFI_MTYPE, M_WAITOK); task_set(t, pfi_kifaddr_update, kif); if_addrhook_add(ifp, t); kif->pfik_ah_cookie = t; pfi_kif_update(kif); PF_UNLOCK(); } void pfi_detach_ifnet(struct ifnet *ifp) { struct pfi_kif *kif; struct task *t; if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL) return; PF_LOCK(); pfi_update++; t = kif->pfik_ah_cookie; kif->pfik_ah_cookie = NULL; if_addrhook_del(ifp, t); free(t, PFI_MTYPE, sizeof(*t)); pfi_kif_update(kif); kif->pfik_ifp = NULL; ifp->if_pf_kif = NULL; pfi_kif_unref(kif, PFI_KIF_REF_NONE); PF_UNLOCK(); } void pfi_attach_ifgroup(struct ifg_group *ifg) { struct pfi_kif *kif; PF_LOCK(); pfi_initialize(); pfi_update++; if ((kif = pfi_kif_get(ifg->ifg_group, NULL)) == NULL) panic("%s: pfi_kif_get failed", __func__); kif->pfik_group = ifg; ifg->ifg_pf_kif = (caddr_t)kif; PF_UNLOCK(); } void pfi_detach_ifgroup(struct ifg_group *ifg) { struct pfi_kif *kif; if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL) return; PF_LOCK(); pfi_update++; kif->pfik_group = NULL; ifg->ifg_pf_kif = NULL; pfi_kif_unref(kif, PFI_KIF_REF_NONE); PF_UNLOCK(); } void pfi_group_change(const char *group) { struct pfi_kif *kif; pfi_update++; if ((kif = pfi_kif_get(group, NULL)) == NULL) panic("%s: pfi_kif_get failed", __func__); pfi_kif_update(kif); } void pfi_group_delmember(const char *group) { PF_LOCK(); pfi_group_change(group); pfi_xcommit(); PF_UNLOCK(); } void pfi_group_addmember(const char *group) { PF_LOCK(); pfi_group_change(group); pfi_xcommit(); PF_UNLOCK(); } int pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) { switch (af) { case AF_INET: switch (dyn->pfid_acnt4) { case 0: return (0); case 1: return (pf_match_addr(0, &dyn->pfid_addr4, &dyn->pfid_mask4, a, AF_INET)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); } break; #ifdef INET6 case AF_INET6: switch (dyn->pfid_acnt6) { case 0: return (0); case 1: return (pf_match_addr(0, &dyn->pfid_addr6, &dyn->pfid_mask6, a, AF_INET6)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); } break; #endif /* INET6 */ default: return (0); } } int pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af, int wait) { struct pfi_dynaddr *dyn; char tblname[PF_TABLE_NAME_SIZE]; struct pf_ruleset *ruleset = NULL; int rv = 0; if (aw->type != PF_ADDR_DYNIFTL) return (0); if ((dyn = pool_get(&pfi_addr_pl, wait|PR_LIMITFAIL|PR_ZERO)) == NULL) return (1); if (!strcmp(aw->v.ifname, "self")) dyn->pfid_kif = pfi_kif_get(IFG_ALL, NULL); else dyn->pfid_kif = pfi_kif_get(aw->v.ifname, NULL); if (dyn->pfid_kif == NULL) { rv = 1; goto _bad; } pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE); dyn->pfid_net = pfi_unmask(&aw->v.a.mask); if (af == AF_INET && dyn->pfid_net == 32) dyn->pfid_net = 128; strlcpy(tblname, aw->v.ifname, sizeof(tblname)); if (aw->iflags & PFI_AFLAG_NETWORK) strlcat(tblname, ":network", sizeof(tblname)); if (aw->iflags & PFI_AFLAG_BROADCAST) strlcat(tblname, ":broadcast", sizeof(tblname)); if (aw->iflags & PFI_AFLAG_PEER) strlcat(tblname, ":peer", sizeof(tblname)); if (aw->iflags & PFI_AFLAG_NOALIAS) strlcat(tblname, ":0", sizeof(tblname)); if (dyn->pfid_net != 128) snprintf(tblname + strlen(tblname), sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) { rv = 1; goto _bad; } if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname, wait)) == NULL) { rv = 1; goto _bad; } dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE; dyn->pfid_iflags = aw->iflags; dyn->pfid_af = af; TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); aw->p.dyn = dyn; pfi_kif_update(dyn->pfid_kif); return (0); _bad: if (dyn->pfid_kt != NULL) pfr_detach_table(dyn->pfid_kt); if (ruleset != NULL) pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); pool_put(&pfi_addr_pl, dyn); return (rv); } void pfi_kif_update(struct pfi_kif *kif) { struct ifg_list *ifgl; struct pfi_dynaddr *p; /* update all dynaddr */ TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry) pfi_dynaddr_update(p); /* again for all groups kif is member of */ if (kif->pfik_ifp != NULL) TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) pfi_kif_update((struct pfi_kif *) ifgl->ifgl_group->ifg_pf_kif); } void pfi_dynaddr_update(struct pfi_dynaddr *dyn) { struct pfi_kif *kif; struct pfr_ktable *kt; if (dyn == NULL || dyn->pfid_kif == NULL || dyn->pfid_kt == NULL) panic("pfi_dynaddr_update"); kif = dyn->pfid_kif; kt = dyn->pfid_kt; if (kt->pfrkt_larg != pfi_update) { /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); kt->pfrkt_larg = pfi_update; } pfr_dynaddr_update(kt, dyn); } void pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, u_int8_t net, int flags) { int e, size2 = 0; struct ifg_member *ifgm; pfi_buffer_cnt = 0; if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); else if (kif->pfik_group != NULL) TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) pfi_instance_add(ifgm->ifgm_ifp, net, flags); if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) DPFPRINTF(LOG_ERR, "pfi_table_update: cannot set %d new addresses " "into table %s: %d", pfi_buffer_cnt, kt->pfrkt_name, e); } void pfi_instance_add(struct ifnet *ifp, u_int8_t net, int flags) { struct ifaddr *ifa; int got4 = 0, got6 = 0; int net2, af; if (ifp == NULL) return; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr == NULL) continue; af = ifa->ifa_addr->sa_family; if (af != AF_INET && af != AF_INET6) continue; if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) continue; if ((flags & PFI_AFLAG_BROADCAST) && !(ifp->if_flags & IFF_BROADCAST)) continue; if ((flags & PFI_AFLAG_PEER) && !(ifp->if_flags & IFF_POINTOPOINT)) continue; if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL( &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) continue; if (flags & PFI_AFLAG_NOALIAS) { if (af == AF_INET && got4) continue; if (af == AF_INET6 && got6) continue; } if (af == AF_INET) got4 = 1; else if (af == AF_INET6) got6 = 1; net2 = net; if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) { if (af == AF_INET) net2 = pfi_unmask(&((struct sockaddr_in *) ifa->ifa_netmask)->sin_addr); else if (af == AF_INET6) net2 = pfi_unmask(&((struct sockaddr_in6 *) ifa->ifa_netmask)->sin6_addr); } if (af == AF_INET && net2 > 32) net2 = 32; if (flags & PFI_AFLAG_BROADCAST) pfi_address_add(ifa->ifa_broadaddr, af, net2); else if (flags & PFI_AFLAG_PEER) pfi_address_add(ifa->ifa_dstaddr, af, net2); else pfi_address_add(ifa->ifa_addr, af, net2); } } void pfi_address_add(struct sockaddr *sa, sa_family_t af, u_int8_t net) { struct pfr_addr *p; int i; if (pfi_buffer_cnt >= pfi_buffer_max) { int new_max = pfi_buffer_max * 2; if (new_max > PFI_BUFFER_MAX) { DPFPRINTF(LOG_ERR, "pfi_address_add: address buffer full (%d/%d)", pfi_buffer_cnt, PFI_BUFFER_MAX); return; } p = mallocarray(new_max, sizeof(*pfi_buffer), PFI_MTYPE, M_DONTWAIT); if (p == NULL) { DPFPRINTF(LOG_ERR, "pfi_address_add: no memory to grow buffer " "(%d/%d)", pfi_buffer_cnt, PFI_BUFFER_MAX); return; } memcpy(p, pfi_buffer, pfi_buffer_max * sizeof(*pfi_buffer)); /* no need to zero buffer */ free(pfi_buffer, PFI_MTYPE, pfi_buffer_max * sizeof(*pfi_buffer)); pfi_buffer = p; pfi_buffer_max = new_max; } if (af == AF_INET && net > 32) net = 128; p = pfi_buffer + pfi_buffer_cnt++; memset(p, 0, sizeof(*p)); p->pfra_af = af; p->pfra_net = net; if (af == AF_INET) p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr; else if (af == AF_INET6) { p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr; if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr)) p->pfra_ip6addr.s6_addr16[1] = 0; } /* mask network address bits */ if (net < 128) ((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8)); for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++) ((caddr_t)p)[i] = 0; } void pfi_dynaddr_remove(struct pf_addr_wrap *aw) { if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL || aw->p.dyn->pfid_kt == NULL) return; TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry); pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE); aw->p.dyn->pfid_kif = NULL; pfr_detach_table(aw->p.dyn->pfid_kt); aw->p.dyn->pfid_kt = NULL; pool_put(&pfi_addr_pl, aw->p.dyn); aw->p.dyn = NULL; } void pfi_dynaddr_copyout(struct pf_addr_wrap *aw) { if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL) return; aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6; } void pfi_kifaddr_update(void *v) { struct pfi_kif *kif = (struct pfi_kif *)v; NET_ASSERT_LOCKED(); PF_LOCK(); pfi_update++; pfi_kif_update(kif); PF_UNLOCK(); } int pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) { return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); } void pfi_update_status(const char *name, struct pf_status *pfs) { struct pfi_kif *p; struct pfi_kif_cmp key; struct ifg_member p_member, *ifgm; TAILQ_HEAD(, ifg_member) ifg_members; int i, j, k; if (*name == '\0' && pfs == NULL) { RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { memset(p->pfik_packets, 0, sizeof(p->pfik_packets)); memset(p->pfik_bytes, 0, sizeof(p->pfik_bytes)); p->pfik_tzero = gettime(); } return; } strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); if (p == NULL) { return; } if (p->pfik_group != NULL) { memcpy(&ifg_members, &p->pfik_group->ifg_members, sizeof(ifg_members)); } else { /* build a temporary list for p only */ memset(&p_member, 0, sizeof(p_member)); p_member.ifgm_ifp = p->pfik_ifp; TAILQ_INIT(&ifg_members); TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next); } if (pfs) { memset(pfs->pcounters, 0, sizeof(pfs->pcounters)); memset(pfs->bcounters, 0, sizeof(pfs->bcounters)); } TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) { if (ifgm->ifgm_ifp == NULL) continue; p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif; /* just clear statistics */ if (pfs == NULL) { memset(p->pfik_packets, 0, sizeof(p->pfik_packets)); memset(p->pfik_bytes, 0, sizeof(p->pfik_bytes)); p->pfik_tzero = gettime(); continue; } for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) { pfs->pcounters[i][j][k] += p->pfik_packets[i][j][k]; pfs->bcounters[i][j] += p->pfik_bytes[i][j][k]; } } } void pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) { struct pfi_kif *p; int n = 0; RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { if (pfi_skip_if(name, p)) continue; if (*size <= ++n) break; if (!p->pfik_tzero) p->pfik_tzero = gettime(); memcpy(buf++, p, sizeof(*buf)); } *size = n; } int pfi_skip_if(const char *filter, struct pfi_kif *p) { struct ifg_list *i; int n; PF_ASSERT_LOCKED(); if (filter == NULL || !*filter) return (0); if (!strcmp(p->pfik_name, filter)) return (0); /* exact match */ n = strlen(filter); if (n < 1 || n >= IFNAMSIZ) return (1); /* sanity check */ if (filter[n-1] >= '0' && filter[n-1] <= '9') return (1); /* group names may not end in a digit */ if (p->pfik_ifp != NULL) TAILQ_FOREACH(i, &p->pfik_ifp->if_groups, ifgl_next) if (!strncmp(i->ifgl_group->ifg_group, filter, IFNAMSIZ)) return (0); /* iface is in group "filter" */ return (1); } int pfi_set_flags(const char *name, int flags) { struct pfi_kif *p; size_t n; PF_ASSERT_LOCKED(); if (name != NULL && name[0] != '\0') { p = pfi_kif_find(name); if (p == NULL) { n = strlen(name); if (n < 1 || n >= IFNAMSIZ) return (EINVAL); if (!isalpha(name[0])) return (EINVAL); p = pfi_kif_get(name, NULL); if (p != NULL) { p->pfik_flags_new = p->pfik_flags | flags; /* * We use pfik_flagrefs counter as an * indication whether the kif has been created * on behalf of 'pfi_set_flags()' or not. */ KASSERT(p->pfik_flagrefs == 0); if (ISSET(p->pfik_flags_new, PFI_IFLAG_SKIP)) pfi_kif_ref(p, PFI_KIF_REF_FLAG); } else panic("%s pfi_kif_get() returned NULL\n", __func__); } else p->pfik_flags_new = p->pfik_flags | flags; } else { RB_FOREACH(p, pfi_ifhead, &pfi_ifs) p->pfik_flags_new = p->pfik_flags | flags; } return (0); } int pfi_clear_flags(const char *name, int flags) { struct pfi_kif *p, *w; PF_ASSERT_LOCKED(); if (name != NULL && name[0] != '\0') { p = pfi_kif_find(name); if (p != NULL) { p->pfik_flags_new = p->pfik_flags & ~flags; KASSERT((p->pfik_flagrefs == 0) || (p->pfik_flagrefs == 1)); if (!ISSET(p->pfik_flags_new, PFI_IFLAG_SKIP) && (p->pfik_flagrefs == 1)) pfi_kif_unref(p, PFI_KIF_REF_FLAG); } else return (ESRCH); } else RB_FOREACH_SAFE(p, pfi_ifhead, &pfi_ifs, w) { p->pfik_flags_new = p->pfik_flags & ~flags; KASSERT((p->pfik_flagrefs == 0) || (p->pfik_flagrefs == 1)); if (!ISSET(p->pfik_flags_new, PFI_IFLAG_SKIP) && (p->pfik_flagrefs == 1)) pfi_kif_unref(p, PFI_KIF_REF_FLAG); } return (0); } void pfi_xcommit(void) { struct pfi_kif *p, *gkif; struct ifg_list *g; struct ifnet *ifp; size_t n; PF_ASSERT_LOCKED(); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { p->pfik_flags = p->pfik_flags_new; n = strlen(p->pfik_name); ifp = p->pfik_ifp; /* * if kif is backed by existing interface, then we must use * skip flags found in groups. We use pfik_flags_new, otherwise * we would need to do two RB_FOREACH() passes: the first to * commit group changes the second to commit flag changes for * interfaces. */ if (ifp != NULL) TAILQ_FOREACH(g, &ifp->if_groups, ifgl_next) { gkif = (struct pfi_kif *)g->ifgl_group->ifg_pf_kif; KASSERT(gkif != NULL); p->pfik_flags |= gkif->pfik_flags_new; } } } /* from pf_print_state.c */ int pfi_unmask(void *addr) { struct pf_addr *m = addr; int i = 31, j = 0, b = 0; u_int32_t tmp; while (j < 4 && m->addr32[j] == 0xffffffff) { b += 32; j++; } if (j < 4) { tmp = ntohl(m->addr32[j]); for (i = 31; tmp & (1 << i); --i) b++; } return (b); }
13 33 13 7 35 35 33 2 4 5 5 2 3 4 5 5 46 33 13 53 40 9 5 5 1 4 8 8 1 7 4 4 2 4 2 23 3 2 23 20 23 23 34 35 5 1 1 4 4 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 /* $OpenBSD: uvm_aobj.c,v 1.110 2024/04/13 23:44:11 jsg Exp $ */ /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */ /* * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and * Washington University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp */ /* * uvm_aobj.c: anonymous memory uvm_object pager * * author: Chuck Silvers <chuq@chuq.com> * started: Jan-1998 * * - design mostly from Chuck Cranor */ #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> #include <sys/kernel.h> #include <sys/pool.h> #include <sys/stdint.h> #include <sys/atomic.h> #include <uvm/uvm.h> /* * An anonymous UVM object (aobj) manages anonymous-memory. In addition to * keeping the list of resident pages, it may also keep a list of allocated * swap blocks. Depending on the size of the object, this list is either * stored in an array (small objects) or in a hash table (large objects). */ /* * Note: for hash tables, we break the address space of the aobj into blocks * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two. */ #define UAO_SWHASH_CLUSTER_SHIFT 4 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT) /* Get the "tag" for this page index. */ #define UAO_SWHASH_ELT_TAG(idx) ((idx) >> UAO_SWHASH_CLUSTER_SHIFT) #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \ ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1)) /* Given an ELT and a page index, find the swap slot. */ #define UAO_SWHASH_ELT_PAGESLOT(elt, idx) \ ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)]) /* Given an ELT, return its pageidx base. */ #define UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \ ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT) /* The hash function. */ #define UAO_SWHASH_HASH(aobj, idx) \ (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \ & (aobj)->u_swhashmask)]) /* * The threshold which determines whether we will use an array or a * hash table to store the list of allocated swap blocks. */ #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4) #define UAO_USES_SWHASH(aobj) \ ((aobj)->u_pages > UAO_SWHASH_THRESHOLD) /* The number of buckets in a hash, with an upper bound. */ #define UAO_SWHASH_MAXBUCKETS 256 #define UAO_SWHASH_BUCKETS(pages) \ (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS)) /* * uao_swhash_elt: when a hash table is being used, this structure defines * the format of an entry in the bucket list. */ struct uao_swhash_elt { LIST_ENTRY(uao_swhash_elt) list; /* the hash list */ voff_t tag; /* our 'tag' */ int count; /* our number of active slots */ int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */ }; /* * uao_swhash: the swap hash table structure */ LIST_HEAD(uao_swhash, uao_swhash_elt); /* * uao_swhash_elt_pool: pool of uao_swhash_elt structures */ struct pool uao_swhash_elt_pool; /* * uvm_aobj: the actual anon-backed uvm_object * * => the uvm_object is at the top of the structure, this allows * (struct uvm_aobj *) == (struct uvm_object *) * => only one of u_swslots and u_swhash is used in any given aobj */ struct uvm_aobj { struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */ int u_pages; /* number of pages in entire object */ int u_flags; /* the flags (see uvm_aobj.h) */ /* * Either an array or hashtable (array of bucket heads) of * offset -> swapslot mappings for the aobj. */ #define u_swslots u_swap.slot_array #define u_swhash u_swap.slot_hash union swslots { int *slot_array; struct uao_swhash *slot_hash; } u_swap; u_long u_swhashmask; /* mask for hashtable */ LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */ }; struct pool uvm_aobj_pool; static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int, boolean_t); static boolean_t uao_flush(struct uvm_object *, voff_t, voff_t, int); static void uao_free(struct uvm_aobj *); static int uao_get(struct uvm_object *, voff_t, vm_page_t *, int *, int, vm_prot_t, int, int); static boolean_t uao_pagein(struct uvm_aobj *, int, int); static boolean_t uao_pagein_page(struct uvm_aobj *, int); void uao_dropswap_range(struct uvm_object *, voff_t, voff_t); void uao_shrink_flush(struct uvm_object *, int, int); int uao_shrink_hash(struct uvm_object *, int); int uao_shrink_array(struct uvm_object *, int); int uao_shrink_convert(struct uvm_object *, int); int uao_grow_hash(struct uvm_object *, int); int uao_grow_array(struct uvm_object *, int); int uao_grow_convert(struct uvm_object *, int); /* * aobj_pager * * note that some functions (e.g. put) are handled elsewhere */ const struct uvm_pagerops aobj_pager = { .pgo_reference = uao_reference, .pgo_detach = uao_detach, .pgo_flush = uao_flush, .pgo_get = uao_get, }; /* * uao_list: global list of active aobjs, locked by uao_list_lock * * Lock ordering: generally the locking order is object lock, then list lock. * in the case of swap off we have to iterate over the list, and thus the * ordering is reversed. In that case we must use trylocking to prevent * deadlock. */ static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list); static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); /* * functions */ /* * hash table/array related functions */ /* * uao_find_swhash_elt: find (or create) a hash table entry for a page * offset. */ static struct uao_swhash_elt * uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create) { struct uao_swhash *swhash; struct uao_swhash_elt *elt; voff_t page_tag; swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */ page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */ /* * now search the bucket for the requested tag */ LIST_FOREACH(elt, swhash, list) { if (elt->tag == page_tag) return elt; } if (!create) return NULL; /* * allocate a new entry for the bucket and init/insert it in */ elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO); /* * XXX We cannot sleep here as the hash table might disappear * from under our feet. And we run the risk of deadlocking * the pagedeamon. In fact this code will only be called by * the pagedaemon and allocation will only fail if we * exhausted the pagedeamon reserve. In that case we're * doomed anyway, so panic. */ if (elt == NULL) panic("%s: can't allocate entry", __func__); LIST_INSERT_HEAD(swhash, elt, list); elt->tag = page_tag; return elt; } /* * uao_find_swslot: find the swap slot number for an aobj/pageidx */ int uao_find_swslot(struct uvm_object *uobj, int pageidx) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); /* * if noswap flag is set, then we never return a slot */ if (aobj->u_flags & UAO_FLAG_NOSWAP) return 0; /* * if hashing, look in hash table. */ if (UAO_USES_SWHASH(aobj)) { struct uao_swhash_elt *elt = uao_find_swhash_elt(aobj, pageidx, FALSE); if (elt) return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); else return 0; } /* * otherwise, look in the array */ return aobj->u_swslots[pageidx]; } /* * uao_set_swslot: set the swap slot for a page in an aobj. * * => setting a slot to zero frees the slot * => object must be locked by caller * => we return the old slot number, or -1 if we failed to allocate * memory to record the new slot number */ int uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; int oldslot; KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0); KASSERT(UVM_OBJ_IS_AOBJ(uobj)); /* * if noswap flag is set, then we can't set a slot */ if (aobj->u_flags & UAO_FLAG_NOSWAP) { if (slot == 0) return 0; /* a clear is ok */ /* but a set is not */ printf("uao_set_swslot: uobj = %p\n", uobj); panic("uao_set_swslot: attempt to set a slot on a NOSWAP object"); } /* * are we using a hash table? if so, add it in the hash. */ if (UAO_USES_SWHASH(aobj)) { /* * Avoid allocating an entry just to free it again if * the page had not swap slot in the first place, and * we are freeing. */ struct uao_swhash_elt *elt = uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE); if (elt == NULL) { KASSERT(slot == 0); return 0; } oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot; /* * now adjust the elt's reference counter and free it if we've * dropped it to zero. */ if (slot) { if (oldslot == 0) elt->count++; } else { if (oldslot) elt->count--; if (elt->count == 0) { LIST_REMOVE(elt, list); pool_put(&uao_swhash_elt_pool, elt); } } } else { /* we are using an array */ oldslot = aobj->u_swslots[pageidx]; aobj->u_swslots[pageidx] = slot; } return oldslot; } /* * end of hash/array functions */ /* * uao_free: free all resources held by an aobj, and then free the aobj * * => the aobj should be dead */ static void uao_free(struct uvm_aobj *aobj) { struct uvm_object *uobj = &aobj->u_obj; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); KASSERT(rw_write_held(uobj->vmobjlock)); uao_dropswap_range(uobj, 0, 0); rw_exit(uobj->vmobjlock); if (UAO_USES_SWHASH(aobj)) { /* * free the hash table itself. */ hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); } else { free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); } /* * finally free the aobj itself */ uvm_obj_destroy(uobj); pool_put(&uvm_aobj_pool, aobj); } /* * pager functions */ #ifdef TMPFS /* * Shrink an aobj to a given number of pages. The procedure is always the same: * assess the necessity of data structure conversion (hash to array), secure * resources, flush pages and drop swap slots. * */ void uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg) { KASSERT(startpg < endpg); KASSERT(uobj->uo_refs == 1); uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT, (voff_t)endpg << PAGE_SHIFT, PGO_FREE); uao_dropswap_range(uobj, startpg, endpg); } int uao_shrink_hash(struct uvm_object *uobj, int pages) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; struct uao_swhash *new_swhash; struct uao_swhash_elt *elt; unsigned long new_hashmask; int i; KASSERT(UAO_USES_SWHASH(aobj)); /* * If the size of the hash table doesn't change, all we need to do is * to adjust the page count. */ if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { uao_shrink_flush(uobj, pages, aobj->u_pages); aobj->u_pages = pages; return 0; } new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, M_WAITOK | M_CANFAIL, &new_hashmask); if (new_swhash == NULL) return ENOMEM; uao_shrink_flush(uobj, pages, aobj->u_pages); /* * Even though the hash table size is changing, the hash of the buckets * we are interested in copying should not change. */ for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) { while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { elt = LIST_FIRST(&aobj->u_swhash[i]); LIST_REMOVE(elt, list); LIST_INSERT_HEAD(&new_swhash[i], elt, list); } } hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); aobj->u_swhash = new_swhash; aobj->u_pages = pages; aobj->u_swhashmask = new_hashmask; return 0; } int uao_shrink_convert(struct uvm_object *uobj, int pages) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; struct uao_swhash_elt *elt; int i, *new_swslots; new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, M_WAITOK | M_CANFAIL | M_ZERO); if (new_swslots == NULL) return ENOMEM; uao_shrink_flush(uobj, pages, aobj->u_pages); /* Convert swap slots from hash to array. */ for (i = 0; i < pages; i++) { elt = uao_find_swhash_elt(aobj, i, FALSE); if (elt != NULL) { new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i); if (new_swslots[i] != 0) elt->count--; if (elt->count == 0) { LIST_REMOVE(elt, list); pool_put(&uao_swhash_elt_pool, elt); } } } hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); aobj->u_swslots = new_swslots; aobj->u_pages = pages; return 0; } int uao_shrink_array(struct uvm_object *uobj, int pages) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; int i, *new_swslots; new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, M_WAITOK | M_CANFAIL | M_ZERO); if (new_swslots == NULL) return ENOMEM; uao_shrink_flush(uobj, pages, aobj->u_pages); for (i = 0; i < pages; i++) new_swslots[i] = aobj->u_swslots[i]; free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); aobj->u_swslots = new_swslots; aobj->u_pages = pages; return 0; } int uao_shrink(struct uvm_object *uobj, int pages) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; KASSERT(pages < aobj->u_pages); /* * Distinguish between three possible cases: * 1. aobj uses hash and must be converted to array. * 2. aobj uses array and array size needs to be adjusted. * 3. aobj uses hash and hash size needs to be adjusted. */ if (pages > UAO_SWHASH_THRESHOLD) return uao_shrink_hash(uobj, pages); /* case 3 */ else if (aobj->u_pages > UAO_SWHASH_THRESHOLD) return uao_shrink_convert(uobj, pages); /* case 1 */ else return uao_shrink_array(uobj, pages); /* case 2 */ } /* * Grow an aobj to a given number of pages. Right now we only adjust the swap * slots. We could additionally handle page allocation directly, so that they * don't happen through uvm_fault(). That would allow us to use another * mechanism for the swap slots other than malloc(). It is thus mandatory that * the caller of these functions does not allow faults to happen in case of * growth error. */ int uao_grow_array(struct uvm_object *uobj, int pages) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; int i, *new_swslots; KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD); new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, M_WAITOK | M_CANFAIL | M_ZERO); if (new_swslots == NULL) return ENOMEM; for (i = 0; i < aobj->u_pages; i++) new_swslots[i] = aobj->u_swslots[i]; free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); aobj->u_swslots = new_swslots; aobj->u_pages = pages; return 0; } int uao_grow_hash(struct uvm_object *uobj, int pages) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; struct uao_swhash *new_swhash; struct uao_swhash_elt *elt; unsigned long new_hashmask; int i; KASSERT(pages > UAO_SWHASH_THRESHOLD); /* * If the size of the hash table doesn't change, all we need to do is * to adjust the page count. */ if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { aobj->u_pages = pages; return 0; } KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages)); new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, M_WAITOK | M_CANFAIL, &new_hashmask); if (new_swhash == NULL) return ENOMEM; for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) { while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { elt = LIST_FIRST(&aobj->u_swhash[i]); LIST_REMOVE(elt, list); LIST_INSERT_HEAD(&new_swhash[i], elt, list); } } hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); aobj->u_swhash = new_swhash; aobj->u_pages = pages; aobj->u_swhashmask = new_hashmask; return 0; } int uao_grow_convert(struct uvm_object *uobj, int pages) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; struct uao_swhash *new_swhash; struct uao_swhash_elt *elt; unsigned long new_hashmask; int i, *old_swslots; new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, M_WAITOK | M_CANFAIL, &new_hashmask); if (new_swhash == NULL) return ENOMEM; /* Set these now, so we can use uao_find_swhash_elt(). */ old_swslots = aobj->u_swslots; aobj->u_swhash = new_swhash; aobj->u_swhashmask = new_hashmask; for (i = 0; i < aobj->u_pages; i++) { if (old_swslots[i] != 0) { elt = uao_find_swhash_elt(aobj, i, TRUE); elt->count++; UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i]; } } free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); aobj->u_pages = pages; return 0; } int uao_grow(struct uvm_object *uobj, int pages) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; KASSERT(pages > aobj->u_pages); /* * Distinguish between three possible cases: * 1. aobj uses hash and hash size needs to be adjusted. * 2. aobj uses array and array size needs to be adjusted. * 3. aobj uses array and must be converted to hash. */ if (pages <= UAO_SWHASH_THRESHOLD) return uao_grow_array(uobj, pages); /* case 2 */ else if (aobj->u_pages > UAO_SWHASH_THRESHOLD) return uao_grow_hash(uobj, pages); /* case 1 */ else return uao_grow_convert(uobj, pages); } #endif /* TMPFS */ /* * uao_create: create an aobj of the given size and return its uvm_object. * * => for normal use, flags are zero or UAO_FLAG_CANFAIL. * => for the kernel object, the flags are: * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once) * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ") */ struct uvm_object * uao_create(vsize_t size, int flags) { static struct uvm_aobj kernel_object_store; static struct rwlock bootstrap_kernel_object_lock; static int kobj_alloced = 0; int pages = round_page(size) >> PAGE_SHIFT; struct uvm_aobj *aobj; int refs; /* * Allocate a new aobj, unless kernel object is requested. */ if (flags & UAO_FLAG_KERNOBJ) { KASSERT(!kobj_alloced); aobj = &kernel_object_store; aobj->u_pages = pages; aobj->u_flags = UAO_FLAG_NOSWAP; refs = UVM_OBJ_KERN; kobj_alloced = UAO_FLAG_KERNOBJ; } else if (flags & UAO_FLAG_KERNSWAP) { KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ); aobj = &kernel_object_store; kobj_alloced = UAO_FLAG_KERNSWAP; } else { aobj = pool_get(&uvm_aobj_pool, PR_WAITOK); aobj->u_pages = pages; aobj->u_flags = 0; refs = 1; } /* * allocate hash/array if necessary */ if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) { int mflags; if (flags) mflags = M_NOWAIT; else mflags = M_WAITOK; /* allocate hash table or array depending on object size */ if (UAO_USES_SWHASH(aobj)) { aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, mflags, &aobj->u_swhashmask); if (aobj->u_swhash == NULL) { if (flags & UAO_FLAG_CANFAIL) { pool_put(&uvm_aobj_pool, aobj); return NULL; } panic("uao_create: hashinit swhash failed"); } } else { aobj->u_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, mflags|M_ZERO); if (aobj->u_swslots == NULL) { if (flags & UAO_FLAG_CANFAIL) { pool_put(&uvm_aobj_pool, aobj); return NULL; } panic("uao_create: malloc swslots failed"); } } if (flags & UAO_FLAG_KERNSWAP) { aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */ return &aobj->u_obj; /* done! */ } } /* * Initialise UVM object. */ uvm_obj_init(&aobj->u_obj, &aobj_pager, refs); if (flags & UAO_FLAG_KERNOBJ) { /* Use a temporary static lock for kernel_object. */ rw_init(&bootstrap_kernel_object_lock, "kobjlk"); uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock); } /* * now that aobj is ready, add it to the global list */ mtx_enter(&uao_list_lock); LIST_INSERT_HEAD(&uao_list, aobj, u_list); mtx_leave(&uao_list_lock); return &aobj->u_obj; } /* * uao_init: set up aobj pager subsystem * * => called at boot time from uvm_pager_init() */ void uao_init(void) { /* * NOTE: Pages for this pool must not come from a pageable * kernel map! */ pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0, IPL_NONE, PR_WAITOK, "uaoeltpl", NULL); pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0, IPL_NONE, PR_WAITOK, "aobjpl", NULL); } /* * uao_reference: hold a reference to an anonymous UVM object. */ void uao_reference(struct uvm_object *uobj) { /* Kernel object is persistent. */ if (UVM_OBJ_IS_KERN_OBJECT(uobj)) return; atomic_inc_int(&uobj->uo_refs); } /* * uao_detach: drop a reference to an anonymous UVM object. */ void uao_detach(struct uvm_object *uobj) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; struct vm_page *pg; /* * Detaching from kernel_object is a NOP. */ if (UVM_OBJ_IS_KERN_OBJECT(uobj)) return; /* * Drop the reference. If it was the last one, destroy the object. */ if (atomic_dec_int_nv(&uobj->uo_refs) > 0) { return; } /* * Remove the aobj from the global list. */ mtx_enter(&uao_list_lock); LIST_REMOVE(aobj, u_list); mtx_leave(&uao_list_lock); /* * Free all the pages left in the aobj. For each page, when the * page is no longer busy (and thus after any disk I/O that it is * involved in is complete), release any swap resources and free * the page itself. */ rw_enter(uobj->vmobjlock, RW_WRITE); while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) { pmap_page_protect(pg, PROT_NONE); if (pg->pg_flags & PG_BUSY) { uvm_pagewait(pg, uobj->vmobjlock, "uao_det"); rw_enter(uobj->vmobjlock, RW_WRITE); continue; } uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); uvm_lock_pageq(); uvm_pagefree(pg); uvm_unlock_pageq(); } /* * Finally, free the anonymous UVM object itself. */ uao_free(aobj); } /* * uao_flush: flush pages out of a uvm object * * => if PGO_CLEANIT is not set, then we will not block. * => if PGO_ALLPAGE is set, then all pages in the object are valid targets * for flushing. * => NOTE: we are allowed to lock the page queues, so the caller * must not be holding the lock on them [e.g. pagedaemon had * better not call us with the queues locked] * => we return TRUE unless we encountered some sort of I/O error * XXXJRT currently never happens, as we never directly initiate * XXXJRT I/O */ boolean_t uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) { struct uvm_aobj *aobj = (struct uvm_aobj *) uobj; struct vm_page *pg; voff_t curoff; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); KASSERT(rw_write_held(uobj->vmobjlock)); if (flags & PGO_ALLPAGES) { start = 0; stop = (voff_t)aobj->u_pages << PAGE_SHIFT; } else { start = trunc_page(start); stop = round_page(stop); if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) { printf("uao_flush: strange, got an out of range " "flush (fixed)\n"); stop = (voff_t)aobj->u_pages << PAGE_SHIFT; } } /* * Don't need to do any work here if we're not freeing * or deactivating pages. */ if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { return TRUE; } curoff = start; for (;;) { if (curoff < stop) { pg = uvm_pagelookup(uobj, curoff); curoff += PAGE_SIZE; if (pg == NULL) continue; } else { break; } /* Make sure page is unbusy, else wait for it. */ if (pg->pg_flags & PG_BUSY) { uvm_pagewait(pg, uobj->vmobjlock, "uaoflsh"); rw_enter(uobj->vmobjlock, RW_WRITE); curoff -= PAGE_SIZE; continue; } switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { /* * XXX In these first 3 cases, we always just * XXX deactivate the page. We may want to * XXX handle the different cases more specifically * XXX in the future. */ case PGO_CLEANIT|PGO_FREE: /* FALLTHROUGH */ case PGO_CLEANIT|PGO_DEACTIVATE: /* FALLTHROUGH */ case PGO_DEACTIVATE: deactivate_it: if (pg->wire_count != 0) continue; uvm_lock_pageq(); pmap_page_protect(pg, PROT_NONE); uvm_pagedeactivate(pg); uvm_unlock_pageq(); continue; case PGO_FREE: /* * If there are multiple references to * the object, just deactivate the page. */ if (uobj->uo_refs > 1) goto deactivate_it; /* XXX skip the page if it's wired */ if (pg->wire_count != 0) continue; /* * free the swap slot and the page. */ pmap_page_protect(pg, PROT_NONE); /* * freeing swapslot here is not strictly necessary. * however, leaving it here doesn't save much * because we need to update swap accounting anyway. */ uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); uvm_lock_pageq(); uvm_pagefree(pg); uvm_unlock_pageq(); continue; default: panic("uao_flush: weird flags"); } } return TRUE; } /* * uao_get: fetch me a page * * we have three cases: * 1: page is resident -> just return the page. * 2: page is zero-fill -> allocate a new page and zero it. * 3: page is swapped out -> fetch the page from swap. * * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot. * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES), * then we will need to return VM_PAGER_UNLOCK. * * => flags: PGO_ALLPAGES: get all of the pages * PGO_LOCKED: fault data structures are locked * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] * => NOTE: caller must check for released pages!! */ static int uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; voff_t current_offset; vm_page_t ptmp; int lcv, gotpages, maxpages, swslot, rv, pageidx; boolean_t done; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); KASSERT(rw_write_held(uobj->vmobjlock)); /* * get number of pages */ maxpages = *npagesp; if (flags & PGO_LOCKED) { /* * step 1a: get pages that are already resident. only do * this if the data structures are locked (i.e. the first * time through). */ done = TRUE; /* be optimistic */ gotpages = 0; /* # of pages we got so far */ for (lcv = 0, current_offset = offset ; lcv < maxpages ; lcv++, current_offset += PAGE_SIZE) { /* do we care about this page? if not, skip it */ if (pps[lcv] == PGO_DONTCARE) continue; ptmp = uvm_pagelookup(uobj, current_offset); /* * if page is new, attempt to allocate the page, * zero-fill'd. */ if (ptmp == NULL && uao_find_swslot(uobj, current_offset >> PAGE_SHIFT) == 0) { ptmp = uvm_pagealloc(uobj, current_offset, NULL, UVM_PGA_ZERO); if (ptmp) { /* new page */ atomic_clearbits_int(&ptmp->pg_flags, PG_BUSY|PG_FAKE); atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ); UVM_PAGE_OWN(ptmp, NULL); } } /* * to be useful must get a non-busy page */ if (ptmp == NULL || (ptmp->pg_flags & PG_BUSY) != 0) { if (lcv == centeridx || (flags & PGO_ALLPAGES) != 0) /* need to do a wait or I/O! */ done = FALSE; continue; } /* * useful page: plug it in our result array */ atomic_setbits_int(&ptmp->pg_flags, PG_BUSY); UVM_PAGE_OWN(ptmp, "uao_get1"); pps[lcv] = ptmp; gotpages++; } /* * step 1b: now we've either done everything needed or we * to unlock and do some waiting or I/O. */ *npagesp = gotpages; if (done) /* bingo! */ return VM_PAGER_OK; else /* EEK! Need to unlock and I/O */ return VM_PAGER_UNLOCK; } /* * step 2: get non-resident or busy pages. * data structures are unlocked. */ for (lcv = 0, current_offset = offset ; lcv < maxpages ; lcv++, current_offset += PAGE_SIZE) { /* * - skip over pages we've already gotten or don't want * - skip over pages we don't _have_ to get */ if (pps[lcv] != NULL || (lcv != centeridx && (flags & PGO_ALLPAGES) == 0)) continue; pageidx = current_offset >> PAGE_SHIFT; /* * we have yet to locate the current page (pps[lcv]). we * first look for a page that is already at the current offset. * if we find a page, we check to see if it is busy or * released. if that is the case, then we sleep on the page * until it is no longer busy or released and repeat the lookup. * if the page we found is neither busy nor released, then we * busy it (so we own it) and plug it into pps[lcv]. this * 'break's the following while loop and indicates we are * ready to move on to the next page in the "lcv" loop above. * * if we exit the while loop with pps[lcv] still set to NULL, * then it means that we allocated a new busy/fake/clean page * ptmp in the object and we need to do I/O to fill in the data. */ /* top of "pps" while loop */ while (pps[lcv] == NULL) { /* look for a resident page */ ptmp = uvm_pagelookup(uobj, current_offset); /* not resident? allocate one now (if we can) */ if (ptmp == NULL) { ptmp = uvm_pagealloc(uobj, current_offset, NULL, 0); /* out of RAM? */ if (ptmp == NULL) { rw_exit(uobj->vmobjlock); uvm_wait("uao_getpage"); rw_enter(uobj->vmobjlock, RW_WRITE); /* goto top of pps while loop */ continue; } /* * safe with PQ's unlocked: because we just * alloc'd the page */ atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ); /* * got new page ready for I/O. break pps while * loop. pps[lcv] is still NULL. */ break; } /* page is there, see if we need to wait on it */ if ((ptmp->pg_flags & PG_BUSY) != 0) { uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get"); rw_enter(uobj->vmobjlock, RW_WRITE); continue; /* goto top of pps while loop */ } /* * if we get here then the page is resident and * unbusy. we busy it now (so we own it). */ /* we own it, caller must un-busy */ atomic_setbits_int(&ptmp->pg_flags, PG_BUSY); UVM_PAGE_OWN(ptmp, "uao_get2"); pps[lcv] = ptmp; } /* * if we own the valid page at the correct offset, pps[lcv] will * point to it. nothing more to do except go to the next page. */ if (pps[lcv]) continue; /* next lcv */ /* * we have a "fake/busy/clean" page that we just allocated. * do the needed "i/o", either reading from swap or zeroing. */ swslot = uao_find_swslot(uobj, pageidx); /* just zero the page if there's nothing in swap. */ if (swslot == 0) { /* page hasn't existed before, just zero it. */ uvm_pagezero(ptmp); } else { /* * page in the swapped-out page. * unlock object for i/o, relock when done. */ rw_exit(uobj->vmobjlock); rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO); rw_enter(uobj->vmobjlock, RW_WRITE); /* * I/O done. check for errors. */ if (rv != VM_PAGER_OK) { /* * remove the swap slot from the aobj * and mark the aobj as having no real slot. * don't free the swap slot, thus preventing * it from being used again. */ swslot = uao_set_swslot(&aobj->u_obj, pageidx, SWSLOT_BAD); uvm_swap_markbad(swslot, 1); if (ptmp->pg_flags & PG_WANTED) wakeup(ptmp); atomic_clearbits_int(&ptmp->pg_flags, PG_WANTED|PG_BUSY); UVM_PAGE_OWN(ptmp, NULL); uvm_lock_pageq(); uvm_pagefree(ptmp); uvm_unlock_pageq(); rw_exit(uobj->vmobjlock); return rv; } } /* * we got the page! clear the fake flag (indicates valid * data now in page) and plug into our result array. note * that page is still busy. * * it is the callers job to: * => check if the page is released * => unbusy the page * => activate the page */ atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE); pmap_clear_modify(ptmp); /* ... and clean */ pps[lcv] = ptmp; } /* lcv loop */ rw_exit(uobj->vmobjlock); return VM_PAGER_OK; } /* * uao_dropswap: release any swap resources from this aobj page. * * => aobj must be locked or have a reference count of 0. */ int uao_dropswap(struct uvm_object *uobj, int pageidx) { int slot; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); slot = uao_set_swslot(uobj, pageidx, 0); if (slot) { uvm_swap_free(slot, 1); } return slot; } /* * page in every page in every aobj that is paged-out to a range of swslots. * * => aobj must be locked and is returned locked. * => returns TRUE if pagein was aborted due to lack of memory. */ boolean_t uao_swap_off(int startslot, int endslot) { struct uvm_aobj *aobj; /* * Walk the list of all anonymous UVM objects. Grab the first. */ mtx_enter(&uao_list_lock); if ((aobj = LIST_FIRST(&uao_list)) == NULL) { mtx_leave(&uao_list_lock); return FALSE; } uao_reference(&aobj->u_obj); do { struct uvm_aobj *nextaobj; boolean_t rv; /* * Prefetch the next object and immediately hold a reference * on it, so neither the current nor the next entry could * disappear while we are iterating. */ if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) { uao_reference(&nextaobj->u_obj); } mtx_leave(&uao_list_lock); /* * Page in all pages in the swap slot range. */ rw_enter(aobj->u_obj.vmobjlock, RW_WRITE); rv = uao_pagein(aobj, startslot, endslot); rw_exit(aobj->u_obj.vmobjlock); /* Drop the reference of the current object. */ uao_detach(&aobj->u_obj); if (rv) { if (nextaobj) { uao_detach(&nextaobj->u_obj); } return rv; } aobj = nextaobj; mtx_enter(&uao_list_lock); } while (aobj); /* * done with traversal, unlock the list */ mtx_leave(&uao_list_lock); return FALSE; } /* * page in any pages from aobj in the given range. * * => returns TRUE if pagein was aborted due to lack of memory. */ static boolean_t uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot) { boolean_t rv; if (UAO_USES_SWHASH(aobj)) { struct uao_swhash_elt *elt; int bucket; restart: for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) { for (elt = LIST_FIRST(&aobj->u_swhash[bucket]); elt != NULL; elt = LIST_NEXT(elt, list)) { int i; for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) { int slot = elt->slots[i]; /* * if the slot isn't in range, skip it. */ if (slot < startslot || slot >= endslot) { continue; } /* * process the page, * the start over on this object * since the swhash elt * may have been freed. */ rv = uao_pagein_page(aobj, UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i); if (rv) { return rv; } goto restart; } } } } else { int i; for (i = 0; i < aobj->u_pages; i++) { int slot = aobj->u_swslots[i]; /* * if the slot isn't in range, skip it */ if (slot < startslot || slot >= endslot) { continue; } /* * process the page. */ rv = uao_pagein_page(aobj, i); if (rv) { return rv; } } } return FALSE; } /* * uao_pagein_page: page in a single page from an anonymous UVM object. * * => Returns TRUE if pagein was aborted due to lack of memory. */ static boolean_t uao_pagein_page(struct uvm_aobj *aobj, int pageidx) { struct uvm_object *uobj = &aobj->u_obj; struct vm_page *pg; int rv, npages; pg = NULL; npages = 1; KASSERT(rw_write_held(uobj->vmobjlock)); rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT, &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0); /* * relock and finish up. */ rw_enter(uobj->vmobjlock, RW_WRITE); switch (rv) { case VM_PAGER_OK: break; case VM_PAGER_ERROR: case VM_PAGER_REFAULT: /* * nothing more to do on errors. * VM_PAGER_REFAULT can only mean that the anon was freed, * so again there's nothing to do. */ return FALSE; } /* * ok, we've got the page now. * mark it as dirty, clear its swslot and un-busy it. */ uao_dropswap(&aobj->u_obj, pageidx); atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE); UVM_PAGE_OWN(pg, NULL); /* * deactivate the page (to put it on a page queue). */ pmap_clear_reference(pg); uvm_lock_pageq(); uvm_pagedeactivate(pg); uvm_unlock_pageq(); return FALSE; } /* * uao_dropswap_range: drop swapslots in the range. * * => aobj must be locked and is returned locked. * => start is inclusive. end is exclusive. */ void uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; int swpgonlydelta = 0; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); KASSERT(rw_write_held(uobj->vmobjlock)); if (end == 0) { end = INT64_MAX; } if (UAO_USES_SWHASH(aobj)) { int i, hashbuckets = aobj->u_swhashmask + 1; voff_t taghi; voff_t taglo; taglo = UAO_SWHASH_ELT_TAG(start); taghi = UAO_SWHASH_ELT_TAG(end); for (i = 0; i < hashbuckets; i++) { struct uao_swhash_elt *elt, *next; for (elt = LIST_FIRST(&aobj->u_swhash[i]); elt != NULL; elt = next) { int startidx, endidx; int j; next = LIST_NEXT(elt, list); if (elt->tag < taglo || taghi < elt->tag) { continue; } if (elt->tag == taglo) { startidx = UAO_SWHASH_ELT_PAGESLOT_IDX(start); } else { startidx = 0; } if (elt->tag == taghi) { endidx = UAO_SWHASH_ELT_PAGESLOT_IDX(end); } else { endidx = UAO_SWHASH_CLUSTER_SIZE; } for (j = startidx; j < endidx; j++) { int slot = elt->slots[j]; KASSERT(uvm_pagelookup(&aobj->u_obj, (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + j) << PAGE_SHIFT) == NULL); if (slot > 0) { uvm_swap_free(slot, 1); swpgonlydelta++; KASSERT(elt->count > 0); elt->slots[j] = 0; elt->count--; } } if (elt->count == 0) { LIST_REMOVE(elt, list); pool_put(&uao_swhash_elt_pool, elt); } } } } else { int i; if (aobj->u_pages < end) { end = aobj->u_pages; } for (i = start; i < end; i++) { int slot = aobj->u_swslots[i]; if (slot > 0) { uvm_swap_free(slot, 1); swpgonlydelta++; } } } /* * adjust the counter of pages only in swap for all * the swap slots we've freed. */ if (swpgonlydelta > 0) { KASSERT(uvmexp.swpgonly >= swpgonlydelta); atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta); } }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 /* $OpenBSD: video.c,v 1.57 2022/07/02 08:50:41 visa Exp $ */ /* * Copyright (c) 2008 Robert Nagy <robert@openbsd.org> * Copyright (c) 2008 Marcus Glocker <mglocker@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/errno.h> #include <sys/ioctl.h> #include <sys/fcntl.h> #include <sys/device.h> #include <sys/vnode.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/conf.h> #include <sys/proc.h> #include <sys/videoio.h> #include <dev/video_if.h> #include <uvm/uvm_extern.h> #ifdef VIDEO_DEBUG int video_debug = 1; #define DPRINTF(l, x...) do { if ((l) <= video_debug) printf(x); } while (0) #else #define DPRINTF(l, x...) #endif struct video_softc { struct device dev; void *hw_hdl; /* hardware driver handle */ struct device *sc_dev; /* hardware device struct */ const struct video_hw_if *hw_if; /* hardware interface */ char sc_dying; /* device detached */ struct process *sc_owner; /* owner process */ uint8_t sc_open; /* device opened */ int sc_fsize; uint8_t *sc_fbuffer; caddr_t sc_fbuffer_mmap; size_t sc_fbufferlen; int sc_vidmode; /* access mode */ #define VIDMODE_NONE 0 #define VIDMODE_MMAP 1 #define VIDMODE_READ 2 int sc_frames_ready; struct selinfo sc_rsel; /* read selector */ }; int videoprobe(struct device *, void *, void *); void videoattach(struct device *, struct device *, void *); int videodetach(struct device *, int); int videoactivate(struct device *, int); int videoprint(void *, const char *); void video_intr(void *); int video_stop(struct video_softc *); int video_claim(struct video_softc *, struct process *); const struct cfattach video_ca = { sizeof(struct video_softc), videoprobe, videoattach, videodetach, videoactivate }; struct cfdriver video_cd = { NULL, "video", DV_DULL }; /* * Global flag to control if video recording is enabled by kern.video.record. */ int video_record_enable = 0; int videoprobe(struct device *parent, void *match, void *aux) { return (1); } void videoattach(struct device *parent, struct device *self, void *aux) { struct video_softc *sc = (void *)self; struct video_attach_args *sa = aux; printf("\n"); sc->hw_if = sa->hwif; sc->hw_hdl = sa->hdl; sc->sc_dev = parent; sc->sc_fbufferlen = 0; sc->sc_owner = NULL; if (sc->hw_if->get_bufsize) sc->sc_fbufferlen = (sc->hw_if->get_bufsize)(sc->hw_hdl); if (sc->sc_fbufferlen == 0) { printf("video: could not request frame buffer size\n"); return; } sc->sc_fbuffer = malloc(sc->sc_fbufferlen, M_DEVBUF, M_NOWAIT); if (sc->sc_fbuffer == NULL) { printf("video: could not allocate frame buffer\n"); return; } } int videoopen(dev_t dev, int flags, int fmt, struct proc *p) { int unit = VIDEOUNIT(dev); struct video_softc *sc; int error = 0; KERNEL_ASSERT_LOCKED(); if (unit >= video_cd.cd_ndevs || (sc = video_cd.cd_devs[unit]) == NULL || sc->hw_if == NULL) return (ENXIO); if (sc->sc_open) { DPRINTF(1, "%s: device already open\n", __func__); return (0); } sc->sc_vidmode = VIDMODE_NONE; sc->sc_frames_ready = 0; if (sc->hw_if->open != NULL) { error = sc->hw_if->open(sc->hw_hdl, flags, &sc->sc_fsize, sc->sc_fbuffer, video_intr, sc); } if (error == 0) { sc->sc_open = 1; DPRINTF(1, "%s: set device to open\n", __func__); } return (error); } int videoclose(dev_t dev, int flags, int fmt, struct proc *p) { struct video_softc *sc; int error = 0; KERNEL_ASSERT_LOCKED(); DPRINTF(1, "%s: last close\n", __func__); sc = video_cd.cd_devs[VIDEOUNIT(dev)]; error = video_stop(sc); sc->sc_open = 0; return (error); } int videoread(dev_t dev, struct uio *uio, int ioflag) { int unit = VIDEOUNIT(dev); struct video_softc *sc; int error; size_t size; KERNEL_ASSERT_LOCKED(); if (unit >= video_cd.cd_ndevs || (sc = video_cd.cd_devs[unit]) == NULL) return (ENXIO); if (sc->sc_dying) return (EIO); if (sc->sc_vidmode == VIDMODE_MMAP) return (EBUSY); if ((error = video_claim(sc, curproc->p_p))) return (error); /* start the stream if not already started */ if (sc->sc_vidmode == VIDMODE_NONE && sc->hw_if->start_read) { error = sc->hw_if->start_read(sc->hw_hdl); if (error) return (error); sc->sc_vidmode = VIDMODE_READ; } DPRINTF(1, "resid=%zu\n", uio->uio_resid); if (sc->sc_frames_ready < 1) { /* block userland read until a frame is ready */ error = tsleep_nsec(sc, PWAIT | PCATCH, "vid_rd", INFSLP); if (sc->sc_dying) error = EIO; if (error) return (error); } /* move no more than 1 frame to userland, as per specification */ size = ulmin(uio->uio_resid, sc->sc_fsize); if (!video_record_enable) bzero(sc->sc_fbuffer, size); error = uiomove(sc->sc_fbuffer, size, uio); sc->sc_frames_ready--; if (error) return (error); DPRINTF(1, "uiomove successfully done (%zu bytes)\n", size); return (0); } int videoioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct proc *p) { int unit = VIDEOUNIT(dev); struct video_softc *sc; struct v4l2_buffer *vb = (struct v4l2_buffer *)data; int error; KERNEL_ASSERT_LOCKED(); if (unit >= video_cd.cd_ndevs || (sc = video_cd.cd_devs[unit]) == NULL || sc->hw_if == NULL) return (ENXIO); DPRINTF(3, "video_ioctl(%zu, '%c', %zu)\n", IOCPARM_LEN(cmd), (int) IOCGROUP(cmd), cmd & 0xff); error = EOPNOTSUPP; switch (cmd) { case VIDIOC_G_CTRL: if (sc->hw_if->g_ctrl) error = (sc->hw_if->g_ctrl)(sc->hw_hdl, (struct v4l2_control *)data); break; case VIDIOC_S_CTRL: if (sc->hw_if->s_ctrl) error = (sc->hw_if->s_ctrl)(sc->hw_hdl, (struct v4l2_control *)data); break; default: error = (ENOTTY); } if (error != ENOTTY) return (error); if ((error = video_claim(sc, p->p_p))) return (error); /* * The following IOCTLs can only be called by the device owner. * For further shared IOCTLs please move it up. */ error = EOPNOTSUPP; switch (cmd) { case VIDIOC_QUERYCAP: if (sc->hw_if->querycap) error = (sc->hw_if->querycap)(sc->hw_hdl, (struct v4l2_capability *)data); break; case VIDIOC_ENUM_FMT: if (sc->hw_if->enum_fmt) error = (sc->hw_if->enum_fmt)(sc->hw_hdl, (struct v4l2_fmtdesc *)data); break; case VIDIOC_ENUM_FRAMESIZES: if (sc->hw_if->enum_fsizes) error = (sc->hw_if->enum_fsizes)(sc->hw_hdl, (struct v4l2_frmsizeenum *)data); break; case VIDIOC_ENUM_FRAMEINTERVALS: if (sc->hw_if->enum_fivals) error = (sc->hw_if->enum_fivals)(sc->hw_hdl, (struct v4l2_frmivalenum *)data); break; case VIDIOC_S_FMT: if (!(flags & FWRITE)) return (EACCES); if (sc->hw_if->s_fmt) error = (sc->hw_if->s_fmt)(sc->hw_hdl, (struct v4l2_format *)data); break; case VIDIOC_G_FMT: if (sc->hw_if->g_fmt) error = (sc->hw_if->g_fmt)(sc->hw_hdl, (struct v4l2_format *)data); break; case VIDIOC_S_PARM: if (sc->hw_if->s_parm) error = (sc->hw_if->s_parm)(sc->hw_hdl, (struct v4l2_streamparm *)data); break; case VIDIOC_G_PARM: if (sc->hw_if->g_parm) error = (sc->hw_if->g_parm)(sc->hw_hdl, (struct v4l2_streamparm *)data); break; case VIDIOC_ENUMINPUT: if (sc->hw_if->enum_input) error = (sc->hw_if->enum_input)(sc->hw_hdl, (struct v4l2_input *)data); break; case VIDIOC_S_INPUT: if (sc->hw_if->s_input) error = (sc->hw_if->s_input)(sc->hw_hdl, (int)*data); break; case VIDIOC_G_INPUT: if (sc->hw_if->g_input) error = (sc->hw_if->g_input)(sc->hw_hdl, (int *)data); break; case VIDIOC_REQBUFS: if (sc->hw_if->reqbufs) error = (sc->hw_if->reqbufs)(sc->hw_hdl, (struct v4l2_requestbuffers *)data); break; case VIDIOC_QUERYBUF: if (sc->hw_if->querybuf) error = (sc->hw_if->querybuf)(sc->hw_hdl, (struct v4l2_buffer *)data); break; case VIDIOC_QBUF: if (sc->hw_if->qbuf) error = (sc->hw_if->qbuf)(sc->hw_hdl, (struct v4l2_buffer *)data); break; case VIDIOC_DQBUF: if (!sc->hw_if->dqbuf) break; /* should have called mmap() before now */ if (sc->sc_vidmode != VIDMODE_MMAP) { error = EINVAL; break; } error = (sc->hw_if->dqbuf)(sc->hw_hdl, (struct v4l2_buffer *)data); if (!video_record_enable) bzero(sc->sc_fbuffer_mmap + vb->m.offset, vb->length); sc->sc_frames_ready--; break; case VIDIOC_STREAMON: if (sc->hw_if->streamon) error = (sc->hw_if->streamon)(sc->hw_hdl, (int)*data); break; case VIDIOC_STREAMOFF: if (sc->hw_if->streamoff) error = (sc->hw_if->streamoff)(sc->hw_hdl, (int)*data); if (!error) { /* Release device ownership and streaming buffers. */ error = video_stop(sc); } break; case VIDIOC_TRY_FMT: if (sc->hw_if->try_fmt) error = (sc->hw_if->try_fmt)(sc->hw_hdl, (struct v4l2_format *)data); break; case VIDIOC_QUERYCTRL: if (sc->hw_if->queryctrl) error = (sc->hw_if->queryctrl)(sc->hw_hdl, (struct v4l2_queryctrl *)data); break; default: error = (ENOTTY); } return (error); } paddr_t videommap(dev_t dev, off_t off, int prot) { int unit = VIDEOUNIT(dev); struct video_softc *sc; caddr_t p; paddr_t pa; KERNEL_ASSERT_LOCKED(); DPRINTF(2, "%s: off=%lld, prot=%d\n", __func__, off, prot); if (unit >= video_cd.cd_ndevs || (sc = video_cd.cd_devs[unit]) == NULL) return (-1); if (sc->sc_dying) return (-1); if (sc->hw_if->mappage == NULL) return (-1); p = sc->hw_if->mappage(sc->hw_hdl, off, prot); if (p == NULL) return (-1); if (pmap_extract(pmap_kernel(), (vaddr_t)p, &pa) == FALSE) panic("videommap: invalid page"); sc->sc_vidmode = VIDMODE_MMAP; /* store frame buffer base address for later blanking */ if (off == 0) sc->sc_fbuffer_mmap = p; return (pa); } void filt_videodetach(struct knote *kn) { struct video_softc *sc = kn->kn_hook; int s; s = splhigh(); klist_remove_locked(&sc->sc_rsel.si_note, kn); splx(s); } int filt_videoread(struct knote *kn, long hint) { struct video_softc *sc = kn->kn_hook; if (sc->sc_frames_ready > 0) return (1); return (0); } const struct filterops video_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_videodetach, .f_event = filt_videoread, }; int videokqfilter(dev_t dev, struct knote *kn) { int unit = VIDEOUNIT(dev); struct video_softc *sc; int s, error; KERNEL_ASSERT_LOCKED(); if (unit >= video_cd.cd_ndevs || (sc = video_cd.cd_devs[unit]) == NULL) return (ENXIO); if (sc->sc_dying) return (ENXIO); switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &video_filtops; kn->kn_hook = sc; break; default: return (EINVAL); } if ((error = video_claim(sc, curproc->p_p))) return (error); /* * Start the stream in read() mode if not already started. If * the user wanted mmap() mode, he should have called mmap() * before now. */ if (sc->sc_vidmode == VIDMODE_NONE && sc->hw_if->start_read) { if (sc->hw_if->start_read(sc->hw_hdl)) return (ENXIO); sc->sc_vidmode = VIDMODE_READ; } s = splhigh(); klist_insert_locked(&sc->sc_rsel.si_note, kn); splx(s); return (0); } int video_submatch(struct device *parent, void *match, void *aux) { struct cfdata *cf = match; return (cf->cf_driver == &video_cd); } /* * Called from hardware driver. This is where the MI video driver gets * probed/attached to the hardware driver */ struct device * video_attach_mi(const struct video_hw_if *rhwp, void *hdlp, struct device *dev) { struct video_attach_args arg; arg.hwif = rhwp; arg.hdl = hdlp; return (config_found_sm(dev, &arg, videoprint, video_submatch)); } void video_intr(void *addr) { struct video_softc *sc = (struct video_softc *)addr; DPRINTF(3, "video_intr sc=%p\n", sc); if (sc->sc_vidmode != VIDMODE_NONE) sc->sc_frames_ready++; else printf("%s: interrupt but no streams!\n", __func__); if (sc->sc_vidmode == VIDMODE_READ) wakeup(sc); selwakeup(&sc->sc_rsel); } int video_stop(struct video_softc *sc) { int error = 0; DPRINTF(1, "%s: stream close\n", __func__); if (sc->hw_if->close != NULL) error = sc->hw_if->close(sc->hw_hdl); sc->sc_vidmode = VIDMODE_NONE; sc->sc_frames_ready = 0; sc->sc_owner = NULL; return (error); } int video_claim(struct video_softc *sc, struct process *pr) { if (sc->sc_owner != NULL && sc->sc_owner != pr) { DPRINTF(1, "%s: already owned=%p\n", __func__, sc->sc_owner); return (EBUSY); } if (sc->sc_owner == NULL) { sc->sc_owner = pr; DPRINTF(1, "%s: new owner=%p\n", __func__, sc->sc_owner); } return (0); } int videoprint(void *aux, const char *pnp) { if (pnp != NULL) printf("video at %s", pnp); return (UNCONF); } int videodetach(struct device *self, int flags) { struct video_softc *sc = (struct video_softc *)self; int s, maj, mn; /* locate the major number */ for (maj = 0; maj < nchrdev; maj++) if (cdevsw[maj].d_open == videoopen) break; /* Nuke the vnodes for any open instances (calls close). */ mn = self->dv_unit; vdevgone(maj, mn, mn, VCHR); s = splhigh(); klist_invalidate(&sc->sc_rsel.si_note); splx(s); free(sc->sc_fbuffer, M_DEVBUF, sc->sc_fbufferlen); return (0); } int videoactivate(struct device *self, int act) { struct video_softc *sc = (struct video_softc *)self; switch (act) { case DVACT_DEACTIVATE: sc->sc_dying = 1; break; } return (0); }
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 /* * Copyright © 2008 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Eric Anholt <eric@anholt.net> * */ #include <linux/dma-buf.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/iosys-map.h> #include <linux/mem_encrypt.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/module.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/string_helpers.h> #include <linux/types.h> #include <linux/uaccess.h> #include <drm/drm.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_gem.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> #include <drm/drm_vma_manager.h> #include "drm_internal.h" #include <sys/conf.h> #include <uvm/uvm.h> void drm_unref(struct uvm_object *); void drm_ref(struct uvm_object *); boolean_t drm_flush(struct uvm_object *, voff_t, voff_t, int); int drm_fault(struct uvm_faultinfo *, vaddr_t, vm_page_t *, int, int, vm_fault_t, vm_prot_t, int); const struct uvm_pagerops drm_pgops = { .pgo_reference = drm_ref, .pgo_detach = drm_unref, .pgo_fault = drm_fault, .pgo_flush = drm_flush, }; void drm_ref(struct uvm_object *uobj) { struct drm_gem_object *obj = container_of(uobj, struct drm_gem_object, uobj); drm_gem_object_get(obj); } void drm_unref(struct uvm_object *uobj) { struct drm_gem_object *obj = container_of(uobj, struct drm_gem_object, uobj); drm_gem_object_put(obj); } int drm_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, vm_page_t *pps, int npages, int centeridx, vm_fault_t fault_type, vm_prot_t access_type, int flags) { struct vm_map_entry *entry = ufi->entry; struct uvm_object *uobj = entry->object.uvm_obj; struct drm_gem_object *obj = container_of(uobj, struct drm_gem_object, uobj); struct drm_device *dev = obj->dev; int ret; /* * we do not allow device mappings to be mapped copy-on-write * so we kill any attempt to do so here. */ if (UVM_ET_ISCOPYONWRITE(entry)) { uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); return(VM_PAGER_ERROR); } /* * We could end up here as the result of a copyin(9) or * copyout(9) while handling an ioctl. So we must be careful * not to deadlock. Therefore we only block if the quiesce * count is zero, which guarantees we didn't enter from within * an ioctl code path. */ mtx_enter(&dev->quiesce_mtx); if (dev->quiesce && dev->quiesce_count == 0) { mtx_leave(&dev->quiesce_mtx); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); mtx_enter(&dev->quiesce_mtx); while (dev->quiesce) { msleep_nsec(&dev->quiesce, &dev->quiesce_mtx, PZERO, "drmflt", INFSLP); } mtx_leave(&dev->quiesce_mtx); return(VM_PAGER_REFAULT); } dev->quiesce_count++; mtx_leave(&dev->quiesce_mtx); /* Call down into driver to do the magic */ ret = dev->driver->gem_fault(obj, ufi, entry->offset + (vaddr - entry->start), vaddr, pps, npages, centeridx, access_type, flags); mtx_enter(&dev->quiesce_mtx); dev->quiesce_count--; if (dev->quiesce) wakeup(&dev->quiesce_count); mtx_leave(&dev->quiesce_mtx); return (ret); } boolean_t drm_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) { return (TRUE); } struct uvm_object * udv_attach_drm(dev_t device, vm_prot_t accessprot, voff_t off, vsize_t size) { struct drm_device *dev = drm_get_device_from_kdev(device); struct drm_gem_object *obj = NULL; struct drm_vma_offset_node *node; struct drm_file *priv; struct file *filp; if (cdevsw[major(device)].d_mmap != drmmmap) return NULL; if (dev == NULL) return NULL; mutex_lock(&dev->filelist_mutex); priv = drm_find_file_by_minor(dev, minor(device)); if (priv == NULL) { mutex_unlock(&dev->filelist_mutex); return NULL; } filp = priv->filp; mutex_unlock(&dev->filelist_mutex); if (dev->driver->mmap) return dev->driver->mmap(filp, accessprot, off, size); drm_vma_offset_lock_lookup(dev->vma_offset_manager); node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, off >> PAGE_SHIFT, atop(round_page(size))); if (likely(node)) { obj = container_of(node, struct drm_gem_object, vma_node); /* * When the object is being freed, after it hits 0-refcnt it * proceeds to tear down the object. In the process it will * attempt to remove the VMA offset and so acquire this * mgr->vm_lock. Therefore if we find an object with a 0-refcnt * that matches our range, we know it is in the process of being * destroyed and will be freed as soon as we release the lock - * so we have to check for the 0-refcnted object and treat it as * invalid. */ if (!kref_get_unless_zero(&obj->refcount)) obj = NULL; } drm_vma_offset_unlock_lookup(dev->vma_offset_manager); if (!obj) return NULL; if (!drm_vma_node_is_allowed(node, priv)) { drm_gem_object_put(obj); return NULL; } return &obj->uobj; } /** @file drm_gem.c * * This file provides some of the base ioctls and library routines for * the graphics memory manager implemented by each device driver. * * Because various devices have different requirements in terms of * synchronization and migration strategies, implementing that is left up to * the driver, and all that the general API provides should be generic -- * allocating objects, reading/writing data with the cpu, freeing objects. * Even there, platform-dependent optimizations for reading/writing data with * the CPU mean we'll likely hook those out to driver-specific calls. However, * the DRI2 implementation wants to have at least allocate/mmap be generic. * * The goal was to have swap-backed object allocation managed through * struct file. However, file descriptors as handles to a struct file have * two major failings: * - Process limits prevent more than 1024 or so being used at a time by * default. * - Inability to allocate high fds will aggravate the X Server's select() * handling, and likely that of many GL client applications as well. * * This led to a plan of using our own integer IDs (called handles, following * DRM terminology) to mimic fds, and implement the fd syscalls we need as * ioctls. The objects themselves will still include the struct file so * that we can transition to fds if the required kernel infrastructure shows * up at a later date, and as our interface with shmfs for memory allocation. */ static void drm_gem_init_release(struct drm_device *dev, void *ptr) { drm_vma_offset_manager_destroy(dev->vma_offset_manager); } /** * drm_gem_init - Initialize the GEM device fields * @dev: drm_devic structure to initialize */ int drm_gem_init(struct drm_device *dev) { struct drm_vma_offset_manager *vma_offset_manager; rw_init(&dev->object_name_lock, "drmonl"); idr_init_base(&dev->object_name_idr, 1); vma_offset_manager = drmm_kzalloc(dev, sizeof(*vma_offset_manager), GFP_KERNEL); if (!vma_offset_manager) { DRM_ERROR("out of memory\n"); return -ENOMEM; } dev->vma_offset_manager = vma_offset_manager; drm_vma_offset_manager_init(vma_offset_manager, DRM_FILE_PAGE_OFFSET_START, DRM_FILE_PAGE_OFFSET_SIZE); return drmm_add_action(dev, drm_gem_init_release, NULL); } #ifdef __linux__ /** * drm_gem_object_init - initialize an allocated shmem-backed GEM object * @dev: drm_device the object should be initialized for * @obj: drm_gem_object to initialize * @size: object size * * Initialize an already allocated GEM object of the specified size with * shmfs backing store. */ int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size) { struct file *filp; drm_gem_private_object_init(dev, obj, size); filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); if (IS_ERR(filp)) return PTR_ERR(filp); obj->filp = filp; return 0; } EXPORT_SYMBOL(drm_gem_object_init); #else int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size) { drm_gem_private_object_init(dev, obj, size); if (size > (512 * 1024 * 1024)) { printf("%s size too big %lu\n", __func__, size); return -ENOMEM; } obj->uao = uao_create(size, 0); uvm_obj_init(&obj->uobj, &drm_pgops, 1); return 0; } #endif /** * drm_gem_private_object_init - initialize an allocated private GEM object * @dev: drm_device the object should be initialized for * @obj: drm_gem_object to initialize * @size: object size * * Initialize an already allocated GEM object of the specified size with * no GEM provided backing store. Instead the caller is responsible for * backing the object and handling it. */ void drm_gem_private_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size) { BUG_ON((size & (PAGE_SIZE - 1)) != 0); obj->dev = dev; #ifdef __linux__ obj->filp = NULL; #else obj->uao = NULL; obj->uobj.pgops = NULL; #endif kref_init(&obj->refcount); obj->handle_count = 0; obj->size = size; dma_resv_init(&obj->_resv); if (!obj->resv) obj->resv = &obj->_resv; if (drm_core_check_feature(dev, DRIVER_GEM_GPUVA)) drm_gem_gpuva_init(obj); drm_vma_node_reset(&obj->vma_node); INIT_LIST_HEAD(&obj->lru_node); } EXPORT_SYMBOL(drm_gem_private_object_init); /** * drm_gem_private_object_fini - Finalize a failed drm_gem_object * @obj: drm_gem_object * * Uninitialize an already allocated GEM object when it initialized failed */ void drm_gem_private_object_fini(struct drm_gem_object *obj) { WARN_ON(obj->dma_buf); dma_resv_fini(&obj->_resv); } EXPORT_SYMBOL(drm_gem_private_object_fini); /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. * * Called after the last handle to the object has been closed * * Removes any name for the object. Note that this must be * called before drm_gem_object_free or we'll be touching * freed memory */ static void drm_gem_object_handle_free(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; /* Remove any name for this object */ if (obj->name) { idr_remove(&dev->object_name_idr, obj->name); obj->name = 0; } } static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) { /* Unbreak the reference cycle if we have an exported dma_buf. */ if (obj->dma_buf) { dma_buf_put(obj->dma_buf); obj->dma_buf = NULL; } } static void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; bool final = false; if (WARN_ON(READ_ONCE(obj->handle_count) == 0)) return; /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { drm_gem_object_handle_free(obj); drm_gem_object_exported_dma_buf_free(obj); final = true; } mutex_unlock(&dev->object_name_lock); if (final) drm_gem_object_put(obj); } /* * Called at device or object close to release the file's * handle references on objects. */ static int drm_gem_object_release_handle(int id, void *ptr, void *data) { struct drm_file *file_priv = data; struct drm_gem_object *obj = ptr; if (obj->funcs->close) obj->funcs->close(obj, file_priv); drm_prime_remove_buf_handle(&file_priv->prime, id); drm_vma_node_revoke(&obj->vma_node, file_priv); drm_gem_object_handle_put_unlocked(obj); return 0; } /** * drm_gem_handle_delete - deletes the given file-private handle * @filp: drm file-private structure to use for the handle look up * @handle: userspace handle to delete * * Removes the GEM handle from the @filp lookup table which has been added with * drm_gem_handle_create(). If this is the last handle also cleans up linked * resources like GEM names. */ int drm_gem_handle_delete(struct drm_file *filp, u32 handle) { struct drm_gem_object *obj; spin_lock(&filp->table_lock); /* Check if we currently have a reference on the object */ obj = idr_replace(&filp->object_idr, NULL, handle); spin_unlock(&filp->table_lock); if (IS_ERR_OR_NULL(obj)) return -EINVAL; /* Release driver's reference and decrement refcount. */ drm_gem_object_release_handle(handle, obj, filp); /* And finally make the handle available for future allocations. */ spin_lock(&filp->table_lock); idr_remove(&filp->object_idr, handle); spin_unlock(&filp->table_lock); return 0; } EXPORT_SYMBOL(drm_gem_handle_delete); /** * drm_gem_dumb_map_offset - return the fake mmap offset for a gem object * @file: drm file-private structure containing the gem object * @dev: corresponding drm_device * @handle: gem object handle * @offset: return location for the fake mmap offset * * This implements the &drm_driver.dumb_map_offset kms driver callback for * drivers which use gem to manage their backing storage. * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, u32 handle, u64 *offset) { struct drm_gem_object *obj; int ret; obj = drm_gem_object_lookup(file, handle); if (!obj) return -ENOENT; /* Don't allow imported objects to be mapped */ if (obj->import_attach) { ret = -EINVAL; goto out; } ret = drm_gem_create_mmap_offset(obj); if (ret) goto out; *offset = drm_vma_node_offset_addr(&obj->vma_node); out: drm_gem_object_put(obj); return ret; } EXPORT_SYMBOL_GPL(drm_gem_dumb_map_offset); /** * drm_gem_handle_create_tail - internal functions to create a handle * @file_priv: drm file-private structure to register the handle for * @obj: object to register * @handlep: pointer to return the created handle to the caller * * This expects the &drm_device.object_name_lock to be held already and will * drop it before returning. Used to avoid races in establishing new handles * when importing an object from either an flink name or a dma-buf. * * Handles must be release again through drm_gem_handle_delete(). This is done * when userspace closes @file_priv for all attached handles, or through the * GEM_CLOSE ioctl for individual handles. */ int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep) { struct drm_device *dev = obj->dev; u32 handle; int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); if (obj->handle_count++ == 0) drm_gem_object_get(obj); /* * Get the user-visible handle using idr. Preload and perform * allocation under our spinlock. */ idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); spin_unlock(&file_priv->table_lock); idr_preload_end(); mutex_unlock(&dev->object_name_lock); if (ret < 0) goto err_unref; handle = ret; ret = drm_vma_node_allow(&obj->vma_node, file_priv); if (ret) goto err_remove; if (obj->funcs->open) { ret = obj->funcs->open(obj, file_priv); if (ret) goto err_revoke; } *handlep = handle; return 0; err_revoke: drm_vma_node_revoke(&obj->vma_node, file_priv); err_remove: spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, handle); spin_unlock(&file_priv->table_lock); err_unref: drm_gem_object_handle_put_unlocked(obj); return ret; } /** * drm_gem_handle_create - create a gem handle for an object * @file_priv: drm file-private structure to register the handle for * @obj: object to register * @handlep: pointer to return the created handle to the caller * * Create a handle for this object. This adds a handle reference to the object, * which includes a regular reference count. Callers will likely want to * dereference the object afterwards. * * Since this publishes @obj to userspace it must be fully set up by this point, * drivers must call this last in their buffer object creation callbacks. */ int drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep) { mutex_lock(&obj->dev->object_name_lock); return drm_gem_handle_create_tail(file_priv, obj, handlep); } EXPORT_SYMBOL(drm_gem_handle_create); /** * drm_gem_free_mmap_offset - release a fake mmap offset for an object * @obj: obj in question * * This routine frees fake offsets allocated by drm_gem_create_mmap_offset(). * * Note that drm_gem_object_release() already calls this function, so drivers * don't have to take care of releasing the mmap offset themselves when freeing * the GEM object. */ void drm_gem_free_mmap_offset(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; drm_vma_offset_remove(dev->vma_offset_manager, &obj->vma_node); } EXPORT_SYMBOL(drm_gem_free_mmap_offset); /** * drm_gem_create_mmap_offset_size - create a fake mmap offset for an object * @obj: obj in question * @size: the virtual size * * GEM memory mapping works by handing back to userspace a fake mmap offset * it can use in a subsequent mmap(2) call. The DRM core code then looks * up the object based on the offset and sets up the various memory mapping * structures. * * This routine allocates and attaches a fake offset for @obj, in cases where * the virtual size differs from the physical size (ie. &drm_gem_object.size). * Otherwise just use drm_gem_create_mmap_offset(). * * This function is idempotent and handles an already allocated mmap offset * transparently. Drivers do not need to check for this case. */ int drm_gem_create_mmap_offset_size(struct drm_gem_object *obj, size_t size) { struct drm_device *dev = obj->dev; return drm_vma_offset_add(dev->vma_offset_manager, &obj->vma_node, size / PAGE_SIZE); } EXPORT_SYMBOL(drm_gem_create_mmap_offset_size); /** * drm_gem_create_mmap_offset - create a fake mmap offset for an object * @obj: obj in question * * GEM memory mapping works by handing back to userspace a fake mmap offset * it can use in a subsequent mmap(2) call. The DRM core code then looks * up the object based on the offset and sets up the various memory mapping * structures. * * This routine allocates and attaches a fake offset for @obj. * * Drivers can call drm_gem_free_mmap_offset() before freeing @obj to release * the fake offset again. */ int drm_gem_create_mmap_offset(struct drm_gem_object *obj) { return drm_gem_create_mmap_offset_size(obj, obj->size); } EXPORT_SYMBOL(drm_gem_create_mmap_offset); #ifdef notyet /* * Move folios to appropriate lru and release the folios, decrementing the * ref count of those folios. */ static void drm_gem_check_release_batch(struct folio_batch *fbatch) { check_move_unevictable_folios(fbatch); __folio_batch_release(fbatch); cond_resched(); } #endif /** * drm_gem_get_pages - helper to allocate backing pages for a GEM object * from shmem * @obj: obj in question * * This reads the page-array of the shmem-backing storage of the given gem * object. An array of pages is returned. If a page is not allocated or * swapped-out, this will allocate/swap-in the required pages. Note that the * whole object is covered by the page-array and pinned in memory. * * Use drm_gem_put_pages() to release the array and unpin all pages. * * This uses the GFP-mask set on the shmem-mapping (see mapping_set_gfp_mask()). * If you require other GFP-masks, you have to do those allocations yourself. * * Note that you are not allowed to change gfp-zones during runtime. That is, * shmem_read_mapping_page_gfp() must be called with the same gfp_zone(gfp) as * set during initialization. If you have special zone constraints, set them * after drm_gem_object_init() via mapping_set_gfp_mask(). shmem-core takes care * to keep pages in the required zone during swap-in. * * This function is only valid on objects initialized with * drm_gem_object_init(), but not for those initialized with * drm_gem_private_object_init() only. */ struct vm_page **drm_gem_get_pages(struct drm_gem_object *obj) { STUB(); return ERR_PTR(-ENOSYS); #ifdef notyet struct address_space *mapping; struct vm_page **pages; struct folio *folio; struct folio_batch fbatch; long i, j, npages; if (WARN_ON(!obj->filp)) return ERR_PTR(-EINVAL); /* This is the shared memory object that backs the GEM resource */ mapping = obj->filp->f_mapping; /* We already BUG_ON() for non-page-aligned sizes in * drm_gem_object_init(), so we should never hit this unless * driver author is doing something really wrong: */ WARN_ON((obj->size & (PAGE_SIZE - 1)) != 0); npages = obj->size >> PAGE_SHIFT; pages = kvmalloc_array(npages, sizeof(struct vm_page *), GFP_KERNEL); if (pages == NULL) return ERR_PTR(-ENOMEM); mapping_set_unevictable(mapping); i = 0; while (i < npages) { long nr; folio = shmem_read_folio_gfp(mapping, i, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) goto fail; nr = min(npages - i, folio_nr_pages(folio)); for (j = 0; j < nr; j++, i++) pages[i] = folio_file_page(folio, i); /* Make sure shmem keeps __GFP_DMA32 allocated pages in the * correct region during swapin. Note that this requires * __GFP_DMA32 to be set in mapping_gfp_mask(inode->i_mapping) * so shmem can relocate pages during swapin if required. */ BUG_ON(mapping_gfp_constraint(mapping, __GFP_DMA32) && (folio_pfn(folio) >= 0x00100000UL)); } return pages; fail: mapping_clear_unevictable(mapping); folio_batch_init(&fbatch); j = 0; while (j < i) { struct folio *f = page_folio(pages[j]); if (!folio_batch_add(&fbatch, f)) drm_gem_check_release_batch(&fbatch); j += folio_nr_pages(f); } if (fbatch.nr) drm_gem_check_release_batch(&fbatch); kvfree(pages); return ERR_CAST(folio); #endif } EXPORT_SYMBOL(drm_gem_get_pages); /** * drm_gem_put_pages - helper to free backing pages for a GEM object * @obj: obj in question * @pages: pages to free * @dirty: if true, pages will be marked as dirty * @accessed: if true, the pages will be marked as accessed */ void drm_gem_put_pages(struct drm_gem_object *obj, struct vm_page **pages, bool dirty, bool accessed) { STUB(); #ifdef notyet int i, npages; struct address_space *mapping; struct folio_batch fbatch; mapping = file_inode(obj->filp)->i_mapping; mapping_clear_unevictable(mapping); /* We already BUG_ON() for non-page-aligned sizes in * drm_gem_object_init(), so we should never hit this unless * driver author is doing something really wrong: */ WARN_ON((obj->size & (PAGE_SIZE - 1)) != 0); npages = obj->size >> PAGE_SHIFT; folio_batch_init(&fbatch); for (i = 0; i < npages; i++) { struct folio *folio; if (!pages[i]) continue; folio = page_folio(pages[i]); if (dirty) folio_mark_dirty(folio); if (accessed) folio_mark_accessed(folio); /* Undo the reference we took when populating the table */ if (!folio_batch_add(&fbatch, folio)) drm_gem_check_release_batch(&fbatch); i += folio_nr_pages(folio) - 1; } if (folio_batch_count(&fbatch)) drm_gem_check_release_batch(&fbatch); kvfree(pages); #endif } EXPORT_SYMBOL(drm_gem_put_pages); static int objects_lookup(struct drm_file *filp, u32 *handle, int count, struct drm_gem_object **objs) { int i, ret = 0; struct drm_gem_object *obj; spin_lock(&filp->table_lock); for (i = 0; i < count; i++) { /* Check if we currently have a reference on the object */ obj = idr_find(&filp->object_idr, handle[i]); if (!obj) { ret = -ENOENT; break; } drm_gem_object_get(obj); objs[i] = obj; } spin_unlock(&filp->table_lock); return ret; } /** * drm_gem_objects_lookup - look up GEM objects from an array of handles * @filp: DRM file private date * @bo_handles: user pointer to array of userspace handle * @count: size of handle array * @objs_out: returned pointer to array of drm_gem_object pointers * * Takes an array of userspace handles and returns a newly allocated array of * GEM objects. * * For a single handle lookup, use drm_gem_object_lookup(). * * Returns: * * @objs filled in with GEM object pointers. Returned GEM objects need to be * released with drm_gem_object_put(). -ENOENT is returned on a lookup * failure. 0 is returned on success. * */ int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles, int count, struct drm_gem_object ***objs_out) { int ret; u32 *handles; struct drm_gem_object **objs; if (!count) return 0; objs = kvmalloc_array(count, sizeof(struct drm_gem_object *), GFP_KERNEL | __GFP_ZERO); if (!objs) return -ENOMEM; *objs_out = objs; handles = kvmalloc_array(count, sizeof(u32), GFP_KERNEL); if (!handles) { ret = -ENOMEM; goto out; } if (copy_from_user(handles, bo_handles, count * sizeof(u32))) { ret = -EFAULT; DRM_DEBUG("Failed to copy in GEM handles\n"); goto out; } ret = objects_lookup(filp, handles, count, objs); out: kvfree(handles); return ret; } EXPORT_SYMBOL(drm_gem_objects_lookup); /** * drm_gem_object_lookup - look up a GEM object from its handle * @filp: DRM file private date * @handle: userspace handle * * Returns: * * A reference to the object named by the handle if such exists on @filp, NULL * otherwise. * * If looking up an array of handles, use drm_gem_objects_lookup(). */ struct drm_gem_object * drm_gem_object_lookup(struct drm_file *filp, u32 handle) { struct drm_gem_object *obj = NULL; objects_lookup(filp, &handle, 1, &obj); return obj; } EXPORT_SYMBOL(drm_gem_object_lookup); /** * drm_gem_dma_resv_wait - Wait on GEM object's reservation's objects * shared and/or exclusive fences. * @filep: DRM file private date * @handle: userspace handle * @wait_all: if true, wait on all fences, else wait on just exclusive fence * @timeout: timeout value in jiffies or zero to return immediately * * Returns: * * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or * greater than 0 on success. */ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle, bool wait_all, unsigned long timeout) { long ret; struct drm_gem_object *obj; obj = drm_gem_object_lookup(filep, handle); if (!obj) { DRM_DEBUG("Failed to look up GEM BO %d\n", handle); return -EINVAL; } ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(wait_all), true, timeout); if (ret == 0) ret = -ETIME; else if (ret > 0) ret = 0; drm_gem_object_put(obj); return ret; } EXPORT_SYMBOL(drm_gem_dma_resv_wait); /** * drm_gem_close_ioctl - implementation of the GEM_CLOSE ioctl * @dev: drm_device * @data: ioctl data * @file_priv: drm file-private structure * * Releases the handle to an mm object. */ int drm_gem_close_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_close *args = data; int ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) return -EOPNOTSUPP; ret = drm_gem_handle_delete(file_priv, args->handle); return ret; } /** * drm_gem_flink_ioctl - implementation of the GEM_FLINK ioctl * @dev: drm_device * @data: ioctl data * @file_priv: drm file-private structure * * Create a global name for an object, returning the name. * * Note that the name does not hold a reference; when the object * is freed, the name goes away. */ int drm_gem_flink_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_flink *args = data; struct drm_gem_object *obj; int ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) return -EOPNOTSUPP; obj = drm_gem_object_lookup(file_priv, args->handle); if (obj == NULL) return -ENOENT; mutex_lock(&dev->object_name_lock); /* prevent races with concurrent gem_close. */ if (obj->handle_count == 0) { ret = -ENOENT; goto err; } if (!obj->name) { ret = idr_alloc(&dev->object_name_idr, obj, 1, 0, GFP_KERNEL); if (ret < 0) goto err; obj->name = ret; } args->name = (uint64_t) obj->name; ret = 0; err: mutex_unlock(&dev->object_name_lock); drm_gem_object_put(obj); return ret; } /** * drm_gem_open_ioctl - implementation of the GEM_OPEN ioctl * @dev: drm_device * @data: ioctl data * @file_priv: drm file-private structure * * Open an object using the global name, returning a handle and the size. * * This handle (of course) holds a reference to the object, so the object * will not go away until the handle is deleted. */ int drm_gem_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_open *args = data; struct drm_gem_object *obj; int ret; u32 handle; if (!drm_core_check_feature(dev, DRIVER_GEM)) return -EOPNOTSUPP; mutex_lock(&dev->object_name_lock); obj = idr_find(&dev->object_name_idr, (int) args->name); if (obj) { drm_gem_object_get(obj); } else { mutex_unlock(&dev->object_name_lock); return -ENOENT; } /* drm_gem_handle_create_tail unlocks dev->object_name_lock. */ ret = drm_gem_handle_create_tail(file_priv, obj, &handle); if (ret) goto err; args->handle = handle; args->size = obj->size; err: drm_gem_object_put(obj); return ret; } /** * drm_gem_open - initializes GEM file-private structures at devnode open time * @dev: drm_device which is being opened by userspace * @file_private: drm file-private structure to set up * * Called at device open time, sets up the structure for handling refcounting * of mm objects. */ void drm_gem_open(struct drm_device *dev, struct drm_file *file_private) { idr_init_base(&file_private->object_idr, 1); mtx_init(&file_private->table_lock, IPL_NONE); } /** * drm_gem_release - release file-private GEM resources * @dev: drm_device which is being closed by userspace * @file_private: drm file-private structure to clean up * * Called at close time when the filp is going away. * * Releases any remaining references on objects by this filp. */ void drm_gem_release(struct drm_device *dev, struct drm_file *file_private) { idr_for_each(&file_private->object_idr, &drm_gem_object_release_handle, file_private); idr_destroy(&file_private->object_idr); } /** * drm_gem_object_release - release GEM buffer object resources * @obj: GEM buffer object * * This releases any structures and resources used by @obj and is the inverse of * drm_gem_object_init(). */ void drm_gem_object_release(struct drm_gem_object *obj) { #ifdef __linux__ if (obj->filp) fput(obj->filp); #else if (obj->uao) uao_detach(obj->uao); if (obj->uobj.pgops) uvm_obj_destroy(&obj->uobj); #endif drm_gem_private_object_fini(obj); drm_gem_free_mmap_offset(obj); drm_gem_lru_remove(obj); } EXPORT_SYMBOL(drm_gem_object_release); /** * drm_gem_object_free - free a GEM object * @kref: kref of the object to free * * Called after the last reference to the object has been lost. * * Frees the object */ void drm_gem_object_free(struct kref *kref) { struct drm_gem_object *obj = container_of(kref, struct drm_gem_object, refcount); if (WARN_ON(!obj->funcs->free)) return; obj->funcs->free(obj); } EXPORT_SYMBOL(drm_gem_object_free); #ifdef __linux__ /** * drm_gem_vm_open - vma->ops->open implementation for GEM * @vma: VM area structure * * This function implements the #vm_operations_struct open() callback for GEM * drivers. This must be used together with drm_gem_vm_close(). */ void drm_gem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; drm_gem_object_get(obj); } EXPORT_SYMBOL(drm_gem_vm_open); /** * drm_gem_vm_close - vma->ops->close implementation for GEM * @vma: VM area structure * * This function implements the #vm_operations_struct close() callback for GEM * drivers. This must be used together with drm_gem_vm_open(). */ void drm_gem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; drm_gem_object_put(obj); } EXPORT_SYMBOL(drm_gem_vm_close); /** * drm_gem_mmap_obj - memory map a GEM object * @obj: the GEM object to map * @obj_size: the object size to be mapped, in bytes * @vma: VMA for the area to be mapped * * Set up the VMA to prepare mapping of the GEM object using the GEM object's * vm_ops. Depending on their requirements, GEM objects can either * provide a fault handler in their vm_ops (in which case any accesses to * the object will be trapped, to perform migration, GTT binding, surface * register allocation, or performance monitoring), or mmap the buffer memory * synchronously after calling drm_gem_mmap_obj. * * This function is mainly intended to implement the DMABUF mmap operation, when * the GEM object is not looked up based on its fake offset. To implement the * DRM mmap operation, drivers should use the drm_gem_mmap() function. * * drm_gem_mmap_obj() assumes the user is granted access to the buffer while * drm_gem_mmap() prevents unprivileged users from mapping random objects. So * callers must verify access restrictions before calling this helper. * * Return 0 or success or -EINVAL if the object size is smaller than the VMA * size, or if no vm_ops are provided. */ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, struct vm_area_struct *vma) { int ret; /* Check for valid size. */ if (obj_size < vma->vm_end - vma->vm_start) return -EINVAL; /* Take a ref for this mapping of the object, so that the fault * handler can dereference the mmap offset's pointer to the object. * This reference is cleaned up by the corresponding vm_close * (which should happen whether the vma was created by this call, or * by a vm_open due to mremap or partial unmap or whatever). */ drm_gem_object_get(obj); vma->vm_private_data = obj; vma->vm_ops = obj->funcs->vm_ops; if (obj->funcs->mmap) { ret = obj->funcs->mmap(obj, vma); if (ret) goto err_drm_gem_object_put; WARN_ON(!(vma->vm_flags & VM_DONTEXPAND)); } else { if (!vma->vm_ops) { ret = -EINVAL; goto err_drm_gem_object_put; } vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); } return 0; err_drm_gem_object_put: drm_gem_object_put(obj); return ret; } EXPORT_SYMBOL(drm_gem_mmap_obj); /** * drm_gem_mmap - memory map routine for GEM objects * @filp: DRM file pointer * @vma: VMA for the area to be mapped * * If a driver supports GEM object mapping, mmap calls on the DRM file * descriptor will end up here. * * Look up the GEM object based on the offset passed in (vma->vm_pgoff will * contain the fake offset we created when the GTT map ioctl was called on * the object) and map it with a call to drm_gem_mmap_obj(). * * If the caller is not granted access to the buffer object, the mmap will fail * with EACCES. Please see the vma manager for more information. */ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) { struct drm_file *priv = filp->private_data; struct drm_device *dev = priv->minor->dev; struct drm_gem_object *obj = NULL; struct drm_vma_offset_node *node; int ret; if (drm_dev_is_unplugged(dev)) return -ENODEV; drm_vma_offset_lock_lookup(dev->vma_offset_manager); node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, vma->vm_pgoff, vma_pages(vma)); if (likely(node)) { obj = container_of(node, struct drm_gem_object, vma_node); /* * When the object is being freed, after it hits 0-refcnt it * proceeds to tear down the object. In the process it will * attempt to remove the VMA offset and so acquire this * mgr->vm_lock. Therefore if we find an object with a 0-refcnt * that matches our range, we know it is in the process of being * destroyed and will be freed as soon as we release the lock - * so we have to check for the 0-refcnted object and treat it as * invalid. */ if (!kref_get_unless_zero(&obj->refcount)) obj = NULL; } drm_vma_offset_unlock_lookup(dev->vma_offset_manager); if (!obj) return -EINVAL; if (!drm_vma_node_is_allowed(node, priv)) { drm_gem_object_put(obj); return -EACCES; } ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT, vma); drm_gem_object_put(obj); return ret; } EXPORT_SYMBOL(drm_gem_mmap); #else /* ! __linux__ */ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, vm_prot_t accessprot, voff_t off, vsize_t size) { int ret; /* Check for valid size. */ if (obj_size < size) return -EINVAL; /* Take a ref for this mapping of the object, so that the fault * handler can dereference the mmap offset's pointer to the object. * This reference is cleaned up by the corresponding vm_close * (which should happen whether the vma was created by this call, or * by a vm_open due to mremap or partial unmap or whatever). */ drm_gem_object_get(obj); #ifdef __linux__ vma->vm_private_data = obj; vma->vm_ops = obj->funcs->vm_ops; #else if (obj->uobj.pgops == NULL) uvm_obj_init(&obj->uobj, obj->funcs->vm_ops, 1); #endif if (obj->funcs->mmap) { ret = obj->funcs->mmap(obj, accessprot, off, size); if (ret) goto err_drm_gem_object_put; #ifdef notyet WARN_ON(!(vma->vm_flags & VM_DONTEXPAND)); #endif } else { #ifdef notyet if (!vma->vm_ops) { ret = -EINVAL; goto err_drm_gem_object_put; } vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); #else ret = -EINVAL; goto err_drm_gem_object_put; #endif } return 0; err_drm_gem_object_put: drm_gem_object_put(obj); return ret; } struct uvm_object * drm_gem_mmap(struct file *filp, vm_prot_t accessprot, voff_t off, vsize_t size) { struct drm_file *priv = (void *)filp; struct drm_device *dev = priv->minor->dev; struct drm_gem_object *obj = NULL; struct drm_vma_offset_node *node; int ret; if (drm_dev_is_unplugged(dev)) return NULL; drm_vma_offset_lock_lookup(dev->vma_offset_manager); node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, off >> PAGE_SHIFT, atop(round_page(size))); if (likely(node)) { obj = container_of(node, struct drm_gem_object, vma_node); /* * When the object is being freed, after it hits 0-refcnt it * proceeds to tear down the object. In the process it will * attempt to remove the VMA offset and so acquire this * mgr->vm_lock. Therefore if we find an object with a 0-refcnt * that matches our range, we know it is in the process of being * destroyed and will be freed as soon as we release the lock - * so we have to check for the 0-refcnted object and treat it as * invalid. */ if (!kref_get_unless_zero(&obj->refcount)) obj = NULL; } drm_vma_offset_unlock_lookup(dev->vma_offset_manager); if (!obj) return NULL; if (!drm_vma_node_is_allowed(node, priv)) { drm_gem_object_put(obj); return NULL; } ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT, accessprot, off, size); drm_gem_object_put(obj); return &obj->uobj; } #endif /* __linux__ */ void drm_gem_print_info(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *obj) { drm_printf_indent(p, indent, "name=%d\n", obj->name); drm_printf_indent(p, indent, "refcount=%u\n", kref_read(&obj->refcount)); drm_printf_indent(p, indent, "start=%08lx\n", drm_vma_node_start(&obj->vma_node)); drm_printf_indent(p, indent, "size=%zu\n", obj->size); drm_printf_indent(p, indent, "imported=%s\n", str_yes_no(obj->import_attach)); if (obj->funcs->print_info) obj->funcs->print_info(p, indent, obj); } int drm_gem_pin(struct drm_gem_object *obj) { if (obj->funcs->pin) return obj->funcs->pin(obj); return 0; } void drm_gem_unpin(struct drm_gem_object *obj) { if (obj->funcs->unpin) obj->funcs->unpin(obj); } int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) { int ret; dma_resv_assert_held(obj->resv); if (!obj->funcs->vmap) return -EOPNOTSUPP; ret = obj->funcs->vmap(obj, map); if (ret) return ret; else if (iosys_map_is_null(map)) return -ENOMEM; return 0; } EXPORT_SYMBOL(drm_gem_vmap); void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { dma_resv_assert_held(obj->resv); if (iosys_map_is_null(map)) return; if (obj->funcs->vunmap) obj->funcs->vunmap(obj, map); /* Always set the mapping to NULL. Callers may rely on this. */ iosys_map_clear(map); } EXPORT_SYMBOL(drm_gem_vunmap); int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map) { int ret; dma_resv_lock(obj->resv, NULL); ret = drm_gem_vmap(obj, map); dma_resv_unlock(obj->resv); return ret; } EXPORT_SYMBOL(drm_gem_vmap_unlocked); void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map) { dma_resv_lock(obj->resv, NULL); drm_gem_vunmap(obj, map); dma_resv_unlock(obj->resv); } EXPORT_SYMBOL(drm_gem_vunmap_unlocked); /** * drm_gem_lock_reservations - Sets up the ww context and acquires * the lock on an array of GEM objects. * * Once you've locked your reservations, you'll want to set up space * for your shared fences (if applicable), submit your job, then * drm_gem_unlock_reservations(). * * @objs: drm_gem_objects to lock * @count: Number of objects in @objs * @acquire_ctx: struct ww_acquire_ctx that will be initialized as * part of tracking this set of locked reservations. */ int drm_gem_lock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx) { int contended = -1; int i, ret; ww_acquire_init(acquire_ctx, &reservation_ww_class); retry: if (contended != -1) { struct drm_gem_object *obj = objs[contended]; ret = dma_resv_lock_slow_interruptible(obj->resv, acquire_ctx); if (ret) { ww_acquire_fini(acquire_ctx); return ret; } } for (i = 0; i < count; i++) { if (i == contended) continue; ret = dma_resv_lock_interruptible(objs[i]->resv, acquire_ctx); if (ret) { int j; for (j = 0; j < i; j++) dma_resv_unlock(objs[j]->resv); if (contended != -1 && contended >= i) dma_resv_unlock(objs[contended]->resv); if (ret == -EDEADLK) { contended = i; goto retry; } ww_acquire_fini(acquire_ctx); return ret; } } ww_acquire_done(acquire_ctx); return 0; } EXPORT_SYMBOL(drm_gem_lock_reservations); void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx) { int i; for (i = 0; i < count; i++) dma_resv_unlock(objs[i]->resv); ww_acquire_fini(acquire_ctx); } EXPORT_SYMBOL(drm_gem_unlock_reservations); /** * drm_gem_lru_init - initialize a LRU * * @lru: The LRU to initialize * @lock: The lock protecting the LRU */ void drm_gem_lru_init(struct drm_gem_lru *lru, struct rwlock *lock) { lru->lock = lock; lru->count = 0; INIT_LIST_HEAD(&lru->list); } EXPORT_SYMBOL(drm_gem_lru_init); static void drm_gem_lru_remove_locked(struct drm_gem_object *obj) { obj->lru->count -= obj->size >> PAGE_SHIFT; WARN_ON(obj->lru->count < 0); list_del(&obj->lru_node); obj->lru = NULL; } /** * drm_gem_lru_remove - remove object from whatever LRU it is in * * If the object is currently in any LRU, remove it. * * @obj: The GEM object to remove from current LRU */ void drm_gem_lru_remove(struct drm_gem_object *obj) { struct drm_gem_lru *lru = obj->lru; if (!lru) return; mutex_lock(lru->lock); drm_gem_lru_remove_locked(obj); mutex_unlock(lru->lock); } EXPORT_SYMBOL(drm_gem_lru_remove); /** * drm_gem_lru_move_tail_locked - move the object to the tail of the LRU * * Like &drm_gem_lru_move_tail but lru lock must be held * * @lru: The LRU to move the object into. * @obj: The GEM object to move into this LRU */ void drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj) { lockdep_assert_held_once(lru->lock); if (obj->lru) drm_gem_lru_remove_locked(obj); lru->count += obj->size >> PAGE_SHIFT; list_add_tail(&obj->lru_node, &lru->list); obj->lru = lru; } EXPORT_SYMBOL(drm_gem_lru_move_tail_locked); /** * drm_gem_lru_move_tail - move the object to the tail of the LRU * * If the object is already in this LRU it will be moved to the * tail. Otherwise it will be removed from whichever other LRU * it is in (if any) and moved into this LRU. * * @lru: The LRU to move the object into. * @obj: The GEM object to move into this LRU */ void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj) { mutex_lock(lru->lock); drm_gem_lru_move_tail_locked(lru, obj); mutex_unlock(lru->lock); } EXPORT_SYMBOL(drm_gem_lru_move_tail); /** * drm_gem_lru_scan - helper to implement shrinker.scan_objects * * If the shrink callback succeeds, it is expected that the driver * move the object out of this LRU. * * If the LRU possibly contain active buffers, it is the responsibility * of the shrink callback to check for this (ie. dma_resv_test_signaled()) * or if necessary block until the buffer becomes idle. * * @lru: The LRU to scan * @nr_to_scan: The number of pages to try to reclaim * @remaining: The number of pages left to reclaim, should be initialized by caller * @shrink: Callback to try to shrink/reclaim the object. */ unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned int nr_to_scan, unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj)) { struct drm_gem_lru still_in_lru; struct drm_gem_object *obj; unsigned freed = 0; drm_gem_lru_init(&still_in_lru, lru->lock); mutex_lock(lru->lock); while (freed < nr_to_scan) { obj = list_first_entry_or_null(&lru->list, typeof(*obj), lru_node); if (!obj) break; drm_gem_lru_move_tail_locked(&still_in_lru, obj); /* * If it's in the process of being freed, gem_object->free() * may be blocked on lock waiting to remove it. So just * skip it. */ if (!kref_get_unless_zero(&obj->refcount)) continue; /* * Now that we own a reference, we can drop the lock for the * rest of the loop body, to reduce contention with other * code paths that need the LRU lock */ mutex_unlock(lru->lock); /* * Note that this still needs to be trylock, since we can * hit shrinker in response to trying to get backing pages * for this obj (ie. while it's lock is already held) */ if (!dma_resv_trylock(obj->resv)) { *remaining += obj->size >> PAGE_SHIFT; goto tail; } if (shrink(obj)) { freed += obj->size >> PAGE_SHIFT; /* * If we succeeded in releasing the object's backing * pages, we expect the driver to have moved the object * out of this LRU */ WARN_ON(obj->lru == &still_in_lru); WARN_ON(obj->lru == lru); } dma_resv_unlock(obj->resv); tail: drm_gem_object_put(obj); mutex_lock(lru->lock); } /* * Move objects we've skipped over out of the temporary still_in_lru * back into this LRU */ list_for_each_entry (obj, &still_in_lru.list, lru_node) obj->lru = lru; list_splice_tail(&still_in_lru.list, &lru->list); lru->count += still_in_lru.count; mutex_unlock(lru->lock); return freed; } EXPORT_SYMBOL(drm_gem_lru_scan); /** * drm_gem_evict - helper to evict backing pages for a GEM object * @obj: obj in question */ int drm_gem_evict(struct drm_gem_object *obj) { dma_resv_assert_held(obj->resv); if (!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)) return -EBUSY; if (obj->funcs->evict) return obj->funcs->evict(obj); return 0; } EXPORT_SYMBOL(drm_gem_evict);
3 409 401 5 404 395 14 398 318 92 365 2 367 14 14 14 14 14 2131 93 125 2251 10 3129 31 3129 2 230 230 5 2 3 2 1 100 100 2 1 2 95 95 161 5 159 1 6 6 2 1 1 2 1 3 11 5 2 3 65 8 39 26 6 5 3 27 3 18 18 37 1 1 2 14 3 2 3 6 9 1 148 100 99 90 1 10 98 1 85 15 1 190 188 3 189 367 367 161 161 147 148 412 412 406 13 398 2 397 397 389 12 378 4 382 2 435 1 416 18 435 478 216 276 198 292 16 2 1 1 6 7 2 9 14 4 4 3 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 /* $OpenBSD: kern_descrip.c,v 1.207 2022/12/05 23:18:37 deraadt Exp $ */ /* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/filedesc.h> #include <sys/vnode.h> #include <sys/proc.h> #include <sys/file.h> #include <sys/socket.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/fcntl.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/ucred.h> #include <sys/unistd.h> #include <sys/resourcevar.h> #include <sys/mount.h> #include <sys/syscallargs.h> #include <sys/event.h> #include <sys/pool.h> #include <sys/ktrace.h> #include <sys/pledge.h> /* * Descriptor management. * * We need to block interrupts as long as `fhdlk' is being taken * with and without the KERNEL_LOCK(). */ struct mutex fhdlk = MUTEX_INITIALIZER(IPL_MPFLOOR); struct filelist filehead; /* head of list of open files */ int numfiles; /* actual number of open files */ static __inline void fd_used(struct filedesc *, int); static __inline void fd_unused(struct filedesc *, int); static __inline int find_next_zero(u_int *, int, u_int); static __inline int fd_inuse(struct filedesc *, int); int finishdup(struct proc *, struct file *, int, int, register_t *, int); int find_last_set(struct filedesc *, int); int dodup3(struct proc *, int, int, int, register_t *); #define DUPF_CLOEXEC 0x01 #define DUPF_DUP2 0x02 struct pool file_pool; struct pool fdesc_pool; void filedesc_init(void) { pool_init(&file_pool, sizeof(struct file), 0, IPL_MPFLOOR, PR_WAITOK, "filepl", NULL); pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, IPL_NONE, PR_WAITOK, "fdescpl", NULL); LIST_INIT(&filehead); } static __inline int find_next_zero (u_int *bitmap, int want, u_int bits) { int i, off, maxoff; u_int sub; if (want > bits) return -1; off = want >> NDENTRYSHIFT; i = want & NDENTRYMASK; if (i) { sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); if (sub != ~0) goto found; off++; } maxoff = NDLOSLOTS(bits); while (off < maxoff) { if ((sub = bitmap[off]) != ~0) goto found; off++; } return -1; found: return (off << NDENTRYSHIFT) + ffs(~sub) - 1; } int find_last_set(struct filedesc *fd, int last) { int off, i; u_int *bitmap = fd->fd_lomap; off = (last - 1) >> NDENTRYSHIFT; while (off >= 0 && !bitmap[off]) off--; if (off < 0) return 0; i = ((off + 1) << NDENTRYSHIFT) - 1; if (i >= last) i = last - 1; while (i > 0 && !fd_inuse(fd, i)) i--; return i; } static __inline int fd_inuse(struct filedesc *fdp, int fd) { u_int off = fd >> NDENTRYSHIFT; if (fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) return 1; return 0; } static __inline void fd_used(struct filedesc *fdp, int fd) { u_int off = fd >> NDENTRYSHIFT; fdp->fd_lomap[off] |= 1U << (fd & NDENTRYMASK); if (fdp->fd_lomap[off] == ~0) fdp->fd_himap[off >> NDENTRYSHIFT] |= 1U << (off & NDENTRYMASK); if (fd > fdp->fd_lastfile) fdp->fd_lastfile = fd; fdp->fd_openfd++; } static __inline void fd_unused(struct filedesc *fdp, int fd) { u_int off = fd >> NDENTRYSHIFT; if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; if (fdp->fd_lomap[off] == ~0) fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1U << (off & NDENTRYMASK)); fdp->fd_lomap[off] &= ~(1U << (fd & NDENTRYMASK)); #ifdef DIAGNOSTIC if (fd > fdp->fd_lastfile) panic("fd_unused: fd_lastfile inconsistent"); #endif if (fd == fdp->fd_lastfile) fdp->fd_lastfile = find_last_set(fdp, fd); fdp->fd_openfd--; } struct file * fd_iterfile(struct file *fp, struct proc *p) { struct file *nfp; unsigned int count; mtx_enter(&fhdlk); if (fp == NULL) nfp = LIST_FIRST(&filehead); else nfp = LIST_NEXT(fp, f_list); /* don't refcount when f_count == 0 to avoid race in fdrop() */ while (nfp != NULL) { count = nfp->f_count; if (count == 0) { nfp = LIST_NEXT(nfp, f_list); continue; } if (atomic_cas_uint(&nfp->f_count, count, count + 1) == count) break; } mtx_leave(&fhdlk); if (fp != NULL) FRELE(fp, p); return nfp; } struct file * fd_getfile(struct filedesc *fdp, int fd) { struct file *fp; vfs_stall_barrier(); if ((u_int)fd >= fdp->fd_nfiles) return (NULL); mtx_enter(&fdp->fd_fplock); fp = fdp->fd_ofiles[fd]; if (fp != NULL) atomic_inc_int(&fp->f_count); mtx_leave(&fdp->fd_fplock); return (fp); } struct file * fd_getfile_mode(struct filedesc *fdp, int fd, int mode) { struct file *fp; KASSERT(mode != 0); fp = fd_getfile(fdp, fd); if (fp == NULL) return (NULL); if ((fp->f_flag & mode) == 0) { FRELE(fp, curproc); return (NULL); } return (fp); } int fd_checkclosed(struct filedesc *fdp, int fd, struct file *fp) { int closed; mtx_enter(&fdp->fd_fplock); KASSERT(fd < fdp->fd_nfiles); closed = (fdp->fd_ofiles[fd] != fp); mtx_leave(&fdp->fd_fplock); return (closed); } /* * System calls on descriptors. */ /* * Duplicate a file descriptor. */ int sys_dup(struct proc *p, void *v, register_t *retval) { struct sys_dup_args /* { syscallarg(int) fd; } */ *uap = v; struct filedesc *fdp = p->p_fd; int old = SCARG(uap, fd); struct file *fp; int new; int error; restart: if ((fp = fd_getfile(fdp, old)) == NULL) return (EBADF); fdplock(fdp); if ((error = fdalloc(p, 0, &new)) != 0) { if (error == ENOSPC) { fdexpand(p); fdpunlock(fdp); FRELE(fp, p); goto restart; } fdpunlock(fdp); FRELE(fp, p); return (error); } /* No need for FRELE(), finishdup() uses current ref. */ return (finishdup(p, fp, old, new, retval, 0)); } /* * Duplicate a file descriptor to a particular value. */ int sys_dup2(struct proc *p, void *v, register_t *retval) { struct sys_dup2_args /* { syscallarg(int) from; syscallarg(int) to; } */ *uap = v; return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 0, retval)); } int sys_dup3(struct proc *p, void *v, register_t *retval) { struct sys_dup3_args /* { syscallarg(int) from; syscallarg(int) to; syscallarg(int) flags; } */ *uap = v; if (SCARG(uap, from) == SCARG(uap, to)) return (EINVAL); if (SCARG(uap, flags) & ~O_CLOEXEC) return (EINVAL); return (dodup3(p, SCARG(uap, from), SCARG(uap, to), SCARG(uap, flags), retval)); } int dodup3(struct proc *p, int old, int new, int flags, register_t *retval) { struct filedesc *fdp = p->p_fd; struct file *fp; int dupflags, error, i; restart: if ((fp = fd_getfile(fdp, old)) == NULL) return (EBADF); if (old == new) { /* * NOTE! This doesn't clear the close-on-exec flag. This might * or might not be the intended behavior from the start, but * this is what everyone else does. */ *retval = new; FRELE(fp, p); return (0); } if ((u_int)new >= lim_cur(RLIMIT_NOFILE) || (u_int)new >= maxfiles) { FRELE(fp, p); return (EBADF); } fdplock(fdp); if (new >= fdp->fd_nfiles) { if ((error = fdalloc(p, new, &i)) != 0) { if (error == ENOSPC) { fdexpand(p); fdpunlock(fdp); FRELE(fp, p); goto restart; } fdpunlock(fdp); FRELE(fp, p); return (error); } if (new != i) panic("dup2: fdalloc"); fd_unused(fdp, new); } dupflags = DUPF_DUP2; if (flags & O_CLOEXEC) dupflags |= DUPF_CLOEXEC; /* No need for FRELE(), finishdup() uses current ref. */ return (finishdup(p, fp, old, new, retval, dupflags)); } /* * The file control system call. */ int sys_fcntl(struct proc *p, void *v, register_t *retval) { struct sys_fcntl_args /* { syscallarg(int) fd; syscallarg(int) cmd; syscallarg(void *) arg; } */ *uap = v; int fd = SCARG(uap, fd); struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; int i, prev, tmp, newmin, flg = F_POSIX; struct flock fl; int error = 0; error = pledge_fcntl(p, SCARG(uap, cmd)); if (error) return (error); restart: if ((fp = fd_getfile(fdp, fd)) == NULL) return (EBADF); switch (SCARG(uap, cmd)) { case F_DUPFD: case F_DUPFD_CLOEXEC: newmin = (long)SCARG(uap, arg); if ((u_int)newmin >= lim_cur(RLIMIT_NOFILE) || (u_int)newmin >= maxfiles) { error = EINVAL; break; } fdplock(fdp); if ((error = fdalloc(p, newmin, &i)) != 0) { if (error == ENOSPC) { fdexpand(p); fdpunlock(fdp); FRELE(fp, p); goto restart; } fdpunlock(fdp); FRELE(fp, p); } else { int dupflags = 0; if (SCARG(uap, cmd) == F_DUPFD_CLOEXEC) dupflags |= DUPF_CLOEXEC; /* No need for FRELE(), finishdup() uses current ref. */ error = finishdup(p, fp, fd, i, retval, dupflags); } return (error); case F_GETFD: fdplock(fdp); *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; fdpunlock(fdp); break; case F_SETFD: fdplock(fdp); if ((long)SCARG(uap, arg) & 1) fdp->fd_ofileflags[fd] |= UF_EXCLOSE; else fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; fdpunlock(fdp); break; case F_GETFL: *retval = OFLAGS(fp->f_flag); break; case F_ISATTY: vp = fp->f_data; if (fp->f_type == DTYPE_VNODE && (vp->v_flag & VISTTY)) *retval = 1; else { *retval = 0; error = ENOTTY; } break; case F_SETFL: do { tmp = prev = fp->f_flag; tmp &= ~FCNTLFLAGS; tmp |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; } while (atomic_cas_uint(&fp->f_flag, prev, tmp) != prev); tmp = fp->f_flag & FNONBLOCK; error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); if (error) break; tmp = fp->f_flag & FASYNC; error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); if (!error) break; atomic_clearbits_int(&fp->f_flag, FNONBLOCK); tmp = 0; (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); break; case F_GETOWN: tmp = 0; error = (*fp->f_ops->fo_ioctl) (fp, FIOGETOWN, (caddr_t)&tmp, p); *retval = tmp; break; case F_SETOWN: tmp = (long)SCARG(uap, arg); error = ((*fp->f_ops->fo_ioctl) (fp, FIOSETOWN, (caddr_t)&tmp, p)); break; case F_SETLKW: flg |= F_WAIT; /* FALLTHROUGH */ case F_SETLK: error = pledge_flock(p); if (error != 0) break; if (fp->f_type != DTYPE_VNODE) { error = EINVAL; break; } vp = fp->f_data; /* Copy in the lock structure */ error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, sizeof (fl)); if (error) break; #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrflock(p, &fl); #endif if (fl.l_whence == SEEK_CUR) { off_t offset = foffset(fp); if (fl.l_start == 0 && fl.l_len < 0) { /* lockf(3) compliance hack */ fl.l_len = -fl.l_len; fl.l_start = offset - fl.l_len; } else fl.l_start += offset; } switch (fl.l_type) { case F_RDLCK: if ((fp->f_flag & FREAD) == 0) { error = EBADF; goto out; } atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); break; case F_WRLCK: if ((fp->f_flag & FWRITE) == 0) { error = EBADF; goto out; } atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); break; case F_UNLCK: error = VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); goto out; default: error = EINVAL; goto out; } if (fd_checkclosed(fdp, fd, fp)) { /* * We have lost the race with close() or dup2(); * unlock, pretend that we've won the race and that * lock had been removed by close() */ fl.l_whence = SEEK_SET; fl.l_start = 0; fl.l_len = 0; VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); fl.l_type = F_UNLCK; } goto out; case F_GETLK: error = pledge_flock(p); if (error != 0) break; if (fp->f_type != DTYPE_VNODE) { error = EINVAL; break; } vp = fp->f_data; /* Copy in the lock structure */ error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, sizeof (fl)); if (error) break; if (fl.l_whence == SEEK_CUR) { off_t offset = foffset(fp); if (fl.l_start == 0 && fl.l_len < 0) { /* lockf(3) compliance hack */ fl.l_len = -fl.l_len; fl.l_start = offset - fl.l_len; } else fl.l_start += offset; } if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK && fl.l_type != F_UNLCK && fl.l_type != 0) { error = EINVAL; break; } error = VOP_ADVLOCK(vp, fdp, F_GETLK, &fl, F_POSIX); if (error) break; #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrflock(p, &fl); #endif error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg), sizeof (fl))); break; default: error = EINVAL; break; } out: FRELE(fp, p); return (error); } /* * Common code for dup, dup2, and fcntl(F_DUPFD). */ int finishdup(struct proc *p, struct file *fp, int old, int new, register_t *retval, int dupflags) { struct file *oldfp; struct filedesc *fdp = p->p_fd; int error; fdpassertlocked(fdp); KASSERT(fp->f_iflags & FIF_INSERTED); if (fp->f_count >= FDUP_MAX_COUNT) { error = EDEADLK; goto fail; } oldfp = fd_getfile(fdp, new); if ((dupflags & DUPF_DUP2) && oldfp == NULL) { if (fd_inuse(fdp, new)) { error = EBUSY; goto fail; } fd_used(fdp, new); } /* * Use `fd_fplock' to synchronize with fd_getfile() so that * the function no longer creates a new reference to the old file. */ mtx_enter(&fdp->fd_fplock); fdp->fd_ofiles[new] = fp; mtx_leave(&fdp->fd_fplock); fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; if (dupflags & DUPF_CLOEXEC) fdp->fd_ofileflags[new] |= UF_EXCLOSE; *retval = new; if (oldfp != NULL) { knote_fdclose(p, new); fdpunlock(fdp); closef(oldfp, p); } else { fdpunlock(fdp); } return (0); fail: fdpunlock(fdp); FRELE(fp, p); return (error); } void fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp) { struct file *fq; fdpassertlocked(fdp); mtx_enter(&fhdlk); if ((fp->f_iflags & FIF_INSERTED) == 0) { atomic_setbits_int(&fp->f_iflags, FIF_INSERTED); if ((fq = fdp->fd_ofiles[0]) != NULL) { LIST_INSERT_AFTER(fq, fp, f_list); } else { LIST_INSERT_HEAD(&filehead, fp, f_list); } } mtx_leave(&fhdlk); mtx_enter(&fdp->fd_fplock); KASSERT(fdp->fd_ofiles[fd] == NULL); fdp->fd_ofiles[fd] = fp; mtx_leave(&fdp->fd_fplock); fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE); } void fdremove(struct filedesc *fdp, int fd) { fdpassertlocked(fdp); /* * Use `fd_fplock' to synchronize with fd_getfile() so that * the function no longer creates a new reference to the file. */ mtx_enter(&fdp->fd_fplock); fdp->fd_ofiles[fd] = NULL; mtx_leave(&fdp->fd_fplock); fdp->fd_ofileflags[fd] = 0; fd_unused(fdp, fd); } int fdrelease(struct proc *p, int fd) { struct filedesc *fdp = p->p_fd; struct file *fp; fdpassertlocked(fdp); fp = fd_getfile(fdp, fd); if (fp == NULL) { fdpunlock(fdp); return (EBADF); } fdremove(fdp, fd); knote_fdclose(p, fd); fdpunlock(fdp); return (closef(fp, p)); } /* * Close a file descriptor. */ int sys_close(struct proc *p, void *v, register_t *retval) { struct sys_close_args /* { syscallarg(int) fd; } */ *uap = v; int fd = SCARG(uap, fd), error; struct filedesc *fdp = p->p_fd; fdplock(fdp); /* fdrelease unlocks fdp. */ error = fdrelease(p, fd); return (error); } /* * Return status information about a file descriptor. */ int sys_fstat(struct proc *p, void *v, register_t *retval) { struct sys_fstat_args /* { syscallarg(int) fd; syscallarg(struct stat *) sb; } */ *uap = v; int fd = SCARG(uap, fd); struct filedesc *fdp = p->p_fd; struct file *fp; struct stat ub; int error; if ((fp = fd_getfile(fdp, fd)) == NULL) return (EBADF); error = (*fp->f_ops->fo_stat)(fp, &ub, p); FRELE(fp, p); if (error == 0) { /* * Don't let non-root see generation numbers * (for NFS security) */ if (suser(p)) ub.st_gen = 0; error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb), sizeof (ub)); } #ifdef KTRACE if (error == 0 && KTRPOINT(p, KTR_STRUCT)) ktrstat(p, &ub); #endif return (error); } /* * Return pathconf information about a file descriptor. */ int sys_fpathconf(struct proc *p, void *v, register_t *retval) { struct sys_fpathconf_args /* { syscallarg(int) fd; syscallarg(int) name; } */ *uap = v; int fd = SCARG(uap, fd); struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; int error; if ((fp = fd_getfile(fdp, fd)) == NULL) return (EBADF); switch (fp->f_type) { case DTYPE_PIPE: case DTYPE_SOCKET: if (SCARG(uap, name) != _PC_PIPE_BUF) { error = EINVAL; break; } *retval = PIPE_BUF; error = 0; break; case DTYPE_VNODE: vp = fp->f_data; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_PATHCONF(vp, SCARG(uap, name), retval); VOP_UNLOCK(vp); break; default: error = EOPNOTSUPP; break; } FRELE(fp, p); return (error); } /* * Allocate a file descriptor for the process. */ int fdalloc(struct proc *p, int want, int *result) { struct filedesc *fdp = p->p_fd; int lim, last, i; u_int new, off; fdpassertlocked(fdp); /* * Search for a free descriptor starting at the higher * of want or fd_freefile. If that fails, consider * expanding the ofile array. */ restart: lim = min((int)lim_cur(RLIMIT_NOFILE), maxfiles); last = min(fdp->fd_nfiles, lim); if ((i = want) < fdp->fd_freefile) i = fdp->fd_freefile; off = i >> NDENTRYSHIFT; new = find_next_zero(fdp->fd_himap, off, (last + NDENTRIES - 1) >> NDENTRYSHIFT); if (new != -1) { i = find_next_zero(&fdp->fd_lomap[new], new > off ? 0 : i & NDENTRYMASK, NDENTRIES); if (i == -1) { /* * Free file descriptor in this block was * below want, try again with higher want. */ want = (new + 1) << NDENTRYSHIFT; goto restart; } i += (new << NDENTRYSHIFT); if (i < last) { fd_used(fdp, i); if (want <= fdp->fd_freefile) fdp->fd_freefile = i; *result = i; fdp->fd_ofileflags[i] = 0; if (ISSET(p->p_p->ps_flags, PS_PLEDGE)) fdp->fd_ofileflags[i] |= UF_PLEDGED; return (0); } } if (fdp->fd_nfiles >= lim) return (EMFILE); return (ENOSPC); } void fdexpand(struct proc *p) { struct filedesc *fdp = p->p_fd; int nfiles, oldnfiles; size_t copylen; struct file **newofile, **oldofile; char *newofileflags; u_int *newhimap, *newlomap; fdpassertlocked(fdp); oldnfiles = fdp->fd_nfiles; oldofile = fdp->fd_ofiles; /* * No space in current array. */ if (fdp->fd_nfiles < NDEXTENT) nfiles = NDEXTENT; else nfiles = 2 * fdp->fd_nfiles; newofile = mallocarray(nfiles, OFILESIZE, M_FILEDESC, M_WAITOK); /* * Allocate all required chunks before calling free(9) to make * sure that ``fd_ofiles'' stays valid if we go to sleep. */ if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { newhimap = mallocarray(NDHISLOTS(nfiles), sizeof(u_int), M_FILEDESC, M_WAITOK); newlomap = mallocarray(NDLOSLOTS(nfiles), sizeof(u_int), M_FILEDESC, M_WAITOK); } newofileflags = (char *) &newofile[nfiles]; /* * Copy the existing ofile and ofileflags arrays * and zero the new portion of each array. */ copylen = sizeof(struct file *) * fdp->fd_nfiles; memcpy(newofile, fdp->fd_ofiles, copylen); memset((char *)newofile + copylen, 0, nfiles * sizeof(struct file *) - copylen); copylen = sizeof(char) * fdp->fd_nfiles; memcpy(newofileflags, fdp->fd_ofileflags, copylen); memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen); if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { copylen = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int); memcpy(newhimap, fdp->fd_himap, copylen); memset((char *)newhimap + copylen, 0, NDHISLOTS(nfiles) * sizeof(u_int) - copylen); copylen = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int); memcpy(newlomap, fdp->fd_lomap, copylen); memset((char *)newlomap + copylen, 0, NDLOSLOTS(nfiles) * sizeof(u_int) - copylen); if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { free(fdp->fd_himap, M_FILEDESC, NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)); free(fdp->fd_lomap, M_FILEDESC, NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)); } fdp->fd_himap = newhimap; fdp->fd_lomap = newlomap; } mtx_enter(&fdp->fd_fplock); fdp->fd_ofiles = newofile; mtx_leave(&fdp->fd_fplock); fdp->fd_ofileflags = newofileflags; fdp->fd_nfiles = nfiles; if (oldnfiles > NDFILE) free(oldofile, M_FILEDESC, oldnfiles * OFILESIZE); } /* * Create a new open file structure and allocate * a file descriptor for the process that refers to it. */ int falloc(struct proc *p, struct file **resultfp, int *resultfd) { struct file *fp; int error, i; KASSERT(resultfp != NULL); KASSERT(resultfd != NULL); fdpassertlocked(p->p_fd); restart: if ((error = fdalloc(p, 0, &i)) != 0) { if (error == ENOSPC) { fdexpand(p); goto restart; } return (error); } fp = fnew(p); if (fp == NULL) { fd_unused(p->p_fd, i); return (ENFILE); } FREF(fp); *resultfp = fp; *resultfd = i; return (0); } struct file * fnew(struct proc *p) { struct file *fp; int nfiles; nfiles = atomic_inc_int_nv(&numfiles); if (nfiles > maxfiles) { atomic_dec_int(&numfiles); tablefull("file"); return (NULL); } fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO); /* * We need to block interrupts as long as `f_mtx' is being taken * with and without the KERNEL_LOCK(). */ mtx_init(&fp->f_mtx, IPL_MPFLOOR); fp->f_count = 1; fp->f_cred = p->p_ucred; crhold(fp->f_cred); return (fp); } /* * Build a new filedesc structure. */ struct filedesc * fdinit(void) { struct filedesc0 *newfdp; newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO); rw_init(&newfdp->fd_fd.fd_lock, "fdlock"); mtx_init(&newfdp->fd_fd.fd_fplock, IPL_MPFLOOR); LIST_INIT(&newfdp->fd_fd.fd_kqlist); /* Create the file descriptor table. */ newfdp->fd_fd.fd_refcnt = 1; newfdp->fd_fd.fd_cmask = S_IWGRP|S_IWOTH; newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; newfdp->fd_fd.fd_freefile = 0; newfdp->fd_fd.fd_lastfile = 0; return (&newfdp->fd_fd); } /* * Share a filedesc structure. */ struct filedesc * fdshare(struct process *pr) { pr->ps_fd->fd_refcnt++; return (pr->ps_fd); } /* * Copy a filedesc structure. */ struct filedesc * fdcopy(struct process *pr) { struct filedesc *newfdp, *fdp = pr->ps_fd; int i; newfdp = fdinit(); fdplock(fdp); if (fdp->fd_cdir) { vref(fdp->fd_cdir); newfdp->fd_cdir = fdp->fd_cdir; } if (fdp->fd_rdir) { vref(fdp->fd_rdir); newfdp->fd_rdir = fdp->fd_rdir; } /* * If the number of open files fits in the internal arrays * of the open file structure, use them, otherwise allocate * additional memory for the number of descriptors currently * in use. */ if (fdp->fd_lastfile >= NDFILE) { /* * Compute the smallest multiple of NDEXTENT needed * for the file descriptors currently in use, * allowing the table to shrink. */ i = fdp->fd_nfiles; while (i >= 2 * NDEXTENT && i > fdp->fd_lastfile * 2) i /= 2; newfdp->fd_ofiles = mallocarray(i, OFILESIZE, M_FILEDESC, M_WAITOK | M_ZERO); newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; newfdp->fd_nfiles = i; } if (NDHISLOTS(newfdp->fd_nfiles) > NDHISLOTS(NDFILE)) { newfdp->fd_himap = mallocarray(NDHISLOTS(newfdp->fd_nfiles), sizeof(u_int), M_FILEDESC, M_WAITOK | M_ZERO); newfdp->fd_lomap = mallocarray(NDLOSLOTS(newfdp->fd_nfiles), sizeof(u_int), M_FILEDESC, M_WAITOK | M_ZERO); } newfdp->fd_freefile = fdp->fd_freefile; newfdp->fd_flags = fdp->fd_flags; newfdp->fd_cmask = fdp->fd_cmask; for (i = 0; i <= fdp->fd_lastfile; i++) { struct file *fp = fdp->fd_ofiles[i]; if (fp != NULL) { /* * XXX Gruesome hack. If count gets too high, fail * to copy an fd, since fdcopy()'s callers do not * permit it to indicate failure yet. * Meanwhile, kqueue files have to be * tied to the process that opened them to enforce * their internal consistency, so close them here. */ if (fp->f_count >= FDUP_MAX_COUNT || fp->f_type == DTYPE_KQUEUE) { if (i < newfdp->fd_freefile) newfdp->fd_freefile = i; continue; } FREF(fp); newfdp->fd_ofiles[i] = fp; newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; fd_used(newfdp, i); } } fdpunlock(fdp); return (newfdp); } /* * Release a filedesc structure. */ void fdfree(struct proc *p) { struct filedesc *fdp = p->p_fd; struct file *fp; int fd; if (--fdp->fd_refcnt > 0) return; for (fd = 0; fd <= fdp->fd_lastfile; fd++) { fp = fdp->fd_ofiles[fd]; if (fp != NULL) { fdp->fd_ofiles[fd] = NULL; knote_fdclose(p, fd); /* closef() expects a refcount of 2 */ FREF(fp); (void) closef(fp, p); } } p->p_fd = NULL; if (fdp->fd_nfiles > NDFILE) free(fdp->fd_ofiles, M_FILEDESC, fdp->fd_nfiles * OFILESIZE); if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { free(fdp->fd_himap, M_FILEDESC, NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)); free(fdp->fd_lomap, M_FILEDESC, NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)); } if (fdp->fd_cdir) vrele(fdp->fd_cdir); if (fdp->fd_rdir) vrele(fdp->fd_rdir); pool_put(&fdesc_pool, fdp); } /* * Internal form of close. * Decrement reference count on file structure. * Note: p may be NULL when closing a file * that was being passed in a message. * * The fp must have its usecount bumped and will be FRELEd here. */ int closef(struct file *fp, struct proc *p) { struct filedesc *fdp; if (fp == NULL) return (0); KASSERTMSG(fp->f_count >= 2, "count (%u) < 2", fp->f_count); atomic_dec_int(&fp->f_count); /* * POSIX record locking dictates that any close releases ALL * locks owned by this process. This is handled by setting * a flag in the unlock to free ONLY locks obeying POSIX * semantics, and not to free BSD-style file locks. * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor. */ if (p && ((fdp = p->p_fd) != NULL) && (fdp->fd_flags & FD_ADVLOCK) && fp->f_type == DTYPE_VNODE) { struct vnode *vp = fp->f_data; struct flock lf; lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, fdp, F_UNLCK, &lf, F_POSIX); } return (FRELE(fp, p)); } int fdrop(struct file *fp, struct proc *p) { int error; KASSERTMSG(fp->f_count == 0, "count (%u) != 0", fp->f_count); mtx_enter(&fhdlk); if (fp->f_iflags & FIF_INSERTED) LIST_REMOVE(fp, f_list); mtx_leave(&fhdlk); if (fp->f_ops) error = (*fp->f_ops->fo_close)(fp, p); else error = 0; crfree(fp->f_cred); atomic_dec_int(&numfiles); pool_put(&file_pool, fp); return (error); } /* * Apply an advisory lock on a file descriptor. * * Just attempt to get a record lock of the requested type on * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). */ int sys_flock(struct proc *p, void *v, register_t *retval) { struct sys_flock_args /* { syscallarg(int) fd; syscallarg(int) how; } */ *uap = v; int fd = SCARG(uap, fd); int how = SCARG(uap, how); struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; struct flock lf; int error; if ((fp = fd_getfile(fdp, fd)) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) { error = EOPNOTSUPP; goto out; } vp = fp->f_data; lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (how & LOCK_UN) { lf.l_type = F_UNLCK; atomic_clearbits_int(&fp->f_iflags, FIF_HASLOCK); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); goto out; } if (how & LOCK_EX) lf.l_type = F_WRLCK; else if (how & LOCK_SH) lf.l_type = F_RDLCK; else { error = EINVAL; goto out; } atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK); if (how & LOCK_NB) error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); else error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); out: FRELE(fp, p); return (error); } /* * File Descriptor pseudo-device driver (/dev/fd/). * * Opening minor device N dup()s the file (if any) connected to file * descriptor N belonging to the calling process. Note that this driver * consists of only the ``open()'' routine, because all subsequent * references to this file will be direct to the other driver. */ int filedescopen(dev_t dev, int mode, int type, struct proc *p) { /* * XXX Kludge: set curproc->p_dupfd to contain the value of the * the file descriptor being sought for duplication. The error * return ensures that the vnode for this device will be released * by vn_open. Open will detect this special error and take the * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN * will simply report the error. */ p->p_dupfd = minor(dev); return (ENODEV); } /* * Duplicate the specified descriptor to a free descriptor. */ int dupfdopen(struct proc *p, int indx, int mode) { struct filedesc *fdp = p->p_fd; int dupfd = p->p_dupfd; struct file *wfp; fdpassertlocked(fdp); /* * Assume that the filename was user-specified; applications do * not tend to open /dev/fd/# when they can just call dup() */ if ((p->p_p->ps_flags & (PS_SUGIDEXEC | PS_SUGID))) { if (p->p_descfd == 255) return (EPERM); if (p->p_descfd != dupfd) return (EPERM); } /* * If the to-be-dup'd fd number is greater than the allowed number * of file descriptors, or the fd to be dup'd has already been * closed, reject. Note, there is no need to check for new == old * because fd_getfile will return NULL if the file at indx is * newly created by falloc. */ if ((wfp = fd_getfile(fdp, dupfd)) == NULL) return (EBADF); /* * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { FRELE(wfp, p); return (EACCES); } if (wfp->f_count >= FDUP_MAX_COUNT) { FRELE(wfp, p); return (EDEADLK); } KASSERT(wfp->f_iflags & FIF_INSERTED); mtx_enter(&fdp->fd_fplock); KASSERT(fdp->fd_ofiles[indx] == NULL); fdp->fd_ofiles[indx] = wfp; mtx_leave(&fdp->fd_fplock); fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | (fdp->fd_ofileflags[dupfd] & ~UF_EXCLOSE); return (0); } /* * Close any files on exec? */ void fdcloseexec(struct proc *p) { struct filedesc *fdp = p->p_fd; int fd; fdplock(fdp); for (fd = 0; fd <= fdp->fd_lastfile; fd++) { fdp->fd_ofileflags[fd] &= ~UF_PLEDGED; if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) { /* fdrelease() unlocks fdp. */ (void) fdrelease(p, fd); fdplock(fdp); } } fdpunlock(fdp); } int sys_closefrom(struct proc *p, void *v, register_t *retval) { struct sys_closefrom_args *uap = v; struct filedesc *fdp = p->p_fd; u_int startfd, i; startfd = SCARG(uap, fd); fdplock(fdp); if (startfd > fdp->fd_lastfile) { fdpunlock(fdp); return (EBADF); } for (i = startfd; i <= fdp->fd_lastfile; i++) { /* fdrelease() unlocks fdp. */ fdrelease(p, i); fdplock(fdp); } fdpunlock(fdp); return (0); } int sys_getdtablecount(struct proc *p, void *v, register_t *retval) { *retval = p->p_fd->fd_openfd; return (0); }
459 459 734 734 12 12 12 13 13 13 377 377 263 262 262 263 746 745 748 131 130 102 102 102 369 368 367 2150 2151 2145 859 863 141 144 18 18 17 17 17 14 14 14 1 13 6 6 6 65 65 65 19 19 19 6 6 6 6 8 8 8 10 10 10 18 18 18 96 95 509 508 508 154 154 3064 3068 3063 3067 23 23 23 112 111 216 216 216 155 155 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 /* $OpenBSD: vfs_vops.c,v 1.36 2024/05/13 11:17:40 semarie Exp $ */ /* * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <sys/param.h> #include <sys/vnode.h> #include <sys/unistd.h> #include <sys/systm.h> #ifdef VFSLCKDEBUG #define ASSERT_VP_ISLOCKED(vp) do { \ if (((vp)->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp)) { \ VOP_PRINT(vp); \ panic("vp not locked"); \ } \ } while (0) #else #define ASSERT_VP_ISLOCKED(vp) /* nothing */ #endif int VOP_ISLOCKED(struct vnode *vp) { struct vop_islocked_args a; a.a_vp = vp; if (vp->v_op->vop_islocked == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_islocked)(&a)); } int VOP_LOOKUP(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { struct vop_lookup_args a; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; if (dvp->v_op->vop_lookup == NULL) return (EOPNOTSUPP); return ((dvp->v_op->vop_lookup)(&a)); } int VOP_CREATE(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct vop_create_args a; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; a.a_vap = vap; ASSERT_VP_ISLOCKED(dvp); if (dvp->v_op->vop_create == NULL) return (EOPNOTSUPP); return ((dvp->v_op->vop_create)(&a)); } int VOP_MKNOD(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct vop_mknod_args a; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; a.a_vap = vap; ASSERT_VP_ISLOCKED(dvp); if (dvp->v_op->vop_mknod == NULL) return (EOPNOTSUPP); return ((dvp->v_op->vop_mknod)(&a)); } int VOP_OPEN(struct vnode *vp, int mode, struct ucred *cred, struct proc *p) { struct vop_open_args a; a.a_vp = vp; a.a_mode = mode; a.a_cred = cred; a.a_p = p; KASSERT(p == curproc); if (vp->v_op->vop_open == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_open)(&a)); } int VOP_CLOSE(struct vnode *vp, int fflag, struct ucred *cred, struct proc *p) { struct vop_close_args a; a.a_vp = vp; a.a_fflag = fflag; a.a_cred = cred; a.a_p = p; KASSERT(p == NULL || p == curproc); ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_close == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_close)(&a)); } int VOP_ACCESS(struct vnode *vp, int mode, struct ucred *cred, struct proc *p) { struct vop_access_args a; a.a_vp = vp; a.a_mode = mode; a.a_cred = cred; a.a_p = p; KASSERT(p == curproc); ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_access == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_access)(&a)); } int VOP_GETATTR(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct proc *p) { struct vop_getattr_args a; a.a_vp = vp; a.a_vap = vap; a.a_cred = cred; a.a_p = p; KASSERT(p == curproc); if (vp->v_op->vop_getattr == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_getattr)(&a)); } int VOP_SETATTR(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct proc *p) { struct vop_setattr_args a; a.a_vp = vp; a.a_vap = vap; a.a_cred = cred; a.a_p = p; KASSERT(p == curproc); ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_setattr == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_setattr)(&a)); } int VOP_READ(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) { struct vop_read_args a; a.a_vp = vp; a.a_uio = uio; a.a_ioflag = ioflag; a.a_cred = cred; ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_read == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_read)(&a)); } int VOP_WRITE(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) { struct vop_write_args a; a.a_vp = vp; a.a_uio = uio; a.a_ioflag = ioflag; a.a_cred = cred; ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_write == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_write)(&a)); } int VOP_IOCTL(struct vnode *vp, u_long command, void *data, int fflag, struct ucred *cred, struct proc *p) { struct vop_ioctl_args a; a.a_vp = vp; a.a_command = command; a.a_data = data; a.a_fflag = fflag; a.a_cred = cred; a.a_p = p; KASSERT(p == curproc); if (vp->v_op->vop_ioctl == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_ioctl)(&a)); } int VOP_KQFILTER(struct vnode *vp, int fflag, struct knote *kn) { struct vop_kqfilter_args a; a.a_vp = vp; a.a_fflag = fflag; a.a_kn = kn; if (vp->v_op->vop_kqfilter == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_kqfilter)(&a)); } int VOP_REVOKE(struct vnode *vp, int flags) { struct vop_revoke_args a; a.a_vp = vp; a.a_flags = flags; if (vp->v_op->vop_revoke == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_revoke)(&a)); } int VOP_FSYNC(struct vnode *vp, struct ucred *cred, int waitfor, struct proc *p) { int r, s; struct vop_fsync_args a; a.a_vp = vp; a.a_cred = cred; a.a_waitfor = waitfor; a.a_p = p; KASSERT(p == curproc); ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_fsync == NULL) return (EOPNOTSUPP); r = (vp->v_op->vop_fsync)(&a); s = splbio(); if (r == 0 && vp->v_bioflag & VBIOERROR) r = EIO; splx(s); return r; } int VOP_REMOVE(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { int error; struct vop_remove_args a; a.a_dvp = dvp; a.a_vp = vp; a.a_cnp = cnp; ASSERT_VP_ISLOCKED(dvp); ASSERT_VP_ISLOCKED(vp); error = dvp->v_op->vop_remove(&a); if (dvp == vp) vrele(vp); else vput(vp); vput(dvp); return error; } int VOP_LINK(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct vop_link_args a; a.a_dvp = dvp; a.a_vp = vp; a.a_cnp = cnp; ASSERT_VP_ISLOCKED(dvp); if (dvp->v_op->vop_link == NULL) return (EOPNOTSUPP); return ((dvp->v_op->vop_link)(&a)); } int VOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp) { struct vop_rename_args a; a.a_fdvp = fdvp; a.a_fvp = fvp; a.a_fcnp = fcnp; a.a_tdvp = tdvp; a.a_tvp = tvp; a.a_tcnp = tcnp; ASSERT_VP_ISLOCKED(tdvp); if (fdvp->v_op->vop_rename == NULL) return (EOPNOTSUPP); return ((fdvp->v_op->vop_rename)(&a)); } int VOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct vop_mkdir_args a; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; a.a_vap = vap; ASSERT_VP_ISLOCKED(dvp); if (dvp->v_op->vop_mkdir == NULL) return (EOPNOTSUPP); return ((dvp->v_op->vop_mkdir)(&a)); } int VOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct vop_rmdir_args a; a.a_dvp = dvp; a.a_vp = vp; a.a_cnp = cnp; ASSERT_VP_ISLOCKED(dvp); ASSERT_VP_ISLOCKED(vp); KASSERT(dvp != vp); if (dvp->v_op->vop_rmdir == NULL) return (EOPNOTSUPP); return ((dvp->v_op->vop_rmdir)(&a)); } int VOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap, char *target) { struct vop_symlink_args a; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; a.a_vap = vap; a.a_target = target; ASSERT_VP_ISLOCKED(dvp); if (dvp->v_op->vop_symlink == NULL) return (EOPNOTSUPP); return ((dvp->v_op->vop_symlink)(&a)); } int VOP_READDIR(struct vnode *vp, struct uio *uio, struct ucred *cred, int *eofflag) { struct vop_readdir_args a; a.a_vp = vp; a.a_uio = uio; a.a_cred = cred; a.a_eofflag = eofflag; ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_readdir == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_readdir)(&a)); } int VOP_READLINK(struct vnode *vp, struct uio *uio, struct ucred *cred) { struct vop_readlink_args a; a.a_vp = vp; a.a_uio = uio; a.a_cred = cred; ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_readlink == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_readlink)(&a)); } int VOP_ABORTOP(struct vnode *dvp, struct componentname *cnp) { struct vop_abortop_args a; a.a_dvp = dvp; a.a_cnp = cnp; if (dvp->v_op->vop_abortop == NULL) return (EOPNOTSUPP); return ((dvp->v_op->vop_abortop)(&a)); } int VOP_INACTIVE(struct vnode *vp, struct proc *p) { struct vop_inactive_args a; a.a_vp = vp; a.a_p = p; KASSERT(p == curproc); ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_inactive == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_inactive)(&a)); } int VOP_RECLAIM(struct vnode *vp, struct proc *p) { struct vop_reclaim_args a; a.a_vp = vp; a.a_p = p; KASSERT(p == curproc); if (vp->v_op->vop_reclaim == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_reclaim)(&a)); } int VOP_LOCK(struct vnode *vp, int flags) { struct vop_lock_args a; a.a_vp = vp; a.a_flags = flags; MUTEX_ASSERT_UNLOCKED(&vnode_mtx); if (vp->v_op->vop_lock == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_lock)(&a)); } int VOP_UNLOCK(struct vnode *vp) { struct vop_unlock_args a; a.a_vp = vp; if (vp->v_op->vop_unlock == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_unlock)(&a)); } int VOP_BMAP(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr_t *bnp, int *runp) { struct vop_bmap_args a; a.a_vp = vp; a.a_bn = bn; a.a_vpp = vpp; a.a_bnp = bnp; a.a_runp = runp; ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_bmap == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_bmap)(&a)); } int VOP_PRINT(struct vnode *vp) { struct vop_print_args a; a.a_vp = vp; if (vp->v_op->vop_print == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_print)(&a)); } int VOP_PATHCONF(struct vnode *vp, int name, register_t *retval) { struct vop_pathconf_args a; /* * Handle names that are constant across filesystem */ switch (name) { case _PC_PATH_MAX: *retval = PATH_MAX; return (0); case _PC_PIPE_BUF: *retval = PIPE_BUF; return (0); case _PC_ASYNC_IO: case _PC_PRIO_IO: case _PC_SYNC_IO: *retval = 0; return (0); } a.a_vp = vp; a.a_name = name; a.a_retval = retval; ASSERT_VP_ISLOCKED(vp); if (vp->v_op->vop_pathconf == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_pathconf)(&a)); } int VOP_ADVLOCK(struct vnode *vp, void *id, int op, struct flock *fl, int flags) { struct vop_advlock_args a; a.a_vp = vp; a.a_id = id; a.a_op = op; a.a_fl = fl; a.a_flags = flags; if (vp->v_op->vop_advlock == NULL) return (EOPNOTSUPP); return (vp->v_op->vop_advlock)(&a); } int VOP_STRATEGY(struct vnode *vp, struct buf *bp) { struct vop_strategy_args a; a.a_vp = vp; a.a_bp = bp; if ((ISSET(bp->b_flags, B_BC)) && (!ISSET(bp->b_flags, B_DMA))) panic("Non dma reachable buffer passed to VOP_STRATEGY"); if (vp->v_op->vop_strategy == NULL) return (EOPNOTSUPP); return ((vp->v_op->vop_strategy)(&a)); } int VOP_BWRITE(struct buf *bp) { struct vop_bwrite_args a; a.a_bp = bp; if (bp->b_vp->v_op->vop_bwrite == NULL) return (EOPNOTSUPP); return ((bp->b_vp->v_op->vop_bwrite)(&a)); }
5 5 4 4 2 2 2 4 1 4 4 1 1 142 142 1 139 1 3 23 3 1 2 1 2 3 2 1 1 2 1 1 1 1 1 1 7 3 4 209 209 209 209 4 209 3 89 63 130 129 6 162 162 163 160 8 8 8 7 1 8 8 8 138 140 1 1 1 1 133 55 76 1 130 128 1 6 5 26 74 24 19 35 2 1 66 32 65 2 9 2 5 2 2 8 86 7 79 79 1 32 32 8 24 7 1 1 8 1 5 2 30 31 3 5 23 6 1 1 23 31 9 32 32 32 30 2 32 31 31 26 6 8 122 120 118 2 2 115 117 115 104 93 2 4 5 1 6 3 2 92 3 125 125 126 99 98 75 126 126 99 124 10 18 18 18 8 10 8 10 79 78 39 5 73 79 24 79 47 48 55 55 31 31 5 2 1 1 1 1 4 4 1 4 4 6 6 6 6 6 3 5 4 4 4 4 2 30 1 2 11 17 8 6 1 6 6 2 2 1 4 2 2 1 2 8 3 9 2 16 2 9 1 2 2 9 1 1 7 1 1 7 7 2 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 /* $OpenBSD: rtsock.c,v 1.373 2023/12/03 10:51:17 mvs Exp $ */ /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/sysctl.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/domain.h> #include <sys/pool.h> #include <sys/protosw.h> #include <sys/srp.h> #include <net/if.h> #include <net/if_dl.h> #include <net/if_var.h> #include <net/route.h> #include <netinet/in.h> #ifdef MPLS #include <netmpls/mpls.h> #endif #ifdef IPSEC #include <netinet/ip_ipsp.h> #include <net/if_enc.h> #endif #ifdef BFD #include <net/bfd.h> #endif #include <sys/stdarg.h> #include <sys/kernel.h> #include <sys/timeout.h> #define ROUTESNDQ 8192 #define ROUTERCVQ 8192 const struct sockaddr route_src = { 2, PF_ROUTE, }; struct walkarg { int w_op, w_arg, w_tmemsize; size_t w_given, w_needed; caddr_t w_where, w_tmem; }; void route_prinit(void); void rcb_ref(void *, void *); void rcb_unref(void *, void *); int route_output(struct mbuf *, struct socket *); int route_ctloutput(int, struct socket *, int, int, struct mbuf *); int route_attach(struct socket *, int, int); int route_detach(struct socket *); int route_disconnect(struct socket *); int route_shutdown(struct socket *); void route_rcvd(struct socket *); int route_send(struct socket *, struct mbuf *, struct mbuf *, struct mbuf *); int route_sockaddr(struct socket *, struct mbuf *); int route_peeraddr(struct socket *, struct mbuf *); void route_input(struct mbuf *m0, struct socket *, sa_family_t); int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); int route_cleargateway(struct rtentry *, void *, unsigned int); void rtm_senddesync_timer(void *); void rtm_senddesync(struct socket *); int rtm_sendup(struct socket *, struct mbuf *); int rtm_getifa(struct rt_addrinfo *, unsigned int); int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, uint8_t, unsigned int); struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); struct mbuf *rtm_msg1(int, struct rt_addrinfo *); int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, struct walkarg *); int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); int rtm_validate_proposal(struct rt_addrinfo *); void rtm_setmetrics(u_long, const struct rt_metrics *, struct rt_kmetrics *); void rtm_getmetrics(const struct rtentry *, struct rt_metrics *); int sysctl_iflist(int, struct walkarg *); int sysctl_ifnames(struct walkarg *); int sysctl_rtable_rtstat(void *, size_t *, void *); int rt_setsource(unsigned int, const struct sockaddr *); /* * Locks used to protect struct members * I immutable after creation * s solock */ struct rtpcb { struct socket *rop_socket; /* [I] */ SRPL_ENTRY(rtpcb) rop_list; struct refcnt rop_refcnt; struct timeout rop_timeout; unsigned int rop_msgfilter; /* [s] */ unsigned int rop_flagfilter; /* [s] */ unsigned int rop_flags; /* [s] */ u_int rop_rtableid; /* [s] */ unsigned short rop_proto; /* [I] */ u_char rop_priority; /* [s] */ }; #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) struct rtptable { SRPL_HEAD(, rtpcb) rtp_list; struct srpl_rc rtp_rc; struct rwlock rtp_lk; unsigned int rtp_count; }; struct pool rtpcb_pool; struct rtptable rtptable; /* * These flags and timeout are used for indicating to userland (via a * RTM_DESYNC msg) when the route socket has overflowed and messages * have been lost. */ #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before queueing more packets */ #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ void route_prinit(void) { srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); rw_init(&rtptable.rtp_lk, "rtsock"); SRPL_INIT(&rtptable.rtp_list); pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL); } void rcb_ref(void *null, void *v) { struct rtpcb *rop = v; refcnt_take(&rop->rop_refcnt); } void rcb_unref(void *null, void *v) { struct rtpcb *rop = v; refcnt_rele_wake(&rop->rop_refcnt); } int route_attach(struct socket *so, int proto, int wait) { struct rtpcb *rop; int error; error = soreserve(so, ROUTESNDQ, ROUTERCVQ); if (error) return (error); /* * use the rawcb but allocate a rtpcb, this * code does not care about the additional fields * and works directly on the raw socket. */ rop = pool_get(&rtpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) | PR_ZERO); if (rop == NULL) return (ENOBUFS); so->so_pcb = rop; /* Init the timeout structure */ timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so, KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); refcnt_init(&rop->rop_refcnt); rop->rop_socket = so; rop->rop_proto = proto; rop->rop_rtableid = curproc->p_p->ps_rtableid; soisconnected(so); so->so_options |= SO_USELOOPBACK; rw_enter(&rtptable.rtp_lk, RW_WRITE); SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rop_list); rtptable.rtp_count++; rw_exit(&rtptable.rtp_lk); return (0); } int route_detach(struct socket *so) { struct rtpcb *rop; soassertlocked(so); rop = sotortpcb(so); if (rop == NULL) return (EINVAL); rw_enter(&rtptable.rtp_lk, RW_WRITE); rtptable.rtp_count--; SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, rop_list); rw_exit(&rtptable.rtp_lk); sounlock(so); /* wait for all references to drop */ refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); timeout_del_barrier(&rop->rop_timeout); solock(so); so->so_pcb = NULL; KASSERT((so->so_state & SS_NOFDREF) == 0); pool_put(&rtpcb_pool, rop); return (0); } int route_disconnect(struct socket *so) { soisdisconnected(so); return (0); } int route_shutdown(struct socket *so) { socantsendmore(so); return (0); } void route_rcvd(struct socket *so) { struct rtpcb *rop = sotortpcb(so); soassertlocked(so); /* * If we are in a FLUSH state, check if the buffer is * empty so that we can clear the flag. */ if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == rop->rop_socket->so_rcv.sb_hiwat))) rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; } int route_send(struct socket *so, struct mbuf *m, struct mbuf *nam, struct mbuf *control) { int error; soassertlocked(so); if (control && control->m_len) { error = EOPNOTSUPP; goto out; } if (nam) { error = EISCONN; goto out; } error = route_output(m, so); m = NULL; out: m_freem(control); m_freem(m); return (error); } int route_sockaddr(struct socket *so, struct mbuf *nam) { return (EINVAL); } int route_peeraddr(struct socket *so, struct mbuf *nam) { /* minimal support, just implement a fake peer address */ bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); nam->m_len = route_src.sa_len; return (0); } int route_ctloutput(int op, struct socket *so, int level, int optname, struct mbuf *m) { struct rtpcb *rop = sotortpcb(so); int error = 0; unsigned int tid, prio; if (level != AF_ROUTE) return (EINVAL); switch (op) { case PRCO_SETOPT: switch (optname) { case ROUTE_MSGFILTER: if (m == NULL || m->m_len != sizeof(unsigned int)) error = EINVAL; else rop->rop_msgfilter = *mtod(m, unsigned int *); break; case ROUTE_TABLEFILTER: if (m == NULL || m->m_len != sizeof(unsigned int)) { error = EINVAL; break; } tid = *mtod(m, unsigned int *); if (tid != RTABLE_ANY && !rtable_exists(tid)) error = ENOENT; else rop->rop_rtableid = tid; break; case ROUTE_PRIOFILTER: if (m == NULL || m->m_len != sizeof(unsigned int)) { error = EINVAL; break; } prio = *mtod(m, unsigned int *); if (prio > RTP_MAX) error = EINVAL; else rop->rop_priority = prio; break; case ROUTE_FLAGFILTER: if (m == NULL || m->m_len != sizeof(unsigned int)) error = EINVAL; else rop->rop_flagfilter = *mtod(m, unsigned int *); break; default: error = ENOPROTOOPT; break; } break; case PRCO_GETOPT: switch (optname) { case ROUTE_MSGFILTER: m->m_len = sizeof(unsigned int); *mtod(m, unsigned int *) = rop->rop_msgfilter; break; case ROUTE_TABLEFILTER: m->m_len = sizeof(unsigned int); *mtod(m, unsigned int *) = rop->rop_rtableid; break; case ROUTE_PRIOFILTER: m->m_len = sizeof(unsigned int); *mtod(m, unsigned int *) = rop->rop_priority; break; case ROUTE_FLAGFILTER: m->m_len = sizeof(unsigned int); *mtod(m, unsigned int *) = rop->rop_flagfilter; break; default: error = ENOPROTOOPT; break; } } return (error); } void rtm_senddesync_timer(void *xso) { struct socket *so = xso; solock(so); rtm_senddesync(so); sounlock(so); } void rtm_senddesync(struct socket *so) { struct rtpcb *rop = sotortpcb(so); struct mbuf *desync_mbuf; soassertlocked(so); /* * Dying socket is disconnected by upper layer and there is * no reason to send packet. Also we shouldn't reschedule * timeout(9), otherwise timeout_del_barrier(9) can't help us. */ if ((so->so_state & SS_ISCONNECTED) == 0 || (so->so_rcv.sb_state & SS_CANTRCVMORE)) return; /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) return; /* * If we fail to alloc memory or if sbappendaddr() * fails, re-add timeout and try again. */ desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); if (desync_mbuf != NULL) { if (sbappendaddr(so, &so->so_rcv, &route_src, desync_mbuf, NULL) != 0) { rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; sorwakeup(rop->rop_socket); return; } m_freem(desync_mbuf); } /* Re-add timeout to try sending msg again */ timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); } void route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) { struct socket *so; struct rtpcb *rop; struct rt_msghdr *rtm; struct mbuf *m = m0; struct srp_ref sr; /* ensure that we can access the rtm_type via mtod() */ if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { m_freem(m); return; } SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { /* * If route socket is bound to an address family only send * messages that match the address family. Address family * agnostic messages are always sent. */ if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && rop->rop_proto != sa_family) continue; so = rop->rop_socket; solock(so); /* * Check to see if we don't want our own messages and * if we can receive anything. */ if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || !(so->so_state & SS_ISCONNECTED) || (so->so_rcv.sb_state & SS_CANTRCVMORE)) goto next; /* filter messages that the process does not want */ rtm = mtod(m, struct rt_msghdr *); /* but RTM_DESYNC can't be filtered */ if (rtm->rtm_type != RTM_DESYNC) { if (rop->rop_msgfilter != 0 && !(rop->rop_msgfilter & (1U << rtm->rtm_type))) goto next; if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) goto next; } switch (rtm->rtm_type) { case RTM_IFANNOUNCE: case RTM_DESYNC: /* no tableid */ break; case RTM_RESOLVE: case RTM_NEWADDR: case RTM_DELADDR: case RTM_IFINFO: case RTM_80211INFO: case RTM_BFD: /* check against rdomain id */ if (rop->rop_rtableid != RTABLE_ANY && rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) goto next; break; default: if (rop->rop_priority != 0 && rop->rop_priority < rtm->rtm_priority) goto next; /* check against rtable id */ if (rop->rop_rtableid != RTABLE_ANY && rop->rop_rtableid != rtm->rtm_tableid) goto next; break; } /* * Check to see if the flush flag is set. If so, don't queue * any more messages until the flag is cleared. */ if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) goto next; rtm_sendup(so, m); next: sounlock(so); } SRPL_LEAVE(&sr); m_freem(m); } int rtm_sendup(struct socket *so, struct mbuf *m0) { struct rtpcb *rop = sotortpcb(so); struct mbuf *m; soassertlocked(so); m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); if (m == NULL) return (ENOMEM); if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { /* Flag socket as desync'ed and flush required */ rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; rtm_senddesync(so); m_freem(m); return (ENOBUFS); } sorwakeup(so); return (0); } struct rt_msghdr * rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) { struct rt_msghdr *rtm; struct rt_addrinfo info; struct sockaddr_rtlabel sa_rl; struct sockaddr_in6 sa_mask; #ifdef BFD struct sockaddr_bfd sa_bfd; #endif struct ifnet *ifp = NULL; int len; bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); #ifdef BFD if (rt->rt_flags & RTF_BFD) { KERNEL_LOCK(); info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); KERNEL_UNLOCK(); } #endif #ifdef MPLS if (rt->rt_flags & RTF_MPLS) { struct sockaddr_mpls sa_mpls; bzero(&sa_mpls, sizeof(sa_mpls)); sa_mpls.smpls_family = AF_MPLS; sa_mpls.smpls_len = sizeof(sa_mpls); sa_mpls.smpls_label = ((struct rt_mpls *) rt->rt_llinfo)->mpls_label; info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; info.rti_mpls = ((struct rt_mpls *) rt->rt_llinfo)->mpls_operation; } #endif ifp = if_get(rt->rt_ifidx); if (ifp != NULL) { info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); info.rti_info[RTAX_IFA] = rtable_getsource(tableid, info.rti_info[RTAX_DST]->sa_family); if (info.rti_info[RTAX_IFA] == NULL) info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; } if_put(ifp); /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ /* build new route message */ len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); rtm->rtm_type = type; rtm->rtm_index = rt->rt_ifidx; rtm->rtm_tableid = tableid; rtm->rtm_priority = rt->rt_priority & RTP_MASK; rtm->rtm_flags = rt->rt_flags; rtm->rtm_pid = curproc->p_p->ps_pid; rtm->rtm_seq = seq; rtm_getmetrics(rt, &rtm->rtm_rmx); rtm->rtm_addrs = info.rti_addrs; #ifdef MPLS rtm->rtm_mpls = info.rti_mpls; #endif return rtm; } int route_output(struct mbuf *m, struct socket *so) { struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct rt_addrinfo info; struct ifnet *ifp; int len, seq, useloopback, error = 0; u_int tableid; u_int8_t prio; u_char vers, type; if (m == NULL || ((m->m_len < sizeof(int32_t)) && (m = m_pullup(m, sizeof(int32_t))) == NULL)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); useloopback = so->so_options & SO_USELOOPBACK; /* * The socket can't be closed concurrently because the file * descriptor reference is still held. */ sounlock(so); len = m->m_pkthdr.len; if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + sizeof(rtm->rtm_hdrlen) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) { error = EINVAL; goto fail; } vers = mtod(m, struct rt_msghdr *)->rtm_version; switch (vers) { case RTM_VERSION: if (len < sizeof(struct rt_msghdr)) { error = EINVAL; goto fail; } if (len > RTM_MAXSIZE) { error = EMSGSIZE; goto fail; } rtm = malloc(len, M_RTABLE, M_WAITOK); m_copydata(m, 0, len, rtm); break; default: error = EPROTONOSUPPORT; goto fail; } /* Verify that the caller is sending an appropriate message early */ switch (rtm->rtm_type) { case RTM_ADD: case RTM_DELETE: case RTM_GET: case RTM_CHANGE: case RTM_PROPOSAL: case RTM_SOURCE: break; default: error = EOPNOTSUPP; goto fail; } /* * Verify that the header length is valid. * All messages from userland start with a struct rt_msghdr. */ if (rtm->rtm_hdrlen == 0) /* old client */ rtm->rtm_hdrlen = sizeof(struct rt_msghdr); if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || len < rtm->rtm_hdrlen) { error = EINVAL; goto fail; } rtm->rtm_pid = curproc->p_p->ps_pid; /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. */ if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { error = EACCES; goto fail; } tableid = rtm->rtm_tableid; if (!rtable_exists(tableid)) { if (rtm->rtm_type == RTM_ADD) { if ((error = rtable_add(tableid)) != 0) goto fail; } else { error = EINVAL; goto fail; } } /* Do not let userland play with kernel-only flags. */ if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { error = EINVAL; goto fail; } /* make sure that kernel-only bits are not set */ rtm->rtm_priority &= RTP_MASK; rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); rtm->rtm_fmask &= RTF_FMASK; if (rtm->rtm_priority != 0) { if (rtm->rtm_priority > RTP_MAX || rtm->rtm_priority == RTP_LOCAL) { error = EINVAL; goto fail; } prio = rtm->rtm_priority; } else if (rtm->rtm_type != RTM_ADD) prio = RTP_ANY; else if (rtm->rtm_flags & RTF_STATIC) prio = 0; else prio = RTP_DEFAULT; bzero(&info, sizeof(info)); info.rti_addrs = rtm->rtm_addrs; if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info)) != 0) goto fail; info.rti_flags = rtm->rtm_flags; if (rtm->rtm_type != RTM_SOURCE && rtm->rtm_type != RTM_PROPOSAL && (info.rti_info[RTAX_DST] == NULL || info.rti_info[RTAX_DST]->sa_family >= AF_MAX || (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || info.rti_info[RTAX_GENMASK] != NULL)) { error = EINVAL; goto fail; } #ifdef MPLS info.rti_mpls = rtm->rtm_mpls; #endif if (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && (info.rti_flags & RTF_CLONING) == 0) { info.rti_flags |= RTF_LLINFO; } /* * Validate RTM_PROPOSAL and pass it along or error out. */ if (rtm->rtm_type == RTM_PROPOSAL) { if (rtm_validate_proposal(&info) == -1) { error = EINVAL; goto fail; } /* * If this is a solicitation proposal forward request to * all interfaces. Most handlers will ignore it but at least * umb(4) will send a response to this event. */ if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { NET_LOCK(); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); } NET_UNLOCK(); } } else if (rtm->rtm_type == RTM_SOURCE) { if (info.rti_info[RTAX_IFA] == NULL) { error = EINVAL; goto fail; } NET_LOCK(); error = rt_setsource(tableid, info.rti_info[RTAX_IFA]); NET_UNLOCK(); if (error) goto fail; } else { error = rtm_output(rtm, &rt, &info, prio, tableid); if (!error) { type = rtm->rtm_type; seq = rtm->rtm_seq; free(rtm, M_RTABLE, len); NET_LOCK_SHARED(); rtm = rtm_report(rt, type, seq, tableid); NET_UNLOCK_SHARED(); len = rtm->rtm_msglen; } } rtfree(rt); if (error) { rtm->rtm_errno = error; } else { rtm->rtm_flags |= RTF_DONE; } /* * Check to see if we don't want our own messages. */ if (!useloopback) { if (rtptable.rtp_count == 0) { /* no other listener and no loopback of messages */ goto fail; } } if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { m_freem(m); m = NULL; } else if (m->m_pkthdr.len > len) m_adj(m, len - m->m_pkthdr.len); free(rtm, M_RTABLE, len); if (m) route_input(m, so, info.rti_info[RTAX_DST] ? info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); solock(so); return (error); fail: free(rtm, M_RTABLE, len); m_freem(m); solock(so); return (error); } int rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) { struct rtentry *rt = *prt; struct ifnet *ifp = NULL; int plen, newgate = 0, error = 0; switch (rtm->rtm_type) { case RTM_ADD: if (info->rti_info[RTAX_GATEWAY] == NULL) { error = EINVAL; break; } rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); if ((error = route_arp_conflict(rt, info))) { rtfree(rt); rt = NULL; break; } /* * We cannot go through a delete/create/insert cycle for * cached route because this can lead to races in the * receive path. Instead we update the L2 cache. */ if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) { ifp = if_get(rt->rt_ifidx); if (ifp == NULL) { rtfree(rt); rt = NULL; error = ESRCH; break; } goto change; } rtfree(rt); rt = NULL; NET_LOCK(); if ((error = rtm_getifa(info, tableid)) != 0) { NET_UNLOCK(); break; } error = rtrequest(RTM_ADD, info, prio, &rt, tableid); NET_UNLOCK(); if (error == 0) rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); break; case RTM_DELETE: rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio); if (rt == NULL) { error = ESRCH; break; } /* * If we got multipath routes, we require users to specify * a matching gateway. */ if (ISSET(rt->rt_flags, RTF_MPATH) && info->rti_info[RTAX_GATEWAY] == NULL) { error = ESRCH; break; } ifp = if_get(rt->rt_ifidx); if (ifp == NULL) { rtfree(rt); rt = NULL; error = ESRCH; break; } /* * Invalidate the cache of automagically created and * referenced L2 entries to make sure that ``rt_gwroute'' * pointer stays valid for other CPUs. */ if ((ISSET(rt->rt_flags, RTF_CACHED))) { NET_LOCK(); ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); /* Reset the MTU of the gateway route. */ rtable_walk(tableid, rt_key(rt)->sa_family, NULL, route_cleargateway, rt); NET_UNLOCK(); break; } /* * Make sure that local routes are only modified by the * kernel. */ if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { error = EINVAL; break; } rtfree(rt); rt = NULL; NET_LOCK(); error = rtrequest_delete(info, prio, ifp, &rt, tableid); NET_UNLOCK(); break; case RTM_CHANGE: rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio); /* * If we got multipath routes, we require users to specify * a matching gateway. */ if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && (info->rti_info[RTAX_GATEWAY] == NULL)) { rtfree(rt); rt = NULL; } /* * If RTAX_GATEWAY is the argument we're trying to * change, try to find a compatible route. */ if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], NULL, prio); /* Ensure we don't pick a multipath one. */ if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { rtfree(rt); rt = NULL; } } if (rt == NULL) { error = ESRCH; break; } /* * Make sure that local routes are only modified by the * kernel. */ if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { error = EINVAL; break; } ifp = if_get(rt->rt_ifidx); if (ifp == NULL) { rtfree(rt); rt = NULL; error = ESRCH; break; } /* * RTM_CHANGE needs a perfect match. */ plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, info->rti_info[RTAX_NETMASK]); if (rt_plen(rt) != plen) { error = ESRCH; break; } if (info->rti_info[RTAX_GATEWAY] != NULL) if (rt->rt_gateway == NULL || bcmp(rt->rt_gateway, info->rti_info[RTAX_GATEWAY], info->rti_info[RTAX_GATEWAY]->sa_len)) { newgate = 1; } /* * Check reachable gateway before changing the route. * New gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous. */ if (newgate || info->rti_info[RTAX_IFP] != NULL || info->rti_info[RTAX_IFA] != NULL) { struct ifaddr *ifa = NULL; NET_LOCK(); if ((error = rtm_getifa(info, tableid)) != 0) { NET_UNLOCK(); break; } ifa = info->rti_ifa; if (rt->rt_ifa != ifa) { ifp->if_rtrequest(ifp, RTM_DELETE, rt); ifafree(rt->rt_ifa); rt->rt_ifa = ifaref(ifa); rt->rt_ifidx = ifa->ifa_ifp->if_index; /* recheck link state after ifp change */ rt_if_linkstate_change(rt, ifa->ifa_ifp, tableid); } NET_UNLOCK(); } change: if (info->rti_info[RTAX_GATEWAY] != NULL) { /* When updating the gateway, make sure it is valid. */ if (!newgate && rt->rt_gateway->sa_family != info->rti_info[RTAX_GATEWAY]->sa_family) { error = EINVAL; break; } NET_LOCK(); error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY], tableid); NET_UNLOCK(); if (error) break; } #ifdef MPLS if (rtm->rtm_flags & RTF_MPLS) { NET_LOCK(); error = rt_mpls_set(rt, info->rti_info[RTAX_SRC], info->rti_mpls); NET_UNLOCK(); if (error) break; } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { NET_LOCK(); /* if gateway changed remove MPLS information */ rt_mpls_clear(rt); NET_UNLOCK(); } #endif #ifdef BFD if (ISSET(rtm->rtm_flags, RTF_BFD)) { KERNEL_LOCK(); error = bfdset(rt); KERNEL_UNLOCK(); if (error) break; } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && ISSET(rtm->rtm_fmask, RTF_BFD)) { KERNEL_LOCK(); bfdclear(rt); KERNEL_UNLOCK(); } #endif NET_LOCK(); /* Hack to allow some flags to be toggled */ if (rtm->rtm_fmask) { /* MPLS flag it is set by rt_mpls_set() */ rtm->rtm_fmask &= ~RTF_MPLS; rtm->rtm_flags &= ~RTF_MPLS; rt->rt_flags = (rt->rt_flags & ~rtm->rtm_fmask) | (rtm->rtm_flags & rtm->rtm_fmask); } rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); ifp->if_rtrequest(ifp, RTM_ADD, rt); if (info->rti_info[RTAX_LABEL] != NULL) { const char *rtlabel = ((const struct sockaddr_rtlabel *) info->rti_info[RTAX_LABEL])->sr_label; rtlabel_unref(rt->rt_labelid); rt->rt_labelid = rtlabel_name2id(rtlabel); } if_group_routechange(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK]); rt->rt_locks &= ~(rtm->rtm_inits); rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); NET_UNLOCK(); break; case RTM_GET: rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio); if (rt == NULL) error = ESRCH; break; } if_put(ifp); *prt = rt; return (error); } struct ifaddr * ifa_ifwithroute(int flags, const struct sockaddr *dst, const struct sockaddr *gateway, unsigned int rtableid) { struct ifaddr *ifa; if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, * and the interface is a pt to pt link * we should search for the destination * as our clue to the interface. Otherwise * we can use the local address. */ ifa = NULL; if (flags & RTF_HOST) ifa = ifa_ifwithdstaddr(dst, rtableid); if (ifa == NULL) ifa = ifa_ifwithaddr(gateway, rtableid); } else { /* * If we are adding a route to a remote net * or host, the gateway may still be on the * other end of a pt to pt link. */ ifa = ifa_ifwithdstaddr(gateway, rtableid); } if (ifa == NULL) { if (gateway->sa_family == AF_LINK) { const struct sockaddr_dl *sdl; struct ifnet *ifp; sdl = satosdl_const(gateway); ifp = if_get(sdl->sdl_index); if (ifp != NULL) ifa = ifaof_ifpforaddr(dst, ifp); if_put(ifp); } else { struct rtentry *rt; rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); if (rt != NULL) ifa = rt->rt_ifa; rtfree(rt); } } if (ifa == NULL) return (NULL); if (ifa->ifa_addr->sa_family != dst->sa_family) { struct ifaddr *oifa = ifa; ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); if (ifa == NULL) ifa = oifa; } return (ifa); } int rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) { struct ifnet *ifp = NULL; /* * The "returned" `ifa' is guaranteed to be alive only if * the NET_LOCK() is held. */ NET_ASSERT_LOCKED(); /* * ifp may be specified by sockaddr_dl when protocol address * is ambiguous */ if (info->rti_info[RTAX_IFP] != NULL) { const struct sockaddr_dl *sdl; sdl = satosdl_const(info->rti_info[RTAX_IFP]); ifp = if_get(sdl->sdl_index); } #ifdef IPSEC /* * If the destination is a PF_KEY address, we'll look * for the existence of a encap interface number or address * in the options list of the gateway. By default, we'll return * enc0. */ if (info->rti_info[RTAX_DST] && info->rti_info[RTAX_DST]->sa_family == PF_KEY) info->rti_ifa = enc_getifa(rtid, 0); #endif if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); if (info->rti_ifa == NULL) { const struct sockaddr *sa; if ((sa = info->rti_info[RTAX_IFA]) == NULL) if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) sa = info->rti_info[RTAX_DST]; if (sa != NULL && ifp != NULL) info->rti_ifa = ifaof_ifpforaddr(sa, ifp); else if (info->rti_info[RTAX_DST] != NULL && info->rti_info[RTAX_GATEWAY] != NULL) info->rti_ifa = ifa_ifwithroute(info->rti_flags, info->rti_info[RTAX_DST], info->rti_info[RTAX_GATEWAY], rtid); else if (sa != NULL) info->rti_ifa = ifa_ifwithroute(info->rti_flags, sa, sa, rtid); } if_put(ifp); if (info->rti_ifa == NULL) return (ENETUNREACH); return (0); } int route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) { struct rtentry *nhrt = arg; if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && !ISSET(rt->rt_locks, RTV_MTU)) rt->rt_mtu = 0; return (0); } /* * Check if the user request to insert an ARP entry does not conflict * with existing ones. * * Only two entries are allowed for a given IP address: a private one * (priv) and a public one (pub). */ int route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) { int proxy = (info->rti_flags & RTF_ANNOUNCE); if ((info->rti_flags & RTF_LLINFO) == 0 || (info->rti_info[RTAX_DST]->sa_family != AF_INET)) return (0); if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) return (0); /* If the entry is cached, it can be updated. */ if (ISSET(rt->rt_flags, RTF_CACHED)) return (0); /* * Same destination, not cached and both "priv" or "pub" conflict. * If a second entry exists, it always conflict. */ if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || ISSET(rt->rt_flags, RTF_MPATH)) return (EEXIST); /* No conflict but an entry exist so we need to force mpath. */ info->rti_flags |= RTF_MPATH; return (0); } void rtm_setmetrics(u_long which, const struct rt_metrics *in, struct rt_kmetrics *out) { int64_t expire; if (which & RTV_MTU) out->rmx_mtu = in->rmx_mtu; if (which & RTV_EXPIRE) { expire = in->rmx_expire; if (expire != 0) { expire -= gettime(); expire += getuptime(); } out->rmx_expire = expire; } } void rtm_getmetrics(const struct rtentry *rt, struct rt_metrics *out) { const struct rt_kmetrics *in = &rt->rt_rmx; int64_t expire; expire = in->rmx_expire; if (expire == 0) expire = rt_timer_get_expire(rt); if (expire != 0) { expire -= getuptime(); expire += gettime(); } bzero(out, sizeof(*out)); out->rmx_locks = in->rmx_locks; out->rmx_mtu = in->rmx_mtu; out->rmx_expire = expire; out->rmx_pksent = in->rmx_pksent; } #define ROUNDUP(a) \ ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) int rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) { int i; /* * Parse address bits, split address storage in chunks, and * set info pointers. Use sa_len for traversing the memory * and check that we stay within in the limit. */ bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { struct sockaddr *sa; if ((rtinfo->rti_addrs & (1U << i)) == 0) continue; if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) return (EINVAL); sa = (struct sockaddr *)cp; if (cp + sa->sa_len > cplim) return (EINVAL); rtinfo->rti_info[i] = sa; ADVANCE(cp, sa); } /* * Check that the address family is suitable for the route address * type. Check that each address has a size that fits its family * and its length is within the size. Strings within addresses must * be NUL terminated. */ for (i = 0; i < RTAX_MAX; i++) { const struct sockaddr *sa; size_t len, maxlen, size; sa = rtinfo->rti_info[i]; if (sa == NULL) continue; maxlen = size = 0; switch (i) { case RTAX_DST: case RTAX_GATEWAY: case RTAX_SRC: switch (sa->sa_family) { case AF_INET: size = sizeof(struct sockaddr_in); break; case AF_LINK: size = sizeof(struct sockaddr_dl); break; #ifdef INET6 case AF_INET6: size = sizeof(struct sockaddr_in6); break; #endif #ifdef MPLS case AF_MPLS: size = sizeof(struct sockaddr_mpls); break; #endif } break; case RTAX_IFP: if (sa->sa_family != AF_LINK) return (EAFNOSUPPORT); /* * XXX Should be sizeof(struct sockaddr_dl), but * route(8) has a bug and provides less memory. * arp(8) has another bug and uses sizeof pointer. */ size = 4; break; case RTAX_IFA: switch (sa->sa_family) { case AF_INET: size = sizeof(struct sockaddr_in); break; #ifdef INET6 case AF_INET6: size = sizeof(struct sockaddr_in6); break; #endif default: return (EAFNOSUPPORT); } break; case RTAX_LABEL: if (sa->sa_family != AF_UNSPEC) return (EAFNOSUPPORT); maxlen = RTLABEL_LEN; size = sizeof(struct sockaddr_rtlabel); break; #ifdef BFD case RTAX_BFD: if (sa->sa_family != AF_UNSPEC) return (EAFNOSUPPORT); size = sizeof(struct sockaddr_bfd); break; #endif case RTAX_DNS: /* more validation in rtm_validate_proposal */ if (sa->sa_len > sizeof(struct sockaddr_rtdns)) return (EINVAL); if (sa->sa_len < offsetof(struct sockaddr_rtdns, sr_dns)) return (EINVAL); switch (sa->sa_family) { case AF_INET: #ifdef INET6 case AF_INET6: #endif break; default: return (EAFNOSUPPORT); } break; case RTAX_STATIC: switch (sa->sa_family) { case AF_INET: #ifdef INET6 case AF_INET6: #endif break; default: return (EAFNOSUPPORT); } maxlen = RTSTATIC_LEN; size = sizeof(struct sockaddr_rtstatic); break; case RTAX_SEARCH: if (sa->sa_family != AF_UNSPEC) return (EAFNOSUPPORT); maxlen = RTSEARCH_LEN; size = sizeof(struct sockaddr_rtsearch); break; } if (size) { /* memory for the full struct must be provided */ if (sa->sa_len < size) return (EINVAL); } if (maxlen) { /* this should not happen */ if (2 + maxlen > size) return (EINVAL); /* strings must be NUL terminated within the struct */ len = strnlen(sa->sa_data, maxlen); if (len >= maxlen || 2 + len >= sa->sa_len) return (EINVAL); break; } } return (0); } struct mbuf * rtm_msg1(int type, struct rt_addrinfo *rtinfo) { struct rt_msghdr *rtm; struct mbuf *m; int i; const struct sockaddr *sa; int len, dlen, hlen; switch (type) { case RTM_DELADDR: case RTM_NEWADDR: hlen = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: hlen = sizeof(struct if_msghdr); break; case RTM_IFANNOUNCE: hlen = sizeof(struct if_announcemsghdr); break; #ifdef BFD case RTM_BFD: hlen = sizeof(struct bfd_msghdr); break; #endif case RTM_80211INFO: hlen = sizeof(struct if_ieee80211_msghdr); break; default: hlen = sizeof(struct rt_msghdr); break; } len = hlen; for (i = 0; i < RTAX_MAX; i++) { if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) continue; len += ROUNDUP(sa->sa_len); } if (len > MCLBYTES) panic("rtm_msg1"); m = m_gethdr(M_DONTWAIT, MT_DATA); if (m && len > MHLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); m = NULL; } } if (m == NULL) return (m); m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.ph_ifidx = 0; rtm = mtod(m, struct rt_msghdr *); bzero(rtm, len); len = hlen; for (i = 0; i < RTAX_MAX; i++) { if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1U << i); dlen = ROUNDUP(sa->sa_len); if (m_copyback(m, len, sa->sa_len, sa, M_NOWAIT)) { m_freem(m); return (NULL); } len += dlen; } rtm->rtm_msglen = len; rtm->rtm_hdrlen = hlen; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } int rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) { int i; int len, dlen, hlen, second_time = 0; caddr_t cp0; rtinfo->rti_addrs = 0; again: switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; default: len = sizeof(struct rt_msghdr); break; } hlen = len; if ((cp0 = cp) != NULL) cp += len; for (i = 0; i < RTAX_MAX; i++) { const struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1U << i); dlen = ROUNDUP(sa->sa_len); if (cp) { bcopy(sa, cp, sa->sa_len); bzero(cp + sa->sa_len, dlen - sa->sa_len); cp += dlen; } len += dlen; } /* align message length to the next natural boundary */ len = ALIGN(len); if (cp == 0 && w != NULL && !second_time) { w->w_needed += len; if (w->w_needed <= w->w_given && w->w_where) { if (w->w_tmemsize < len) { free(w->w_tmem, M_RTABLE, w->w_tmemsize); w->w_tmem = malloc(len, M_RTABLE, M_NOWAIT | M_ZERO); if (w->w_tmem) w->w_tmemsize = len; } if (w->w_tmem) { cp = w->w_tmem; second_time = 1; goto again; } else w->w_where = 0; } } if (cp && w) /* clear the message header */ bzero(cp0, hlen); if (cp) { struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = len; rtm->rtm_hdrlen = hlen; } return (len); } void rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) { struct rt_addrinfo info; struct ifnet *ifp; struct sockaddr_rtlabel sa_rl; struct sockaddr_in6 sa_mask; memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; if (!ISSET(rt->rt_flags, RTF_HOST)) info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); ifp = if_get(rt->rt_ifidx); if (ifp != NULL) { info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); info.rti_info[RTAX_IFA] = rtable_getsource(rtableid, info.rti_info[RTAX_DST]->sa_family); if (info.rti_info[RTAX_IFA] == NULL) info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; } rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, rtableid); if_put(ifp); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occurred, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, u_int ifidx, int error, u_int tableid) { struct rt_msghdr *rtm; struct mbuf *m; const struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; if (rtptable.rtp_count == 0) return; m = rtm_msg1(type, rtinfo); if (m == NULL) return; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_priority = prio; rtm->rtm_errno = error; rtm->rtm_tableid = tableid; rtm->rtm_addrs = rtinfo->rti_addrs; rtm->rtm_index = ifidx; route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ void rtm_ifchg(struct ifnet *ifp) { struct rt_addrinfo info; struct if_msghdr *ifm; struct mbuf *m; if (rtptable.rtp_count == 0) return; memset(&info, 0, sizeof(info)); info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); m = rtm_msg1(RTM_IFINFO, &info); if (m == NULL) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_tableid = ifp->if_rdomain; ifm->ifm_flags = ifp->if_flags; ifm->ifm_xflags = ifp->if_xflags; if_getdata(ifp, &ifm->ifm_data); ifm->ifm_addrs = info.rti_addrs; route_input(m, NULL, AF_UNSPEC); } /* * This is called to generate messages from the routing socket * indicating a network interface has had addresses associated with it. * if we ever reverse the logic and replace messages TO the routing * socket indicate a request to configure interfaces, then it will * be unnecessary as the routing socket will automatically generate * copies of it. */ void rtm_addr(int cmd, struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct mbuf *m; struct rt_addrinfo info; struct ifa_msghdr *ifam; if (rtptable.rtp_count == 0) return; memset(&info, 0, sizeof(info)); info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; if ((m = rtm_msg1(cmd, &info)) == NULL) return; ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = info.rti_addrs; ifam->ifam_tableid = ifp->if_rdomain; route_input(m, NULL, ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); } /* * This is called to generate routing socket messages indicating * network interface arrival and departure. */ void rtm_ifannounce(struct ifnet *ifp, int what) { struct if_announcemsghdr *ifan; struct mbuf *m; if (rtptable.rtp_count == 0) return; m = rtm_msg1(RTM_IFANNOUNCE, NULL); if (m == NULL) return; ifan = mtod(m, struct if_announcemsghdr *); ifan->ifan_index = ifp->if_index; strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); ifan->ifan_what = what; route_input(m, NULL, AF_UNSPEC); } #ifdef BFD /* * This is used to generate routing socket messages indicating * the state of a BFD session. */ void rtm_bfd(struct bfd_config *bfd) { struct bfd_msghdr *bfdm; struct sockaddr_bfd sa_bfd; struct mbuf *m; struct rt_addrinfo info; if (rtptable.rtp_count == 0) return; memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; m = rtm_msg1(RTM_BFD, &info); if (m == NULL) return; bfdm = mtod(m, struct bfd_msghdr *); bfdm->bm_addrs = info.rti_addrs; KERNEL_ASSERT_LOCKED(); bfd2sa(bfd->bc_rt, &sa_bfd); memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); } #endif /* BFD */ /* * This is used to generate routing socket messages indicating * the state of an ieee80211 interface. */ void rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) { struct if_ieee80211_msghdr *ifim; struct mbuf *m; if (rtptable.rtp_count == 0) return; m = rtm_msg1(RTM_80211INFO, NULL); if (m == NULL) return; ifim = mtod(m, struct if_ieee80211_msghdr *); ifim->ifim_index = ifp->if_index; ifim->ifim_tableid = ifp->if_rdomain; memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); route_input(m, NULL, AF_UNSPEC); } /* * This is used to generate routing socket messages indicating * the address selection proposal from an interface. */ void rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, uint8_t prio) { struct rt_msghdr *rtm; struct mbuf *m; m = rtm_msg1(RTM_PROPOSAL, rtinfo); if (m == NULL) return; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_priority = prio; rtm->rtm_tableid = ifp->if_rdomain; rtm->rtm_index = ifp->if_index; rtm->rtm_addrs = rtinfo->rti_addrs; route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); } /* * This is used in dumping the kernel table via sysctl(). */ int sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) { struct walkarg *w = v; int error = 0, size; struct rt_addrinfo info; struct ifnet *ifp; #ifdef BFD struct sockaddr_bfd sa_bfd; #endif struct sockaddr_rtlabel sa_rl; struct sockaddr_in6 sa_mask; if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; if (w->w_op == NET_RT_DUMP && w->w_arg) { u_int8_t prio = w->w_arg & RTP_MASK; if (w->w_arg < 0) { prio = (-w->w_arg) & RTP_MASK; /* Show all routes that are not this priority */ if (prio == (rt->rt_priority & RTP_MASK)) return 0; } else { if (prio != (rt->rt_priority & RTP_MASK) && prio != RTP_ANY) return 0; } } bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); ifp = if_get(rt->rt_ifidx); if (ifp != NULL) { info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); info.rti_info[RTAX_IFA] = rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); if (info.rti_info[RTAX_IFA] == NULL) info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; } if_put(ifp); info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); #ifdef BFD if (rt->rt_flags & RTF_BFD) { KERNEL_ASSERT_LOCKED(); info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); } #endif #ifdef MPLS if (rt->rt_flags & RTF_MPLS) { struct sockaddr_mpls sa_mpls; bzero(&sa_mpls, sizeof(sa_mpls)); sa_mpls.smpls_family = AF_MPLS; sa_mpls.smpls_len = sizeof(sa_mpls); sa_mpls.smpls_label = ((struct rt_mpls *) rt->rt_llinfo)->mpls_label; info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; info.rti_mpls = ((struct rt_mpls *) rt->rt_llinfo)->mpls_operation; } #endif size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; rtm->rtm_pid = curproc->p_p->ps_pid; rtm->rtm_flags = RTF_DONE | rt->rt_flags; rtm->rtm_priority = rt->rt_priority & RTP_MASK; rtm_getmetrics(rt, &rtm->rtm_rmx); /* Do not account the routing table's reference. */ rtm->rtm_rmx.rmx_refcnt = refcnt_read(&rt->rt_refcnt) - 1; rtm->rtm_index = rt->rt_ifidx; rtm->rtm_addrs = info.rti_addrs; rtm->rtm_tableid = id; #ifdef MPLS rtm->rtm_mpls = info.rti_mpls; #endif if ((error = copyout(rtm, w->w_where, size)) != 0) w->w_where = NULL; else w->w_where += size; } return (error); } int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifaddr *ifa; struct rt_addrinfo info; int len, error = 0; bzero(&info, sizeof(info)); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (w->w_arg && w->w_arg != ifp->if_index) continue; /* Copy the link-layer address first */ info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { struct if_msghdr *ifm; ifm = (struct if_msghdr *)w->w_tmem; ifm->ifm_index = ifp->if_index; ifm->ifm_tableid = ifp->if_rdomain; ifm->ifm_flags = ifp->if_flags; if_getdata(ifp, &ifm->ifm_data); ifm->ifm_addrs = info.rti_addrs; error = copyout(ifm, w->w_where, len); if (error) return (error); w->w_where += len; } info.rti_info[RTAX_IFP] = NULL; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { KASSERT(ifa->ifa_addr->sa_family != AF_LINK); if (af && af != ifa->ifa_addr->sa_family) continue; info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_addrs = info.rti_addrs; error = copyout(w->w_tmem, w->w_where, len); if (error) return (error); w->w_where += len; } } info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = info.rti_info[RTAX_BRD] = NULL; } return (0); } int sysctl_ifnames(struct walkarg *w) { struct if_nameindex_msg ifn; struct ifnet *ifp; int error = 0; /* XXX ignore tableid for now */ TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (w->w_arg && w->w_arg != ifp->if_index) continue; w->w_needed += sizeof(ifn); if (w->w_where && w->w_needed <= w->w_given) { memset(&ifn, 0, sizeof(ifn)); ifn.if_index = ifp->if_index; strlcpy(ifn.if_name, ifp->if_xname, sizeof(ifn.if_name)); error = copyout(&ifn, w->w_where, sizeof(ifn)); if (error) return (error); w->w_where += sizeof(ifn); } } return (0); } int sysctl_source(int af, u_int tableid, struct walkarg *w) { struct sockaddr *sa; int size, error = 0; sa = rtable_getsource(tableid, af); if (sa) { switch (sa->sa_family) { case AF_INET: size = sizeof(struct sockaddr_in); break; #ifdef INET6 case AF_INET6: size = sizeof(struct sockaddr_in6); break; #endif default: return (0); } w->w_needed += size; if (w->w_where && w->w_needed <= w->w_given) { if ((error = copyout(sa, w->w_where, size))) return (error); w->w_where += size; } } return (0); } int sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, size_t newlen) { int i, error = EINVAL; u_char af; struct walkarg w; struct rt_tableinfo tableinfo; u_int tableid = 0; if (new) return (EPERM); if (namelen < 3 || namelen > 4) return (EINVAL); af = name[0]; bzero(&w, sizeof(w)); w.w_where = where; w.w_given = *given; w.w_op = name[1]; w.w_arg = name[2]; if (namelen == 4) { tableid = name[3]; if (!rtable_exists(tableid)) return (ENOENT); } else tableid = curproc->p_p->ps_rtableid; switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: NET_LOCK_SHARED(); for (i = 1; i <= AF_MAX; i++) { if (af != 0 && af != i) continue; error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, &w); if (error == EAFNOSUPPORT) error = 0; if (error) break; } NET_UNLOCK_SHARED(); break; case NET_RT_IFLIST: NET_LOCK_SHARED(); error = sysctl_iflist(af, &w); NET_UNLOCK_SHARED(); break; case NET_RT_STATS: return (sysctl_rtable_rtstat(where, given, new)); case NET_RT_TABLE: tableid = w.w_arg; if (!rtable_exists(tableid)) return (ENOENT); memset(&tableinfo, 0, sizeof tableinfo); tableinfo.rti_tableid = tableid; tableinfo.rti_domainid = rtable_l2(tableid); error = sysctl_rdstruct(where, given, new, &tableinfo, sizeof(tableinfo)); return (error); case NET_RT_IFNAMES: NET_LOCK_SHARED(); error = sysctl_ifnames(&w); NET_UNLOCK_SHARED(); break; case NET_RT_SOURCE: tableid = w.w_arg; if (!rtable_exists(tableid)) return (ENOENT); NET_LOCK_SHARED(); for (i = 1; i <= AF_MAX; i++) { if (af != 0 && af != i) continue; error = sysctl_source(i, tableid, &w); if (error == EAFNOSUPPORT) error = 0; if (error) break; } NET_UNLOCK_SHARED(); break; } free(w.w_tmem, M_RTABLE, w.w_tmemsize); if (where) { *given = w.w_where - (caddr_t)where; if (w.w_needed > w.w_given) return (ENOMEM); } else if (w.w_needed == 0) { *given = 0; } else { *given = roundup(w.w_needed + MAX(w.w_needed / 10, 1024), PAGE_SIZE); } return (error); } int sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) { extern struct cpumem *rtcounters; uint64_t counters[rts_ncounters]; struct rtstat rtstat; uint32_t *words = (uint32_t *)&rtstat; int i; CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); memset(&rtstat, 0, sizeof rtstat); counters_read(rtcounters, counters, nitems(counters), NULL); for (i = 0; i < nitems(counters); i++) words[i] = (uint32_t)counters[i]; return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); } int rtm_validate_proposal(struct rt_addrinfo *info) { if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | RTA_SEARCH)) { return -1; } if (ISSET(info->rti_addrs, RTA_NETMASK)) { const struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; if (sa == NULL) return -1; switch (sa->sa_family) { case AF_INET: if (sa->sa_len != sizeof(struct sockaddr_in)) return -1; break; case AF_INET6: if (sa->sa_len != sizeof(struct sockaddr_in6)) return -1; break; default: return -1; } } if (ISSET(info->rti_addrs, RTA_IFA)) { const struct sockaddr *sa = info->rti_info[RTAX_IFA]; if (sa == NULL) return -1; switch (sa->sa_family) { case AF_INET: if (sa->sa_len != sizeof(struct sockaddr_in)) return -1; break; case AF_INET6: if (sa->sa_len != sizeof(struct sockaddr_in6)) return -1; break; default: return -1; } } if (ISSET(info->rti_addrs, RTA_DNS)) { const struct sockaddr_rtdns *rtdns = (const struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; if (rtdns == NULL) return -1; if (rtdns->sr_len > sizeof(*rtdns)) return -1; if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) return -1; switch (rtdns->sr_family) { case AF_INET: if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, sr_dns)) % sizeof(struct in_addr) != 0) return -1; break; #ifdef INET6 case AF_INET6: if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, sr_dns)) % sizeof(struct in6_addr) != 0) return -1; break; #endif default: return -1; } } if (ISSET(info->rti_addrs, RTA_STATIC)) { const struct sockaddr_rtstatic *rtstatic = (const struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; if (rtstatic == NULL) return -1; if (rtstatic->sr_len > sizeof(*rtstatic)) return -1; if (rtstatic->sr_len <= offsetof(struct sockaddr_rtstatic, sr_static)) return -1; } if (ISSET(info->rti_addrs, RTA_SEARCH)) { const struct sockaddr_rtsearch *rtsearch = (const struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; if (rtsearch == NULL) return -1; if (rtsearch->sr_len > sizeof(*rtsearch)) return -1; if (rtsearch->sr_len <= offsetof(struct sockaddr_rtsearch, sr_search)) return -1; } return 0; } int rt_setsource(unsigned int rtableid, const struct sockaddr *src) { struct ifaddr *ifa; /* * If source address is 0.0.0.0 or :: * use automatic source selection */ switch(src->sa_family) { case AF_INET: if(satosin_const(src)->sin_addr.s_addr == INADDR_ANY) { rtable_setsource(rtableid, AF_INET, NULL); return (0); } break; #ifdef INET6 case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&satosin6_const(src)->sin6_addr)) { rtable_setsource(rtableid, AF_INET6, NULL); return (0); } break; #endif default: return (EAFNOSUPPORT); } /* * Check if source address is assigned to an interface in the * same rdomain */ if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) return (EINVAL); return rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr); } /* * Definitions of protocols supported in the ROUTE domain. */ const struct pr_usrreqs route_usrreqs = { .pru_attach = route_attach, .pru_detach = route_detach, .pru_disconnect = route_disconnect, .pru_shutdown = route_shutdown, .pru_rcvd = route_rcvd, .pru_send = route_send, .pru_sockaddr = route_sockaddr, .pru_peeraddr = route_peeraddr, }; const struct protosw routesw[] = { { .pr_type = SOCK_RAW, .pr_domain = &routedomain, .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, .pr_ctloutput = route_ctloutput, .pr_usrreqs = &route_usrreqs, .pr_init = route_prinit, .pr_sysctl = sysctl_rtable } }; const struct domain routedomain = { .dom_family = PF_ROUTE, .dom_name = "route", .dom_init = route_init, .dom_protosw = routesw, .dom_protoswNPROTOSW = &routesw[nitems(routesw)] };
141 137 3 1 3 132 47 13 19 134 1 47 1 135 641 2 2 42 606 634 2 633 636 638 636 7 637 8 48 620 10 633 4 6 634 10 626 6 16 13 16 15 15 16 16 133 134 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 /* $OpenBSD: ffs_vnops.c,v 1.102 2024/02/03 18:51:58 beck Exp $ */ /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vnops.c 8.10 (Berkeley) 8/10/94 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/resourcevar.h> #include <sys/kernel.h> #include <sys/stat.h> #include <sys/buf.h> #include <sys/mount.h> #include <sys/vnode.h> #include <sys/malloc.h> #include <sys/signalvar.h> #include <sys/pool.h> #include <sys/event.h> #include <sys/specdev.h> #include <miscfs/fifofs/fifo.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/dir.h> #include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ffs/fs.h> #include <ufs/ffs/ffs_extern.h> const struct vops ffs_vops = { .vop_lookup = ufs_lookup, .vop_create = ufs_create, .vop_mknod = ufs_mknod, .vop_open = ufs_open, .vop_close = ufs_close, .vop_access = ufs_access, .vop_getattr = ufs_getattr, .vop_setattr = ufs_setattr, .vop_read = ffs_read, .vop_write = ffs_write, .vop_ioctl = ufs_ioctl, .vop_kqfilter = ufs_kqfilter, .vop_revoke = vop_generic_revoke, .vop_fsync = ffs_fsync, .vop_remove = ufs_remove, .vop_link = ufs_link, .vop_rename = ufs_rename, .vop_mkdir = ufs_mkdir, .vop_rmdir = ufs_rmdir, .vop_symlink = ufs_symlink, .vop_readdir = ufs_readdir, .vop_readlink = ufs_readlink, .vop_abortop = vop_generic_abortop, .vop_inactive = ufs_inactive, .vop_reclaim = ffs_reclaim, .vop_lock = ufs_lock, .vop_unlock = ufs_unlock, .vop_bmap = ufs_bmap, .vop_strategy = ufs_strategy, .vop_print = ufs_print, .vop_islocked = ufs_islocked, .vop_pathconf = ufs_pathconf, .vop_advlock = ufs_advlock, .vop_bwrite = vop_generic_bwrite }; const struct vops ffs_specvops = { .vop_close = ufsspec_close, .vop_access = ufs_access, .vop_getattr = ufs_getattr, .vop_setattr = ufs_setattr, .vop_read = ufsspec_read, .vop_write = ufsspec_write, .vop_fsync = ffs_fsync, .vop_inactive = ufs_inactive, .vop_reclaim = ffs_reclaim, .vop_lock = ufs_lock, .vop_unlock = ufs_unlock, .vop_print = ufs_print, .vop_islocked = ufs_islocked, /* XXX: Keep in sync with spec_vops */ .vop_lookup = vop_generic_lookup, .vop_create = vop_generic_badop, .vop_mknod = vop_generic_badop, .vop_open = spec_open, .vop_ioctl = spec_ioctl, .vop_kqfilter = spec_kqfilter, .vop_revoke = vop_generic_revoke, .vop_remove = vop_generic_badop, .vop_link = vop_generic_badop, .vop_rename = vop_generic_badop, .vop_mkdir = vop_generic_badop, .vop_rmdir = vop_generic_badop, .vop_symlink = vop_generic_badop, .vop_readdir = vop_generic_badop, .vop_readlink = vop_generic_badop, .vop_abortop = vop_generic_badop, .vop_bmap = vop_generic_bmap, .vop_strategy = spec_strategy, .vop_pathconf = spec_pathconf, .vop_advlock = spec_advlock, .vop_bwrite = vop_generic_bwrite, }; #ifdef FIFO const struct vops ffs_fifovops = { .vop_close = ufsfifo_close, .vop_access = ufs_access, .vop_getattr = ufs_getattr, .vop_setattr = ufs_setattr, .vop_read = ufsfifo_read, .vop_write = ufsfifo_write, .vop_fsync = ffs_fsync, .vop_inactive = ufs_inactive, .vop_reclaim = ffsfifo_reclaim, .vop_lock = ufs_lock, .vop_unlock = ufs_unlock, .vop_print = ufs_print, .vop_islocked = ufs_islocked, .vop_bwrite = vop_generic_bwrite, /* XXX: Keep in sync with fifo_vops */ .vop_lookup = vop_generic_lookup, .vop_create = vop_generic_badop, .vop_mknod = vop_generic_badop, .vop_open = fifo_open, .vop_ioctl = fifo_ioctl, .vop_kqfilter = fifo_kqfilter, .vop_revoke = vop_generic_revoke, .vop_remove = vop_generic_badop, .vop_link = vop_generic_badop, .vop_rename = vop_generic_badop, .vop_mkdir = vop_generic_badop, .vop_rmdir = vop_generic_badop, .vop_symlink = vop_generic_badop, .vop_readdir = vop_generic_badop, .vop_readlink = vop_generic_badop, .vop_abortop = vop_generic_badop, .vop_bmap = vop_generic_bmap, .vop_strategy = vop_generic_badop, .vop_pathconf = fifo_pathconf, .vop_advlock = fifo_advlock }; #endif /* FIFO */ /* * Vnode op for reading. */ int ffs_read(void *v) { struct vop_read_args *ap = v; struct vnode *vp; struct inode *ip; struct uio *uio; struct fs *fs; struct buf *bp; daddr_t lbn, nextlbn; off_t bytesinfile; int size, xfersize, blkoffset; mode_t mode; int error; vp = ap->a_vp; ip = VTOI(vp); mode = DIP(ip, mode); uio = ap->a_uio; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("ffs_read: mode"); if (vp->v_type == VLNK) { if (DIP(ip, size) < ip->i_ump->um_maxsymlinklen) panic("ffs_read: short symlink"); } else if (vp->v_type != VREG && vp->v_type != VDIR) panic("ffs_read: type %d", vp->v_type); #endif fs = ip->i_fs; if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = DIP(ip, size) - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; size = fs->fs_bsize; /* WAS blksize(fs, ip, lbn); */ blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= DIP(ip, size)) error = bread(vp, lbn, size, &bp); else if (lbn - 1 == ip->i_ci.ci_lastr || uio->uio_resid > xfersize) { error = bread_cluster(vp, lbn, size, &bp); } else error = bread(vp, lbn, size, &bp); if (error) break; ip->i_ci.ci_lastr = lbn; /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } error = uiomove(bp->b_data + blkoffset, xfersize, uio); if (error) break; brelse(bp); } if (bp != NULL) brelse(bp); if (!(vp->v_mount->mnt_flag & MNT_NOATIME) || (ip->i_flag & (IN_CHANGE | IN_UPDATE))) { ip->i_flag |= IN_ACCESS; } return (error); } /* * Vnode op for writing. */ int ffs_write(void *v) { struct vop_write_args *ap = v; struct vnode *vp; struct uio *uio; struct inode *ip; struct fs *fs; struct buf *bp; daddr_t lbn; off_t osize; int blkoffset, error, extended, flags, ioflag, size, xfersize; size_t resid; ssize_t overrun; extended = 0; ioflag = ap->a_ioflag; uio = ap->a_uio; vp = ap->a_vp; ip = VTOI(vp); #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("ffs_write: mode"); #endif /* * If writing 0 bytes, succeed and do not change * update time or file offset (standards compliance) */ if (uio->uio_resid == 0) return (0); switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = DIP(ip, size); if ((DIP(ip, flags) & APPEND) && uio->uio_offset != DIP(ip, size)) return (EPERM); /* FALLTHROUGH */ case VLNK: break; case VDIR: if ((ioflag & IO_SYNC) == 0) panic("ffs_write: nonsync dir write"); break; default: panic("ffs_write: type %d", vp->v_type); } fs = ip->i_fs; if (uio->uio_offset < 0 || (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) return (EFBIG); /* do the filesize rlimit check */ if ((error = vn_fsizechk(vp, uio, ioflag, &overrun))) return (error); resid = uio->uio_resid; osize = DIP(ip, size); flags = ioflag & IO_SYNC ? B_SYNC : 0; for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (fs->fs_bsize > xfersize) flags |= B_CLRBUF; else flags &= ~B_CLRBUF; if ((error = UFS_BUF_ALLOC(ip, uio->uio_offset, xfersize, ap->a_cred, flags, &bp)) != 0) break; if (uio->uio_offset + xfersize > DIP(ip, size)) { DIP_ASSIGN(ip, size, uio->uio_offset + xfersize); uvm_vnp_setsize(vp, DIP(ip, size)); extended = 1; } (void)uvm_vnp_uncache(vp); size = blksize(fs, ip, lbn) - bp->b_resid; if (size < xfersize) xfersize = size; error = uiomove(bp->b_data + blkoffset, xfersize, uio); /* * If the buffer is not already filled and we encounter an * error while trying to fill it, we have to clear out any * garbage data from the pages instantiated for the buffer. * If we do not, a failed uiomove() during a write can leave * the prior contents of the pages exposed to a userland mmap. * * Note that we don't need to clear buffers that were * allocated with the B_CLRBUF flag set. */ if (error != 0 && !(flags & B_CLRBUF)) memset(bp->b_data + blkoffset, 0, xfersize); if (ioflag & IO_NOCACHE) bp->b_flags |= B_NOCACHE; if (ioflag & IO_SYNC) (void)bwrite(bp); else if (xfersize + blkoffset == fs->fs_bsize) { bawrite(bp); } else bdwrite(bp); if (error || xfersize == 0) break; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0 && !vnoperm(vp)) DIP_ASSIGN(ip, mode, DIP(ip, mode) & ~(ISUID | ISGID)); if (resid > uio->uio_resid) VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); if (error) { if (ioflag & IO_UNIT) { (void)UFS_TRUNCATE(ip, osize, ioflag & IO_SYNC, ap->a_cred); uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { error = UFS_UPDATE(ip, 1); } /* correct the result for writes clamped by vn_fsizechk() */ uio->uio_resid += overrun; return (error); } /* * Synch an open file. */ int ffs_fsync(void *v) { struct vop_fsync_args *ap = v; struct vnode *vp = ap->a_vp; struct buf *bp, *nbp; int s, error, passes, skipmeta; /* * Flush all dirty buffers associated with a vnode. */ passes = NIADDR + 1; skipmeta = 0; if (ap->a_waitfor == MNT_WAIT) skipmeta = 1; s = splbio(); loop: LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { bp->b_flags &= ~B_SCANNED; } LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp) { /* * Reasons to skip this buffer: it has already been considered * on this pass, this pass is the first time through on a * synchronous flush request and the buffer being considered * is metadata, the buffer has dependencies that will cause * it to be redirtied and it has not already been deferred, * or it is already being written. */ if (bp->b_flags & (B_BUSY | B_SCANNED)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); if (skipmeta && bp->b_lblkno < 0) continue; bremfree(bp); buf_acquire(bp); bp->b_flags |= B_SCANNED; splx(s); /* * On our final pass through, do all I/O synchronously * so that we can find out if our flush is failing * because of write errors. */ if (passes > 0 || ap->a_waitfor != MNT_WAIT) (void) bawrite(bp); else if ((error = bwrite(bp)) != 0) return (error); s = splbio(); /* * Since we may have slept during the I/O, we need * to start from a known point. */ nbp = LIST_FIRST(&vp->v_dirtyblkhd); } if (skipmeta) { skipmeta = 0; goto loop; } if (ap->a_waitfor == MNT_WAIT) { vwaitforio(vp, 0, "ffs_fsync", INFSLP); /* * Ensure that any filesystem metadata associated * with the vnode has been written. */ splx(s); /* XXX softdep was here. reconsider this locking dance */ s = splbio(); if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { /* * Block devices associated with filesystems may * have new I/O requests posted for them even if * the vnode is locked, so no amount of trying will * get them clean. Thus we give block devices a * good effort, then just give up. For all other file * types, go around and try again until it is clean. */ if (passes > 0) { passes -= 1; goto loop; } #ifdef DIAGNOSTIC if (vp->v_type != VBLK) vprint("ffs_fsync: dirty", vp); #endif } } splx(s); return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT)); } /* * Reclaim an inode so that it can be used for other purposes. */ int ffs_reclaim(void *v) { struct vop_reclaim_args *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); int error; if ((error = ufs_reclaim(vp)) != 0) return (error); if (ip->i_din1 != NULL) { #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) pool_put(&ffs_dinode2_pool, ip->i_din2); else #endif pool_put(&ffs_dinode1_pool, ip->i_din1); } pool_put(&ffs_ino_pool, ip); vp->v_data = NULL; return (0); } #ifdef FIFO int ffsfifo_reclaim(void *v) { fifo_reclaim(v); return (ffs_reclaim(v)); } #endif
7 7 7 7 7 7 4 1 3 10 9 7 7 1 7 7 7 7 8 7 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 /* $OpenBSD: wskbdutil.c,v 1.19 2021/12/30 06:55:11 anton Exp $ */ /* $NetBSD: wskbdutil.c,v 1.7 1999/12/21 11:59:13 drochner Exp $ */ /*- * Copyright (c) 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Juergen Hannken-Illjes. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/param.h> #include <sys/errno.h> #include <sys/systm.h> #include <sys/malloc.h> #include <dev/wscons/wsksymdef.h> #include <dev/wscons/wsksymvar.h> static struct compose_tab_s { keysym_t elem[2]; keysym_t result; } compose_tab[] = { { { KS_plus, KS_plus }, KS_numbersign }, { { KS_a, KS_a }, KS_at }, { { KS_parenleft, KS_parenleft }, KS_bracketleft }, { { KS_slash, KS_slash }, KS_backslash }, { { KS_parenright, KS_parenright }, KS_bracketright }, { { KS_parenleft, KS_minus }, KS_braceleft }, { { KS_slash, KS_minus }, KS_bar }, { { KS_parenright, KS_minus }, KS_braceright }, { { KS_exclam, KS_exclam }, KS_exclamdown }, { { KS_c, KS_slash }, KS_cent }, { { KS_l, KS_minus }, KS_sterling }, { { KS_y, KS_minus }, KS_yen }, { { KS_s, KS_o }, KS_section }, { { KS_x, KS_o }, KS_currency }, { { KS_c, KS_o }, KS_copyright }, { { KS_less, KS_less }, KS_guillemotleft }, { { KS_greater, KS_greater }, KS_guillemotright }, { { KS_question, KS_question }, KS_questiondown }, { { KS_dead_acute, KS_space }, KS_apostrophe }, { { KS_dead_grave, KS_space }, KS_grave }, { { KS_dead_tilde, KS_space }, KS_asciitilde }, { { KS_dead_circumflex, KS_space }, KS_asciicircum }, { { KS_dead_diaeresis, KS_space }, KS_quotedbl }, { { KS_dead_cedilla, KS_space }, KS_comma }, { { KS_dead_circumflex, KS_A }, KS_Acircumflex }, { { KS_dead_diaeresis, KS_A }, KS_Adiaeresis }, { { KS_dead_grave, KS_A }, KS_Agrave }, { { KS_dead_abovering, KS_A }, KS_Aring }, { { KS_dead_tilde, KS_A }, KS_Atilde }, { { KS_dead_cedilla, KS_C }, KS_Ccedilla }, { { KS_dead_acute, KS_E }, KS_Eacute }, { { KS_dead_circumflex, KS_E }, KS_Ecircumflex }, { { KS_dead_diaeresis, KS_E }, KS_Ediaeresis }, { { KS_dead_grave, KS_E }, KS_Egrave }, { { KS_dead_acute, KS_I }, KS_Iacute }, { { KS_dead_circumflex, KS_I }, KS_Icircumflex }, { { KS_dead_diaeresis, KS_I }, KS_Idiaeresis }, { { KS_dead_grave, KS_I }, KS_Igrave }, { { KS_dead_tilde, KS_N }, KS_Ntilde }, { { KS_dead_acute, KS_O }, KS_Oacute }, { { KS_dead_circumflex, KS_O }, KS_Ocircumflex }, { { KS_dead_diaeresis, KS_O }, KS_Odiaeresis }, { { KS_dead_grave, KS_O }, KS_Ograve }, { { KS_dead_tilde, KS_O }, KS_Otilde }, { { KS_dead_acute, KS_U }, KS_Uacute }, { { KS_dead_circumflex, KS_U }, KS_Ucircumflex }, { { KS_dead_diaeresis, KS_U }, KS_Udiaeresis }, { { KS_dead_grave, KS_U }, KS_Ugrave }, { { KS_dead_acute, KS_Y }, KS_Yacute }, { { KS_dead_acute, KS_a }, KS_aacute }, { { KS_dead_circumflex, KS_a }, KS_acircumflex }, { { KS_dead_diaeresis, KS_a }, KS_adiaeresis }, { { KS_dead_grave, KS_a }, KS_agrave }, { { KS_dead_abovering, KS_a }, KS_aring }, { { KS_dead_tilde, KS_a }, KS_atilde }, { { KS_dead_cedilla, KS_c }, KS_ccedilla }, { { KS_dead_acute, KS_e }, KS_eacute }, { { KS_dead_circumflex, KS_e }, KS_ecircumflex }, { { KS_dead_diaeresis, KS_e }, KS_ediaeresis }, { { KS_dead_grave, KS_e }, KS_egrave }, { { KS_dead_acute, KS_i }, KS_iacute }, { { KS_dead_circumflex, KS_i }, KS_icircumflex }, { { KS_dead_diaeresis, KS_i }, KS_idiaeresis }, { { KS_dead_grave, KS_i }, KS_igrave }, { { KS_dead_tilde, KS_n }, KS_ntilde }, { { KS_dead_acute, KS_o }, KS_oacute }, { { KS_dead_circumflex, KS_o }, KS_ocircumflex }, { { KS_dead_diaeresis, KS_o }, KS_odiaeresis }, { { KS_dead_grave, KS_o }, KS_ograve }, { { KS_dead_tilde, KS_o }, KS_otilde }, { { KS_dead_acute, KS_u }, KS_uacute }, { { KS_dead_circumflex, KS_u }, KS_ucircumflex }, { { KS_dead_diaeresis, KS_u }, KS_udiaeresis }, { { KS_dead_grave, KS_u }, KS_ugrave }, { { KS_dead_acute, KS_y }, KS_yacute }, { { KS_dead_diaeresis, KS_y }, KS_ydiaeresis }, { { KS_quotedbl, KS_A }, KS_Adiaeresis }, { { KS_quotedbl, KS_E }, KS_Ediaeresis }, { { KS_quotedbl, KS_I }, KS_Idiaeresis }, { { KS_quotedbl, KS_O }, KS_Odiaeresis }, { { KS_quotedbl, KS_U }, KS_Udiaeresis }, { { KS_quotedbl, KS_a }, KS_adiaeresis }, { { KS_quotedbl, KS_e }, KS_ediaeresis }, { { KS_quotedbl, KS_i }, KS_idiaeresis }, { { KS_quotedbl, KS_o }, KS_odiaeresis }, { { KS_quotedbl, KS_u }, KS_udiaeresis }, { { KS_quotedbl, KS_y }, KS_ydiaeresis }, { { KS_acute, KS_A }, KS_Aacute }, { { KS_asciicircum, KS_A }, KS_Acircumflex }, { { KS_grave, KS_A }, KS_Agrave }, { { KS_asterisk, KS_A }, KS_Aring }, { { KS_asciitilde, KS_A }, KS_Atilde }, { { KS_cedilla, KS_C }, KS_Ccedilla }, { { KS_acute, KS_E }, KS_Eacute }, { { KS_asciicircum, KS_E }, KS_Ecircumflex }, { { KS_grave, KS_E }, KS_Egrave }, { { KS_acute, KS_I }, KS_Iacute }, { { KS_asciicircum, KS_I }, KS_Icircumflex }, { { KS_grave, KS_I }, KS_Igrave }, { { KS_asciitilde, KS_N }, KS_Ntilde }, { { KS_acute, KS_O }, KS_Oacute }, { { KS_asciicircum, KS_O }, KS_Ocircumflex }, { { KS_grave, KS_O }, KS_Ograve }, { { KS_asciitilde, KS_O }, KS_Otilde }, { { KS_acute, KS_U }, KS_Uacute }, { { KS_asciicircum, KS_U }, KS_Ucircumflex }, { { KS_grave, KS_U }, KS_Ugrave }, { { KS_acute, KS_Y }, KS_Yacute }, { { KS_acute, KS_a }, KS_aacute }, { { KS_asciicircum, KS_a }, KS_acircumflex }, { { KS_grave, KS_a }, KS_agrave }, { { KS_asterisk, KS_a }, KS_aring }, { { KS_asciitilde, KS_a }, KS_atilde }, { { KS_cedilla, KS_c }, KS_ccedilla }, { { KS_acute, KS_e }, KS_eacute }, { { KS_asciicircum, KS_e }, KS_ecircumflex }, { { KS_grave, KS_e }, KS_egrave }, { { KS_acute, KS_i }, KS_iacute }, { { KS_asciicircum, KS_i }, KS_icircumflex }, { { KS_grave, KS_i }, KS_igrave }, { { KS_asciitilde, KS_n }, KS_ntilde }, { { KS_acute, KS_o }, KS_oacute }, { { KS_asciicircum, KS_o }, KS_ocircumflex }, { { KS_grave, KS_o }, KS_ograve }, { { KS_asciitilde, KS_o }, KS_otilde }, { { KS_acute, KS_u }, KS_uacute }, { { KS_asciicircum, KS_u }, KS_ucircumflex }, { { KS_grave, KS_u }, KS_ugrave }, { { KS_acute, KS_y }, KS_yacute }, { { KS_dead_caron, KS_space }, KS_L2_caron }, { { KS_dead_caron, KS_S }, KS_L2_Scaron }, { { KS_dead_caron, KS_Z }, KS_L2_Zcaron }, { { KS_dead_caron, KS_s }, KS_L2_scaron }, { { KS_dead_caron, KS_z }, KS_L2_zcaron } }; #define COMPOSE_SIZE nitems(compose_tab) static int compose_tab_inorder = 0; keysym_t ksym_upcase(keysym_t); void fillmapentry(const keysym_t *, int, struct wscons_keymap *); static inline int compose_tab_cmp(struct compose_tab_s *i, struct compose_tab_s *j) { if (i->elem[0] == j->elem[0]) return(i->elem[1] - j->elem[1]); else return(i->elem[0] - j->elem[0]); } keysym_t wskbd_compose_value(keysym_t *compose_buf) { int i, j, r; struct compose_tab_s v; if (!compose_tab_inorder) { /* Insertion sort. */ for (i = 1; i < COMPOSE_SIZE; i++) { v = compose_tab[i]; /* find correct slot, moving others up */ for (j = i; --j >= 0 && compose_tab_cmp(&v, &compose_tab[j]) < 0;) compose_tab[j + 1] = compose_tab[j]; compose_tab[j + 1] = v; } compose_tab_inorder = 1; } for (j = 0, i = COMPOSE_SIZE; i != 0; i /= 2) { if (compose_tab[j + i/2].elem[0] == compose_buf[0]) { if (compose_tab[j + i/2].elem[1] == compose_buf[1]) return(compose_tab[j + i/2].result); r = compose_tab[j + i/2].elem[1] < compose_buf[1]; } else r = compose_tab[j + i/2].elem[0] < compose_buf[0]; if (r) { j += i/2 + 1; i--; } } return(KS_voidSymbol); } static const u_char latin1_to_upper[256] = { /* 0 8 1 9 2 a 3 b 4 c 5 d 6 e 7 f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 1 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 1 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 2 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 2 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 4 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 4 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 5 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 5 */ 0x00, 'A', 'B', 'C', 'D', 'E', 'F', 'G', /* 6 */ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', /* 6 */ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', /* 7 */ 'X', 'Y', 'Z', 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 9 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 9 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* a */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* a */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* b */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* b */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* c */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* c */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* d */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* d */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* e */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* e */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0x00, /* f */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0x00, /* f */ }; keysym_t ksym_upcase(keysym_t ksym) { if (ksym >= KS_f1 && ksym <= KS_f20) return(KS_F1 - KS_f1 + ksym); if (KS_GROUP(ksym) == KS_GROUP_Ascii && ksym <= 0xff && latin1_to_upper[ksym] != 0x00) return(latin1_to_upper[ksym]); return(ksym); } void fillmapentry(const keysym_t *kp, int len, struct wscons_keymap *mapentry) { switch (len) { case 0: mapentry->group1[0] = KS_voidSymbol; mapentry->group1[1] = KS_voidSymbol; mapentry->group2[0] = KS_voidSymbol; mapentry->group2[1] = KS_voidSymbol; break; case 1: mapentry->group1[0] = kp[0]; mapentry->group1[1] = ksym_upcase(kp[0]); mapentry->group2[0] = mapentry->group1[0]; mapentry->group2[1] = mapentry->group1[1]; break; case 2: mapentry->group1[0] = kp[0]; mapentry->group1[1] = kp[1]; mapentry->group2[0] = mapentry->group1[0]; mapentry->group2[1] = mapentry->group1[1]; break; case 3: mapentry->group1[0] = kp[0]; mapentry->group1[1] = kp[1]; mapentry->group2[0] = kp[2]; mapentry->group2[1] = ksym_upcase(kp[2]); break; case 4: mapentry->group1[0] = kp[0]; mapentry->group1[1] = kp[1]; mapentry->group2[0] = kp[2]; mapentry->group2[1] = kp[3]; break; } } void wskbd_get_mapentry(const struct wskbd_mapdata *mapdata, int kc, struct wscons_keymap *mapentry) { kbd_t cur; const keysym_t *kp; const struct wscons_keydesc *mp; int l; keysym_t ksg; mapentry->command = KS_voidSymbol; mapentry->group1[0] = KS_voidSymbol; mapentry->group1[1] = KS_voidSymbol; mapentry->group2[0] = KS_voidSymbol; mapentry->group2[1] = KS_voidSymbol; for (cur = mapdata->layout & ~KB_HANDLEDBYWSKBD; cur != 0; ) { mp = mapdata->keydesc; while (mp->map_size > 0) { if (mp->name == cur) break; mp++; } /* If map not found, return */ if (mp->map_size <= 0) return; for (kp = mp->map; kp < mp->map + mp->map_size; kp++) { ksg = KS_GROUP(*kp); if (ksg == KS_GROUP_Keycode && KS_VALUE(*kp) == kc) { /* First skip keycode and possible command */ kp++; if (KS_GROUP(*kp) == KS_GROUP_Command || *kp == KS_Cmd || *kp == KS_Cmd1 || *kp == KS_Cmd2) mapentry->command = *kp++; for (l = 0; kp + l < mp->map + mp->map_size; l++) { ksg = KS_GROUP(kp[l]); if (ksg == KS_GROUP_Keycode) break; } if (l > 4) panic("wskbd_get_mapentry: %d(%d): bad entry", mp->name, *kp); fillmapentry(kp, l, mapentry); return; } } cur = mp->base; } } struct wscons_keymap * wskbd_init_keymap(int maplen) { struct wscons_keymap *map; int i; map = mallocarray(maplen, sizeof(*map), M_DEVBUF, M_WAITOK); for (i = 0; i < maplen; i++) { map[i].command = KS_voidSymbol; map[i].group1[0] = KS_voidSymbol; map[i].group1[1] = KS_voidSymbol; map[i].group2[0] = KS_voidSymbol; map[i].group2[1] = KS_voidSymbol; } return map; } int wskbd_load_keymap(const struct wskbd_mapdata *mapdata, kbd_t layout, struct wscons_keymap **map, int *maplen) { int i, s, kc, stack_ptr; const keysym_t *kp; const struct wscons_keydesc *mp, *stack[10]; kbd_t cur; keysym_t ksg; for (cur = layout & ~KB_HANDLEDBYWSKBD, stack_ptr = 0; cur != 0; stack_ptr++) { mp = mapdata->keydesc; while (mp->map_size > 0) { if (cur == 0 || mp->name == cur) { break; } mp++; } if (stack_ptr == nitems(stack)) panic("wskbd_load_keymap: %d: recursion too deep", mapdata->layout); if (mp->map_size <= 0) return(EINVAL); stack[stack_ptr] = mp; cur = mp->base; } for (i = 0, s = stack_ptr - 1; s >= 0; s--) { mp = stack[s]; for (kp = mp->map; kp < mp->map + mp->map_size; kp++) { ksg = KS_GROUP(*kp); if (ksg == KS_GROUP_Keycode && KS_VALUE(*kp) > i) i = KS_VALUE(*kp); } } *map = wskbd_init_keymap(i + 1); *maplen = i + 1; for (s = stack_ptr - 1; s >= 0; s--) { mp = stack[s]; for (kp = mp->map; kp < mp->map + mp->map_size; ) { ksg = KS_GROUP(*kp); if (ksg != KS_GROUP_Keycode) panic("wskbd_load_keymap: %d(%d): bad entry", mp->name, *kp); kc = KS_VALUE(*kp); kp++; if (KS_GROUP(*kp) == KS_GROUP_Command || *kp == KS_Cmd || *kp == KS_Cmd1 || *kp == KS_Cmd2) { (*map)[kc].command = *kp; kp++; } for (i = 0; kp + i < mp->map + mp->map_size; i++) { ksg = KS_GROUP(kp[i]); if (ksg == KS_GROUP_Keycode) break; } if (i > 4) panic("wskbd_load_keymap: %d(%d): bad entry", mp->name, *kp); fillmapentry(kp, i, &(*map)[kc]); kp += i; } } return(0); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 /* $OpenBSD: midi.c,v 1.57 2024/05/13 01:15:50 jsg Exp $ */ /* * Copyright (c) 2003, 2004 Alexandre Ratchov * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/param.h> #include <sys/fcntl.h> #include <sys/systm.h> #include <sys/ioctl.h> #include <sys/conf.h> #include <sys/kernel.h> #include <sys/timeout.h> #include <sys/vnode.h> #include <sys/signalvar.h> #include <sys/device.h> #include <dev/midi_if.h> #include <dev/audio_if.h> #include <dev/midivar.h> #define DEVNAME(sc) ((sc)->dev.dv_xname) int midiopen(dev_t, int, int, struct proc *); int midiclose(dev_t, int, int, struct proc *); int midiread(dev_t, struct uio *, int); int midiwrite(dev_t, struct uio *, int); int midikqfilter(dev_t, struct knote *); int midiioctl(dev_t, u_long, caddr_t, int, struct proc *); int midiprobe(struct device *, void *, void *); void midiattach(struct device *, struct device *, void *); int mididetach(struct device *, int); int midiprint(void *, const char *); void midi_iintr(void *, int); void midi_ointr(void *); void midi_timeout(void *); void midi_out_start(struct midi_softc *); void midi_out_stop(struct midi_softc *); void midi_out_do(struct midi_softc *); const struct cfattach midi_ca = { sizeof(struct midi_softc), midiprobe, midiattach, mididetach }; struct cfdriver midi_cd = { NULL, "midi", DV_DULL }; void filt_midiwdetach(struct knote *); int filt_midiwrite(struct knote *, long); int filt_midimodify(struct kevent *, struct knote *); int filt_midiprocess(struct knote *, struct kevent *); const struct filterops midiwrite_filtops = { .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_midiwdetach, .f_event = filt_midiwrite, .f_modify = filt_midimodify, .f_process = filt_midiprocess, }; void filt_midirdetach(struct knote *); int filt_midiread(struct knote *, long); const struct filterops midiread_filtops = { .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_midirdetach, .f_event = filt_midiread, .f_modify = filt_midimodify, .f_process = filt_midiprocess, }; void midi_buf_wakeup(struct midi_buffer *buf) { if (buf->blocking) { wakeup(&buf->blocking); buf->blocking = 0; } knote_locked(&buf->klist, 0); } void midi_iintr(void *addr, int data) { struct midi_softc *sc = (struct midi_softc *)addr; struct midi_buffer *mb = &sc->inbuf; MUTEX_ASSERT_LOCKED(&audio_lock); if (!(sc->dev.dv_flags & DVF_ACTIVE) || !(sc->flags & FREAD)) return; if (MIDIBUF_ISFULL(mb)) return; /* discard data */ MIDIBUF_WRITE(mb, data); midi_buf_wakeup(mb); } int midiread(dev_t dev, struct uio *uio, int ioflag) { struct midi_softc *sc; struct midi_buffer *mb; size_t count; int error; sc = (struct midi_softc *)device_lookup(&midi_cd, minor(dev)); if (sc == NULL) return ENXIO; if (!(sc->flags & FREAD)) { error = ENXIO; goto done; } mb = &sc->inbuf; /* if there is no data then sleep (unless IO_NDELAY flag is set) */ error = 0; mtx_enter(&audio_lock); while (MIDIBUF_ISEMPTY(mb)) { if (ioflag & IO_NDELAY) { error = EWOULDBLOCK; goto done_mtx; } sc->inbuf.blocking = 1; error = msleep_nsec(&sc->inbuf.blocking, &audio_lock, PWAIT | PCATCH, "mid_rd", INFSLP); if (!(sc->dev.dv_flags & DVF_ACTIVE)) error = EIO; if (error) goto done_mtx; } /* at this stage, there is at least 1 byte */ while (uio->uio_resid > 0 && mb->used > 0) { count = MIDIBUF_SIZE - mb->start; if (count > mb->used) count = mb->used; if (count > uio->uio_resid) count = uio->uio_resid; mtx_leave(&audio_lock); error = uiomove(mb->data + mb->start, count, uio); if (error) goto done; mtx_enter(&audio_lock); MIDIBUF_REMOVE(mb, count); } done_mtx: mtx_leave(&audio_lock); done: device_unref(&sc->dev); return error; } void midi_ointr(void *addr) { struct midi_softc *sc = (struct midi_softc *)addr; struct midi_buffer *mb; MUTEX_ASSERT_LOCKED(&audio_lock); if (!(sc->dev.dv_flags & DVF_ACTIVE) || !(sc->flags & FWRITE)) return; mb = &sc->outbuf; if (mb->used > 0) { #ifdef MIDI_DEBUG if (!sc->isbusy) { printf("midi_ointr: output must be busy\n"); } #endif midi_out_do(sc); } else if (sc->isbusy) midi_out_stop(sc); } void midi_timeout(void *addr) { mtx_enter(&audio_lock); midi_ointr(addr); mtx_leave(&audio_lock); } void midi_out_start(struct midi_softc *sc) { if (!sc->isbusy) { sc->isbusy = 1; midi_out_do(sc); } } void midi_out_stop(struct midi_softc *sc) { sc->isbusy = 0; midi_buf_wakeup(&sc->outbuf); } void midi_out_do(struct midi_softc *sc) { struct midi_buffer *mb = &sc->outbuf; while (mb->used > 0) { if (!sc->hw_if->output(sc->hw_hdl, mb->data[mb->start])) break; MIDIBUF_REMOVE(mb, 1); if (MIDIBUF_ISEMPTY(mb)) { if (sc->hw_if->flush != NULL) sc->hw_if->flush(sc->hw_hdl); midi_out_stop(sc); return; } } if (!(sc->props & MIDI_PROP_OUT_INTR)) { if (MIDIBUF_ISEMPTY(mb)) midi_out_stop(sc); else timeout_add(&sc->timeo, 1); } } int midiwrite(dev_t dev, struct uio *uio, int ioflag) { struct midi_softc *sc; struct midi_buffer *mb; size_t count; int error; sc = (struct midi_softc *)device_lookup(&midi_cd, minor(dev)); if (sc == NULL) return ENXIO; if (!(sc->flags & FWRITE)) { error = ENXIO; goto done; } mb = &sc->outbuf; /* * If IO_NDELAY flag is set then check if there is enough room * in the buffer to store at least one byte. If not then dont * start the write process. */ error = 0; mtx_enter(&audio_lock); if ((ioflag & IO_NDELAY) && MIDIBUF_ISFULL(mb) && (uio->uio_resid > 0)) { error = EWOULDBLOCK; goto done_mtx; } while (uio->uio_resid > 0) { while (MIDIBUF_ISFULL(mb)) { if (ioflag & IO_NDELAY) { /* * At this stage at least one byte is already * moved so we do not return EWOULDBLOCK */ goto done_mtx; } sc->outbuf.blocking = 1; error = msleep_nsec(&sc->outbuf.blocking, &audio_lock, PWAIT | PCATCH, "mid_wr", INFSLP); if (!(sc->dev.dv_flags & DVF_ACTIVE)) error = EIO; if (error) goto done_mtx; } count = MIDIBUF_SIZE - MIDIBUF_END(mb); if (count > MIDIBUF_AVAIL(mb)) count = MIDIBUF_AVAIL(mb); if (count > uio->uio_resid) count = uio->uio_resid; mtx_leave(&audio_lock); error = uiomove(mb->data + MIDIBUF_END(mb), count, uio); if (error) goto done; mtx_enter(&audio_lock); mb->used += count; midi_out_start(sc); } done_mtx: mtx_leave(&audio_lock); done: device_unref(&sc->dev); return error; } int midikqfilter(dev_t dev, struct knote *kn) { struct midi_softc *sc; struct klist *klist; int error; sc = (struct midi_softc *)device_lookup(&midi_cd, minor(dev)); if (sc == NULL) return ENXIO; error = 0; switch (kn->kn_filter) { case EVFILT_READ: klist = &sc->inbuf.klist; kn->kn_fop = &midiread_filtops; break; case EVFILT_WRITE: klist = &sc->outbuf.klist; kn->kn_fop = &midiwrite_filtops; break; default: error = EINVAL; goto done; } kn->kn_hook = (void *)sc; klist_insert(klist, kn); done: device_unref(&sc->dev); return error; } void filt_midirdetach(struct knote *kn) { struct midi_softc *sc = (struct midi_softc *)kn->kn_hook; klist_remove(&sc->inbuf.klist, kn); } int filt_midiread(struct knote *kn, long hint) { struct midi_softc *sc = (struct midi_softc *)kn->kn_hook; return (!MIDIBUF_ISEMPTY(&sc->inbuf)); } void filt_midiwdetach(struct knote *kn) { struct midi_softc *sc = (struct midi_softc *)kn->kn_hook; klist_remove(&sc->outbuf.klist, kn); } int filt_midiwrite(struct knote *kn, long hint) { struct midi_softc *sc = (struct midi_softc *)kn->kn_hook; return (!MIDIBUF_ISFULL(&sc->outbuf)); } int filt_midimodify(struct kevent *kev, struct knote *kn) { int active; mtx_enter(&audio_lock); active = knote_modify(kev, kn); mtx_leave(&audio_lock); return active; } int filt_midiprocess(struct knote *kn, struct kevent *kev) { int active; mtx_enter(&audio_lock); active = knote_process(kn, kev); mtx_leave(&audio_lock); return active; } int midiioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) { struct midi_softc *sc; int error; sc = (struct midi_softc *)device_lookup(&midi_cd, minor(dev)); if (sc == NULL) return ENXIO; error = 0; switch(cmd) { case FIONBIO: /* All handled in the upper FS layer */ break; default: error = ENOTTY; } device_unref(&sc->dev); return error; } int midiopen(dev_t dev, int flags, int mode, struct proc *p) { struct midi_softc *sc; int error; sc = (struct midi_softc *)device_lookup(&midi_cd, minor(dev)); if (sc == NULL) return ENXIO; error = 0; if (sc->flags) { error = EBUSY; goto done; } MIDIBUF_INIT(&sc->inbuf); MIDIBUF_INIT(&sc->outbuf); sc->isbusy = 0; sc->inbuf.blocking = sc->outbuf.blocking = 0; sc->flags = flags; error = sc->hw_if->open(sc->hw_hdl, flags, midi_iintr, midi_ointr, sc); if (error) sc->flags = 0; done: device_unref(&sc->dev); return error; } int midiclose(dev_t dev, int fflag, int devtype, struct proc *p) { struct midi_softc *sc; struct midi_buffer *mb; int error; sc = (struct midi_softc *)device_lookup(&midi_cd, minor(dev)); if (sc == NULL) return ENXIO; /* start draining output buffer */ error = 0; mb = &sc->outbuf; mtx_enter(&audio_lock); if (!MIDIBUF_ISEMPTY(mb)) midi_out_start(sc); while (sc->isbusy) { sc->outbuf.blocking = 1; error = msleep_nsec(&sc->outbuf.blocking, &audio_lock, PWAIT, "mid_dr", SEC_TO_NSEC(5)); if (!(sc->dev.dv_flags & DVF_ACTIVE)) error = EIO; if (error) break; } mtx_leave(&audio_lock); /* * some hw_if->close() reset immediately the midi uart * which flushes the internal buffer of the uart device, * so we may lose some (important) data. To avoid this, * sleep 20ms (around 64 bytes) to give the time to the * uart to drain its internal buffers. */ tsleep_nsec(&sc->outbuf.blocking, PWAIT, "mid_cl", MSEC_TO_NSEC(20)); sc->hw_if->close(sc->hw_hdl); sc->flags = 0; device_unref(&sc->dev); return 0; } int midiprobe(struct device *parent, void *match, void *aux) { struct audio_attach_args *sa = aux; return (sa != NULL && (sa->type == AUDIODEV_TYPE_MIDI) ? 1 : 0); } void midiattach(struct device *parent, struct device *self, void *aux) { struct midi_info mi; struct midi_softc *sc = (struct midi_softc *)self; struct audio_attach_args *sa = (struct audio_attach_args *)aux; const struct midi_hw_if *hwif = sa->hwif; void *hdl = sa->hdl; #ifdef DIAGNOSTIC if (hwif == 0 || hwif->open == 0 || hwif->close == 0 || hwif->output == 0 || hwif->getinfo == 0) { printf("%s: missing method\n", DEVNAME(sc)); return; } #endif klist_init_mutex(&sc->inbuf.klist, &audio_lock); klist_init_mutex(&sc->outbuf.klist, &audio_lock); sc->hw_if = hwif; sc->hw_hdl = hdl; sc->hw_if->getinfo(sc->hw_hdl, &mi); sc->props = mi.props; sc->flags = 0; timeout_set(&sc->timeo, midi_timeout, sc); printf(": <%s>\n", mi.name); } int mididetach(struct device *self, int flags) { struct midi_softc *sc = (struct midi_softc *)self; int maj, mn; /* locate the major number */ for (maj = 0; maj < nchrdev; maj++) { if (cdevsw[maj].d_open == midiopen) { /* Nuke the vnodes for any open instances (calls close). */ mn = self->dv_unit; vdevgone(maj, mn, mn, VCHR); } } /* * The close() method did nothing (device_lookup() returns * NULL), so quickly halt transfers (normally parent is already * gone, and code below is no-op), and wake-up user-land blocked * in read/write/ioctl, which return EIO. */ if (sc->flags) { KERNEL_ASSERT_LOCKED(); if (sc->flags & FREAD) wakeup(&sc->inbuf.blocking); if (sc->flags & FWRITE) wakeup(&sc->outbuf.blocking); sc->hw_if->close(sc->hw_hdl); sc->flags = 0; } klist_invalidate(&sc->inbuf.klist); klist_invalidate(&sc->outbuf.klist); klist_free(&sc->inbuf.klist); klist_free(&sc->outbuf.klist); return 0; } int midiprint(void *aux, const char *pnp) { if (pnp) printf("midi at %s", pnp); return (UNCONF); } struct device * midi_attach_mi(const struct midi_hw_if *hwif, void *hdl, struct device *dev) { struct audio_attach_args arg; arg.type = AUDIODEV_TYPE_MIDI; arg.hwif = hwif; arg.hdl = hdl; return config_found(dev, &arg, midiprint); }
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 /* $OpenBSD: route.h,v 1.210 2024/03/31 15:53:12 bluhm Exp $ */ /* $NetBSD: route.h,v 1.9 1996/02/13 22:00:49 christos Exp $ */ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.h 8.3 (Berkeley) 4/19/94 */ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ /* * Locks used to protect struct members in this file: * I immutable after creation * N net lock * X exclusive net lock, or shared net lock + kernel lock * R art (rtable) lock * L arp/nd6/etc lock for updates, net lock for reads * T rttimer_mtx route timer lists */ /* * Kernel resident routing tables. * * The routing tables are initialized when interface addresses * are set by making entries for all directly connected interfaces. */ #ifdef _KERNEL /* * These numbers are used by reliable protocols for determining * retransmission behavior and are included in the routing structure. */ struct rt_kmetrics { u_int64_t rmx_pksent; /* packets sent using this route */ int64_t rmx_expire; /* lifetime for route, e.g. redirect */ u_int rmx_locks; /* Kernel must leave these values */ u_int rmx_mtu; /* MTU for this path */ }; #endif /* * Huge version for userland compatibility. */ struct rt_metrics { u_int64_t rmx_pksent; /* packets sent using this route */ int64_t rmx_expire; /* lifetime for route, e.g. redirect */ u_int rmx_locks; /* Kernel must leave these values */ u_int rmx_mtu; /* MTU for this path */ u_int rmx_refcnt; /* # references hold */ /* some apps may still need these no longer used metrics */ u_int rmx_hopcount; /* max hops expected */ u_int rmx_recvpipe; /* inbound delay-bandwidth product */ u_int rmx_sendpipe; /* outbound delay-bandwidth product */ u_int rmx_ssthresh; /* outbound gateway buffer limit */ u_int rmx_rtt; /* estimated round trip time */ u_int rmx_rttvar; /* estimated rtt variance */ u_int rmx_pad; }; #ifdef _KERNEL /* * rmx_rtt and rmx_rttvar are stored as microseconds; * RTTTOPRHZ(rtt) converts to a value suitable for use * by a protocol slowtimo counter. */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) #include <sys/queue.h> #include <net/rtable.h> struct rttimer; /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer * the interface to use from the gateway address supplied when * the route was entered. Routes that forward packets through * gateways are marked with RTF_GATEWAY so that the output routines * know to address the gateway rather than the ultimate destination. * * How the RT_gw union is used also depends on RTF_GATEWAY. With * RTF_GATEWAY set, rt_gwroute points at the rtentry for the rt_gateway * address. If RTF_GATEWAY is not set, rt_cachecnt contains the * number of RTF_GATEWAY rtentry structs with their rt_gwroute pointing * at this rtentry. */ struct rtentry { struct sockaddr *rt_dest; /* [I] destination */ SRPL_ENTRY(rtentry) rt_next; /* [R] next mpath entry to our dst */ struct sockaddr *rt_gateway; /* [X] gateway address */ struct ifaddr *rt_ifa; /* [N] interface addr to use */ caddr_t rt_llinfo; /* [L] pointer to link level info or an MPLS structure */ union { struct rtentry *_nh; /* [X] rtentry for rt_gateway */ unsigned int _ref; /* [X] # gateway rtentry refs */ } RT_gw; #define rt_gwroute RT_gw._nh #define rt_cachecnt RT_gw._ref struct rtentry *rt_parent; /* [N] if cloned, parent rtentry */ LIST_HEAD(, rttimer) rt_timer; /* queue of timeouts for misc funcs */ struct rt_kmetrics rt_rmx; /* metrics used by rx'ing protocols */ unsigned int rt_ifidx; /* [N] interface to use */ unsigned int rt_flags; /* [X] up/down?, host/net */ struct refcnt rt_refcnt; /* # held references */ int rt_plen; /* [I] prefix length */ uint16_t rt_labelid; /* [N] route label ID */ uint8_t rt_priority; /* [N] routing priority to use */ }; #define rt_use rt_rmx.rmx_pksent #define rt_expire rt_rmx.rmx_expire #define rt_locks rt_rmx.rmx_locks #define rt_mtu rt_rmx.rmx_mtu #endif /* _KERNEL */ /* bitmask values for rtm_flags */ #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ #define RTF_CLONING 0x100 /* generate new routes on use */ #define RTF_MULTICAST 0x200 /* route associated to a mcast addr. */ #define RTF_LLINFO 0x400 /* generated by ARP or ND */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO3 0x2000 /* protocol specific routing flag */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_ANNOUNCE RTF_PROTO2 /* announce L2 entry */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ #define RTF_CLONED 0x10000 /* this is a cloned route */ #define RTF_CACHED 0x20000 /* cached by a RTF_GATEWAY entry */ #define RTF_MPATH 0x40000 /* multipath route or operation */ #define RTF_MPLS 0x100000 /* MPLS additional infos */ #define RTF_LOCAL 0x200000 /* route to a local address */ #define RTF_BROADCAST 0x400000 /* route associated to a bcast addr. */ #define RTF_CONNECTED 0x800000 /* interface route */ #define RTF_BFD 0x1000000 /* Link state controlled by BFD */ /* mask of RTF flags that are allowed to be modified by RTM_CHANGE */ #define RTF_FMASK \ (RTF_LLINFO | RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \ RTF_REJECT | RTF_STATIC | RTF_MPLS | RTF_BFD) /* Routing priorities used by the different routing protocols */ #define RTP_NONE 0 /* unset priority use sane default */ #define RTP_LOCAL 1 /* local address routes (must be the highest) */ #define RTP_CONNECTED 4 /* directly connected routes */ #define RTP_STATIC 8 /* static routes base priority */ #define RTP_EIGRP 28 /* EIGRP routes */ #define RTP_OSPF 32 /* OSPF routes */ #define RTP_ISIS 36 /* IS-IS routes */ #define RTP_RIP 40 /* RIP routes */ #define RTP_BGP 48 /* BGP routes */ #define RTP_DEFAULT 56 /* routes that have nothing set */ #define RTP_PROPOSAL_STATIC 57 #define RTP_PROPOSAL_DHCLIENT 58 #define RTP_PROPOSAL_SLAAC 59 #define RTP_PROPOSAL_UMB 60 #define RTP_PROPOSAL_PPP 61 #define RTP_PROPOSAL_SOLICIT 62 /* request reply of all RTM_PROPOSAL */ #define RTP_MAX 63 /* maximum priority */ #define RTP_ANY 64 /* any of the above */ #define RTP_MASK 0x7f #define RTP_DOWN 0x80 /* route/link is down */ /* * Routing statistics. */ struct rtstat { u_int32_t rts_badredirect; /* bogus redirect calls */ u_int32_t rts_dynamic; /* routes created by redirects */ u_int32_t rts_newgateway; /* routes modified by redirects */ u_int32_t rts_unreach; /* lookups which failed */ u_int32_t rts_wildcard; /* lookups satisfied by a wildcard */ }; /* * Routing Table Info. */ struct rt_tableinfo { u_short rti_tableid; /* routing table id */ u_short rti_domainid; /* routing domain id */ }; /* * Structures for routing messages. */ struct rt_msghdr { u_short rtm_msglen; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ u_short rtm_hdrlen; /* sizeof(rt_msghdr) to skip over the header */ u_short rtm_index; /* index for associated ifp */ u_short rtm_tableid; /* routing table id */ u_char rtm_priority; /* routing priority */ u_char rtm_mpls; /* MPLS additional infos */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_fmask; /* bitmask used in RTM_CHANGE message */ pid_t rtm_pid; /* identify sender */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ u_int rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; /* overload no longer used field */ #define rtm_use rtm_rmx.rmx_pksent #define RTM_VERSION 5 /* Up the ante and ignore older versions */ #define RTM_MAXSIZE 2048 /* Maximum size of an accepted route msg */ /* values for rtm_type */ #define RTM_ADD 0x1 /* Add Route */ #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics or flags */ #define RTM_GET 0x4 /* Report Metrics */ #define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ #define RTM_NEWADDR 0xc /* address being added to iface */ #define RTM_DELADDR 0xd /* address being removed from iface */ #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_IFANNOUNCE 0xf /* iface arrival/departure */ #define RTM_DESYNC 0x10 /* route socket buffer overflow */ #define RTM_INVALIDATE 0x11 /* Invalidate cache of L2 route */ #define RTM_BFD 0x12 /* bidirectional forwarding detection */ #define RTM_PROPOSAL 0x13 /* proposal for resolvd(8) */ #define RTM_CHGADDRATTR 0x14 /* address attribute change */ #define RTM_80211INFO 0x15 /* 80211 iface change */ #define RTM_SOURCE 0x16 /* set source address */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ #define RTV_EXPIRE 0x4 /* init or lock _expire */ #define RTV_RPIPE 0x8 /* init or lock _recvpipe */ #define RTV_SPIPE 0x10 /* init or lock _sendpipe */ #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ /* * Bitmask values for rtm_addrs. */ #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ #define RTA_SRC 0x100 /* source sockaddr present */ #define RTA_SRCMASK 0x200 /* source netmask present */ #define RTA_LABEL 0x400 /* route label present */ #define RTA_BFD 0x800 /* bfd present */ #define RTA_DNS 0x1000 /* DNS Servers sockaddr present */ #define RTA_STATIC 0x2000 /* RFC 3442 encoded static routes present */ #define RTA_SEARCH 0x4000 /* RFC 3397 encoded search path present */ /* * Index offsets for sockaddr array for alternate internal encoding. */ #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ #define RTAX_GENMASK 3 /* cloning mask sockaddr present */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ #define RTAX_SRC 8 /* source sockaddr present */ #define RTAX_SRCMASK 9 /* source netmask present */ #define RTAX_LABEL 10 /* route label present */ #define RTAX_BFD 11 /* bfd present */ #define RTAX_DNS 12 /* DNS Server(s) sockaddr present */ #define RTAX_STATIC 13 /* RFC 3442 encoded static routes present */ #define RTAX_SEARCH 14 /* RFC 3397 encoded search path present */ #define RTAX_MAX 15 /* size of array to allocate */ /* * setsockopt defines used for the filtering. */ #define ROUTE_MSGFILTER 1 /* bitmask to specify which types should be sent to the client. */ #define ROUTE_TABLEFILTER 2 /* change routing table the socket is listening on, RTABLE_ANY listens on all tables. */ #define ROUTE_PRIOFILTER 3 /* only pass updates with a priority higher or equal (actual value lower) to the specified priority. */ #define ROUTE_FLAGFILTER 4 /* do not pass updates for routes with flags in this bitmask. */ #define ROUTE_FILTER(m) (1 << (m)) #define RTABLE_ANY 0xffffffff #define RTLABEL_LEN 32 struct sockaddr_rtlabel { u_int8_t sr_len; /* total length */ sa_family_t sr_family; /* address family */ char sr_label[RTLABEL_LEN]; }; #define RTDNS_LEN 128 struct sockaddr_rtdns { u_int8_t sr_len; /* total length */ sa_family_t sr_family; /* address family */ char sr_dns[RTDNS_LEN]; }; #ifdef _KERNEL static inline struct sockaddr * srtdnstosa(struct sockaddr_rtdns *sdns) { return ((struct sockaddr *)(sdns)); } #endif #define RTSTATIC_LEN 128 struct sockaddr_rtstatic { u_int8_t sr_len; /* total length */ sa_family_t sr_family; /* address family */ char sr_static[RTSTATIC_LEN]; }; #define RTSEARCH_LEN 128 struct sockaddr_rtsearch { u_int8_t sr_len; /* total length */ sa_family_t sr_family; /* address family */ char sr_search[RTSEARCH_LEN]; }; struct rt_addrinfo { int rti_addrs; const struct sockaddr *rti_info[RTAX_MAX]; int rti_flags; struct ifaddr *rti_ifa; struct rt_msghdr *rti_rtm; u_char rti_mpls; }; #ifdef __BSD_VISIBLE #include <netinet/in.h> /* * A route consists of a destination address and a reference * to a routing entry. These are often held by protocols * in their control blocks, e.g. inpcb. */ struct route { struct rtentry *ro_rt; u_long ro_generation; u_long ro_tableid; /* u_long because of alignment */ union { struct sockaddr ro_dstsa; struct sockaddr_in ro_dstsin; struct sockaddr_in6 ro_dstsin6; }; union { struct in_addr ro_srcin; struct in6_addr ro_srcin6; }; }; #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #include <sys/percpu.h> enum rtstat_counters { rts_badredirect, /* bogus redirect calls */ rts_dynamic, /* routes created by redirects */ rts_newgateway, /* routes modified by redirects */ rts_unreach, /* lookups which failed */ rts_wildcard, /* lookups satisfied by a wildcard */ rts_ncounters }; static inline void rtstat_inc(enum rtstat_counters c) { extern struct cpumem *rtcounters; counters_inc(rtcounters, c); } /* * This structure, and the prototypes for the rt_timer_{init,remove_all, * add,timer} functions all used with the kind permission of BSDI. * These allow functions to be called for routes at specific times. */ struct rttimer_queue { TAILQ_HEAD(, rttimer) rtq_head; /* [T] */ LIST_ENTRY(rttimer_queue) rtq_link; /* [T] */ void (*rtq_func) /* [I] callback */ (struct rtentry *, u_int); unsigned long rtq_count; /* [T] */ int rtq_timeout; /* [T] */ }; const char *rtlabel_id2name_locked(u_int16_t); const char *rtlabel_id2name(u_int16_t, char *, size_t); u_int16_t rtlabel_name2id(const char *); struct sockaddr *rtlabel_id2sa(u_int16_t, struct sockaddr_rtlabel *); void rtlabel_unref(u_int16_t); /* * Values for additional argument to rtalloc() */ #define RT_RESOLVE 1 extern struct rtstat rtstat; extern u_long rtgeneration; struct mbuf; struct socket; struct ifnet; struct sockaddr_in6; struct if_ieee80211_data; struct bfd_config; void route_init(void); int route_cache(struct route *, const struct in_addr *, const struct in_addr *, u_int); struct rtentry *route_mpath(struct route *, const struct in_addr *, const struct in_addr *, u_int); int route6_cache(struct route *, const struct in6_addr *, const struct in6_addr *, u_int); struct rtentry *route6_mpath(struct route *, const struct in6_addr *, const struct in6_addr *, u_int); void rtm_ifchg(struct ifnet *); void rtm_ifannounce(struct ifnet *, int); void rtm_bfd(struct bfd_config *); void rtm_80211info(struct ifnet *, struct if_ieee80211_data *); void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); struct sockaddr *rt_plen2mask(struct rtentry *, struct sockaddr_in6 *); void rtm_send(struct rtentry *, int, int, unsigned int); void rtm_addr(int, struct ifaddr *); void rtm_miss(int, struct rt_addrinfo *, int, uint8_t, u_int, int, u_int); void rtm_proposal(struct ifnet *, struct rt_addrinfo *, int, uint8_t); int rt_setgate(struct rtentry *, const struct sockaddr *, u_int); struct rtentry *rt_getll(struct rtentry *); void rt_timer_init(void); int rt_timer_add(struct rtentry *, struct rttimer_queue *, u_int); void rt_timer_remove_all(struct rtentry *); time_t rt_timer_get_expire(const struct rtentry *); void rt_timer_queue_init(struct rttimer_queue *, int, void(*)(struct rtentry *, u_int)); void rt_timer_queue_change(struct rttimer_queue *, int); void rt_timer_queue_flush(struct rttimer_queue *); unsigned long rt_timer_queue_count(struct rttimer_queue *); void rt_timer_timer(void *); int rt_mpls_set(struct rtentry *, const struct sockaddr *, uint8_t); void rt_mpls_clear(struct rtentry *); int rtisvalid(struct rtentry *); int rt_hash(struct rtentry *, const struct sockaddr *, uint32_t *); struct rtentry *rtalloc_mpath(const struct sockaddr *, uint32_t *, u_int); struct rtentry *rtalloc(const struct sockaddr *, int, unsigned int); void rtref(struct rtentry *); void rtfree(struct rtentry *); int rt_ifa_add(struct ifaddr *, int, struct sockaddr *, unsigned int); int rt_ifa_del(struct ifaddr *, int, struct sockaddr *, unsigned int); void rt_ifa_purge(struct ifaddr *); int rt_ifa_addlocal(struct ifaddr *); int rt_ifa_dellocal(struct ifaddr *); void rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, struct rtentry **, unsigned int); int rtrequest(int, struct rt_addrinfo *, u_int8_t, struct rtentry **, u_int); int rtrequest_delete(struct rt_addrinfo *, u_int8_t, struct ifnet *, struct rtentry **, u_int); int rt_if_track(struct ifnet *); int rt_if_linkstate_change(struct rtentry *, void *, u_int); int rtdeletemsg(struct rtentry *, struct ifnet *, u_int); #endif /* _KERNEL */ #endif /* _NET_ROUTE_H_ */
1942 329 330 61 42 91 1304 330 1123 1264 1119 330 220 301 120 122 122 122 1195 1195 1191 219 1122 1167 125 61 61 61 223 215 44 78 194 161 194 43 223 223 4 214 46 160 3 3 1912 1951 270 1919 570 571 30 570 564 569 31 565 23 23 5 23 5 23 2 1837 1842 1842 118 1801 1655 1005 1950 21 185 51 1122 749 1444 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 /* $OpenBSD: uvm_page.c,v 1.177 2024/05/01 12:54:27 mpi Exp $ */ /* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * uvm_page.c: page ops. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/sched.h> #include <sys/vnode.h> #include <sys/mount.h> #include <sys/proc.h> #include <sys/smr.h> #include <uvm/uvm.h> /* * for object trees */ RBT_GENERATE(uvm_objtree, vm_page, objt, uvm_pagecmp); int uvm_pagecmp(const struct vm_page *a, const struct vm_page *b) { return a->offset < b->offset ? -1 : a->offset > b->offset; } /* * global vars... XXXCDC: move to uvm. structure. */ /* * physical memory config is stored in vm_physmem. */ struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */ int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */ /* * Some supported CPUs in a given architecture don't support all * of the things necessary to do idle page zero'ing efficiently. * We therefore provide a way to disable it from machdep code here. */ /* * local variables */ /* * these variables record the values returned by vm_page_bootstrap, * for debugging purposes. The implementation of uvm_pageboot_alloc * and pmap_startup here also uses them internally. */ static vaddr_t virtual_space_start; static vaddr_t virtual_space_end; /* * local prototypes */ static void uvm_pageinsert(struct vm_page *); static void uvm_pageremove(struct vm_page *); int uvm_page_owner_locked_p(struct vm_page *); /* * inline functions */ /* * uvm_pageinsert: insert a page in the object * * => caller must lock object * => call should have already set pg's object and offset pointers * and bumped the version counter */ static inline void uvm_pageinsert(struct vm_page *pg) { struct vm_page *dupe; KASSERT(UVM_OBJ_IS_DUMMY(pg->uobject) || rw_write_held(pg->uobject->vmobjlock)); KASSERT((pg->pg_flags & PG_TABLED) == 0); dupe = RBT_INSERT(uvm_objtree, &pg->uobject->memt, pg); /* not allowed to insert over another page */ KASSERT(dupe == NULL); atomic_setbits_int(&pg->pg_flags, PG_TABLED); pg->uobject->uo_npages++; } /* * uvm_page_remove: remove page from object * * => caller must lock object */ static inline void uvm_pageremove(struct vm_page *pg) { KASSERT(UVM_OBJ_IS_DUMMY(pg->uobject) || rw_write_held(pg->uobject->vmobjlock)); KASSERT(pg->pg_flags & PG_TABLED); RBT_REMOVE(uvm_objtree, &pg->uobject->memt, pg); atomic_clearbits_int(&pg->pg_flags, PG_TABLED); pg->uobject->uo_npages--; pg->uobject = NULL; pg->pg_version++; } /* * uvm_page_init: init the page system. called from uvm_init(). * * => we return the range of kernel virtual memory in kvm_startp/kvm_endp */ void uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) { vsize_t freepages, pagecount, n; vm_page_t pagearray, curpg; int lcv, i; paddr_t paddr, pgno; struct vm_physseg *seg; /* * init the page queues and page queue locks */ TAILQ_INIT(&uvm.page_active); TAILQ_INIT(&uvm.page_inactive); mtx_init(&uvm.pageqlock, IPL_VM); mtx_init(&uvm.fpageqlock, IPL_VM); uvm_pmr_init(); /* * allocate vm_page structures. */ /* * sanity check: * before calling this function the MD code is expected to register * some free RAM with the uvm_page_physload() function. our job * now is to allocate vm_page structures for this memory. */ if (vm_nphysseg == 0) panic("uvm_page_bootstrap: no memory pre-allocated"); /* * first calculate the number of free pages... * * note that we use start/end rather than avail_start/avail_end. * this allows us to allocate extra vm_page structures in case we * want to return some memory to the pool after booting. */ freepages = 0; for (lcv = 0, seg = vm_physmem; lcv < vm_nphysseg ; lcv++, seg++) freepages += (seg->end - seg->start); /* * we now know we have (PAGE_SIZE * freepages) bytes of memory we can * use. for each page of memory we use we need a vm_page structure. * thus, the total number of pages we can use is the total size of * the memory divided by the PAGE_SIZE plus the size of the vm_page * structure. we add one to freepages as a fudge factor to avoid * truncation errors (since we can only allocate in terms of whole * pages). */ pagecount = (((paddr_t)freepages + 1) << PAGE_SHIFT) / (PAGE_SIZE + sizeof(struct vm_page)); pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount * sizeof(struct vm_page)); memset(pagearray, 0, pagecount * sizeof(struct vm_page)); /* init the vm_page structures and put them in the correct place. */ for (lcv = 0, seg = vm_physmem; lcv < vm_nphysseg ; lcv++, seg++) { n = seg->end - seg->start; if (n > pagecount) { panic("uvm_page_init: lost %ld page(s) in init", (long)(n - pagecount)); /* XXXCDC: shouldn't happen? */ /* n = pagecount; */ } /* set up page array pointers */ seg->pgs = pagearray; pagearray += n; pagecount -= n; seg->lastpg = seg->pgs + (n - 1); /* init and free vm_pages (we've already zeroed them) */ pgno = seg->start; paddr = ptoa(pgno); for (i = 0, curpg = seg->pgs; i < n; i++, curpg++, pgno++, paddr += PAGE_SIZE) { curpg->phys_addr = paddr; VM_MDPAGE_INIT(curpg); if (pgno >= seg->avail_start && pgno < seg->avail_end) { uvmexp.npages++; } } /* Add pages to free pool. */ uvm_pmr_freepages(&seg->pgs[seg->avail_start - seg->start], seg->avail_end - seg->avail_start); } /* * pass up the values of virtual_space_start and * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper * layers of the VM. */ *kvm_startp = round_page(virtual_space_start); *kvm_endp = trunc_page(virtual_space_end); /* init locks for kernel threads */ mtx_init(&uvm.aiodoned_lock, IPL_BIO); /* * init reserve thresholds * XXXCDC - values may need adjusting */ uvmexp.reserve_pagedaemon = 4; uvmexp.reserve_kernel = 8; uvmexp.anonminpct = 10; uvmexp.vnodeminpct = 10; uvmexp.vtextminpct = 5; uvmexp.anonmin = uvmexp.anonminpct * 256 / 100; uvmexp.vnodemin = uvmexp.vnodeminpct * 256 / 100; uvmexp.vtextmin = uvmexp.vtextminpct * 256 / 100; uvm.page_init_done = TRUE; } /* * uvm_setpagesize: set the page size * * => sets page_shift and page_mask from uvmexp.pagesize. */ void uvm_setpagesize(void) { if (uvmexp.pagesize == 0) uvmexp.pagesize = DEFAULT_PAGE_SIZE; uvmexp.pagemask = uvmexp.pagesize - 1; if ((uvmexp.pagemask & uvmexp.pagesize) != 0) panic("uvm_setpagesize: page size not a power of two"); for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) if ((1 << uvmexp.pageshift) == uvmexp.pagesize) break; } /* * uvm_pageboot_alloc: steal memory from physmem for bootstrapping */ vaddr_t uvm_pageboot_alloc(vsize_t size) { #if defined(PMAP_STEAL_MEMORY) vaddr_t addr; /* * defer bootstrap allocation to MD code (it may want to allocate * from a direct-mapped segment). pmap_steal_memory should round * off virtual_space_start/virtual_space_end. */ addr = pmap_steal_memory(size, &virtual_space_start, &virtual_space_end); return addr; #else /* !PMAP_STEAL_MEMORY */ static boolean_t initialized = FALSE; vaddr_t addr, vaddr; paddr_t paddr; /* round to page size */ size = round_page(size); /* on first call to this function, initialize ourselves. */ if (initialized == FALSE) { pmap_virtual_space(&virtual_space_start, &virtual_space_end); /* round it the way we like it */ virtual_space_start = round_page(virtual_space_start); virtual_space_end = trunc_page(virtual_space_end); initialized = TRUE; } /* allocate virtual memory for this request */ if (virtual_space_start == virtual_space_end || (virtual_space_end - virtual_space_start) < size) panic("uvm_pageboot_alloc: out of virtual space"); addr = virtual_space_start; #ifdef PMAP_GROWKERNEL /* * If the kernel pmap can't map the requested space, * then allocate more resources for it. */ if (uvm_maxkaddr < (addr + size)) { uvm_maxkaddr = pmap_growkernel(addr + size); if (uvm_maxkaddr < (addr + size)) panic("uvm_pageboot_alloc: pmap_growkernel() failed"); } #endif virtual_space_start += size; /* allocate and mapin physical pages to back new virtual pages */ for (vaddr = round_page(addr) ; vaddr < addr + size ; vaddr += PAGE_SIZE) { if (!uvm_page_physget(&paddr)) panic("uvm_pageboot_alloc: out of memory"); /* * Note this memory is no longer managed, so using * pmap_kenter is safe. */ pmap_kenter_pa(vaddr, paddr, PROT_READ | PROT_WRITE); } pmap_update(pmap_kernel()); return addr; #endif /* PMAP_STEAL_MEMORY */ } #if !defined(PMAP_STEAL_MEMORY) /* * uvm_page_physget: "steal" one page from the vm_physmem structure. * * => attempt to allocate it off the end of a segment in which the "avail" * values match the start/end values. if we can't do that, then we * will advance both values (making them equal, and removing some * vm_page structures from the non-avail area). * => return false if out of memory. */ boolean_t uvm_page_physget(paddr_t *paddrp) { int lcv; struct vm_physseg *seg; /* pass 1: try allocating from a matching end */ #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \ (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) for (lcv = vm_nphysseg - 1, seg = vm_physmem + lcv; lcv >= 0; lcv--, seg--) #else for (lcv = 0, seg = vm_physmem; lcv < vm_nphysseg ; lcv++, seg++) #endif { if (uvm.page_init_done == TRUE) panic("uvm_page_physget: called _after_ bootstrap"); /* try from front */ if (seg->avail_start == seg->start && seg->avail_start < seg->avail_end) { *paddrp = ptoa(seg->avail_start); seg->avail_start++; seg->start++; /* nothing left? nuke it */ if (seg->avail_start == seg->end) { if (vm_nphysseg == 1) panic("uvm_page_physget: out of memory!"); vm_nphysseg--; for (; lcv < vm_nphysseg; lcv++, seg++) /* structure copy */ seg[0] = seg[1]; } return TRUE; } /* try from rear */ if (seg->avail_end == seg->end && seg->avail_start < seg->avail_end) { *paddrp = ptoa(seg->avail_end - 1); seg->avail_end--; seg->end--; /* nothing left? nuke it */ if (seg->avail_end == seg->start) { if (vm_nphysseg == 1) panic("uvm_page_physget: out of memory!"); vm_nphysseg--; for (; lcv < vm_nphysseg ; lcv++, seg++) /* structure copy */ seg[0] = seg[1]; } return TRUE; } } /* pass2: forget about matching ends, just allocate something */ #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \ (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) for (lcv = vm_nphysseg - 1, seg = vm_physmem + lcv; lcv >= 0; lcv--, seg--) #else for (lcv = 0, seg = vm_physmem; lcv < vm_nphysseg ; lcv++, seg++) #endif { /* any room in this bank? */ if (seg->avail_start >= seg->avail_end) continue; /* nope */ *paddrp = ptoa(seg->avail_start); seg->avail_start++; /* truncate! */ seg->start = seg->avail_start; /* nothing left? nuke it */ if (seg->avail_start == seg->end) { if (vm_nphysseg == 1) panic("uvm_page_physget: out of memory!"); vm_nphysseg--; for (; lcv < vm_nphysseg ; lcv++, seg++) /* structure copy */ seg[0] = seg[1]; } return TRUE; } return FALSE; /* whoops! */ } #endif /* PMAP_STEAL_MEMORY */ /* * uvm_page_physload: load physical memory into VM system * * => all args are PFs * => all pages in start/end get vm_page structures * => areas marked by avail_start/avail_end get added to the free page pool * => we are limited to VM_PHYSSEG_MAX physical memory segments */ void uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start, paddr_t avail_end, int flags) { int preload, lcv; psize_t npages; struct vm_page *pgs; struct vm_physseg *ps, *seg; #ifdef DIAGNOSTIC if (uvmexp.pagesize == 0) panic("uvm_page_physload: page size not set!"); if (start >= end) panic("uvm_page_physload: start >= end"); #endif /* do we have room? */ if (vm_nphysseg == VM_PHYSSEG_MAX) { printf("uvm_page_physload: unable to load physical memory " "segment\n"); printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n", VM_PHYSSEG_MAX, (long long)start, (long long)end); printf("\tincrease VM_PHYSSEG_MAX\n"); return; } /* * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been * called yet, so malloc is not available). */ for (lcv = 0, seg = vm_physmem; lcv < vm_nphysseg; lcv++, seg++) { if (seg->pgs) break; } preload = (lcv == vm_nphysseg); /* if VM is already running, attempt to malloc() vm_page structures */ if (!preload) { /* * XXXCDC: need some sort of lockout for this case * right now it is only used by devices so it should be alright. */ paddr_t paddr; npages = end - start; /* # of pages */ pgs = km_alloc(round_page(npages * sizeof(*pgs)), &kv_any, &kp_zero, &kd_waitok); if (pgs == NULL) { printf("uvm_page_physload: can not malloc vm_page " "structs for segment\n"); printf("\tignoring 0x%lx -> 0x%lx\n", start, end); return; } /* init phys_addr and free pages, XXX uvmexp.npages */ for (lcv = 0, paddr = ptoa(start); lcv < npages; lcv++, paddr += PAGE_SIZE) { pgs[lcv].phys_addr = paddr; VM_MDPAGE_INIT(&pgs[lcv]); if (atop(paddr) >= avail_start && atop(paddr) < avail_end) { if (flags & PHYSLOAD_DEVICE) { atomic_setbits_int(&pgs[lcv].pg_flags, PG_DEV); pgs[lcv].wire_count = 1; } else { #if defined(VM_PHYSSEG_NOADD) panic("uvm_page_physload: tried to add RAM after vm_mem_init"); #endif } } } /* Add pages to free pool. */ if ((flags & PHYSLOAD_DEVICE) == 0) { uvm_pmr_freepages(&pgs[avail_start - start], avail_end - avail_start); } /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */ } else { /* gcc complains if these don't get init'd */ pgs = NULL; npages = 0; } /* now insert us in the proper place in vm_physmem[] */ #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM) /* random: put it at the end (easy!) */ ps = &vm_physmem[vm_nphysseg]; #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) { int x; /* sort by address for binary search */ for (lcv = 0, seg = vm_physmem; lcv < vm_nphysseg; lcv++, seg++) if (start < seg->start) break; ps = seg; /* move back other entries, if necessary ... */ for (x = vm_nphysseg, seg = vm_physmem + x - 1; x > lcv; x--, seg--) /* structure copy */ seg[1] = seg[0]; } #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) { int x; /* sort by largest segment first */ for (lcv = 0, seg = vm_physmem; lcv < vm_nphysseg; lcv++, seg++) if ((end - start) > (seg->end - seg->start)) break; ps = &vm_physmem[lcv]; /* move back other entries, if necessary ... */ for (x = vm_nphysseg, seg = vm_physmem + x - 1; x > lcv; x--, seg--) /* structure copy */ seg[1] = seg[0]; } #else panic("uvm_page_physload: unknown physseg strategy selected!"); #endif ps->start = start; ps->end = end; ps->avail_start = avail_start; ps->avail_end = avail_end; if (preload) { ps->pgs = NULL; } else { ps->pgs = pgs; ps->lastpg = pgs + npages - 1; } vm_nphysseg++; return; } #ifdef DDB /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */ void uvm_page_physdump(void); /* SHUT UP GCC */ /* call from DDB */ void uvm_page_physdump(void) { int lcv; struct vm_physseg *seg; printf("uvm_page_physdump: physical memory config [segs=%d of %d]:\n", vm_nphysseg, VM_PHYSSEG_MAX); for (lcv = 0, seg = vm_physmem; lcv < vm_nphysseg ; lcv++, seg++) printf("0x%llx->0x%llx [0x%llx->0x%llx]\n", (long long)seg->start, (long long)seg->end, (long long)seg->avail_start, (long long)seg->avail_end); printf("STRATEGY = "); switch (VM_PHYSSEG_STRAT) { case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break; case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break; case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break; default: printf("<<UNKNOWN>>!!!!\n"); } } #endif void uvm_shutdown(void) { #ifdef UVM_SWAP_ENCRYPT uvm_swap_finicrypt_all(); #endif smr_flush(); } /* * Perform insert of a given page in the specified anon of obj. * This is basically, uvm_pagealloc, but with the page already given. */ void uvm_pagealloc_pg(struct vm_page *pg, struct uvm_object *obj, voff_t off, struct vm_anon *anon) { int flags; KASSERT(obj == NULL || anon == NULL); KASSERT(anon == NULL || off == 0); KASSERT(off == trunc_page(off)); KASSERT(obj == NULL || UVM_OBJ_IS_DUMMY(obj) || rw_write_held(obj->vmobjlock)); KASSERT(anon == NULL || anon->an_lock == NULL || rw_write_held(anon->an_lock)); flags = PG_BUSY | PG_FAKE; pg->offset = off; pg->uobject = obj; pg->uanon = anon; KASSERT(uvm_page_owner_locked_p(pg)); if (anon) { anon->an_page = pg; flags |= PQ_ANON; } else if (obj) uvm_pageinsert(pg); atomic_setbits_int(&pg->pg_flags, flags); #if defined(UVM_PAGE_TRKOWN) pg->owner_tag = NULL; #endif UVM_PAGE_OWN(pg, "new alloc"); } /* * uvm_pglistalloc: allocate a list of pages * * => allocated pages are placed at the tail of rlist. rlist is * assumed to be properly initialized by caller. * => returns 0 on success or errno on failure * => doesn't take into account clean non-busy pages on inactive list * that could be used(?) * => params: * size the size of the allocation, rounded to page size. * low the low address of the allowed allocation range. * high the high address of the allowed allocation range. * alignment memory must be aligned to this power-of-two boundary. * boundary no segment in the allocation may cross this * power-of-two boundary (relative to zero). * => flags: * UVM_PLA_NOWAIT fail if allocation fails * UVM_PLA_WAITOK wait for memory to become avail * UVM_PLA_ZERO return zeroed memory */ int uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, paddr_t boundary, struct pglist *rlist, int nsegs, int flags) { KASSERT((alignment & (alignment - 1)) == 0); KASSERT((boundary & (boundary - 1)) == 0); KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT)); if (size == 0) return EINVAL; size = atop(round_page(size)); /* * XXX uvm_pglistalloc is currently only used for kernel * objects. Unlike the checks in uvm_pagealloc, below, here * we are always allowed to use the kernel reserve. */ flags |= UVM_PLA_USERESERVE; if ((high & PAGE_MASK) != PAGE_MASK) { printf("uvm_pglistalloc: Upper boundary 0x%lx " "not on pagemask.\n", (unsigned long)high); } /* * Our allocations are always page granularity, so our alignment * must be, too. */ if (alignment < PAGE_SIZE) alignment = PAGE_SIZE; low = atop(roundup(low, alignment)); /* * high + 1 may result in overflow, in which case high becomes 0x0, * which is the 'don't care' value. * The only requirement in that case is that low is also 0x0, or the * low<high assert will fail. */ high = atop(high + 1); alignment = atop(alignment); if (boundary < PAGE_SIZE && boundary != 0) boundary = PAGE_SIZE; boundary = atop(boundary); return uvm_pmr_getpages(size, low, high, alignment, boundary, nsegs, flags, rlist); } /* * uvm_pglistfree: free a list of pages * * => pages should already be unmapped */ void uvm_pglistfree(struct pglist *list) { uvm_pmr_freepageq(list); } /* * interface used by the buffer cache to allocate a buffer at a time. * The pages are allocated wired in DMA accessible memory */ int uvm_pagealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size, int flags) { struct pglist plist; struct vm_page *pg; int i, r; KASSERT(UVM_OBJ_IS_BUFCACHE(obj)); KERNEL_ASSERT_LOCKED(); TAILQ_INIT(&plist); r = uvm_pglistalloc(size, dma_constraint.ucr_low, dma_constraint.ucr_high, 0, 0, &plist, atop(round_page(size)), flags); if (r == 0) { i = 0; while ((pg = TAILQ_FIRST(&plist)) != NULL) { pg->wire_count = 1; atomic_setbits_int(&pg->pg_flags, PG_CLEAN | PG_FAKE); KASSERT((pg->pg_flags & PG_DEV) == 0); TAILQ_REMOVE(&plist, pg, pageq); uvm_pagealloc_pg(pg, obj, off + ptoa(i++), NULL); } } return r; } /* * interface used by the buffer cache to reallocate a buffer at a time. * The pages are reallocated wired outside the DMA accessible region. * */ int uvm_pagerealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size, int flags, struct uvm_constraint_range *where) { struct pglist plist; struct vm_page *pg, *tpg; int i, r; voff_t offset; KASSERT(UVM_OBJ_IS_BUFCACHE(obj)); KERNEL_ASSERT_LOCKED(); TAILQ_INIT(&plist); if (size == 0) panic("size 0 uvm_pagerealloc"); r = uvm_pglistalloc(size, where->ucr_low, where->ucr_high, 0, 0, &plist, atop(round_page(size)), flags); if (r == 0) { i = 0; while((pg = TAILQ_FIRST(&plist)) != NULL) { offset = off + ptoa(i++); tpg = uvm_pagelookup(obj, offset); KASSERT(tpg != NULL); pg->wire_count = 1; atomic_setbits_int(&pg->pg_flags, PG_CLEAN | PG_FAKE); KASSERT((pg->pg_flags & PG_DEV) == 0); TAILQ_REMOVE(&plist, pg, pageq); uvm_pagecopy(tpg, pg); KASSERT(tpg->wire_count == 1); tpg->wire_count = 0; uvm_lock_pageq(); uvm_pagefree(tpg); uvm_unlock_pageq(); uvm_pagealloc_pg(pg, obj, offset, NULL); } } return r; } /* * uvm_pagealloc: allocate vm_page from a particular free list. * * => return null if no pages free * => wake up pagedaemon if number of free pages drops below low water mark * => only one of obj or anon can be non-null * => caller must activate/deactivate page if it is not wired. */ struct vm_page * uvm_pagealloc(struct uvm_object *obj, voff_t off, struct vm_anon *anon, int flags) { struct vm_page *pg = NULL; int pmr_flags; KASSERT(obj == NULL || anon == NULL); KASSERT(anon == NULL || off == 0); KASSERT(off == trunc_page(off)); KASSERT(obj == NULL || UVM_OBJ_IS_DUMMY(obj) || rw_write_held(obj->vmobjlock)); KASSERT(anon == NULL || anon->an_lock == NULL || rw_write_held(anon->an_lock)); pmr_flags = UVM_PLA_NOWAIT; /* * We're allowed to use the kernel reserve if the page is * being allocated to a kernel object. */ if ((flags & UVM_PGA_USERESERVE) || (obj != NULL && UVM_OBJ_IS_KERN_OBJECT(obj))) pmr_flags |= UVM_PLA_USERESERVE; if (flags & UVM_PGA_ZERO) pmr_flags |= UVM_PLA_ZERO; pg = uvm_pmr_cache_get(pmr_flags); if (pg == NULL) return NULL; uvm_pagealloc_pg(pg, obj, off, anon); KASSERT((pg->pg_flags & PG_DEV) == 0); if (flags & UVM_PGA_ZERO) atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); else atomic_setbits_int(&pg->pg_flags, PG_CLEAN); return pg; } /* * uvm_pagerealloc: reallocate a page from one object to another */ void uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) { /* remove it from the old object */ if (pg->uobject) { uvm_pageremove(pg); } /* put it in the new object */ if (newobj) { pg->uobject = newobj; pg->offset = newoff; pg->pg_version++; uvm_pageinsert(pg); } } /* * uvm_pageclean: clean page * * => erase page's identity (i.e. remove from object) * => caller must lock page queues if `pg' is managed * => assumes all valid mappings of pg are gone */ void uvm_pageclean(struct vm_page *pg) { u_int flags_to_clear = 0; if ((pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE)) && (pg->uobject == NULL || !UVM_OBJ_IS_PMAP(pg->uobject))) MUTEX_ASSERT_LOCKED(&uvm.pageqlock); #ifdef DEBUG if (pg->uobject == (void *)0xdeadbeef && pg->uanon == (void *)0xdeadbeef) { panic("uvm_pagefree: freeing free page %p", pg); } #endif KASSERT((pg->pg_flags & PG_DEV) == 0); KASSERT(pg->uobject == NULL || UVM_OBJ_IS_DUMMY(pg->uobject) || rw_write_held(pg->uobject->vmobjlock)); KASSERT(pg->uobject != NULL || pg->uanon == NULL || rw_write_held(pg->uanon->an_lock)); /* * if the page was an object page (and thus "TABLED"), remove it * from the object. */ if (pg->pg_flags & PG_TABLED) uvm_pageremove(pg); /* * now remove the page from the queues */ uvm_pagedequeue(pg); /* * if the page was wired, unwire it now. */ if (pg->wire_count) { pg->wire_count = 0; uvmexp.wired--; } if (pg->uanon) { pg->uanon->an_page = NULL; pg->uanon = NULL; } /* Clean page state bits. */ flags_to_clear |= PQ_ANON|PQ_AOBJ|PQ_ENCRYPT|PG_ZERO|PG_FAKE|PG_BUSY| PG_RELEASED|PG_CLEAN|PG_CLEANCHK; atomic_clearbits_int(&pg->pg_flags, flags_to_clear); #ifdef DEBUG pg->uobject = (void *)0xdeadbeef; pg->offset = 0xdeadbeef; pg->uanon = (void *)0xdeadbeef; #endif } /* * uvm_pagefree: free page * * => erase page's identity (i.e. remove from object) * => put page on free list * => caller must lock page queues if `pg' is managed * => assumes all valid mappings of pg are gone */ void uvm_pagefree(struct vm_page *pg) { uvm_pageclean(pg); uvm_pmr_cache_put(pg); } /* * uvm_page_unbusy: unbusy an array of pages. * * => pages must either all belong to the same object, or all belong to anons. * => if pages are object-owned, object must be locked. * => if pages are anon-owned, anons must have 0 refcount. * => caller must make sure that anon-owned pages are not PG_RELEASED. */ void uvm_page_unbusy(struct vm_page **pgs, int npgs) { struct vm_page *pg; int i; for (i = 0; i < npgs; i++) { pg = pgs[i]; if (pg == NULL || pg == PGO_DONTCARE) { continue; } KASSERT(uvm_page_owner_locked_p(pg)); KASSERT(pg->pg_flags & PG_BUSY); if (pg->pg_flags & PG_WANTED) { wakeup(pg); } if (pg->pg_flags & PG_RELEASED) { KASSERT(pg->uobject != NULL || (pg->uanon != NULL && pg->uanon->an_ref > 0)); atomic_clearbits_int(&pg->pg_flags, PG_RELEASED); pmap_page_protect(pg, PROT_NONE); uvm_pagefree(pg); } else { KASSERT((pg->pg_flags & PG_FAKE) == 0); atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY); UVM_PAGE_OWN(pg, NULL); } } } /* * uvm_pagewait: wait for a busy page * * => page must be known PG_BUSY * => object must be locked * => object will be unlocked on return */ void uvm_pagewait(struct vm_page *pg, struct rwlock *lock, const char *wmesg) { KASSERT(rw_lock_held(lock)); KASSERT((pg->pg_flags & PG_BUSY) != 0); atomic_setbits_int(&pg->pg_flags, PG_WANTED); rwsleep_nsec(pg, lock, PVM | PNORELOCK, wmesg, INFSLP); } #if defined(UVM_PAGE_TRKOWN) /* * uvm_page_own: set or release page ownership * * => this is a debugging function that keeps track of who sets PG_BUSY * and where they do it. it can be used to track down problems * such a thread setting "PG_BUSY" and never releasing it. * => if "tag" is NULL then we are releasing page ownership */ void uvm_page_own(struct vm_page *pg, char *tag) { /* gain ownership? */ if (tag) { if (pg->owner_tag) { printf("uvm_page_own: page %p already owned " "by thread %d [%s]\n", pg, pg->owner, pg->owner_tag); panic("uvm_page_own"); } pg->owner = (curproc) ? curproc->p_tid : (pid_t) -1; pg->owner_tag = tag; return; } /* drop ownership */ if (pg->owner_tag == NULL) { printf("uvm_page_own: dropping ownership of an non-owned " "page (%p)\n", pg); panic("uvm_page_own"); } pg->owner_tag = NULL; return; } #endif /* * when VM_PHYSSEG_MAX is 1, we can simplify these functions */ #if VM_PHYSSEG_MAX > 1 /* * vm_physseg_find: find vm_physseg structure that belongs to a PA */ int vm_physseg_find(paddr_t pframe, int *offp) { struct vm_physseg *seg; #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) /* binary search for it */ int start, len, try; /* * if try is too large (thus target is less than try) we reduce * the length to trunc(len/2) [i.e. everything smaller than "try"] * * if the try is too small (thus target is greater than try) then * we set the new start to be (try + 1). this means we need to * reduce the length to (round(len/2) - 1). * * note "adjust" below which takes advantage of the fact that * (round(len/2) - 1) == trunc((len - 1) / 2) * for any value of len we may have */ for (start = 0, len = vm_nphysseg ; len != 0 ; len = len / 2) { try = start + (len / 2); /* try in the middle */ seg = vm_physmem + try; /* start past our try? */ if (pframe >= seg->start) { /* was try correct? */ if (pframe < seg->end) { if (offp) *offp = pframe - seg->start; return try; /* got it */ } start = try + 1; /* next time, start here */ len--; /* "adjust" */ } else { /* * pframe before try, just reduce length of * region, done in "for" loop */ } } return -1; #else /* linear search for it */ int lcv; for (lcv = 0, seg = vm_physmem; lcv < vm_nphysseg ; lcv++, seg++) { if (pframe >= seg->start && pframe < seg->end) { if (offp) *offp = pframe - seg->start; return lcv; /* got it */ } } return -1; #endif } /* * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages * back from an I/O mapping (ugh!). used in some MD code as well. */ struct vm_page * PHYS_TO_VM_PAGE(paddr_t pa) { paddr_t pf = atop(pa); int off; int psi; psi = vm_physseg_find(pf, &off); return (psi == -1) ? NULL : &vm_physmem[psi].pgs[off]; } #endif /* VM_PHYSSEG_MAX > 1 */ /* * uvm_pagelookup: look up a page */ struct vm_page * uvm_pagelookup(struct uvm_object *obj, voff_t off) { /* XXX if stack is too much, handroll */ struct vm_page p, *pg; p.offset = off; pg = RBT_FIND(uvm_objtree, &obj->memt, &p); KASSERT(pg == NULL || obj->uo_npages != 0); KASSERT(pg == NULL || (pg->pg_flags & PG_RELEASED) == 0 || (pg->pg_flags & PG_BUSY) != 0); return (pg); } /* * uvm_pagewire: wire the page, thus removing it from the daemon's grasp * * => caller must lock page queues */ void uvm_pagewire(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg)); MUTEX_ASSERT_LOCKED(&uvm.pageqlock); if (pg->wire_count == 0) { uvm_pagedequeue(pg); uvmexp.wired++; } pg->wire_count++; } /* * uvm_pageunwire: unwire the page. * * => activate if wire count goes to zero. * => caller must lock page queues */ void uvm_pageunwire(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg)); MUTEX_ASSERT_LOCKED(&uvm.pageqlock); pg->wire_count--; if (pg->wire_count == 0) { uvm_pageactivate(pg); uvmexp.wired--; } } /* * uvm_pagedeactivate: deactivate page -- no pmaps have access to page * * => caller must lock page queues * => caller must check to make sure page is not wired * => object that page belongs to must be locked (so we can adjust pg->flags) */ void uvm_pagedeactivate(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg)); MUTEX_ASSERT_LOCKED(&uvm.pageqlock); if (pg->pg_flags & PQ_ACTIVE) { TAILQ_REMOVE(&uvm.page_active, pg, pageq); atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE); uvmexp.active--; } if ((pg->pg_flags & PQ_INACTIVE) == 0) { KASSERT(pg->wire_count == 0); TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq); atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE); uvmexp.inactive++; pmap_clear_reference(pg); /* * update the "clean" bit. this isn't 100% * accurate, and doesn't have to be. we'll * re-sync it after we zap all mappings when * scanning the inactive list. */ if ((pg->pg_flags & PG_CLEAN) != 0 && pmap_is_modified(pg)) atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); } } /* * uvm_pageactivate: activate page * * => caller must lock page queues */ void uvm_pageactivate(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg)); MUTEX_ASSERT_LOCKED(&uvm.pageqlock); uvm_pagedequeue(pg); if (pg->wire_count == 0) { TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq); atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE); uvmexp.active++; } } /* * uvm_pagedequeue: remove a page from any paging queue */ void uvm_pagedequeue(struct vm_page *pg) { if (pg->pg_flags & PQ_ACTIVE) { TAILQ_REMOVE(&uvm.page_active, pg, pageq); atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE); uvmexp.active--; } if (pg->pg_flags & PQ_INACTIVE) { TAILQ_REMOVE(&uvm.page_inactive, pg, pageq); atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE); uvmexp.inactive--; } } /* * uvm_pagezero: zero fill a page */ void uvm_pagezero(struct vm_page *pg) { atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); pmap_zero_page(pg); } /* * uvm_pagecopy: copy a page */ void uvm_pagecopy(struct vm_page *src, struct vm_page *dst) { atomic_clearbits_int(&dst->pg_flags, PG_CLEAN); pmap_copy_page(src, dst); } /* * uvm_page_owner_locked_p: return true if object associated with page is * locked. this is a weak check for runtime assertions only. */ int uvm_page_owner_locked_p(struct vm_page *pg) { if (pg->uobject != NULL) { if (UVM_OBJ_IS_DUMMY(pg->uobject)) return 1; return rw_write_held(pg->uobject->vmobjlock); } if (pg->uanon != NULL) { return rw_write_held(pg->uanon->an_lock); } return 1; } /* * uvm_pagecount: count the number of physical pages in the address range. */ psize_t uvm_pagecount(struct uvm_constraint_range* constraint) { int lcv; psize_t sz; paddr_t low, high; paddr_t ps_low, ps_high; /* Algorithm uses page numbers. */ low = atop(constraint->ucr_low); high = atop(constraint->ucr_high); sz = 0; for (lcv = 0; lcv < vm_nphysseg; lcv++) { ps_low = MAX(low, vm_physmem[lcv].avail_start); ps_high = MIN(high, vm_physmem[lcv].avail_end); if (ps_low < ps_high) sz += ps_high - ps_low; } return sz; }
5 2 1 2 1 1 3 2 1 4 3 1 2 2 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 /* $OpenBSD: vscsi.c,v 1.63 2024/05/13 01:15:50 jsg Exp $ */ /* * Copyright (c) 2008 David Gwynne <dlg@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/device.h> #include <sys/conf.h> #include <sys/queue.h> #include <sys/rwlock.h> #include <sys/pool.h> #include <sys/task.h> #include <sys/ioctl.h> #include <sys/event.h> #include <scsi/scsi_all.h> #include <scsi/scsiconf.h> #include <dev/vscsivar.h> /* * Locks used to protect struct members and global data * s sc_state_mtx */ int vscsi_match(struct device *, void *, void *); void vscsi_attach(struct device *, struct device *, void *); struct vscsi_ccb { TAILQ_ENTRY(vscsi_ccb) ccb_entry; int ccb_tag; struct scsi_xfer *ccb_xs; size_t ccb_datalen; }; TAILQ_HEAD(vscsi_ccb_list, vscsi_ccb); enum vscsi_state { VSCSI_S_CLOSED, VSCSI_S_CONFIG, VSCSI_S_RUNNING }; struct vscsi_softc { struct device sc_dev; struct scsibus_softc *sc_scsibus; struct mutex sc_state_mtx; enum vscsi_state sc_state; u_int sc_ref_count; struct pool sc_ccb_pool; struct scsi_iopool sc_iopool; struct vscsi_ccb_list sc_ccb_i2t; /* [s] */ struct vscsi_ccb_list sc_ccb_t2i; int sc_ccb_tag; struct mutex sc_poll_mtx; struct rwlock sc_ioc_lock; struct klist sc_klist; /* [s] */ }; #define DEVNAME(_s) ((_s)->sc_dev.dv_xname) #define DEV2SC(_d) ((struct vscsi_softc *)device_lookup(&vscsi_cd, minor(_d))) const struct cfattach vscsi_ca = { sizeof(struct vscsi_softc), vscsi_match, vscsi_attach }; struct cfdriver vscsi_cd = { NULL, "vscsi", DV_DULL }; void vscsi_cmd(struct scsi_xfer *); int vscsi_probe(struct scsi_link *); void vscsi_free(struct scsi_link *); const struct scsi_adapter vscsi_switch = { vscsi_cmd, NULL, vscsi_probe, vscsi_free, NULL }; int vscsi_i2t(struct vscsi_softc *, struct vscsi_ioc_i2t *); int vscsi_data(struct vscsi_softc *, struct vscsi_ioc_data *, int); int vscsi_t2i(struct vscsi_softc *, struct vscsi_ioc_t2i *); int vscsi_devevent(struct vscsi_softc *, u_long, struct vscsi_ioc_devevent *); void vscsi_devevent_task(void *); void vscsi_done(struct vscsi_softc *, struct vscsi_ccb *); void * vscsi_ccb_get(void *); void vscsi_ccb_put(void *, void *); void filt_vscsidetach(struct knote *); int filt_vscsiread(struct knote *, long); int filt_vscsimodify(struct kevent *, struct knote *); int filt_vscsiprocess(struct knote *, struct kevent *); const struct filterops vscsi_filtops = { .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_vscsidetach, .f_event = filt_vscsiread, .f_modify = filt_vscsimodify, .f_process = filt_vscsiprocess, }; int vscsi_match(struct device *parent, void *match, void *aux) { return (1); } void vscsi_attach(struct device *parent, struct device *self, void *aux) { struct vscsi_softc *sc = (struct vscsi_softc *)self; struct scsibus_attach_args saa; printf("\n"); mtx_init(&sc->sc_state_mtx, IPL_MPFLOOR); sc->sc_state = VSCSI_S_CLOSED; TAILQ_INIT(&sc->sc_ccb_i2t); TAILQ_INIT(&sc->sc_ccb_t2i); mtx_init(&sc->sc_poll_mtx, IPL_BIO); rw_init(&sc->sc_ioc_lock, "vscsiioc"); scsi_iopool_init(&sc->sc_iopool, sc, vscsi_ccb_get, vscsi_ccb_put); klist_init_mutex(&sc->sc_klist, &sc->sc_state_mtx); saa.saa_adapter = &vscsi_switch; saa.saa_adapter_softc = sc; saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET; saa.saa_adapter_buswidth = 256; saa.saa_luns = 8; saa.saa_openings = 16; saa.saa_pool = &sc->sc_iopool; saa.saa_quirks = saa.saa_flags = 0; saa.saa_wwpn = saa.saa_wwnn = 0; sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, &saa, scsiprint); } void vscsi_cmd(struct scsi_xfer *xs) { struct scsi_link *link = xs->sc_link; struct vscsi_softc *sc = link->bus->sb_adapter_softc; struct vscsi_ccb *ccb = xs->io; int polled = ISSET(xs->flags, SCSI_POLL); int running = 0; if (ISSET(xs->flags, SCSI_POLL) && ISSET(xs->flags, SCSI_NOSLEEP)) { printf("%s: POLL && NOSLEEP for 0x%02x\n", DEVNAME(sc), xs->cmd.opcode); xs->error = XS_DRIVER_STUFFUP; scsi_done(xs); return; } ccb->ccb_xs = xs; mtx_enter(&sc->sc_state_mtx); if (sc->sc_state == VSCSI_S_RUNNING) { running = 1; TAILQ_INSERT_TAIL(&sc->sc_ccb_i2t, ccb, ccb_entry); } knote_locked(&sc->sc_klist, 0); mtx_leave(&sc->sc_state_mtx); if (!running) { xs->error = XS_DRIVER_STUFFUP; scsi_done(xs); return; } if (polled) { mtx_enter(&sc->sc_poll_mtx); while (ccb->ccb_xs != NULL) msleep_nsec(ccb, &sc->sc_poll_mtx, PRIBIO, "vscsipoll", INFSLP); mtx_leave(&sc->sc_poll_mtx); scsi_done(xs); } } void vscsi_done(struct vscsi_softc *sc, struct vscsi_ccb *ccb) { struct scsi_xfer *xs = ccb->ccb_xs; if (ISSET(xs->flags, SCSI_POLL)) { mtx_enter(&sc->sc_poll_mtx); ccb->ccb_xs = NULL; wakeup(ccb); mtx_leave(&sc->sc_poll_mtx); } else scsi_done(xs); } int vscsi_probe(struct scsi_link *link) { struct vscsi_softc *sc = link->bus->sb_adapter_softc; int rv = 0; mtx_enter(&sc->sc_state_mtx); if (sc->sc_state == VSCSI_S_RUNNING) sc->sc_ref_count++; else rv = ENXIO; mtx_leave(&sc->sc_state_mtx); return (rv); } void vscsi_free(struct scsi_link *link) { struct vscsi_softc *sc = link->bus->sb_adapter_softc; mtx_enter(&sc->sc_state_mtx); sc->sc_ref_count--; if (sc->sc_state != VSCSI_S_RUNNING && sc->sc_ref_count == 0) wakeup(&sc->sc_ref_count); mtx_leave(&sc->sc_state_mtx); } int vscsiopen(dev_t dev, int flags, int mode, struct proc *p) { struct vscsi_softc *sc = DEV2SC(dev); enum vscsi_state state = VSCSI_S_RUNNING; int rv = 0; if (sc == NULL) return (ENXIO); mtx_enter(&sc->sc_state_mtx); if (sc->sc_state != VSCSI_S_CLOSED) rv = EBUSY; else sc->sc_state = VSCSI_S_CONFIG; mtx_leave(&sc->sc_state_mtx); if (rv != 0) { device_unref(&sc->sc_dev); return (rv); } pool_init(&sc->sc_ccb_pool, sizeof(struct vscsi_ccb), 0, IPL_BIO, 0, "vscsiccb", NULL); /* we need to guarantee some ccbs will be available for the iopool */ rv = pool_prime(&sc->sc_ccb_pool, 8); if (rv != 0) { pool_destroy(&sc->sc_ccb_pool); state = VSCSI_S_CLOSED; } /* commit changes */ mtx_enter(&sc->sc_state_mtx); sc->sc_state = state; mtx_leave(&sc->sc_state_mtx); device_unref(&sc->sc_dev); return (rv); } int vscsiioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) { struct vscsi_softc *sc = DEV2SC(dev); int read = 0; int err = 0; if (sc == NULL) return (ENXIO); rw_enter_write(&sc->sc_ioc_lock); switch (cmd) { case VSCSI_I2T: err = vscsi_i2t(sc, (struct vscsi_ioc_i2t *)addr); break; case VSCSI_DATA_READ: read = 1; case VSCSI_DATA_WRITE: err = vscsi_data(sc, (struct vscsi_ioc_data *)addr, read); break; case VSCSI_T2I: err = vscsi_t2i(sc, (struct vscsi_ioc_t2i *)addr); break; case VSCSI_REQPROBE: case VSCSI_REQDETACH: err = vscsi_devevent(sc, cmd, (struct vscsi_ioc_devevent *)addr); break; default: err = ENOTTY; break; } rw_exit_write(&sc->sc_ioc_lock); device_unref(&sc->sc_dev); return (err); } int vscsi_i2t(struct vscsi_softc *sc, struct vscsi_ioc_i2t *i2t) { struct vscsi_ccb *ccb; struct scsi_xfer *xs; struct scsi_link *link; mtx_enter(&sc->sc_state_mtx); ccb = TAILQ_FIRST(&sc->sc_ccb_i2t); if (ccb != NULL) TAILQ_REMOVE(&sc->sc_ccb_i2t, ccb, ccb_entry); mtx_leave(&sc->sc_state_mtx); if (ccb == NULL) return (EAGAIN); xs = ccb->ccb_xs; link = xs->sc_link; i2t->tag = ccb->ccb_tag; i2t->target = link->target; i2t->lun = link->lun; memcpy(&i2t->cmd, &xs->cmd, xs->cmdlen); i2t->cmdlen = xs->cmdlen; i2t->datalen = xs->datalen; switch (xs->flags & (SCSI_DATA_IN | SCSI_DATA_OUT)) { case SCSI_DATA_IN: i2t->direction = VSCSI_DIR_READ; break; case SCSI_DATA_OUT: i2t->direction = VSCSI_DIR_WRITE; break; default: i2t->direction = VSCSI_DIR_NONE; break; } TAILQ_INSERT_TAIL(&sc->sc_ccb_t2i, ccb, ccb_entry); return (0); } int vscsi_data(struct vscsi_softc *sc, struct vscsi_ioc_data *data, int read) { struct vscsi_ccb *ccb; struct scsi_xfer *xs; int xsread; u_int8_t *buf; int rv = EINVAL; TAILQ_FOREACH(ccb, &sc->sc_ccb_t2i, ccb_entry) { if (ccb->ccb_tag == data->tag) break; } if (ccb == NULL) return (EFAULT); xs = ccb->ccb_xs; if (data->datalen > xs->datalen - ccb->ccb_datalen) return (ENOMEM); switch (xs->flags & (SCSI_DATA_IN | SCSI_DATA_OUT)) { case SCSI_DATA_IN: xsread = 1; break; case SCSI_DATA_OUT: xsread = 0; break; default: return (EINVAL); } if (read != xsread) return (EINVAL); buf = xs->data; buf += ccb->ccb_datalen; if (read) rv = copyin(data->data, buf, data->datalen); else rv = copyout(buf, data->data, data->datalen); if (rv == 0) ccb->ccb_datalen += data->datalen; return (rv); } int vscsi_t2i(struct vscsi_softc *sc, struct vscsi_ioc_t2i *t2i) { struct vscsi_ccb *ccb; struct scsi_xfer *xs; int rv = 0; TAILQ_FOREACH(ccb, &sc->sc_ccb_t2i, ccb_entry) { if (ccb->ccb_tag == t2i->tag) break; } if (ccb == NULL) return (EFAULT); TAILQ_REMOVE(&sc->sc_ccb_t2i, ccb, ccb_entry); xs = ccb->ccb_xs; xs->resid = xs->datalen - ccb->ccb_datalen; xs->status = SCSI_OK; switch (t2i->status) { case VSCSI_STAT_DONE: xs->error = XS_NOERROR; break; case VSCSI_STAT_SENSE: xs->error = XS_SENSE; memcpy(&xs->sense, &t2i->sense, sizeof(xs->sense)); break; case VSCSI_STAT_RESET: xs->error = XS_RESET; break; case VSCSI_STAT_ERR: default: xs->error = XS_DRIVER_STUFFUP; break; } vscsi_done(sc, ccb); return (rv); } struct vscsi_devevent_task { struct vscsi_softc *sc; struct task t; struct vscsi_ioc_devevent de; u_long cmd; }; int vscsi_devevent(struct vscsi_softc *sc, u_long cmd, struct vscsi_ioc_devevent *de) { struct vscsi_devevent_task *dt; dt = malloc(sizeof(*dt), M_TEMP, M_WAITOK | M_CANFAIL); if (dt == NULL) return (ENOMEM); task_set(&dt->t, vscsi_devevent_task, dt); dt->sc = sc; dt->de = *de; dt->cmd = cmd; device_ref(&sc->sc_dev); task_add(systq, &dt->t); return (0); } void vscsi_devevent_task(void *xdt) { struct vscsi_devevent_task *dt = xdt; struct vscsi_softc *sc = dt->sc; int state; mtx_enter(&sc->sc_state_mtx); state = sc->sc_state; mtx_leave(&sc->sc_state_mtx); if (state != VSCSI_S_RUNNING) goto gone; switch (dt->cmd) { case VSCSI_REQPROBE: scsi_probe(sc->sc_scsibus, dt->de.target, dt->de.lun); break; case VSCSI_REQDETACH: scsi_detach(sc->sc_scsibus, dt->de.target, dt->de.lun, DETACH_FORCE); break; #ifdef DIAGNOSTIC default: panic("unexpected vscsi_devevent cmd"); /* NOTREACHED */ #endif } gone: device_unref(&sc->sc_dev); free(dt, M_TEMP, sizeof(*dt)); } int vscsikqfilter(dev_t dev, struct knote *kn) { struct vscsi_softc *sc = DEV2SC(dev); if (sc == NULL) return (ENXIO); switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &vscsi_filtops; break; default: device_unref(&sc->sc_dev); return (EINVAL); } kn->kn_hook = sc; klist_insert(&sc->sc_klist, kn); /* device ref is given to the knote in the klist */ return (0); } void filt_vscsidetach(struct knote *kn) { struct vscsi_softc *sc = kn->kn_hook; klist_remove(&sc->sc_klist, kn); device_unref(&sc->sc_dev); } int filt_vscsiread(struct knote *kn, long hint) { struct vscsi_softc *sc = kn->kn_hook; return (!TAILQ_EMPTY(&sc->sc_ccb_i2t)); } int filt_vscsimodify(struct kevent *kev, struct knote *kn) { struct vscsi_softc *sc = kn->kn_hook; int active; mtx_enter(&sc->sc_state_mtx); active = knote_modify(kev, kn); mtx_leave(&sc->sc_state_mtx); return (active); } int filt_vscsiprocess(struct knote *kn, struct kevent *kev) { struct vscsi_softc *sc = kn->kn_hook; int active; mtx_enter(&sc->sc_state_mtx); active = knote_process(kn, kev); mtx_leave(&sc->sc_state_mtx); return (active); } int vscsiclose(dev_t dev, int flags, int mode, struct proc *p) { struct vscsi_softc *sc = DEV2SC(dev); struct vscsi_ccb *ccb; if (sc == NULL) return (ENXIO); mtx_enter(&sc->sc_state_mtx); KASSERT(sc->sc_state == VSCSI_S_RUNNING); sc->sc_state = VSCSI_S_CONFIG; mtx_leave(&sc->sc_state_mtx); scsi_activate(sc->sc_scsibus, -1, -1, DVACT_DEACTIVATE); while ((ccb = TAILQ_FIRST(&sc->sc_ccb_t2i)) != NULL) { TAILQ_REMOVE(&sc->sc_ccb_t2i, ccb, ccb_entry); ccb->ccb_xs->error = XS_RESET; vscsi_done(sc, ccb); } while ((ccb = TAILQ_FIRST(&sc->sc_ccb_i2t)) != NULL) { TAILQ_REMOVE(&sc->sc_ccb_i2t, ccb, ccb_entry); ccb->ccb_xs->error = XS_RESET; vscsi_done(sc, ccb); } scsi_req_detach(sc->sc_scsibus, -1, -1, DETACH_FORCE); mtx_enter(&sc->sc_state_mtx); while (sc->sc_ref_count > 0) { msleep_nsec(&sc->sc_ref_count, &sc->sc_state_mtx, PRIBIO, "vscsiref", INFSLP); } mtx_leave(&sc->sc_state_mtx); pool_destroy(&sc->sc_ccb_pool); mtx_enter(&sc->sc_state_mtx); sc->sc_state = VSCSI_S_CLOSED; mtx_leave(&sc->sc_state_mtx); device_unref(&sc->sc_dev); return (0); } void * vscsi_ccb_get(void *cookie) { struct vscsi_softc *sc = cookie; struct vscsi_ccb *ccb = NULL; ccb = pool_get(&sc->sc_ccb_pool, PR_NOWAIT); if (ccb != NULL) { ccb->ccb_tag = sc->sc_ccb_tag++; ccb->ccb_datalen = 0; } return (ccb); } void vscsi_ccb_put(void *cookie, void *io) { struct vscsi_softc *sc = cookie; struct vscsi_ccb *ccb = io; pool_put(&sc->sc_ccb_pool, ccb); }
609 4 57 4 557 578 70 160 5 593 89 524 15 100 1 99 115 4 112 110 263 10 203 3 52 246 19 184 77 236 5 15 248 1348 1 1344 416 5 411 389 1795 9 1744 3 52 1771 106 1537 165 85 1739 16 90 1760 1248 13 846 1 1118 126 1105 112 1 64 6 5 5 6 1228 1060 211 1236 116 97 2 9 7 2 15 112 1 91 22 110 109 106 95 16 16 16 105 69 4 9 2 24 18 28 28 27 27 26 6 6 6 20 91 105 66 68 100 91 18 87 39 69 21 1 11 7 2 20 350 91 103 2 1 103 1 79 25 2 100 1 6 9 2 79 51 85 6 9 85 94 10 77 24 88 88 18 83 13 2 1 100 99 33 89 62 68 55 51 86 15 98 50 50 14 9 21 6 22 11 1 49 50 50 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 /* $OpenBSD: sys_generic.c,v 1.157 2024/04/10 10:05:26 claudio Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* * Copyright (c) 1996 Theo de Raadt * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/filedesc.h> #include <sys/ioctl.h> #include <sys/fcntl.h> #include <sys/vnode.h> #include <sys/file.h> #include <sys/proc.h> #include <sys/resourcevar.h> #include <sys/socketvar.h> #include <sys/signalvar.h> #include <sys/uio.h> #include <sys/time.h> #include <sys/malloc.h> #include <sys/poll.h> #include <sys/eventvar.h> #ifdef KTRACE #include <sys/ktrace.h> #endif #include <sys/pledge.h> #include <sys/mount.h> #include <sys/syscallargs.h> /* * Debug values: * 1 - print implementation errors, things that should not happen. * 2 - print ppoll(2) information, somewhat verbose * 3 - print pselect(2) and ppoll(2) information, very verbose */ int kqpoll_debug = 0; #define DPRINTFN(v, x...) if (kqpoll_debug > v) { \ printf("%s(%d): ", curproc->p_p->ps_comm, curproc->p_tid); \ printf(x); \ } int pselregister(struct proc *, fd_set **, fd_set **, int, int *, int *); int pselcollect(struct proc *, struct kevent *, fd_set **, int *); void ppollregister(struct proc *, struct pollfd *, int, int *, int *); int ppollcollect(struct proc *, struct kevent *, struct pollfd *, u_int); int pollout(struct pollfd *, struct pollfd *, u_int); int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, struct timespec *, const sigset_t *, register_t *); int doppoll(struct proc *, struct pollfd *, u_int, struct timespec *, const sigset_t *, register_t *); int iovec_copyin(const struct iovec *uiov, struct iovec **iovp, struct iovec *aiov, unsigned int iovcnt, size_t *residp) { #ifdef KTRACE struct proc *p = curproc; #endif struct iovec *iov; int error, i; size_t resid = 0; if (iovcnt > UIO_SMALLIOV) { if (iovcnt > IOV_MAX) return (EINVAL); iov = mallocarray(iovcnt, sizeof(*iov), M_IOV, M_WAITOK); } else if (iovcnt > 0) { iov = aiov; } else { return (EINVAL); } *iovp = iov; if ((error = copyin(uiov, iov, iovcnt * sizeof(*iov)))) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktriovec(p, iov, iovcnt); #endif for (i = 0; i < iovcnt; i++) { resid += iov->iov_len; /* * Writes return ssize_t because -1 is returned on error. * Therefore we must restrict the length to SSIZE_MAX to * avoid garbage return values. Note that the addition is * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. */ if (iov->iov_len > SSIZE_MAX || resid > SSIZE_MAX) return (EINVAL); iov++; } if (residp != NULL) *residp = resid; return (0); } void iovec_free(struct iovec *iov, unsigned int iovcnt) { if (iovcnt > UIO_SMALLIOV) free(iov, M_IOV, iovcnt * sizeof(*iov)); } /* * Read system call. */ int sys_read(struct proc *p, void *v, register_t *retval) { struct sys_read_args /* { syscallarg(int) fd; syscallarg(void *) buf; syscallarg(size_t) nbyte; } */ *uap = v; struct iovec iov; struct uio auio; iov.iov_base = SCARG(uap, buf); iov.iov_len = SCARG(uap, nbyte); if (iov.iov_len > SSIZE_MAX) return (EINVAL); auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_resid = iov.iov_len; return (dofilereadv(p, SCARG(uap, fd), &auio, 0, retval)); } /* * Scatter read system call. */ int sys_readv(struct proc *p, void *v, register_t *retval) { struct sys_readv_args /* { syscallarg(int) fd; syscallarg(const struct iovec *) iovp; syscallarg(int) iovcnt; } */ *uap = v; struct iovec aiov[UIO_SMALLIOV], *iov = NULL; int error, iovcnt = SCARG(uap, iovcnt); struct uio auio; size_t resid; error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); if (error) goto done; auio.uio_iov = iov; auio.uio_iovcnt = iovcnt; auio.uio_resid = resid; error = dofilereadv(p, SCARG(uap, fd), &auio, 0, retval); done: iovec_free(iov, iovcnt); return (error); } int dofilereadv(struct proc *p, int fd, struct uio *uio, int flags, register_t *retval) { struct filedesc *fdp = p->p_fd; struct file *fp; long cnt, error = 0; u_int iovlen; #ifdef KTRACE struct iovec *ktriov = NULL; #endif KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); iovlen = uio->uio_iovcnt * sizeof(struct iovec); if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) return (EBADF); /* Checks for positioned read. */ if (flags & FO_POSITION) { struct vnode *vp = fp->f_data; if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || (vp->v_flag & VISTTY)) { error = ESPIPE; goto done; } if (uio->uio_offset < 0 && vp->v_type != VCHR) { error = EINVAL; goto done; } } uio->uio_rw = UIO_READ; uio->uio_segflg = UIO_USERSPACE; uio->uio_procp = p; #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { ktriov = malloc(iovlen, M_TEMP, M_WAITOK); memcpy(ktriov, uio->uio_iov, iovlen); } #endif cnt = uio->uio_resid; error = (*fp->f_ops->fo_read)(fp, uio, flags); if (error) { if (uio->uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } cnt -= uio->uio_resid; mtx_enter(&fp->f_mtx); fp->f_rxfer++; fp->f_rbytes += cnt; mtx_leave(&fp->f_mtx); #ifdef KTRACE if (ktriov != NULL) { if (error == 0) ktrgenio(p, fd, UIO_READ, ktriov, cnt); free(ktriov, M_TEMP, iovlen); } #endif *retval = cnt; done: FRELE(fp, p); return (error); } /* * Write system call */ int sys_write(struct proc *p, void *v, register_t *retval) { struct sys_write_args /* { syscallarg(int) fd; syscallarg(const void *) buf; syscallarg(size_t) nbyte; } */ *uap = v; struct iovec iov; struct uio auio; iov.iov_base = (void *)SCARG(uap, buf); iov.iov_len = SCARG(uap, nbyte); if (iov.iov_len > SSIZE_MAX) return (EINVAL); auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_resid = iov.iov_len; return (dofilewritev(p, SCARG(uap, fd), &auio, 0, retval)); } /* * Gather write system call */ int sys_writev(struct proc *p, void *v, register_t *retval) { struct sys_writev_args /* { syscallarg(int) fd; syscallarg(const struct iovec *) iovp; syscallarg(int) iovcnt; } */ *uap = v; struct iovec aiov[UIO_SMALLIOV], *iov = NULL; int error, iovcnt = SCARG(uap, iovcnt); struct uio auio; size_t resid; error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); if (error) goto done; auio.uio_iov = iov; auio.uio_iovcnt = iovcnt; auio.uio_resid = resid; error = dofilewritev(p, SCARG(uap, fd), &auio, 0, retval); done: iovec_free(iov, iovcnt); return (error); } int dofilewritev(struct proc *p, int fd, struct uio *uio, int flags, register_t *retval) { struct filedesc *fdp = p->p_fd; struct file *fp; long cnt, error = 0; u_int iovlen; #ifdef KTRACE struct iovec *ktriov = NULL; #endif KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); iovlen = uio->uio_iovcnt * sizeof(struct iovec); if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) return (EBADF); /* Checks for positioned write. */ if (flags & FO_POSITION) { struct vnode *vp = fp->f_data; if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || (vp->v_flag & VISTTY)) { error = ESPIPE; goto done; } if (uio->uio_offset < 0 && vp->v_type != VCHR) { error = EINVAL; goto done; } } uio->uio_rw = UIO_WRITE; uio->uio_segflg = UIO_USERSPACE; uio->uio_procp = p; #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { ktriov = malloc(iovlen, M_TEMP, M_WAITOK); memcpy(ktriov, uio->uio_iov, iovlen); } #endif cnt = uio->uio_resid; error = (*fp->f_ops->fo_write)(fp, uio, flags); if (error) { if (uio->uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; if (error == EPIPE) { KERNEL_LOCK(); ptsignal(p, SIGPIPE, STHREAD); KERNEL_UNLOCK(); } } cnt -= uio->uio_resid; mtx_enter(&fp->f_mtx); fp->f_wxfer++; fp->f_wbytes += cnt; mtx_leave(&fp->f_mtx); #ifdef KTRACE if (ktriov != NULL) { if (error == 0) ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); free(ktriov, M_TEMP, iovlen); } #endif *retval = cnt; done: FRELE(fp, p); return (error); } /* * Ioctl system call */ int sys_ioctl(struct proc *p, void *v, register_t *retval) { struct sys_ioctl_args /* { syscallarg(int) fd; syscallarg(u_long) com; syscallarg(void *) data; } */ *uap = v; struct file *fp; struct filedesc *fdp = p->p_fd; u_long com = SCARG(uap, com); int error = 0; u_int size = 0; caddr_t data, memp = NULL; int tmp; #define STK_PARAMS 128 long long stkbuf[STK_PARAMS / sizeof(long long)]; if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL) return (EBADF); if (fp->f_type == DTYPE_SOCKET) { struct socket *so = fp->f_data; if (so->so_state & SS_DNS) { error = EINVAL; goto out; } } error = pledge_ioctl(p, com, fp); if (error) goto out; switch (com) { case FIONCLEX: case FIOCLEX: fdplock(fdp); if (com == FIONCLEX) fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; else fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; fdpunlock(fdp); goto out; } /* * Interpret high order word to find amount of data to be * copied to/from the user's address space. */ size = IOCPARM_LEN(com); if (size > IOCPARM_MAX) { error = ENOTTY; goto out; } if (size > sizeof (stkbuf)) { memp = malloc(size, M_IOCTLOPS, M_WAITOK); data = memp; } else data = (caddr_t)stkbuf; if (com&IOC_IN) { if (size) { error = copyin(SCARG(uap, data), data, size); if (error) { goto out; } } else *(caddr_t *)data = SCARG(uap, data); } else if ((com&IOC_OUT) && size) /* * Zero the buffer so the user always * gets back something deterministic. */ memset(data, 0, size); else if (com&IOC_VOID) *(caddr_t *)data = SCARG(uap, data); switch (com) { case FIONBIO: if ((tmp = *(int *)data) != 0) atomic_setbits_int(&fp->f_flag, FNONBLOCK); else atomic_clearbits_int(&fp->f_flag, FNONBLOCK); error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); break; case FIOASYNC: if ((tmp = *(int *)data) != 0) atomic_setbits_int(&fp->f_flag, FASYNC); else atomic_clearbits_int(&fp->f_flag, FASYNC); error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); break; default: error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); break; } /* * Copy any data to user, size was * already set and checked above. */ if (error == 0 && (com&IOC_OUT) && size) error = copyout(data, SCARG(uap, data), size); out: FRELE(fp, p); free(memp, M_IOCTLOPS, size); return (error); } /* * Select system call. */ int sys_select(struct proc *p, void *v, register_t *retval) { struct sys_select_args /* { syscallarg(int) nd; syscallarg(fd_set *) in; syscallarg(fd_set *) ou; syscallarg(fd_set *) ex; syscallarg(struct timeval *) tv; } */ *uap = v; struct timespec ts, *tsp = NULL; int error; if (SCARG(uap, tv) != NULL) { struct timeval tv; if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrreltimeval(p, &tv); #endif if (tv.tv_sec < 0 || !timerisvalid(&tv)) return (EINVAL); TIMEVAL_TO_TIMESPEC(&tv, &ts); tsp = &ts; } return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), SCARG(uap, ex), tsp, NULL, retval)); } int sys_pselect(struct proc *p, void *v, register_t *retval) { struct sys_pselect_args /* { syscallarg(int) nd; syscallarg(fd_set *) in; syscallarg(fd_set *) ou; syscallarg(fd_set *) ex; syscallarg(const struct timespec *) ts; syscallarg(const sigset_t *) mask; } */ *uap = v; struct timespec ts, *tsp = NULL; sigset_t ss, *ssp = NULL; int error; if (SCARG(uap, ts) != NULL) { if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrreltimespec(p, &ts); #endif if (ts.tv_sec < 0 || !timespecisvalid(&ts)) return (EINVAL); tsp = &ts; } if (SCARG(uap, mask) != NULL) { if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) return (error); ssp = &ss; } return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), SCARG(uap, ex), tsp, ssp, retval)); } int dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timespec *timeout, const sigset_t *sigmask, register_t *retval) { struct kqueue_scan_state scan; struct timespec zerots = {}; fd_mask bits[6]; fd_set *pibits[3], *pobits[3]; int error, nfiles, ncollected = 0, nevents = 0; u_int ni; if (nd < 0) return (EINVAL); nfiles = READ_ONCE(p->p_fd->fd_nfiles); if (nd > nfiles) nd = nfiles; ni = howmany(nd, NFDBITS) * sizeof(fd_mask); if (ni > sizeof(bits[0])) { caddr_t mbits; mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); pibits[0] = (fd_set *)&mbits[ni * 0]; pibits[1] = (fd_set *)&mbits[ni * 1]; pibits[2] = (fd_set *)&mbits[ni * 2]; pobits[0] = (fd_set *)&mbits[ni * 3]; pobits[1] = (fd_set *)&mbits[ni * 4]; pobits[2] = (fd_set *)&mbits[ni * 5]; } else { memset(bits, 0, sizeof(bits)); pibits[0] = (fd_set *)&bits[0]; pibits[1] = (fd_set *)&bits[1]; pibits[2] = (fd_set *)&bits[2]; pobits[0] = (fd_set *)&bits[3]; pobits[1] = (fd_set *)&bits[4]; pobits[2] = (fd_set *)&bits[5]; } kqpoll_init(nd); #define getbits(name, x) \ if (name && (error = copyin(name, pibits[x], ni))) \ goto done; getbits(in, 0); getbits(ou, 1); getbits(ex, 2); #undef getbits #ifdef KTRACE if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { if (in) ktrfdset(p, pibits[0], ni); if (ou) ktrfdset(p, pibits[1], ni); if (ex) ktrfdset(p, pibits[2], ni); } #endif if (sigmask) dosigsuspend(p, *sigmask &~ sigcantmask); /* Register kqueue events */ error = pselregister(p, pibits, pobits, nd, &nevents, &ncollected); if (error != 0) goto done; /* * The poll/select family of syscalls has been designed to * block when file descriptors are not available, even if * there's nothing to wait for. */ if (nevents == 0 && ncollected == 0) { uint64_t nsecs = INFSLP; if (timeout != NULL) { if (!timespecisset(timeout)) goto done; nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP)); } error = tsleep_nsec(&nowake, PSOCK | PCATCH, "kqsel", nsecs); /* select is not restarted after signals... */ if (error == ERESTART) error = EINTR; if (error == EWOULDBLOCK) error = 0; goto done; } /* Do not block if registering found pending events. */ if (ncollected > 0) timeout = &zerots; /* Collect at most `nevents' possibly waiting in kqueue_scan() */ kqueue_scan_setup(&scan, p->p_kq); while (nevents > 0) { struct kevent kev[KQ_NEVENTS]; int i, ready, count; /* Maximum number of events per iteration */ count = MIN(nitems(kev), nevents); ready = kqueue_scan(&scan, count, kev, timeout, p, &error); /* Convert back events that are ready. */ for (i = 0; i < ready && error == 0; i++) error = pselcollect(p, &kev[i], pobits, &ncollected); /* * Stop if there was an error or if we had enough * space to collect all events that were ready. */ if (error || ready < count) break; nevents -= ready; } kqueue_scan_finish(&scan); *retval = ncollected; done: #define putbits(name, x) \ if (name && (error2 = copyout(pobits[x], name, ni))) \ error = error2; if (error == 0) { int error2; putbits(in, 0); putbits(ou, 1); putbits(ex, 2); #undef putbits #ifdef KTRACE if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { if (in) ktrfdset(p, pobits[0], ni); if (ou) ktrfdset(p, pobits[1], ni); if (ex) ktrfdset(p, pobits[2], ni); } #endif } if (pibits[0] != (fd_set *)&bits[0]) free(pibits[0], M_TEMP, 6 * ni); kqpoll_done(nd); return (error); } /* * Convert fd_set into kqueue events and register them on the * per-thread queue. */ int pselregister(struct proc *p, fd_set *pibits[3], fd_set *pobits[3], int nfd, int *nregistered, int *ncollected) { static const int evf[] = { EVFILT_READ, EVFILT_WRITE, EVFILT_EXCEPT }; static const int evff[] = { 0, 0, NOTE_OOB }; int msk, i, j, fd, nevents = 0, error = 0; struct kevent kev; fd_mask bits; for (msk = 0; msk < 3; msk++) { for (i = 0; i < nfd; i += NFDBITS) { bits = pibits[msk]->fds_bits[i / NFDBITS]; while ((j = ffs(bits)) && (fd = i + --j) < nfd) { bits &= ~(1 << j); DPRINTFN(2, "select fd %d mask %d serial %lu\n", fd, msk, p->p_kq_serial); EV_SET(&kev, fd, evf[msk], EV_ADD|EV_ENABLE|__EV_SELECT, evff[msk], 0, (void *)(p->p_kq_serial)); error = kqueue_register(p->p_kq, &kev, 0, p); switch (error) { case 0: nevents++; /* FALLTHROUGH */ case EOPNOTSUPP:/* No underlying kqfilter */ case EINVAL: /* Unimplemented filter */ case EPERM: /* Specific to FIFO and * __EV_SELECT */ error = 0; break; case ENXIO: /* Device has been detached */ default: goto bad; } } } } *nregistered = nevents; return (0); bad: DPRINTFN(0, "select fd %u filt %d error %d\n", (int)kev.ident, kev.filter, error); return (error); } /* * Convert given kqueue event into corresponding select(2) bit. */ int pselcollect(struct proc *p, struct kevent *kevp, fd_set *pobits[3], int *ncollected) { if ((unsigned long)kevp->udata != p->p_kq_serial) { panic("%s: spurious kevp %p fd %d udata 0x%lx serial 0x%lx", __func__, kevp, (int)kevp->ident, (unsigned long)kevp->udata, p->p_kq_serial); } if (kevp->flags & EV_ERROR) { DPRINTFN(2, "select fd %d filt %d error %d\n", (int)kevp->ident, kevp->filter, (int)kevp->data); return (kevp->data); } switch (kevp->filter) { case EVFILT_READ: FD_SET(kevp->ident, pobits[0]); break; case EVFILT_WRITE: FD_SET(kevp->ident, pobits[1]); break; case EVFILT_EXCEPT: FD_SET(kevp->ident, pobits[2]); break; default: KASSERT(0); } (*ncollected)++; DPRINTFN(2, "select fd %d filt %d\n", (int)kevp->ident, kevp->filter); return (0); } /* * Do a wakeup when a selectable event occurs. */ void selwakeup(struct selinfo *sip) { KERNEL_LOCK(); knote_locked(&sip->si_note, NOTE_SUBMIT); KERNEL_UNLOCK(); } /* * Only copyout the revents field. */ int pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) { int error = 0; u_int i = 0; while (!error && i++ < nfds) { error = copyout(&pl->revents, &upl->revents, sizeof(upl->revents)); pl++; upl++; } return (error); } /* * We are using the same mechanism as select only we encode/decode args * differently. */ int sys_poll(struct proc *p, void *v, register_t *retval) { struct sys_poll_args /* { syscallarg(struct pollfd *) fds; syscallarg(u_int) nfds; syscallarg(int) timeout; } */ *uap = v; struct timespec ts, *tsp = NULL; int msec = SCARG(uap, timeout); if (msec != INFTIM) { if (msec < 0) return (EINVAL); ts.tv_sec = msec / 1000; ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; tsp = &ts; } return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, retval)); } int sys_ppoll(struct proc *p, void *v, register_t *retval) { struct sys_ppoll_args /* { syscallarg(struct pollfd *) fds; syscallarg(u_int) nfds; syscallarg(const struct timespec *) ts; syscallarg(const sigset_t *) mask; } */ *uap = v; int error; struct timespec ts, *tsp = NULL; sigset_t ss, *ssp = NULL; if (SCARG(uap, ts) != NULL) { if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrreltimespec(p, &ts); #endif if (ts.tv_sec < 0 || !timespecisvalid(&ts)) return (EINVAL); tsp = &ts; } if (SCARG(uap, mask) != NULL) { if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) return (error); ssp = &ss; } return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, retval)); } int doppoll(struct proc *p, struct pollfd *fds, u_int nfds, struct timespec *timeout, const sigset_t *sigmask, register_t *retval) { struct kqueue_scan_state scan; struct timespec zerots = {}; struct pollfd pfds[4], *pl = pfds; int error, ncollected = 0, nevents = 0; size_t sz; /* Standards say no more than MAX_OPEN; this is possibly better. */ if (nfds > min((int)lim_cur(RLIMIT_NOFILE), maxfiles)) return (EINVAL); /* optimize for the default case, of a small nfds value */ if (nfds > nitems(pfds)) { pl = mallocarray(nfds, sizeof(*pl), M_TEMP, M_WAITOK | M_CANFAIL); if (pl == NULL) return (EINVAL); } kqpoll_init(nfds); sz = nfds * sizeof(*pl); if ((error = copyin(fds, pl, sz)) != 0) goto bad; if (sigmask) dosigsuspend(p, *sigmask &~ sigcantmask); /* Register kqueue events */ ppollregister(p, pl, nfds, &nevents, &ncollected); /* * The poll/select family of syscalls has been designed to * block when file descriptors are not available, even if * there's nothing to wait for. */ if (nevents == 0 && ncollected == 0) { uint64_t nsecs = INFSLP; if (timeout != NULL) { if (!timespecisset(timeout)) goto done; nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP)); } error = tsleep_nsec(&nowake, PSOCK | PCATCH, "kqpoll", nsecs); if (error == ERESTART) error = EINTR; if (error == EWOULDBLOCK) error = 0; goto done; } /* Do not block if registering found pending events. */ if (ncollected > 0) timeout = &zerots; /* Collect at most `nevents' possibly waiting in kqueue_scan() */ kqueue_scan_setup(&scan, p->p_kq); while (nevents > 0) { struct kevent kev[KQ_NEVENTS]; int i, ready, count; /* Maximum number of events per iteration */ count = MIN(nitems(kev), nevents); ready = kqueue_scan(&scan, count, kev, timeout, p, &error); /* Convert back events that are ready. */ for (i = 0; i < ready; i++) ncollected += ppollcollect(p, &kev[i], pl, nfds); /* * Stop if there was an error or if we had enough * place to collect all events that were ready. */ if (error || ready < count) break; nevents -= ready; } kqueue_scan_finish(&scan); *retval = ncollected; done: /* * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is * ignored (since the whole point is to see what would block). */ switch (error) { case EINTR: error = pollout(pl, fds, nfds); if (error == 0) error = EINTR; break; case EWOULDBLOCK: case 0: error = pollout(pl, fds, nfds); break; } #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrpollfd(p, pl, nfds); #endif /* KTRACE */ bad: if (pl != pfds) free(pl, M_TEMP, sz); kqpoll_done(nfds); return (error); } int ppollregister_evts(struct proc *p, struct kevent *kevp, int nkev, struct pollfd *pl, unsigned int pollid) { int i, error, nevents = 0; KASSERT(pl->revents == 0); for (i = 0; i < nkev; i++, kevp++) { again: error = kqueue_register(p->p_kq, kevp, pollid, p); switch (error) { case 0: nevents++; break; case EOPNOTSUPP:/* No underlying kqfilter */ case EINVAL: /* Unimplemented filter */ break; case EBADF: /* Bad file descriptor */ pl->revents |= POLLNVAL; break; case EPERM: /* Specific to FIFO */ KASSERT(kevp->filter == EVFILT_WRITE); if (nkev == 1) { /* * If this is the only filter make sure * POLLHUP is passed to userland. */ kevp->filter = EVFILT_EXCEPT; goto again; } break; default: DPRINTFN(0, "poll err %lu fd %d revents %02x serial" " %lu filt %d ERROR=%d\n", ((unsigned long)kevp->udata - p->p_kq_serial), pl->fd, pl->revents, p->p_kq_serial, kevp->filter, error); /* FALLTHROUGH */ case ENXIO: /* Device has been detached */ pl->revents |= POLLERR; break; } } return (nevents); } /* * Convert pollfd into kqueue events and register them on the * per-thread queue. * * At most 3 events can correspond to a single pollfd. */ void ppollregister(struct proc *p, struct pollfd *pl, int nfds, int *nregistered, int *ncollected) { int i, nkev, nevt, forcehup; struct kevent kev[3], *kevp; for (i = 0; i < nfds; i++) { pl[i].events &= ~POLL_NOHUP; pl[i].revents = 0; if (pl[i].fd < 0) continue; /* * POLLHUP checking is implicit in the event filters. * However, the checking must be even if no events are * requested. */ forcehup = ((pl[i].events & ~POLLHUP) == 0); DPRINTFN(1, "poll set %d/%d fd %d events %02x serial %lu\n", i+1, nfds, pl[i].fd, pl[i].events, p->p_kq_serial); nevt = 0; nkev = 0; kevp = kev; if (pl[i].events & (POLLIN | POLLRDNORM)) { EV_SET(kevp, pl[i].fd, EVFILT_READ, EV_ADD|EV_ENABLE|__EV_POLL, 0, 0, (void *)(p->p_kq_serial + i)); nkev++; kevp++; } if (pl[i].events & (POLLOUT | POLLWRNORM)) { EV_SET(kevp, pl[i].fd, EVFILT_WRITE, EV_ADD|EV_ENABLE|__EV_POLL, 0, 0, (void *)(p->p_kq_serial + i)); nkev++; kevp++; } if ((pl[i].events & (POLLPRI | POLLRDBAND)) || forcehup) { int evff = forcehup ? 0 : NOTE_OOB; EV_SET(kevp, pl[i].fd, EVFILT_EXCEPT, EV_ADD|EV_ENABLE|__EV_POLL, evff, 0, (void *)(p->p_kq_serial + i)); nkev++; kevp++; } if (nkev == 0) continue; *nregistered += ppollregister_evts(p, kev, nkev, &pl[i], i); if (pl[i].revents != 0) (*ncollected)++; } DPRINTFN(1, "poll registered = %d, collected = %d\n", *nregistered, *ncollected); } /* * Convert given kqueue event into corresponding poll(2) revents bit. */ int ppollcollect(struct proc *p, struct kevent *kevp, struct pollfd *pl, u_int nfds) { static struct timeval poll_errintvl = { 5, 0 }; static struct timeval poll_lasterr; int already_seen; unsigned long i; /* Extract poll array index */ i = (unsigned long)kevp->udata - p->p_kq_serial; if (i >= nfds) { panic("%s: spurious kevp %p nfds %u udata 0x%lx serial 0x%lx", __func__, kevp, nfds, (unsigned long)kevp->udata, p->p_kq_serial); } if ((int)kevp->ident != pl[i].fd) { panic("%s: kevp %p %lu/%d mismatch fd %d!=%d serial 0x%lx", __func__, kevp, i + 1, nfds, (int)kevp->ident, pl[i].fd, p->p_kq_serial); } /* * A given descriptor may already have generated an error * against another filter during kqueue_register(). * * Make sure to set the appropriate flags but do not * increment `*retval' more than once. */ already_seen = (pl[i].revents != 0); /* POLLNVAL preempts other events. */ if ((kevp->flags & EV_ERROR) && kevp->data == EBADF) { pl[i].revents = POLLNVAL; goto done; } else if (pl[i].revents & POLLNVAL) { goto done; } switch (kevp->filter) { case EVFILT_READ: if (kevp->flags & __EV_HUP) pl[i].revents |= POLLHUP; if (pl[i].events & (POLLIN | POLLRDNORM)) pl[i].revents |= pl[i].events & (POLLIN | POLLRDNORM); break; case EVFILT_WRITE: /* POLLHUP and POLLOUT/POLLWRNORM are mutually exclusive */ if (kevp->flags & __EV_HUP) { pl[i].revents |= POLLHUP; } else if (pl[i].events & (POLLOUT | POLLWRNORM)) { pl[i].revents |= pl[i].events & (POLLOUT | POLLWRNORM); } break; case EVFILT_EXCEPT: if (kevp->flags & __EV_HUP) { if (pl[i].events != 0 && pl[i].events != POLLOUT) DPRINTFN(0, "weird events %x\n", pl[i].events); pl[i].revents |= POLLHUP; break; } if (pl[i].events & (POLLPRI | POLLRDBAND)) pl[i].revents |= pl[i].events & (POLLPRI | POLLRDBAND); break; default: KASSERT(0); } done: DPRINTFN(1, "poll get %lu/%d fd %d revents %02x serial %lu filt %d\n", i+1, nfds, pl[i].fd, pl[i].revents, (unsigned long)kevp->udata, kevp->filter); /* * Make noise about unclaimed events as they might indicate a bug * and can result in spurious-looking wakeups of poll(2). * * Live-locking within the system call should not happen because * the scan loop in doppoll() has an upper limit for the number * of events to process. */ if (pl[i].revents == 0 && ratecheck(&poll_lasterr, &poll_errintvl)) { printf("%s[%d]: poll index %lu fd %d events 0x%x " "filter %d/0x%x unclaimed\n", p->p_p->ps_comm, p->p_tid, i, pl[i].fd, pl[i].events, kevp->filter, kevp->flags); } if (!already_seen && (pl[i].revents != 0)) return (1); return (0); } /* * utrace system call */ int sys_utrace(struct proc *curp, void *v, register_t *retval) { #ifdef KTRACE struct sys_utrace_args /* { syscallarg(const char *) label; syscallarg(const void *) addr; syscallarg(size_t) len; } */ *uap = v; return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len))); #else return (0); #endif }
7 2 5 1 1 5 2 3 11 7 3 6 14 4 10 13 1 3 9 2 9 6 2 4 6 13 12 2 8 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 /* $OpenBSD: vfs_getcwd.c,v 1.38 2022/12/05 23:18:37 deraadt Exp $ */ /* $NetBSD: vfs_getcwd.c,v 1.3.2.3 1999/07/11 10:24:09 sommerfeld Exp $ */ /* * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Bill Sommerfeld. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/filedesc.h> #include <sys/stat.h> #include <sys/lock.h> #include <sys/vnode.h> #include <sys/mount.h> #include <sys/ktrace.h> #include <sys/proc.h> #include <sys/uio.h> #include <sys/malloc.h> #include <sys/dirent.h> #include <ufs/ufs/dir.h> /* only for DIRBLKSIZ */ #include <sys/syscallargs.h> /* Find parent vnode of *lvpp, return in *uvpp */ int vfs_getcwd_scandir(struct vnode **lvpp, struct vnode **uvpp, char **bpp, char *bufp, struct proc *p) { int eofflag, tries, dirbuflen = 0, len, reclen, error = 0; off_t off; struct uio uio; struct iovec iov; char *dirbuf = NULL; ino_t fileno; struct vattr va; struct vnode *uvp = NULL; struct vnode *lvp = *lvpp; struct componentname cn; tries = 0; /* * If we want the filename, get some info we need while the * current directory is still locked. */ if (bufp != NULL) { error = VOP_GETATTR(lvp, &va, p->p_ucred, p); if (error) { vput(lvp); *lvpp = NULL; *uvpp = NULL; return (error); } } cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN | ISDOTDOT | RDONLY; cn.cn_proc = p; cn.cn_cred = p->p_ucred; cn.cn_pnbuf = NULL; cn.cn_nameptr = ".."; cn.cn_namelen = 2; cn.cn_consume = 0; /* Get parent vnode using lookup of '..' */ error = VOP_LOOKUP(lvp, uvpp, &cn); if (error) { vput(lvp); *lvpp = NULL; *uvpp = NULL; return (error); } uvp = *uvpp; /* If we don't care about the pathname, we're done */ if (bufp == NULL) { error = 0; goto out; } fileno = va.va_fileid; dirbuflen = DIRBLKSIZ; if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; /* XXX we need some limit for fuse, 1 MB should be enough */ if (dirbuflen > 0xfffff) { error = EINVAL; goto out; } dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK); off = 0; do { char *cpos; struct dirent *dp; iov.iov_base = dirbuf; iov.iov_len = dirbuflen; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = off; uio.uio_resid = dirbuflen; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_procp = p; eofflag = 0; /* Call VOP_READDIR of parent */ error = VOP_READDIR(uvp, &uio, p->p_ucred, &eofflag); off = uio.uio_offset; /* Try again if NFS tosses its cookies */ if (error == EINVAL && tries < 3) { tries++; off = 0; continue; } else if (error) { goto out; /* Old userland getcwd() behaviour */ } cpos = dirbuf; tries = 0; /* Scan directory page looking for matching vnode */ for (len = (dirbuflen - uio.uio_resid); len > 0; len -= reclen) { dp = (struct dirent *)cpos; reclen = dp->d_reclen; /* Check for malformed directory */ if (reclen < DIRENT_RECSIZE(1) || reclen > len) { error = EINVAL; goto out; } if (dp->d_fileno == fileno) { char *bp = *bpp; if (offsetof(struct dirent, d_name) + dp->d_namlen > reclen) { error = EINVAL; goto out; } bp -= dp->d_namlen; if (bp <= bufp) { error = ERANGE; goto out; } memmove(bp, dp->d_name, dp->d_namlen); error = 0; *bpp = bp; goto out; } cpos += reclen; } } while (!eofflag); error = ENOENT; out: vrele(lvp); *lvpp = NULL; free(dirbuf, M_TEMP, dirbuflen); return (error); } /* Do a lookup in the vnode-to-name reverse */ int vfs_getcwd_getcache(struct vnode **lvpp, struct vnode **uvpp, char **bpp, char *bufp) { struct vnode *lvp, *uvp = NULL; char *obp; int error, vpid; lvp = *lvpp; obp = *bpp; /* Save original position to restore to on error */ error = cache_revlookup(lvp, uvpp, bpp, bufp); if (error) { if (error != -1) { vput(lvp); *lvpp = NULL; *uvpp = NULL; } return (error); } uvp = *uvpp; vpid = uvp->v_id; /* Release current lock before acquiring the parent lock */ VOP_UNLOCK(lvp); error = vget(uvp, LK_EXCLUSIVE | LK_RETRY); if (error) *uvpp = NULL; /* * Verify that vget() succeeded, and check that vnode capability * didn't change while we were waiting for the lock. */ if (error || (vpid != uvp->v_id)) { /* * Try to get our lock back. If that works, tell the caller to * try things the hard way, otherwise give up. */ if (!error) vput(uvp); *uvpp = NULL; error = vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); if (!error) { *bpp = obp; /* restore the buffer */ return (-1); } } vrele(lvp); *lvpp = NULL; return (error); } /* Common routine shared by sys___getcwd() and vn_isunder() and sys___realpath() */ int vfs_getcwd_common(struct vnode *lvp, struct vnode *rvp, char **bpp, char *bufp, int limit, int flags, struct proc *p) { struct filedesc *fdp = p->p_fd; struct vnode *uvp = NULL; char *bp = NULL; int error, perms = VEXEC; if (rvp == NULL) { rvp = fdp->fd_rdir; if (rvp == NULL) rvp = rootvnode; } vref(rvp); vref(lvp); error = vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); if (error) { vrele(lvp); lvp = NULL; goto out; } if (bufp) bp = *bpp; if (lvp == rvp) { if (bp) *(--bp) = '/'; goto out; } /* * This loop will terminate when we hit the root, VOP_READDIR() or * VOP_LOOKUP() fails, or we run out of space in the user buffer. */ do { if (lvp->v_type != VDIR) { error = ENOTDIR; goto out; } /* Check for access if caller cares */ if (flags & GETCWD_CHECK_ACCESS) { error = VOP_ACCESS(lvp, perms, p->p_ucred, p); if (error) goto out; perms = VEXEC|VREAD; } /* Step up if we're a covered vnode */ while (lvp->v_flag & VROOT) { struct vnode *tvp; if (lvp == rvp) goto out; tvp = lvp; lvp = lvp->v_mount->mnt_vnodecovered; vput(tvp); if (lvp == NULL) { error = ENOENT; goto out; } vref(lvp); error = vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); if (error) { vrele(lvp); lvp = NULL; goto out; } } /* Look in the name cache */ error = vfs_getcwd_getcache(&lvp, &uvp, &bp, bufp); if (error == -1) { /* If that fails, look in the directory */ error = vfs_getcwd_scandir(&lvp, &uvp, &bp, bufp, p); } if (error) goto out; #ifdef DIAGNOSTIC if (lvp != NULL) panic("getcwd: oops, forgot to null lvp"); if (bufp && (bp <= bufp)) { panic("getcwd: oops, went back too far"); } #endif if (bp) *(--bp) = '/'; lvp = uvp; uvp = NULL; limit--; } while ((lvp != rvp) && (limit > 0)); out: if (bpp) *bpp = bp; if (uvp) vput(uvp); if (lvp) vput(lvp); vrele(rvp); return (error); } /* Find pathname of a process's current directory */ int sys___getcwd(struct proc *p, void *v, register_t *retval) { struct sys___getcwd_args *uap = v; int error, len = SCARG(uap, len); char *path, *bp; if (len > MAXPATHLEN * 4) len = MAXPATHLEN * 4; else if (len < 2) return (ERANGE); path = malloc(len, M_TEMP, M_WAITOK); bp = &path[len - 1]; *bp = '\0'; /* * 5th argument here is "max number of vnodes to traverse". * Since each entry takes up at least 2 bytes in the output * buffer, limit it to N/2 vnodes for an N byte buffer. */ error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, path, len/2, GETCWD_CHECK_ACCESS, p); if (error) goto out; /* Put the result into user buffer */ error = copyoutstr(bp, SCARG(uap, buf), MAXPATHLEN, NULL); #ifdef KTRACE if (KTRPOINT(p, KTR_NAMEI)) ktrnamei(p, bp); #endif out: free(path, M_TEMP, len); return (error); }
2 69 68 41 21 3 13 4 4 4 5 5 5 5 3 2 5 21 22 11 11 11 19 3 3 3 1 2 2 2 1 2 3 3 3 477 480 5 3 2 1 2 4 2 2 2 6435 6384 176 6441 6432 6442 6440 532 524 175 168 173 9 175 480 47 47 47 47 47 1 15 47 47 47 47 47 47 47 33 31 10 31 31 181 165 18 18 3 1 2 2 2 42 16 19 7 5 3 1 1 1 13 5 16 16 17 16 1 3 2 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 /* $OpenBSD: kern_sig.c,v 1.329 2024/05/22 09:22:55 claudio Exp $ */ /* $NetBSD: kern_sig.c,v 1.54 1996/04/22 01:38:32 christos Exp $ */ /* * Copyright (c) 1997 Theo de Raadt. All rights reserved. * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sig.c 8.7 (Berkeley) 4/18/94 */ #include <sys/param.h> #include <sys/signalvar.h> #include <sys/queue.h> #include <sys/namei.h> #include <sys/vnode.h> #include <sys/event.h> #include <sys/proc.h> #include <sys/systm.h> #include <sys/acct.h> #include <sys/fcntl.h> #include <sys/filedesc.h> #include <sys/wait.h> #include <sys/ktrace.h> #include <sys/stat.h> #include <sys/malloc.h> #include <sys/pool.h> #include <sys/sched.h> #include <sys/user.h> #include <sys/syslog.h> #include <sys/ttycom.h> #include <sys/pledge.h> #include <sys/witness.h> #include <sys/exec_elf.h> #include <sys/mount.h> #include <sys/syscallargs.h> #include <uvm/uvm_extern.h> #include <machine/tcb.h> int nosuidcoredump = 1; int filt_sigattach(struct knote *kn); void filt_sigdetach(struct knote *kn); int filt_signal(struct knote *kn, long hint); const struct filterops sig_filtops = { .f_flags = 0, .f_attach = filt_sigattach, .f_detach = filt_sigdetach, .f_event = filt_signal, }; /* * The array below categorizes the signals and their default actions. */ const int sigprop[NSIG] = { 0, /* unused */ SA_KILL, /* SIGHUP */ SA_KILL, /* SIGINT */ SA_KILL|SA_CORE, /* SIGQUIT */ SA_KILL|SA_CORE, /* SIGILL */ SA_KILL|SA_CORE, /* SIGTRAP */ SA_KILL|SA_CORE, /* SIGABRT */ SA_KILL|SA_CORE, /* SIGEMT */ SA_KILL|SA_CORE, /* SIGFPE */ SA_KILL, /* SIGKILL */ SA_KILL|SA_CORE, /* SIGBUS */ SA_KILL|SA_CORE, /* SIGSEGV */ SA_KILL|SA_CORE, /* SIGSYS */ SA_KILL, /* SIGPIPE */ SA_KILL, /* SIGALRM */ SA_KILL, /* SIGTERM */ SA_IGNORE, /* SIGURG */ SA_STOP, /* SIGSTOP */ SA_STOP|SA_TTYSTOP, /* SIGTSTP */ SA_IGNORE|SA_CONT, /* SIGCONT */ SA_IGNORE, /* SIGCHLD */ SA_STOP|SA_TTYSTOP, /* SIGTTIN */ SA_STOP|SA_TTYSTOP, /* SIGTTOU */ SA_IGNORE, /* SIGIO */ SA_KILL, /* SIGXCPU */ SA_KILL, /* SIGXFSZ */ SA_KILL, /* SIGVTALRM */ SA_KILL, /* SIGPROF */ SA_IGNORE, /* SIGWINCH */ SA_IGNORE, /* SIGINFO */ SA_KILL, /* SIGUSR1 */ SA_KILL, /* SIGUSR2 */ SA_IGNORE, /* SIGTHR */ }; #define CONTSIGMASK (sigmask(SIGCONT)) #define STOPSIGMASK (sigmask(SIGSTOP) | sigmask(SIGTSTP) | \ sigmask(SIGTTIN) | sigmask(SIGTTOU)) void setsigvec(struct proc *, int, struct sigaction *); void proc_stop(struct proc *p, int); void proc_stop_sweep(void *); void *proc_stop_si; void setsigctx(struct proc *, int, struct sigctx *); void postsig_done(struct proc *, int, sigset_t, int); void postsig(struct proc *, int, struct sigctx *); int cansignal(struct proc *, struct process *, int); struct pool sigacts_pool; /* memory pool for sigacts structures */ void sigio_del(struct sigiolst *); void sigio_unlink(struct sigio_ref *, struct sigiolst *); struct mutex sigio_lock = MUTEX_INITIALIZER(IPL_HIGH); /* * Can thread p, send the signal signum to process qr? */ int cansignal(struct proc *p, struct process *qr, int signum) { struct process *pr = p->p_p; struct ucred *uc = p->p_ucred; struct ucred *quc = qr->ps_ucred; if (uc->cr_uid == 0) return (1); /* root can always signal */ if (pr == qr) return (1); /* process can always signal itself */ /* optimization: if the same creds then the tests below will pass */ if (uc == quc) return (1); if (signum == SIGCONT && qr->ps_session == pr->ps_session) return (1); /* SIGCONT in session */ /* * Using kill(), only certain signals can be sent to setugid * child processes */ if (qr->ps_flags & PS_SUGID) { switch (signum) { case 0: case SIGKILL: case SIGINT: case SIGTERM: case SIGALRM: case SIGSTOP: case SIGTTIN: case SIGTTOU: case SIGTSTP: case SIGHUP: case SIGUSR1: case SIGUSR2: if (uc->cr_ruid == quc->cr_ruid || uc->cr_uid == quc->cr_ruid) return (1); } return (0); } if (uc->cr_ruid == quc->cr_ruid || uc->cr_ruid == quc->cr_svuid || uc->cr_uid == quc->cr_ruid || uc->cr_uid == quc->cr_svuid) return (1); return (0); } /* * Initialize signal-related data structures. */ void signal_init(void) { proc_stop_si = softintr_establish(IPL_SOFTCLOCK, proc_stop_sweep, NULL); if (proc_stop_si == NULL) panic("signal_init failed to register softintr"); pool_init(&sigacts_pool, sizeof(struct sigacts), 0, IPL_NONE, PR_WAITOK, "sigapl", NULL); } /* * Initialize a new sigaltstack structure. */ void sigstkinit(struct sigaltstack *ss) { ss->ss_flags = SS_DISABLE; ss->ss_size = 0; ss->ss_sp = NULL; } /* * Create an initial sigacts structure, using the same signal state * as pr. */ struct sigacts * sigactsinit(struct process *pr) { struct sigacts *ps; ps = pool_get(&sigacts_pool, PR_WAITOK); memcpy(ps, pr->ps_sigacts, sizeof(struct sigacts)); return (ps); } /* * Release a sigacts structure. */ void sigactsfree(struct sigacts *ps) { pool_put(&sigacts_pool, ps); } int sys_sigaction(struct proc *p, void *v, register_t *retval) { struct sys_sigaction_args /* { syscallarg(int) signum; syscallarg(const struct sigaction *) nsa; syscallarg(struct sigaction *) osa; } */ *uap = v; struct sigaction vec; #ifdef KTRACE struct sigaction ovec; #endif struct sigaction *sa; const struct sigaction *nsa; struct sigaction *osa; struct sigacts *ps = p->p_p->ps_sigacts; int signum; int bit, error; signum = SCARG(uap, signum); nsa = SCARG(uap, nsa); osa = SCARG(uap, osa); if (signum <= 0 || signum >= NSIG || (nsa && (signum == SIGKILL || signum == SIGSTOP))) return (EINVAL); sa = &vec; if (osa) { mtx_enter(&p->p_p->ps_mtx); sa->sa_handler = ps->ps_sigact[signum]; sa->sa_mask = ps->ps_catchmask[signum]; bit = sigmask(signum); sa->sa_flags = 0; if ((ps->ps_sigonstack & bit) != 0) sa->sa_flags |= SA_ONSTACK; if ((ps->ps_sigintr & bit) == 0) sa->sa_flags |= SA_RESTART; if ((ps->ps_sigreset & bit) != 0) sa->sa_flags |= SA_RESETHAND; if ((ps->ps_siginfo & bit) != 0) sa->sa_flags |= SA_SIGINFO; if (signum == SIGCHLD) { if ((ps->ps_sigflags & SAS_NOCLDSTOP) != 0) sa->sa_flags |= SA_NOCLDSTOP; if ((ps->ps_sigflags & SAS_NOCLDWAIT) != 0) sa->sa_flags |= SA_NOCLDWAIT; } mtx_leave(&p->p_p->ps_mtx); if ((sa->sa_mask & bit) == 0) sa->sa_flags |= SA_NODEFER; sa->sa_mask &= ~bit; error = copyout(sa, osa, sizeof (vec)); if (error) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ovec = vec; #endif } if (nsa) { error = copyin(nsa, sa, sizeof (vec)); if (error) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrsigaction(p, sa); #endif setsigvec(p, signum, sa); } #ifdef KTRACE if (osa && KTRPOINT(p, KTR_STRUCT)) ktrsigaction(p, &ovec); #endif return (0); } void setsigvec(struct proc *p, int signum, struct sigaction *sa) { struct sigacts *ps = p->p_p->ps_sigacts; int bit; bit = sigmask(signum); mtx_enter(&p->p_p->ps_mtx); ps->ps_sigact[signum] = sa->sa_handler; if ((sa->sa_flags & SA_NODEFER) == 0) sa->sa_mask |= sigmask(signum); ps->ps_catchmask[signum] = sa->sa_mask &~ sigcantmask; if (signum == SIGCHLD) { if (sa->sa_flags & SA_NOCLDSTOP) atomic_setbits_int(&ps->ps_sigflags, SAS_NOCLDSTOP); else atomic_clearbits_int(&ps->ps_sigflags, SAS_NOCLDSTOP); /* * If the SA_NOCLDWAIT flag is set or the handler * is SIG_IGN we reparent the dying child to PID 1 * (init) which will reap the zombie. Because we use * init to do our dirty work we never set SAS_NOCLDWAIT * for PID 1. * XXX exit1 rework means this is unnecessary? */ if (initprocess->ps_sigacts != ps && ((sa->sa_flags & SA_NOCLDWAIT) || sa->sa_handler == SIG_IGN)) atomic_setbits_int(&ps->ps_sigflags, SAS_NOCLDWAIT); else atomic_clearbits_int(&ps->ps_sigflags, SAS_NOCLDWAIT); } if ((sa->sa_flags & SA_RESETHAND) != 0) ps->ps_sigreset |= bit; else ps->ps_sigreset &= ~bit; if ((sa->sa_flags & SA_SIGINFO) != 0) ps->ps_siginfo |= bit; else ps->ps_siginfo &= ~bit; if ((sa->sa_flags & SA_RESTART) == 0) ps->ps_sigintr |= bit; else ps->ps_sigintr &= ~bit; if ((sa->sa_flags & SA_ONSTACK) != 0) ps->ps_sigonstack |= bit; else ps->ps_sigonstack &= ~bit; /* * Set bit in ps_sigignore for signals that are set to SIG_IGN, * and for signals set to SIG_DFL where the default is to ignore. * However, don't put SIGCONT in ps_sigignore, * as we have to restart the process. */ if (sa->sa_handler == SIG_IGN || (sigprop[signum] & SA_IGNORE && sa->sa_handler == SIG_DFL)) { atomic_clearbits_int(&p->p_siglist, bit); atomic_clearbits_int(&p->p_p->ps_siglist, bit); if (signum != SIGCONT) ps->ps_sigignore |= bit; /* easier in psignal */ ps->ps_sigcatch &= ~bit; } else { ps->ps_sigignore &= ~bit; if (sa->sa_handler == SIG_DFL) ps->ps_sigcatch &= ~bit; else ps->ps_sigcatch |= bit; } mtx_leave(&p->p_p->ps_mtx); } /* * Initialize signal state for process 0; * set to ignore signals that are ignored by default. */ void siginit(struct sigacts *ps) { int i; for (i = 0; i < NSIG; i++) if (sigprop[i] & SA_IGNORE && i != SIGCONT) ps->ps_sigignore |= sigmask(i); ps->ps_sigflags = SAS_NOCLDWAIT | SAS_NOCLDSTOP; } /* * Reset signals for an exec by the specified thread. */ void execsigs(struct proc *p) { struct sigacts *ps; int nc, mask; ps = p->p_p->ps_sigacts; mtx_enter(&p->p_p->ps_mtx); /* * Reset caught signals. Held signals remain held * through p_sigmask (unless they were caught, * and are now ignored by default). */ while (ps->ps_sigcatch) { nc = ffs((long)ps->ps_sigcatch); mask = sigmask(nc); ps->ps_sigcatch &= ~mask; if (sigprop[nc] & SA_IGNORE) { if (nc != SIGCONT) ps->ps_sigignore |= mask; atomic_clearbits_int(&p->p_siglist, mask); atomic_clearbits_int(&p->p_p->ps_siglist, mask); } ps->ps_sigact[nc] = SIG_DFL; } /* * Reset stack state to the user stack. * Clear set of signals caught on the signal stack. */ sigstkinit(&p->p_sigstk); atomic_clearbits_int(&ps->ps_sigflags, SAS_NOCLDWAIT); if (ps->ps_sigact[SIGCHLD] == SIG_IGN) ps->ps_sigact[SIGCHLD] = SIG_DFL; mtx_leave(&p->p_p->ps_mtx); } /* * Manipulate signal mask. * Note that we receive new mask, not pointer, * and return old mask as return value; * the library stub does the rest. */ int sys_sigprocmask(struct proc *p, void *v, register_t *retval) { struct sys_sigprocmask_args /* { syscallarg(int) how; syscallarg(sigset_t) mask; } */ *uap = v; int error = 0; sigset_t mask; KASSERT(p == curproc); *retval = p->p_sigmask; mask = SCARG(uap, mask) &~ sigcantmask; switch (SCARG(uap, how)) { case SIG_BLOCK: SET(p->p_sigmask, mask); break; case SIG_UNBLOCK: CLR(p->p_sigmask, mask); break; case SIG_SETMASK: p->p_sigmask = mask; break; default: error = EINVAL; break; } return (error); } int sys_sigpending(struct proc *p, void *v, register_t *retval) { *retval = p->p_siglist | p->p_p->ps_siglist; return (0); } /* * Temporarily replace calling proc's signal mask for the duration of a * system call. Original signal mask will be restored by userret(). */ void dosigsuspend(struct proc *p, sigset_t newmask) { KASSERT(p == curproc); p->p_oldmask = p->p_sigmask; p->p_sigmask = newmask; atomic_setbits_int(&p->p_flag, P_SIGSUSPEND); } /* * Suspend thread until signal, providing mask to be set * in the meantime. Note nonstandard calling convention: * libc stub passes mask, not pointer, to save a copyin. */ int sys_sigsuspend(struct proc *p, void *v, register_t *retval) { struct sys_sigsuspend_args /* { syscallarg(int) mask; } */ *uap = v; dosigsuspend(p, SCARG(uap, mask) &~ sigcantmask); while (tsleep_nsec(&nowake, PPAUSE|PCATCH, "sigsusp", INFSLP) == 0) continue; /* always return EINTR rather than ERESTART... */ return (EINTR); } int sigonstack(size_t stack) { const struct sigaltstack *ss = &curproc->p_sigstk; return (ss->ss_flags & SS_DISABLE ? 0 : (stack - (size_t)ss->ss_sp < ss->ss_size)); } int sys_sigaltstack(struct proc *p, void *v, register_t *retval) { struct sys_sigaltstack_args /* { syscallarg(const struct sigaltstack *) nss; syscallarg(struct sigaltstack *) oss; } */ *uap = v; struct sigaltstack ss; const struct sigaltstack *nss; struct sigaltstack *oss; int onstack = sigonstack(PROC_STACK(p)); int error; nss = SCARG(uap, nss); oss = SCARG(uap, oss); if (oss != NULL) { ss = p->p_sigstk; if (onstack) ss.ss_flags |= SS_ONSTACK; if ((error = copyout(&ss, oss, sizeof(ss)))) return (error); } if (nss == NULL) return (0); error = copyin(nss, &ss, sizeof(ss)); if (error) return (error); if (onstack) return (EPERM); if (ss.ss_flags & ~SS_DISABLE) return (EINVAL); if (ss.ss_flags & SS_DISABLE) { p->p_sigstk.ss_flags = ss.ss_flags; return (0); } if (ss.ss_size < MINSIGSTKSZ) return (ENOMEM); error = uvm_map_remap_as_stack(p, (vaddr_t)ss.ss_sp, ss.ss_size); if (error) return (error); p->p_sigstk = ss; return (0); } int sys_kill(struct proc *cp, void *v, register_t *retval) { struct sys_kill_args /* { syscallarg(int) pid; syscallarg(int) signum; } */ *uap = v; struct process *pr; int pid = SCARG(uap, pid); int signum = SCARG(uap, signum); int error; int zombie = 0; if ((error = pledge_kill(cp, pid)) != 0) return (error); if (((u_int)signum) >= NSIG) return (EINVAL); if (pid > 0) { if ((pr = prfind(pid)) == NULL) { if ((pr = zombiefind(pid)) == NULL) return (ESRCH); else zombie = 1; } if (!cansignal(cp, pr, signum)) return (EPERM); /* kill single process */ if (signum && !zombie) prsignal(pr, signum); return (0); } switch (pid) { case -1: /* broadcast signal */ return (killpg1(cp, signum, 0, 1)); case 0: /* signal own process group */ return (killpg1(cp, signum, 0, 0)); default: /* negative explicit process group */ return (killpg1(cp, signum, -pid, 0)); } } int sys_thrkill(struct proc *cp, void *v, register_t *retval) { struct sys_thrkill_args /* { syscallarg(pid_t) tid; syscallarg(int) signum; syscallarg(void *) tcb; } */ *uap = v; struct proc *p; int tid = SCARG(uap, tid); int signum = SCARG(uap, signum); void *tcb; if (((u_int)signum) >= NSIG) return (EINVAL); p = tid ? tfind_user(tid, cp->p_p) : cp; if (p == NULL) return (ESRCH); /* optionally require the target thread to have the given tcb addr */ tcb = SCARG(uap, tcb); if (tcb != NULL && tcb != TCB_GET(p)) return (ESRCH); if (signum) ptsignal(p, signum, STHREAD); return (0); } /* * Common code for kill process group/broadcast kill. * cp is calling process. */ int killpg1(struct proc *cp, int signum, int pgid, int all) { struct process *pr; struct pgrp *pgrp; int nfound = 0; if (all) { /* * broadcast */ LIST_FOREACH(pr, &allprocess, ps_list) { if (pr->ps_pid <= 1 || pr->ps_flags & (PS_SYSTEM | PS_NOBROADCASTKILL) || pr == cp->p_p || !cansignal(cp, pr, signum)) continue; nfound++; if (signum) prsignal(pr, signum); } } else { if (pgid == 0) /* * zero pgid means send to my process group. */ pgrp = cp->p_p->ps_pgrp; else { pgrp = pgfind(pgid); if (pgrp == NULL) return (ESRCH); } LIST_FOREACH(pr, &pgrp->pg_members, ps_pglist) { if (pr->ps_pid <= 1 || pr->ps_flags & PS_SYSTEM || !cansignal(cp, pr, signum)) continue; nfound++; if (signum) prsignal(pr, signum); } } return (nfound ? 0 : ESRCH); } #define CANDELIVER(uid, euid, pr) \ (euid == 0 || \ (uid) == (pr)->ps_ucred->cr_ruid || \ (uid) == (pr)->ps_ucred->cr_svuid || \ (uid) == (pr)->ps_ucred->cr_uid || \ (euid) == (pr)->ps_ucred->cr_ruid || \ (euid) == (pr)->ps_ucred->cr_svuid || \ (euid) == (pr)->ps_ucred->cr_uid) #define CANSIGIO(cr, pr) \ CANDELIVER((cr)->cr_ruid, (cr)->cr_uid, (pr)) /* * Send a signal to a process group. If checktty is 1, * limit to members which have a controlling terminal. */ void pgsignal(struct pgrp *pgrp, int signum, int checkctty) { struct process *pr; if (pgrp) LIST_FOREACH(pr, &pgrp->pg_members, ps_pglist) if (checkctty == 0 || pr->ps_flags & PS_CONTROLT) prsignal(pr, signum); } /* * Send a SIGIO or SIGURG signal to a process or process group using stored * credentials rather than those of the current process. */ void pgsigio(struct sigio_ref *sir, int sig, int checkctty) { struct process *pr; struct sigio *sigio; if (sir->sir_sigio == NULL) return; KERNEL_LOCK(); mtx_enter(&sigio_lock); sigio = sir->sir_sigio; if (sigio == NULL) goto out; if (sigio->sio_pgid > 0) { if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc)) prsignal(sigio->sio_proc, sig); } else if (sigio->sio_pgid < 0) { LIST_FOREACH(pr, &sigio->sio_pgrp->pg_members, ps_pglist) { if (CANSIGIO(sigio->sio_ucred, pr) && (checkctty == 0 || (pr->ps_flags & PS_CONTROLT))) prsignal(pr, sig); } } out: mtx_leave(&sigio_lock); KERNEL_UNLOCK(); } /* * Recalculate the signal mask and reset the signal disposition after * usermode frame for delivery is formed. */ void postsig_done(struct proc *p, int signum, sigset_t catchmask, int reset) { p->p_ru.ru_nsignals++; SET(p->p_sigmask, catchmask); if (reset != 0) { sigset_t mask = sigmask(signum); struct sigacts *ps = p->p_p->ps_sigacts; mtx_enter(&p->p_p->ps_mtx); ps->ps_sigcatch &= ~mask; if (signum != SIGCONT && sigprop[signum] & SA_IGNORE) ps->ps_sigignore |= mask; ps->ps_sigact[signum] = SIG_DFL; mtx_leave(&p->p_p->ps_mtx); } } /* * Send a signal caused by a trap to the current thread * If it will be caught immediately, deliver it with correct code. * Otherwise, post it normally. */ void trapsignal(struct proc *p, int signum, u_long trapno, int code, union sigval sigval) { struct process *pr = p->p_p; struct sigctx ctx; int mask; switch (signum) { case SIGILL: if (code == ILL_BTCFI) { pr->ps_acflag |= ABTCFI; break; } /* FALLTHROUGH */ case SIGBUS: case SIGSEGV: pr->ps_acflag |= ATRAP; break; } mask = sigmask(signum); setsigctx(p, signum, &ctx); if ((pr->ps_flags & PS_TRACED) == 0 && ctx.sig_catch != 0 && (p->p_sigmask & mask) == 0) { siginfo_t si; initsiginfo(&si, signum, trapno, code, sigval); #ifdef KTRACE if (KTRPOINT(p, KTR_PSIG)) { ktrpsig(p, signum, ctx.sig_action, p->p_sigmask, code, &si); } #endif if (sendsig(ctx.sig_action, signum, p->p_sigmask, &si, ctx.sig_info, ctx.sig_onstack)) { KERNEL_LOCK(); sigexit(p, SIGILL); /* NOTREACHED */ } postsig_done(p, signum, ctx.sig_catchmask, ctx.sig_reset); } else { p->p_sisig = signum; p->p_sitrapno = trapno; /* XXX for core dump/debugger */ p->p_sicode = code; p->p_sigval = sigval; /* * If traced, stop if signal is masked, and stay stopped * until released by the debugger. If our parent process * is waiting for us, don't hang as we could deadlock. */ if (((pr->ps_flags & (PS_TRACED | PS_PPWAIT)) == PS_TRACED) && signum != SIGKILL && (p->p_sigmask & mask) != 0) { int s; single_thread_set(p, SINGLE_SUSPEND | SINGLE_NOWAIT); pr->ps_xsig = signum; SCHED_LOCK(s); proc_stop(p, 1); SCHED_UNLOCK(s); signum = pr->ps_xsig; single_thread_clear(p, 0); /* * If we are no longer being traced, or the parent * didn't give us a signal, skip sending the signal. */ if ((pr->ps_flags & PS_TRACED) == 0 || signum == 0) return; /* update signal info */ p->p_sisig = signum; mask = sigmask(signum); } /* * Signals like SIGBUS and SIGSEGV should not, when * generated by the kernel, be ignorable or blockable. * If it is and we're not being traced, then just kill * the process. * After vfs_shutdown(9), init(8) cannot receive signals * because new code pages of the signal handler cannot be * mapped from halted storage. init(8) may not die or the * kernel panics. Better loop between signal handler and * page fault trap until the machine is halted. */ if ((pr->ps_flags & PS_TRACED) == 0 && (sigprop[signum] & SA_KILL) && ((p->p_sigmask & mask) || ctx.sig_ignore) && pr->ps_pid != 1) { KERNEL_LOCK(); sigexit(p, signum); /* NOTREACHED */ } KERNEL_LOCK(); ptsignal(p, signum, STHREAD); KERNEL_UNLOCK(); } } /* * Send the signal to the process. If the signal has an action, the action * is usually performed by the target process rather than the caller; we add * the signal to the set of pending signals for the process. * * Exceptions: * o When a stop signal is sent to a sleeping process that takes the * default action, the process is stopped without awakening it. * o SIGCONT restarts stopped processes (or puts them back to sleep) * regardless of the signal action (eg, blocked or ignored). * * Other ignored signals are discarded immediately. */ void psignal(struct proc *p, int signum) { ptsignal(p, signum, SPROCESS); } /* * type = SPROCESS process signal, can be diverted (sigwait()) * type = STHREAD thread signal, but should be propagated if unhandled * type = SPROPAGATED propagated to this thread, so don't propagate again */ void ptsignal(struct proc *p, int signum, enum signal_type type) { int s, prop; sig_t action, altaction = SIG_DFL; sigset_t mask, sigmask; int *siglist; struct process *pr = p->p_p; struct proc *q; int wakeparent = 0; KERNEL_ASSERT_LOCKED(); #ifdef DIAGNOSTIC if ((u_int)signum >= NSIG || signum == 0) panic("psignal signal number"); #endif /* Ignore signal if the target process is exiting */ if (pr->ps_flags & PS_EXITING) return; mask = sigmask(signum); sigmask = READ_ONCE(p->p_sigmask); if (type == SPROCESS) { sigset_t tmpmask; /* Accept SIGKILL to coredumping processes */ if (pr->ps_flags & PS_COREDUMP && signum == SIGKILL) { atomic_setbits_int(&pr->ps_siglist, mask); return; } /* * If the current thread can process the signal * immediately (it's unblocked) then have it take it. */ q = curproc; tmpmask = READ_ONCE(q->p_sigmask); if (q->p_p == pr && (q->p_flag & P_WEXIT) == 0 && (tmpmask & mask) == 0) { p = q; sigmask = tmpmask; } else { /* * A process-wide signal can be diverted to a * different thread that's in sigwait() for this * signal. If there isn't such a thread, then * pick a thread that doesn't have it blocked so * that the stop/kill consideration isn't * delayed. Otherwise, mark it pending on the * main thread. */ TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { /* ignore exiting threads */ if (q->p_flag & P_WEXIT) continue; /* skip threads that have the signal blocked */ tmpmask = READ_ONCE(q->p_sigmask); if ((tmpmask & mask) != 0) continue; /* okay, could send to this thread */ p = q; sigmask = tmpmask; /* * sigsuspend, sigwait, ppoll/pselect, etc? * Definitely go to this thread, as it's * already blocked in the kernel. */ if (q->p_flag & P_SIGSUSPEND) break; } } } if (type != SPROPAGATED) knote_locked(&pr->ps_klist, NOTE_SIGNAL | signum); prop = sigprop[signum]; /* * If proc is traced, always give parent a chance. */ if (pr->ps_flags & PS_TRACED) { action = SIG_DFL; } else { sigset_t sigcatch, sigignore; /* * If the signal is being ignored, * then we forget about it immediately. * (Note: we don't set SIGCONT in ps_sigignore, * and if it is set to SIG_IGN, * action will be SIG_DFL here.) */ mtx_enter(&pr->ps_mtx); sigignore = pr->ps_sigacts->ps_sigignore; sigcatch = pr->ps_sigacts->ps_sigcatch; mtx_leave(&pr->ps_mtx); if (sigignore & mask) return; if (sigmask & mask) { action = SIG_HOLD; if (sigcatch & mask) altaction = SIG_CATCH; } else if (sigcatch & mask) { action = SIG_CATCH; } else { action = SIG_DFL; if (prop & SA_KILL && pr->ps_nice > NZERO) pr->ps_nice = NZERO; /* * If sending a tty stop signal to a member of an * orphaned process group, discard the signal here if * the action is default; don't stop the process below * if sleeping, and don't clear any pending SIGCONT. */ if (prop & SA_TTYSTOP && pr->ps_pgrp->pg_jobc == 0) return; } } /* * If delivered to process, mark as pending there. Continue and stop * signals will be propagated to all threads. So they are always * marked at thread level. */ siglist = (type == SPROCESS) ? &pr->ps_siglist : &p->p_siglist; if (prop & (SA_CONT | SA_STOP)) siglist = &p->p_siglist; /* * XXX delay processing of SA_STOP signals unless action == SIG_DFL? */ if (prop & (SA_CONT | SA_STOP) && type != SPROPAGATED) TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) if (q != p) ptsignal(q, signum, SPROPAGATED); SCHED_LOCK(s); switch (p->p_stat) { case SSLEEP: /* * If process is sleeping uninterruptibly * we can't interrupt the sleep... the signal will * be noticed when the process returns through * trap() or syscall(). */ if ((p->p_flag & P_SINTR) == 0) goto out; /* * Process is sleeping and traced... make it runnable * so it can discover the signal in cursig() and stop * for the parent. */ if (pr->ps_flags & PS_TRACED) goto run; /* * Recheck sigmask before waking up the process, * there is a chance that while sending the signal * the process changed sigmask and went to sleep. */ sigmask = READ_ONCE(p->p_sigmask); if (sigmask & mask) goto out; else if (action == SIG_HOLD) { /* signal got unmasked, get proper action */ action = altaction; if (action == SIG_DFL) { if (prop & SA_KILL && pr->ps_nice > NZERO) pr->ps_nice = NZERO; /* * Discard tty stop signals sent to an * orphaned process group, see above. */ if (prop & SA_TTYSTOP && pr->ps_pgrp->pg_jobc == 0) { mask = 0; prop = 0; goto out; } } } /* * If SIGCONT is default (or ignored) and process is * asleep, we are finished; the process should not * be awakened. */ if ((prop & SA_CONT) && action == SIG_DFL) { mask = 0; goto out; } /* * When a sleeping process receives a stop * signal, process immediately if possible. */ if ((prop & SA_STOP) && action == SIG_DFL) { /* * If a child holding parent blocked, * stopping could cause deadlock. */ if (pr->ps_flags & PS_PPWAIT) goto out; mask = 0; pr->ps_xsig = signum; proc_stop(p, 0); goto out; } /* * All other (caught or default) signals * cause the process to run. */ goto runfast; /* NOTREACHED */ case SSTOP: /* * If traced process is already stopped, * then no further action is necessary. */ if (pr->ps_flags & PS_TRACED) goto out; /* * Kill signal always sets processes running. */ if (signum == SIGKILL) { atomic_clearbits_int(&p->p_flag, P_SUSPSIG); goto runfast; } if (prop & SA_CONT) { /* * If SIGCONT is default (or ignored), we continue the * process but don't leave the signal in p_siglist, as * it has no further action. If SIGCONT is held, we * continue the process and leave the signal in * p_siglist. If the process catches SIGCONT, let it * handle the signal itself. If it isn't waiting on * an event, then it goes back to run state. * Otherwise, process goes back to sleep state. */ atomic_setbits_int(&p->p_flag, P_CONTINUED); atomic_clearbits_int(&p->p_flag, P_SUSPSIG); wakeparent = 1; if (action == SIG_DFL) mask = 0; if (action == SIG_CATCH) goto runfast; if (p->p_wchan == NULL) goto run; atomic_clearbits_int(&p->p_flag, P_WSLEEP); p->p_stat = SSLEEP; goto out; } /* * Defer further processing for signals which are held, * except that stopped processes must be continued by SIGCONT. */ if (action == SIG_HOLD) goto out; if (prop & SA_STOP) { /* * Already stopped, don't need to stop again. * (If we did the shell could get confused.) */ mask = 0; goto out; } /* * If process is sleeping interruptibly, then simulate a * wakeup so that when it is continued, it will be made * runnable and can look at the signal. But