113 113 112 18 2 1 111 18 18 12 5 4 1 102 12 11 1 31 71 49 1 41 1 5 5 37 5 1 2 1 1 103 4 33 2 1 1 1 26 10 58 93 17 5 4 2 2 2 3 6 5 1 48 3 1 3 1 4 7 1 1 5 5 6 5 1 /* $OpenBSD: raw_ip.c,v 1.119 2019/02/04 21:40:52 bluhm Exp $ */ /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 * * NRL grants permission for redistribution and use in source and binary * forms, with or without modification, of the software and documentation * created at NRL provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgements: * This product includes software developed by the University of * California, Berkeley and its contributors. * This product includes software developed at the Information * Technology Division, US Naval Research Laboratory. * 4. Neither the name of the NRL nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation * are those of the authors and should not be interpreted as representing * official policies, either expressed or implied, of the US Naval * Research Laboratory (NRL). */ #include <sys/param.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/protosw.h> #include <sys/socketvar.h> #include <net/if.h> #include <net/if_var.h> #include <net/route.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_mroute.h> #include <netinet/ip_var.h> #include <netinet/in_pcb.h> #include <netinet/in_var.h> #include <netinet/ip_icmp.h> #include <net/pfvar.h> #include "pf.h" struct inpcbtable rawcbtable; /* * Nominal space allocated to a raw ip socket. */ #define RIPSNDQ 8192 #define RIPRCVQ 8192 /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ void rip_init(void) { in_pcbinit(&rawcbtable, 1); } struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; struct mbuf *rip_chkhdr(struct mbuf *, struct mbuf *); int rip_input(struct mbuf **mp, int *offp, int proto, int af) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct inpcb *inp, *last = NULL; struct in_addr *key; struct mbuf *opts = NULL; struct counters_ref ref; uint64_t *counters; KASSERT(af == AF_INET); ripsrc.sin_addr = ip->ip_src; key = &ip->ip_dst; #if NPF > 0 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { struct pf_divert *divert; divert = pf_find_divert(m); KASSERT(divert != NULL); switch (divert->type) { case PF_DIVERT_TO: key = &divert->addr.v4; break; case PF_DIVERT_REPLY: break; default: panic("%s: unknown divert type %d, mbuf %p, divert %p", __func__, divert->type, m, divert); } } #endif NET_ASSERT_LOCKED(); TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { if (inp->inp_socket->so_state & SS_CANTRCVMORE) continue; #ifdef INET6 if (inp->inp_flags & INP_IPV6) continue; #endif if (rtable_l2(inp->inp_rtableid) != rtable_l2(m->m_pkthdr.ph_rtableid)) continue; if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) continue; if (inp->inp_laddr.s_addr && inp->inp_laddr.s_addr != key->s_addr) continue; if (inp->inp_faddr.s_addr && inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (last) { struct mbuf *n; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(last, &opts, ip, n); if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, sintosa(&ripsrc), n, opts) == 0) { /* should notify about lost packet */ m_freem(n); m_freem(opts); } else sorwakeup(last->inp_socket); opts = NULL; } } last = inp; } if (last) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(last, &opts, ip, m); if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, sintosa(&ripsrc), m, opts) == 0) { m_freem(m); m_freem(opts); } else sorwakeup(last->inp_socket); } else { if (ip->ip_p != IPPROTO_ICMP) icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); else m_freem(m); counters = counters_enter(&ref, ipcounters); counters[ips_noproto]++; counters[ips_delivered]--; counters_leave(&ref, ipcounters); } return IPPROTO_DONE; } /* * Generate IP header and pass packet to ip_output. * Tack on options user may have setup with control call. */ int rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, struct mbuf *control) { struct ip *ip; struct inpcb *inp; int flags, error; inp = sotoinpcb(so); flags = IP_ALLOWBROADCAST; /* * If the user handed us a complete IP packet, use it. * Otherwise, allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { m_freem(m); return (EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); if (!m) return (ENOBUFS); ip = mtod(m, struct ip *); ip->ip_tos = inp->inp_ip.ip_tos; ip->ip_off = htons(0); ip->ip_p = inp->inp_ip.ip_p; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst = satosin(dstaddr)->sin_addr; ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); return (EMSGSIZE); } m = rip_chkhdr(m, inp->inp_options); if (m == NULL) return (EINVAL); ip = mtod(m, struct ip *); if (ip->ip_id == 0) ip->ip_id = htons(ip_randomid()); /* XXX prevent ip_output from overwriting header fields */ flags |= IP_RAWOUTPUT; ipstat_inc(ips_rawout); } #ifdef INET6 /* * A thought: Even though raw IP shouldn't be able to set IPv6 * multicast options, if it does, the last parameter to * ip_output should be guarded against v6/v4 problems. */ #endif /* force routing table */ m->m_pkthdr.ph_rtableid = inp->inp_rtableid; #if NPF > 0 if (inp->inp_socket->so_state & SS_ISCONNECTED && ip->ip_p != IPPROTO_ICMP) pf_mbuf_link_inpcb(m, inp); #endif error = ip_output(m, inp->inp_options, &inp->inp_route, flags, inp->inp_moptions, inp, 0); return (error); } struct mbuf * rip_chkhdr(struct mbuf *m, struct mbuf *options) { struct ip *ip; int hlen, opt, optlen, cnt; u_char *cp; if (m->m_pkthdr.len < sizeof(struct ip)) { m_freem(m); return NULL; } m = m_pullup(m, sizeof (struct ip)); if (m == NULL) return NULL; ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; /* Don't allow packet length sizes that will crash. */ if (hlen < sizeof (struct ip) || ntohs(ip->ip_len) < hlen || ntohs(ip->ip_len) != m->m_pkthdr.len) { m_freem(m); return NULL; } m = m_pullup(m, hlen); if (m == NULL) return NULL; ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { m_freem(m); return NULL; } /* * Don't allow both user specified and setsockopt options. * If options are present verify them. */ if (hlen != sizeof(struct ip)) { if (options) { m_freem(m); return NULL; } else { cp = (u_char *)(ip + 1); cnt = hlen - sizeof(struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; else { if (cnt < IPOPT_OLEN + sizeof(*cp)) { m_freem(m); return NULL; } optlen = cp[IPOPT_OLEN]; if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) { m_freem(m); return NULL; } } } } } return m; } /* * Raw IP socket option processing. */ int rip_ctloutput(int op, struct socket *so, int level, int optname, struct mbuf *m) { struct inpcb *inp = sotoinpcb(so); int error; if (level != IPPROTO_IP) return (EINVAL); switch (optname) { case IP_HDRINCL: error = 0; if (op == PRCO_SETOPT) { if (m == NULL || m->m_len < sizeof (int)) error = EINVAL; else if (*mtod(m, int *)) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; } else { m->m_len = sizeof(int); *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; } return (error); case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: #ifdef MROUTING switch (op) { case PRCO_SETOPT: error = ip_mrouter_set(so, optname, m); break; case PRCO_GETOPT: error = ip_mrouter_get(so, optname, m); break; default: error = EINVAL; break; } return (error); #else return (EOPNOTSUPP); #endif } return (ip_ctloutput(op, so, level, optname, m)); } u_long rip_sendspace = RIPSNDQ; u_long rip_recvspace = RIPRCVQ; /*ARGSUSED*/ int rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct proc *p) { struct inpcb *inp; int error = 0; if (req == PRU_CONTROL) return (in_control(so, (u_long)m, (caddr_t)nam, (struct ifnet *)control)); soassertlocked(so); inp = sotoinpcb(so); if (inp == NULL) { error = EINVAL; goto release; } switch (req) { case PRU_DISCONNECT: if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; break; } soisdisconnected(so); inp->inp_faddr.s_addr = INADDR_ANY; break; case PRU_ABORT: soisdisconnected(so); if (inp == NULL) panic("rip_abort"); #ifdef MROUTING if (so == ip_mrouter[inp->inp_rtableid]) ip_mrouter_done(so); #endif in_pcbdetach(inp); break; case PRU_BIND: { struct sockaddr_in *addr; if ((error = in_nam2sin(nam, &addr))) break; if (!((so->so_options & SO_BINDANY) || addr->sin_addr.s_addr == INADDR_ANY || addr->sin_addr.s_addr == INADDR_BROADCAST || in_broadcast(addr->sin_addr, inp->inp_rtableid) || ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) { error = EADDRNOTAVAIL; break; } inp->inp_laddr = addr->sin_addr; break; } case PRU_CONNECT: { struct sockaddr_in *addr; if ((error = in_nam2sin(nam, &addr))) break; inp->inp_faddr = addr->sin_addr; soisconnected(so); break; } case PRU_CONNECT2: error = EOPNOTSUPP; break; /* * Mark the connection as being incapable of further input. */ case PRU_SHUTDOWN: socantsendmore(so); break; /* * Ship a packet out. The appropriate raw output * routine handles any massaging necessary. */ case PRU_SEND: { struct sockaddr_in dst; memset(&dst, 0, sizeof(dst)); dst.sin_family = AF_INET; dst.sin_len = sizeof(dst); if (so->so_state & SS_ISCONNECTED) { if (nam) { error = EISCONN; break; } dst.sin_addr = inp->inp_faddr; } else { struct sockaddr_in *addr; if (nam == NULL) { error = ENOTCONN; break; } if ((error = in_nam2sin(nam, &addr))) break; dst.sin_addr = addr->sin_addr; } #ifdef IPSEC /* XXX Find an IPsec TDB */ #endif error = rip_output(m, so, sintosa(&dst), NULL); m = NULL; break; } case PRU_SENSE: /* * stat: don't bother with a blocksize. */ break; /* * Not supported. */ case PRU_LISTEN: case PRU_ACCEPT: case PRU_SENDOOB: case PRU_RCVD: case PRU_RCVOOB: error = EOPNOTSUPP; break; case PRU_SOCKADDR: in_setsockaddr(inp, nam); break; case PRU_PEERADDR: in_setpeeraddr(inp, nam); break; default: panic("rip_usrreq"); } release: if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { m_freem(control); m_freem(m); } return (error); } int rip_attach(struct socket *so, int proto) { struct inpcb *inp; int error; if (so->so_pcb) panic("rip_attach"); if ((so->so_state & SS_PRIV) == 0) return EACCES; if (proto < 0 || proto >= IPPROTO_MAX) return EPROTONOSUPPORT; if ((error = soreserve(so, rip_sendspace, rip_recvspace))) return error; NET_ASSERT_LOCKED(); if ((error = in_pcballoc(so, &rawcbtable))) return error; inp = sotoinpcb(so); inp->inp_ip.ip_p = proto; return 0; } int rip_detach(struct socket *so) { struct inpcb *inp = sotoinpcb(so); soassertlocked(so); if (inp == NULL) return (EINVAL); #ifdef MROUTING if (so == ip_mrouter[inp->inp_rtableid]) ip_mrouter_done(so); #endif in_pcbdetach(inp); return (0); }
149 67 67 /* $OpenBSD: uvm_object.c,v 1.18 2020/11/24 13:49:09 mpi Exp $ */ /* * Copyright (c) 2006 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Mindaugas Rasiukevicius. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * uvm_object.c: operate with memory objects * */ #include <sys/param.h> #include <sys/systm.h> #include <sys/mman.h> #include <sys/atomic.h> #include <uvm/uvm.h> /* We will fetch this page count per step */ #define FETCH_PAGECOUNT 16 /* * uvm_objinit: initialise a uvm object. */ void uvm_objinit(struct uvm_object *uobj, const struct uvm_pagerops *pgops, int refs) { uobj->pgops = pgops; RBT_INIT(uvm_objtree, &uobj->memt); uobj->uo_npages = 0; uobj->uo_refs = refs; } #ifndef SMALL_KERNEL /* * uvm_objwire: wire the pages of entire uobj * * => caller must pass page-aligned start and end values * => if the caller passes in a pageq pointer, we'll return a list of * wired pages. */ int uvm_objwire(struct uvm_object *uobj, voff_t start, voff_t end, struct pglist *pageq) { int i, npages, left, error; struct vm_page *pgs[FETCH_PAGECOUNT]; voff_t offset = start; left = (end - start) >> PAGE_SHIFT; while (left) { npages = MIN(FETCH_PAGECOUNT, left); /* Get the pages */ memset(pgs, 0, sizeof(pgs)); error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, 0, PROT_READ | PROT_WRITE, MADV_SEQUENTIAL, PGO_ALLPAGES | PGO_SYNCIO); if (error) goto error; for (i = 0; i < npages; i++) { KASSERT(pgs[i] != NULL); KASSERT(!(pgs[i]->pg_flags & PG_RELEASED)); if (pgs[i]->pg_flags & PQ_AOBJ) { atomic_clearbits_int(&pgs[i]->pg_flags, PG_CLEAN); uao_dropswap(uobj, i); } } /* Wire the pages */ uvm_lock_pageq(); for (i = 0; i < npages; i++) { uvm_pagewire(pgs[i]); if (pageq != NULL) TAILQ_INSERT_TAIL(pageq, pgs[i], pageq); } uvm_unlock_pageq(); /* Unbusy the pages */ uvm_page_unbusy(pgs, npages); left -= npages; offset += (voff_t)npages << PAGE_SHIFT; } return 0; error: /* Unwire the pages which have been wired */ uvm_objunwire(uobj, start, offset); return error; } /* * uobj_unwirepages: unwire the pages of entire uobj * * => caller must pass page-aligned start and end values */ void uvm_objunwire(struct uvm_object *uobj, voff_t start, voff_t end) { struct vm_page *pg; off_t offset; uvm_lock_pageq(); for (offset = start; offset < end; offset += PAGE_SIZE) { pg = uvm_pagelookup(uobj, offset); KASSERT(pg != NULL); KASSERT(!(pg->pg_flags & PG_RELEASED)); uvm_pageunwire(pg); } uvm_unlock_pageq(); } #endif /* !SMALL_KERNEL */ /* * uvm_objfree: free all pages in a uvm object, used by the buffer * cache to free all pages attached to a buffer. */ void uvm_objfree(struct uvm_object *uobj) { struct vm_page *pg; struct pglist pgl; TAILQ_INIT(&pgl); /* * Extract from rb tree in offset order. The phys addresses * usually increase in that order, which is better for * uvm_pmr_freepageq. */ RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { /* * clear PG_TABLED so we don't do work to remove * this pg from the uobj we are throwing away */ atomic_clearbits_int(&pg->pg_flags, PG_TABLED); uvm_lock_pageq(); uvm_pageclean(pg); uvm_unlock_pageq(); TAILQ_INSERT_TAIL(&pgl, pg, pageq); } uvm_pmr_freepageq(&pgl); }
45 /* $OpenBSD: puc.c,v 1.30 2020/08/14 18:14:11 jcs Exp $ */ /* $NetBSD: puc.c,v 1.3 1999/02/06 06:29:54 cgd Exp $ */ /* * Copyright (c) 1996, 1998, 1999 * Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * PCI "universal" communication card device driver, glues com, lpt, * and similar ports to PCI via bridge chip often much larger than * the devices being glued. * * Author: Christopher G. Demetriou, May 14, 1998 (derived from NetBSD * sys/dev/pci/pciide.c, revision 1.6). * * These devices could be (and some times are) described as * communications/{serial,parallel}, etc. devices with known * programming interfaces, but those programming interfaces (in * particular the BAR assignments for devices, etc.) in fact are not * particularly well defined. * * After I/we have seen more of these devices, it may be possible * to generalize some of these bits. In particular, devices which * describe themselves as communications/serial/16[45]50, and * communications/parallel/??? might be attached via direct * 'com' and 'lpt' attachments to pci. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/device.h> #include <sys/tty.h> #include <dev/pci/pcireg.h> #include <dev/pci/pcivar.h> #include <dev/pci/pucvar.h> #include <dev/pci/pcidevs.h> #include <dev/ic/comreg.h> #include <dev/ic/comvar.h> #include "com.h" struct puc_pci_softc { struct puc_softc sc_psc; pci_chipset_tag_t pc; pci_intr_handle_t ih; }; int puc_pci_match(struct device *, void *, void *); void puc_pci_attach(struct device *, struct device *, void *); int puc_pci_detach(struct device *, int); const char *puc_pci_intr_string(struct puc_attach_args *); void *puc_pci_intr_establish(struct puc_attach_args *, int, int (*)(void *), void *, char *); int puc_pci_xr17v35x_intr(void *arg); struct cfattach puc_pci_ca = { sizeof(struct puc_pci_softc), puc_pci_match, puc_pci_attach, puc_pci_detach }; struct cfdriver puc_cd = { NULL, "puc", DV_DULL }; const char *puc_port_type_name(int); int puc_pci_match(struct device *parent, void *match, void *aux) { struct pci_attach_args *pa = aux; const struct puc_device_description *desc; pcireg_t bhlc, subsys; bhlc = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_BHLC_REG); if (PCI_HDRTYPE_TYPE(bhlc) != 0) return (0); subsys = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG); desc = puc_find_description(PCI_VENDOR(pa->pa_id), PCI_PRODUCT(pa->pa_id), PCI_VENDOR(subsys), PCI_PRODUCT(subsys)); if (desc != NULL) return (1); return (0); } const char * puc_pci_intr_string(struct puc_attach_args *paa) { struct puc_pci_softc *sc = paa->puc; return (pci_intr_string(sc->pc, sc->ih)); } void * puc_pci_intr_establish(struct puc_attach_args *paa, int type, int (*func)(void *), void *arg, char *name) { struct puc_pci_softc *sc = paa->puc; struct puc_softc *psc = &sc->sc_psc; if (psc->sc_xr17v35x) { psc->sc_ports[paa->port].real_intrhand = func; psc->sc_ports[paa->port].real_intrhand_arg = arg; if (paa->port == 0) psc->sc_ports[paa->port].intrhand = pci_intr_establish(sc->pc, sc->ih, type, puc_pci_xr17v35x_intr, sc, name); return (psc->sc_ports[paa->port].real_intrhand); } psc->sc_ports[paa->port].intrhand = pci_intr_establish(sc->pc, sc->ih, type, func, arg, name); return (psc->sc_ports[paa->port].intrhand); } void puc_pci_attach(struct device *parent, struct device *self, void *aux) { struct puc_pci_softc *psc = (struct puc_pci_softc *)self; struct puc_softc *sc = &psc->sc_psc; struct pci_attach_args *pa = aux; struct puc_attach_args paa; pcireg_t subsys; int i; subsys = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG); sc->sc_desc = puc_find_description(PCI_VENDOR(pa->pa_id), PCI_PRODUCT(pa->pa_id), PCI_VENDOR(subsys), PCI_PRODUCT(subsys)); if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_EXAR && PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_EXAR_XR17V354) sc->sc_xr17v35x = 1; puc_print_ports(sc->sc_desc); for (i = 0; i < PUC_NBARS; i++) { pcireg_t type; int bar; sc->sc_bar_mappings[i].mapped = 0; bar = PCI_MAPREG_START + 4 * i; if (!pci_mapreg_probe(pa->pa_pc, pa->pa_tag, bar, &type)) continue; sc->sc_bar_mappings[i].mapped = (pci_mapreg_map(pa, bar, type, 0, &sc->sc_bar_mappings[i].t, &sc->sc_bar_mappings[i].h, &sc->sc_bar_mappings[i].a, &sc->sc_bar_mappings[i].s, 0) == 0); if (sc->sc_bar_mappings[i].mapped) { if (type == PCI_MAPREG_MEM_TYPE_64BIT) i++; continue; } #if NCOM > 0 /* * If a port on this card is used as serial console, * mapping the associated BAR will fail because the * bus space is already mapped. In that case, we try * to re-use the already existing mapping. * Unfortunately this means that if a BAR is used to * support multiple ports, only the first port will * work. */ if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, bar, type, &sc->sc_bar_mappings[i].a, NULL, NULL) == 0 && pa->pa_iot == comconsiot && sc->sc_bar_mappings[i].a == comconsaddr) { sc->sc_bar_mappings[i].t = comconsiot; sc->sc_bar_mappings[i].h = comconsioh; sc->sc_bar_mappings[i].s = COM_NPORTS; sc->sc_bar_mappings[i].mapped = 1; if (type == PCI_MAPREG_MEM_TYPE_64BIT) i++; continue; } #endif printf("%s: couldn't map BAR at offset 0x%lx\n", sc->sc_dev.dv_xname, (long)bar); } /* Map interrupt. */ psc->pc = pa->pa_pc; if (pci_intr_map(pa, &psc->ih)) { printf("%s: couldn't map interrupt\n", sc->sc_dev.dv_xname); return; } paa.puc = sc; paa.intr_string = &puc_pci_intr_string; paa.intr_establish = &puc_pci_intr_establish; puc_common_attach(sc, &paa); } void puc_common_attach(struct puc_softc *sc, struct puc_attach_args *paa) { const struct puc_device_description *desc = sc->sc_desc; int i, bar; /* Configure each port. */ for (i = 0; i < PUC_MAX_PORTS; i++) { if (desc->ports[i].type == 0) /* neither com or lpt */ continue; /* make sure the base address register is mapped */ bar = PUC_PORT_BAR_INDEX(desc->ports[i].bar); if (!sc->sc_bar_mappings[bar].mapped) { printf("%s: %s port uses unmapped BAR (0x%x)\n", sc->sc_dev.dv_xname, puc_port_type_name(desc->ports[i].type), desc->ports[i].bar); continue; } /* set up to configure the child device */ paa->port = i; paa->a = sc->sc_bar_mappings[bar].a; paa->t = sc->sc_bar_mappings[bar].t; paa->type = desc->ports[i].type; if (desc->ports[i].offset >= sc->sc_bar_mappings[bar].s || bus_space_subregion(sc->sc_bar_mappings[bar].t, sc->sc_bar_mappings[bar].h, desc->ports[i].offset, sc->sc_bar_mappings[bar].s - desc->ports[i].offset, &paa->h)) { printf("%s: couldn't get subregion for port %d\n", sc->sc_dev.dv_xname, i); continue; } #if 0 if (autoconf_verbose) printf("%s: port %d: %s @ (index %d) 0x%x " "(0x%lx, 0x%lx)\n", sc->sc_dev.dv_xname, paa->port, puc_port_type_name(paa->type), bar, (int)paa->a, (long)paa->t, (long)paa->h); #endif /* and configure it */ sc->sc_ports[i].dev = config_found_sm(&sc->sc_dev, paa, puc_print, puc_submatch); } } int puc_pci_detach(struct device *self, int flags) { struct puc_pci_softc *sc = (struct puc_pci_softc *)self; struct puc_softc *psc = &sc->sc_psc; int i, rv; for (i = PUC_MAX_PORTS; i--; ) { if (psc->sc_ports[i].intrhand) pci_intr_disestablish(sc->pc, psc->sc_ports[i].intrhand); if (psc->sc_ports[i].dev) if ((rv = config_detach(psc->sc_ports[i].dev, flags))) return (rv); } for (i = PUC_NBARS; i--; ) if (psc->sc_bar_mappings[i].mapped) bus_space_unmap(psc->sc_bar_mappings[i].t, psc->sc_bar_mappings[i].h, psc->sc_bar_mappings[i].s); return (0); } int puc_print(void *aux, const char *pnp) { struct puc_attach_args *paa = aux; if (pnp) printf("%s at %s", puc_port_type_name(paa->type), pnp); printf(" port %d", paa->port); return (UNCONF); } int puc_submatch(struct device *parent, void *vcf, void *aux) { struct cfdata *cf = (struct cfdata *)vcf; struct puc_attach_args *aa = aux; if (cf->cf_loc[0] != -1 && cf->cf_loc[0] != aa->port) return 0; return ((*cf->cf_attach->ca_match)(parent, cf, aux)); } const struct puc_device_description * puc_find_description(u_int16_t vend, u_int16_t prod, u_int16_t svend, u_int16_t sprod) { int i; for (i = 0; i < puc_ndevs; i++) if ((vend & puc_devs[i].rmask[0]) == puc_devs[i].rval[0] && (prod & puc_devs[i].rmask[1]) == puc_devs[i].rval[1] && (svend & puc_devs[i].rmask[2]) == puc_devs[i].rval[2] && (sprod & puc_devs[i].rmask[3]) == puc_devs[i].rval[3]) return (&puc_devs[i]); return (NULL); } const char * puc_port_type_name(int type) { if (PUC_IS_COM(type)) return "com"; if (PUC_IS_LPT(type)) return "lpt"; return (NULL); } void puc_print_ports(const struct puc_device_description *desc) { int i, ncom, nlpt; printf(": ports: "); for (i = ncom = nlpt = 0; i < PUC_MAX_PORTS; i++) { if (PUC_IS_COM(desc->ports[i].type)) ncom++; else if (PUC_IS_LPT(desc->ports[i].type)) nlpt++; } if (ncom) printf("%d com", ncom); if (nlpt) { if (ncom) printf(", "); printf("%d lpt", nlpt); } printf("\n"); } int puc_pci_xr17v35x_intr(void *arg) { struct puc_pci_softc *sc = arg; struct puc_softc *psc = &sc->sc_psc; int ports, i; ports = bus_space_read_1(psc->sc_bar_mappings[0].t, psc->sc_bar_mappings[0].h, UART_EXAR_INT0); for (i = 0; i < 8; i++) { if ((ports & (1 << i)) && psc->sc_ports[i].real_intrhand) (*(psc->sc_ports[i].real_intrhand))( psc->sc_ports[i].real_intrhand_arg); } return (1); }
2 2 2 2 7 7 7 7 /* $OpenBSD: vm_machdep.c,v 1.43 2018/08/21 13:10:13 bluhm Exp $ */ /* $NetBSD: vm_machdep.c,v 1.1 2003/04/26 18:39:33 fvdl Exp $ */ /*- * Copyright (c) 1995 Charles M. Hannum. All rights reserved. * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 */ /* * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/buf.h> #include <sys/user.h> #include <uvm/uvm_extern.h> #include <machine/cpu.h> #include <machine/fpu.h> void setguardpage(struct proc *); /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the kernel stack and pcb, making the child * ready to run, and marking it so that it can return differently * than the parent. */ void cpu_fork(struct proc *p1, struct proc *p2, void *stack, void *tcb, void (*func)(void *), void *arg) { struct cpu_info *ci = curcpu(); struct pcb *pcb = &p2->p_addr->u_pcb; struct pcb *pcb1 = &p1->p_addr->u_pcb; struct trapframe *tf; struct switchframe *sf; /* Save the fpu h/w state to p1's pcb so that we can copy it. */ if (p1 != &proc0 && (ci->ci_flags & CPUF_USERXSTATE)) fpusave(&pcb1->pcb_savefpu); p2->p_md.md_flags = p1->p_md.md_flags; #ifdef DIAGNOSTIC if (p1 != curproc && p1 != &proc0) panic("cpu_fork: curproc"); #endif *pcb = *pcb1; /* * Activate the address space. */ pmap_activate(p2); /* Record where this process's kernel stack is */ pcb->pcb_kstack = (u_int64_t)p2->p_addr + USPACE - 16 - (arc4random() & PAGE_MASK & ~_STACKALIGNBYTES); /* * Copy the trapframe. */ p2->p_md.md_regs = tf = (struct trapframe *)pcb->pcb_kstack - 1; *tf = *p1->p_md.md_regs; setguardpage(p2); /* * If specified, give the child a different stack and/or TCB */ if (stack != NULL) tf->tf_rsp = (u_int64_t)stack; if (tcb != NULL) pcb->pcb_fsbase = (u_int64_t)tcb; sf = (struct switchframe *)tf - 1; sf->sf_r12 = (u_int64_t)func; sf->sf_r13 = (u_int64_t)arg; sf->sf_rip = (u_int64_t)proc_trampoline; pcb->pcb_rsp = (u_int64_t)sf; pcb->pcb_rbp = 0; } /* * cpu_exit is called as the last action during exit. * * We clean up a little and then call sched_exit() with the old proc as an * argument. */ void cpu_exit(struct proc *p) { pmap_deactivate(p); sched_exit(p); } /* * Set a red zone in the kernel stack after the u. area. */ void setguardpage(struct proc *p) { pmap_remove(pmap_kernel(), (vaddr_t)p->p_addr + PAGE_SIZE, (vaddr_t)p->p_addr + 2 * PAGE_SIZE); pmap_update(pmap_kernel()); } /* * Map a user I/O request into kernel virtual address space. * Note: the pages are already locked by uvm_vslock(), so we * do not need to pass an access_type to pmap_enter(). */ void vmapbuf(struct buf *bp, vsize_t len) { vaddr_t faddr, taddr, off; paddr_t fpa; if ((bp->b_flags & B_PHYS) == 0) panic("vmapbuf"); faddr = trunc_page((vaddr_t)(bp->b_saveaddr = bp->b_data)); off = (vaddr_t)bp->b_data - faddr; len = round_page(off + len); taddr= uvm_km_valloc_wait(phys_map, len); bp->b_data = (caddr_t)(taddr + off); /* * The region is locked, so we expect that pmap_pte() will return * non-NULL. * XXX: unwise to expect this in a multithreaded environment. * anything can happen to a pmap between the time we lock a * region, release the pmap lock, and then relock it for * the pmap_extract(). * * no need to flush TLB since we expect nothing to be mapped * where we we just allocated (TLB will be flushed when our * mapping is removed). */ while (len) { (void) pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map), faddr, &fpa); pmap_kenter_pa(taddr, fpa, PROT_READ | PROT_WRITE); faddr += PAGE_SIZE; taddr += PAGE_SIZE; len -= PAGE_SIZE; } } /* * Unmap a previously-mapped user I/O request. */ void vunmapbuf(struct buf *bp, vsize_t len) { vaddr_t addr, off; if ((bp->b_flags & B_PHYS) == 0) panic("vunmapbuf"); addr = trunc_page((vaddr_t)bp->b_data); off = (vaddr_t)bp->b_data - addr; len = round_page(off + len); pmap_kremove(addr, len); pmap_update(pmap_kernel()); uvm_km_free_wakeup(phys_map, addr, len); bp->b_data = bp->b_saveaddr; bp->b_saveaddr = 0; } void * tcb_get(struct proc *p) { return ((void *)p->p_addr->u_pcb.pcb_fsbase); } void tcb_set(struct proc *p, void *tcb) { KASSERT(p == curproc); reset_segs(); p->p_addr->u_pcb.pcb_fsbase = (u_int64_t)tcb; }
746 744 27 745 652 528 561 24 24 1 24 1 561 2 1 560 542 542 560 1 561 561 2 195 195 3 5 12 12 511 561 161 4 159 36 6 7 121 124 11 25 548 550 395 173 2 10 77 1 1 6 49 50 3 4 24 63 2 81 41 371 63 470 5 560 560 92 92 92 7 7 7 7 89 89 87 1 3 3 1 88 89 89 89 1 2 3 1 2 44 44 2 43 44 44 44 14 14 14 14 18 18 18 2 18 18 5 1 3 2 2 1 /* $OpenBSD: ufs_lookup.c,v 1.58 2020/10/09 08:20:46 mpi Exp $ */ /* $NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $ */ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_lookup.c 8.9 (Berkeley) 8/11/94 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/namei.h> #include <sys/buf.h> #include <sys/stat.h> #include <sys/mount.h> #include <sys/proc.h> #include <sys/vnode.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/dir.h> #ifdef UFS_DIRHASH #include <ufs/ufs/dirhash.h> #endif #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> extern struct nchstats nchstats; #ifdef DIAGNOSTIC int dirchk = 1; #else int dirchk = 0; #endif #define OFSFMT(ip) ((ip)->i_ump->um_maxsymlinklen == 0) /* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the file system is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending * on whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and vput * instead of two vputs. * * Overall outline of ufs_lookup: * * check accessibility of directory * look for name in cache, if found, then if at end of path * and deleting or creating, drop it, else return name * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache */ int ufs_lookup(void *v) { struct vop_lookup_args *ap = v; struct vnode *vdp; /* vnode for directory being searched */ struct inode *dp; /* inode for directory being searched */ struct buf *bp; /* a buffer of directory entries */ struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ enum {NONE, COMPACT, FOUND} slotstatus; doff_t slotoffset; /* offset of area with free space */ int slotsize; /* size of area at slotoffset */ int slotfreespace; /* amount of space free in slot */ int slotneeded; /* size of the entry we're seeking */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* prev entry dp->i_offset */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by VFS_VGET */ doff_t enduseful; /* pointer past last used dir slot */ u_long bmask; /* block offset mask */ int lockparent; /* 1 => lockparent flag is set */ int wantparent; /* 1 => wantparent or lockparent flag */ int namlen, error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct ucred *cred = cnp->cn_cred; int flags; int nameiop = cnp->cn_nameiop; cnp->cn_flags &= ~PDIRUNLOCK; flags = cnp->cn_flags; bp = NULL; slotoffset = -1; *vpp = NULL; vdp = ap->a_dvp; dp = VTOI(vdp); lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); /* * Check accessiblity of directory. */ if ((DIP(dp, mode) & IFMT) != IFDIR) return (ENOTDIR); if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0) return (error); if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); /* * We now have a segment name to search for, and a directory to search. * * Before tediously performing a linear scan of the directory, * check the name cache to see if the directory/name pair * we are looking for is known already. */ if ((error = cache_lookup(vdp, vpp, cnp)) >= 0) return (error); /* * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ slotstatus = FOUND; slotfreespace = slotsize = slotneeded = 0; if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { slotstatus = NONE; slotneeded = (sizeof(struct direct) - MAXNAMLEN + cnp->cn_namelen + 3) &~ 3; } /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; #ifdef UFS_DIRHASH /* * Use dirhash for fast operations on large directories. The logic * to determine whether to hash the directory is contained within * ufsdirhash_build(); a zero return means that it decided to hash * this directory and it successfully built up the hash table. */ if (ufsdirhash_build(dp) == 0) { /* Look for a free slot if needed. */ enduseful = DIP(dp, size); if (slotstatus != FOUND) { slotoffset = ufsdirhash_findfree(dp, slotneeded, &slotsize); if (slotoffset >= 0) { slotstatus = COMPACT; enduseful = ufsdirhash_enduseful(dp); if (enduseful < 0) enduseful = DIP(dp, size); } } /* Look up the component. */ numdirpasses = 1; entryoffsetinblock = 0; /* silence compiler warning */ switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { case 0: ep = (struct direct *)((char *)bp->b_data + (dp->i_offset & bmask)); goto foundentry; case ENOENT: #define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ dp->i_offset = roundup2(DIP(dp, size), DIRBLKSIZ); goto notfound; default: /* Something failed; just do a linear search. */ break; } } #endif /* UFS_DIRHASH */ if (nameiop != LOOKUP || dp->i_diroff == 0 || dp->i_diroff >= DIP(dp, size)) { entryoffsetinblock = 0; dp->i_offset = 0; numdirpasses = 1; } else { dp->i_offset = dp->i_diroff; if ((entryoffsetinblock = dp->i_offset & bmask) && (error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL, &bp))) return (error); numdirpasses = 2; nchstats.ncs_2passes++; } prevoff = dp->i_offset; endsearch = roundup(DIP(dp, size), DIRBLKSIZ); enduseful = 0; searchloop: while (dp->i_offset < endsearch) { /* * If necessary, get the next directory block. */ if ((dp->i_offset & bmask) == 0) { if (bp != NULL) brelse(bp); error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL, &bp); if (error) return (error); entryoffsetinblock = 0; } /* * If still looking for a slot, and at a DIRBLKSIZE * boundary, have to start looking for free space again. */ if (slotstatus == NONE && (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { slotoffset = -1; slotfreespace = 0; } /* * Get pointer to next entry. * Full validation checks are slow, so we only check * enough to insure forward progress through the * directory. Complete checks can be run by patching * "dirchk" to be true. */ ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); if (ep->d_reclen == 0 || (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { int i; ufs_dirbad(dp, dp->i_offset, "mangled entry"); i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); dp->i_offset += i; entryoffsetinblock += i; continue; } /* * If an appropriate sized slot has not yet been found, * check to see if one is available. Also accumulate space * in the current block so that we can determine if * compaction is viable. */ if (slotstatus != FOUND) { int size = ep->d_reclen; if (ep->d_ino != 0) size -= DIRSIZ(OFSFMT(dp), ep); if (size > 0) { if (size >= slotneeded) { slotstatus = FOUND; slotoffset = dp->i_offset; slotsize = ep->d_reclen; } else if (slotstatus == NONE) { slotfreespace += size; if (slotoffset == -1) slotoffset = dp->i_offset; if (slotfreespace >= slotneeded) { slotstatus = COMPACT; slotsize = dp->i_offset + ep->d_reclen - slotoffset; } } } } /* * Check for a name match. */ if (ep->d_ino) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(dp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if (namlen == cnp->cn_namelen && !memcmp(cnp->cn_nameptr, ep->d_name, namlen)) { #ifdef UFS_DIRHASH foundentry: #endif /* * Save directory entry's inode number and * reclen in ndp->ni_ufs area, and release * directory buffer. */ dp->i_ino = ep->d_ino; dp->i_reclen = ep->d_reclen; goto found; } } prevoff = dp->i_offset; dp->i_offset += ep->d_reclen; entryoffsetinblock += ep->d_reclen; if (ep->d_ino) enduseful = dp->i_offset; } #ifdef UFS_DIRHASH notfound: #endif /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; dp->i_offset = 0; endsearch = dp->i_diroff; goto searchloop; } if (bp != NULL) brelse(bp); /* * If creating, and at end of pathname and current * directory has not been removed, then can consider * allowing file to be created. */ if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN) && dp->i_effnlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. */ error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); if (error) return (error); /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, * then set dp->i_count to 0 indicating * that the new slot belongs at the end of the * directory. If we found a slot, then the new entry * can be put in the range from dp->i_offset to * dp->i_offset + dp->i_count. */ if (slotstatus == NONE) { dp->i_offset = roundup(DIP(dp, size), DIRBLKSIZ); dp->i_count = 0; enduseful = dp->i_offset; } else if (nameiop == DELETE) { dp->i_offset = slotoffset; if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; } else { dp->i_offset = slotoffset; dp->i_count = slotsize; if (enduseful < slotoffset + slotsize) enduseful = slotoffset + slotsize; } dp->i_endoff = roundup(enduseful, DIRBLKSIZ); /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * The pathname buffer is saved so that the name * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ cnp->cn_flags |= SAVENAME; if (!lockparent) { VOP_UNLOCK(vdp); cnp->cn_flags |= PDIRUNLOCK; } return (EJUSTRETURN); } /* * Insert name into cache (as non-existent) if appropriate. */ if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) cache_enter(vdp, *vpp, cnp); return (ENOENT); found: if (numdirpasses == 2) nchstats.ncs_pass2++; /* * Check that directory length properly reflects presence * of this entry. */ if (dp->i_offset + DIRSIZ(OFSFMT(dp), ep) > DIP(dp, size)) { ufs_dirbad(dp, dp->i_offset, "i_ffs_size too small"); DIP_ASSIGN(dp, size, dp->i_offset + DIRSIZ(OFSFMT(dp), ep)); dp->i_flag |= IN_CHANGE | IN_UPDATE; } brelse(bp); /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. * If the wantparent flag isn't set, we return only * the directory (in ndp->ni_dvp), otherwise we go * on and lock the inode, being careful with ".". */ if (nameiop == DELETE && (flags & ISLASTCN)) { /* * Write access to directory required to delete files. */ error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); if (error) return (error); /* * Return pointer to current entry in dp->i_offset, * and distance past previous entry (if there * is a previous entry in this block) in dp->i_count. * Save directory inode pointer in ndp->ni_dvp for dirremove(). */ if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; if (dp->i_number == dp->i_ino) { vref(vdp); *vpp = vdp; return (0); } error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) return (error); /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ if ((DIP(dp, mode) & ISVTX) && cred->cr_uid != 0 && cred->cr_uid != DIP(dp, uid) && !vnoperm(vdp) && DIP(VTOI(tdp), uid) != cred->cr_uid) { vput(tdp); return (EPERM); } *vpp = tdp; if (!lockparent) { VOP_UNLOCK(vdp); cnp->cn_flags |= PDIRUNLOCK; } return (0); } /* * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a * regular file, or empty directory. */ if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) { error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); if (error) return (error); /* * Careful about locking second inode. * This can only occur if the target is ".". */ if (dp->i_number == dp->i_ino) return (EISDIR); error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) return (error); *vpp = tdp; cnp->cn_flags |= SAVENAME; if (!lockparent) { VOP_UNLOCK(vdp); cnp->cn_flags |= PDIRUNLOCK; } return (0); } /* * Step through the translation in the name. We do not `vput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the VFS_VGET for the * inode associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the file system has any hard links other than ".." * that point backwards in the directory structure. */ pdp = vdp; if (flags & ISDOTDOT) { VOP_UNLOCK(pdp); /* race to get the inode */ cnp->cn_flags |= PDIRUNLOCK; error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) { if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY) == 0) cnp->cn_flags &= ~PDIRUNLOCK; return (error); } if (lockparent && (flags & ISLASTCN)) { if ((error = vn_lock(pdp, LK_EXCLUSIVE))) { vput(tdp); return (error); } cnp->cn_flags &= ~PDIRUNLOCK; } *vpp = tdp; } else if (dp->i_number == dp->i_ino) { vref(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) return (error); if (!lockparent || !(flags & ISLASTCN)) { VOP_UNLOCK(pdp); cnp->cn_flags |= PDIRUNLOCK; } *vpp = tdp; } /* * Insert name into cache if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); return (0); } void ufs_dirbad(struct inode *ip, doff_t offset, char *how) { struct mount *mp; mp = ITOV(ip)->v_mount; (void)printf("%s: bad dir ino %u at offset %d: %s\n", mp->mnt_stat.f_mntonname, ip->i_number, offset, how); if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0) panic("bad dir"); } /* * Do consistency checking on a directory entry: * record length must be multiple of 4 * entry must fit in rest of its DIRBLKSIZ block * record must be large enough to contain entry * name is not longer than MAXNAMLEN * name must be as long as advertised, and null terminated */ int ufs_dirbadentry(struct vnode *vdp, struct direct *ep, int entryoffsetinblock) { struct inode *dp; int i; int namlen; dp = VTOI(vdp); # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(dp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) { /*return (1); */ printf("First bad\n"); goto bad; } if (ep->d_ino == 0) return (0); for (i = 0; i < namlen; i++) if (ep->d_name[i] == '\0') { /*return (1); */ printf("Second bad\n"); goto bad; } if (ep->d_name[i]) goto bad; return (0); bad: return (1); } /* * Construct a new directory entry after a call to namei, using the * parameters that it left in the componentname argument cnp. The * argument ip is the inode to which the new directory entry will refer. */ void ufs_makedirentry(struct inode *ip, struct componentname *cnp, struct direct *newdirp) { #ifdef DIAGNOSTIC if ((cnp->cn_flags & SAVENAME) == 0) panic("ufs_makedirentry: missing name"); #endif newdirp->d_ino = ip->i_number; newdirp->d_namlen = cnp->cn_namelen; memset(newdirp->d_name + (cnp->cn_namelen & ~(DIR_ROUNDUP-1)), 0, DIR_ROUNDUP); memcpy(newdirp->d_name, cnp->cn_nameptr, cnp->cn_namelen); if (OFSFMT(ip)) { newdirp->d_type = 0; # if (BYTE_ORDER == LITTLE_ENDIAN) { u_char tmp = newdirp->d_namlen; newdirp->d_namlen = newdirp->d_type; newdirp->d_type = tmp; } # endif } else newdirp->d_type = IFTODT(DIP(ip, mode)); } /* * Write a directory entry after a call to namei, using the parameters * that it left in nameidata. The argument dirp is the new directory * entry contents. Dvp is a pointer to the directory to be written, * which was left locked by namei. Remaining parameters (dp->i_offset, * dp->i_count) indicate how the space for the new entry is to be obtained. * Non-null bp indicates that a directory is being created (for the * soft dependency code). */ int ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp, struct componentname *cnp, struct buf *newdirbp) { struct ucred *cr; struct proc *p; int newentrysize; struct inode *dp; struct buf *bp; u_int dsize; struct direct *ep, *nep; int error, ret, blkoff, loc, spacefree, flags; char *dirbuf; error = 0; cr = cnp->cn_cred; p = cnp->cn_proc; dp = VTOI(dvp); newentrysize = DIRSIZ(OFSFMT(dp), dirp); if (dp->i_count == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (dp->i_offset & (DIRBLKSIZ - 1)) panic("ufs_direnter: newblk"); flags = B_CLRBUF; if (!DOINGSOFTDEP(dvp)) flags |= B_SYNC; if ((error = UFS_BUF_ALLOC(dp, (off_t)dp->i_offset, DIRBLKSIZ, cr, flags, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } DIP_ASSIGN(dp, size, dp->i_offset + DIRBLKSIZ); dp->i_flag |= IN_CHANGE | IN_UPDATE; uvm_vnp_setsize(dvp, DIP(dp, size)); dirp->d_reclen = DIRBLKSIZ; blkoff = dp->i_offset & (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); memcpy(bp->b_data + blkoff, dirp, newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { ufsdirhash_newblk(dp, dp->i_offset); ufsdirhash_add(dp, dirp, dp->i_offset); ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, dp->i_offset); } #endif if (DOINGSOFTDEP(dvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff += DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } if (softdep_setup_directory_add(bp, dp, dp->i_offset, dirp->d_ino, newdirbp, 1) == 0) { bdwrite(bp); return (UFS_UPDATE(dp, 0)); } /* We have just allocated a directory block in an * indirect block. Rather than tracking when it gets * claimed by the inode, we simply do a VOP_FSYNC * now to ensure that it is there (in case the user * does a future fsync). Note that we have to unlock * the inode for the entry that we just entered, as * the VOP_FSYNC may need to lock other inodes which * can lead to deadlock if we also hold a lock on * the newly entered node. */ if ((error = VOP_BWRITE(bp))) return (error); if (tvp != NULL) VOP_UNLOCK(tvp); error = VOP_FSYNC(dvp, p->p_ucred, MNT_WAIT, p); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); return (error); } error = VOP_BWRITE(bp); ret = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp)); if (error == 0) return (ret); return (error); } /* * If dp->i_count is non-zero, then namei found space for the new * entry in the range dp->i_offset to dp->i_offset + dp->i_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (dp->i_offset + dp->i_count > DIP(dp, size)) DIP_ASSIGN(dp, size, dp->i_offset + dp->i_count); /* * Get the block containing the space for the new directory entry. */ if ((error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, &dirbuf, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = ep->d_ino ? DIRSIZ(OFSFMT(dp), ep) : 0; spacefree = ep->d_reclen - dsize; for (loc = ep->d_reclen; loc < dp->i_count; ) { nep = (struct direct *)(dirbuf + loc); /* Trim the existing slot (NB: dsize may be zero). */ ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); /* Read nep->d_reclen now as the memmove() may clobber it. */ loc += nep->d_reclen; if (nep->d_ino == 0) { /* * A mid-block unused entry. Such entries are * never created by the kernel, but fsck_ffs * can create them (and it doesn't fix them). * * Add up the free space, and initialise the * relocated entry since we don't memmove it. */ spacefree += nep->d_reclen; ep->d_ino = 0; dsize = 0; continue; } dsize = DIRSIZ(OFSFMT(dp), nep); spacefree += nep->d_reclen - dsize; #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_move(dp, nep, dp->i_offset + ((char *)nep - dirbuf), dp->i_offset + ((char *)ep - dirbuf)); #endif if (DOINGSOFTDEP(dvp)) softdep_change_directoryentry_offset(dp, dirbuf, (caddr_t)nep, (caddr_t)ep, dsize); else memmove(ep, nep, dsize); } /* * Here, `ep' points to a directory entry containing `dsize' in-use * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, * then the entry is completely unused (dsize == 0). The value * of ep->d_reclen is always indeterminate. * * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ if (ep->d_ino == 0) { if (spacefree + dsize < newentrysize) panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); #endif memcpy(ep, dirp, newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, dirbuf - (dp->i_offset & (DIRBLKSIZ - 1)), dp->i_offset & ~(DIRBLKSIZ - 1)); #endif if (DOINGSOFTDEP(dvp)) { (void)softdep_setup_directory_add(bp, dp, dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp, 0); bdwrite(bp); } else { error = VOP_BWRITE(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If all went well, and the directory can be shortened, proceed * with the truncation. Note that we have to unlock the inode for * the entry that we just entered, as the truncation may need to * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ if (error == 0 && dp->i_endoff && dp->i_endoff < DIP(dp, size)) { if (tvp != NULL) VOP_UNLOCK(tvp); error = UFS_TRUNCATE(dp, (off_t)dp->i_endoff, IO_SYNC, cr); #ifdef UFS_DIRHASH if (error == 0 && dp->i_dirhash != NULL) ufsdirhash_dirtrunc(dp, dp->i_endoff); #endif if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); } return (error); } /* * Remove a directory entry after a call to namei, using * the parameters which it left in nameidata. The entry * dp->i_offset contains the offset into the directory of the * entry to be eliminated. The dp->i_count field contains the * size of the previous record in the directory. If this * is 0, the first entry is being deleted, so we need only * zero the inode number to mark the entry as free. If the * entry is not the first in the directory, we must reclaim * the space of the now empty record by adding the record size * to the size of the previous entry. */ int ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir) { struct inode *dp; struct direct *ep; struct buf *bp; int error; dp = VTOI(dvp); if ((error = UFS_BUFATOFF(dp, (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) return (error); #ifdef UFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact * that `ep' is the previous entry when dp->i_count != 0. */ if (dp->i_dirhash != NULL) ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); #endif if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. */ ep->d_ino = 0; } else { /* * Collapse new free space into previous entry. */ ep->d_reclen += dp->i_reclen; } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, (char *)ep - ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), dp->i_offset & ~(DIRBLKSIZ - 1)); #endif if (DOINGSOFTDEP(dvp)) { if (ip) { ip->i_effnlink--; softdep_change_linkcnt(ip, 0); softdep_setup_remove(bp, dp, ip, isrmdir); } if (softdep_slowdown(dvp)) { error = bwrite(bp); } else { bdwrite(bp); error = 0; } } else { if (ip) { ip->i_effnlink--; DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; } if (DOINGASYNC(dvp) && dp->i_count != 0) { bdwrite(bp); error = 0; } else error = bwrite(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); } /* * Rewrite an existing directory entry to point at the inode * supplied. The parameters describing the directory entry are * set up by a call to namei. */ int ufs_dirrewrite(struct inode *dp, struct inode *oip, ufsino_t newinum, int newtype, int isrmdir) { struct buf *bp; struct direct *ep; struct vnode *vdp = ITOV(dp); int error; error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); ep->d_ino = newinum; if (!OFSFMT(dp)) ep->d_type = newtype; oip->i_effnlink--; if (DOINGSOFTDEP(vdp)) { softdep_change_linkcnt(oip, 0); softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); } else { DIP_ADD(oip, nlink, -1); oip->i_flag |= IN_CHANGE; if (DOINGASYNC(vdp)) { bdwrite(bp); error = 0; } else { error = VOP_BWRITE(bp); } } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); } /* * Check if a directory is empty or not. * Inode supplied must be locked. * * Using a struct dirtemplate here is not precisely * what we want, but better than using a struct direct. * * NB: does not handle corrupted directories. */ int ufs_dirempty(struct inode *ip, ufsino_t parentino, struct ucred *cred) { off_t off, m; struct dirtemplate dbuf; struct direct *dp = (struct direct *)&dbuf; int error, namlen; size_t count; #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) m = DIP(ip, size); for (off = 0; off < m; off += dp->d_reclen) { error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, UIO_SYSSPACE, IO_NODELOCKED, cred, &count, curproc); /* * Since we read MINDIRSIZ, residual must * be 0 unless we're at end of file. */ if (error || count != 0) return (0); /* avoid infinite loops */ if (dp->d_reclen == 0) return (0); /* skip empty entries */ if (dp->d_ino == 0) continue; /* accept only "." and ".." */ # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(ip)) namlen = dp->d_type; else namlen = dp->d_namlen; # else namlen = dp->d_namlen; # endif if (namlen > 2) return (0); if (dp->d_name[0] != '.') return (0); /* * At this point namlen must be 1 or 2. * 1 implies ".", 2 implies ".." if second * char is also "." */ if (namlen == 1 && dp->d_ino == ip->i_number) continue; if (dp->d_name[1] == '.' && dp->d_ino == parentino) continue; return (0); } return (1); } /* * Check if source directory is in the path of the target directory. * Target is supplied locked, source is unlocked. * The target is always vput before returning. */ int ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred) { struct vnode *nextvp, *vp; int error, rootino, namlen; struct dirtemplate dirbuf; vp = ITOV(target); if (target->i_number == source->i_number) { error = EEXIST; goto out; } rootino = ROOTINO; error = 0; if (target->i_number == rootino) goto out; for (;;) { if (vp->v_type != VDIR) { error = ENOTDIR; break; } error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL, curproc); if (error != 0) break; # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(VTOI(vp))) namlen = dirbuf.dotdot_type; else namlen = dirbuf.dotdot_namlen; # else namlen = dirbuf.dotdot_namlen; # endif if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') { error = ENOTDIR; break; } if (dirbuf.dotdot_ino == source->i_number) { error = EINVAL; break; } if (dirbuf.dotdot_ino == rootino) break; VOP_UNLOCK(vp); error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &nextvp); vrele(vp); if (error) { vp = NULL; break; } vp = nextvp; } out: if (error == ENOTDIR) printf("checkpath: .. not a directory\n"); if (vp != NULL) vput(vp); return (error); }
53 53 53 53 53 53 53 53 53 /* $OpenBSD: ip6_id.c,v 1.16 2021/03/10 10:21:49 jsg Exp $ */ /* $NetBSD: ip6_id.c,v 1.7 2003/09/13 21:32:59 itojun Exp $ */ /* $KAME: ip6_id.c,v 1.8 2003/09/06 13:41:06 itojun Exp $ */ /* * Copyright (C) 2003 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright 1998 Niels Provos <provos@citi.umich.edu> * All rights reserved. * * Theo de Raadt <deraadt@openbsd.org> came up with the idea of using * such a mathematical system to generate more random (yet non-repeating) * ids to solve the resolver/named problem. But Niels designed the * actual system based on the constraints. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * seed = random (bits - 1) bit * n = prime, g0 = generator to n, * j = random so that gcd(j,n-1) == 1 * g = g0^j mod n will be a generator again. * * X[0] = random seed. * X[n] = a*X[n-1]+b mod m is a Linear Congruential Generator * with a = 7^(even random) mod m, * b = random with gcd(b,m) == 1 * m = constant and a maximal period of m-1. * * The transaction id is determined by: * id[n] = seed xor (g^X[n] mod n) * * Effectivly the id is restricted to the lower (bits - 1) bits, thus * yielding two different cycles by toggling the msb on and off. * This avoids reuse issues caused by reseeding. */ #include <sys/param.h> #include <sys/kernel.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/systm.h> #include <netinet/in.h> #include <netinet/ip6.h> #include <netinet6/ip6_var.h> struct randomtab { const int ru_bits; /* resulting bits */ const long ru_out; /* Time after which will be reseeded */ const u_int32_t ru_max; /* Uniq cycle, avoid blackjack prediction */ const u_int32_t ru_gen; /* Starting generator */ const u_int32_t ru_n; /* ru_n: prime, ru_n - 1: product of pfacts[] */ const u_int32_t ru_agen; /* determine ru_a as ru_agen^(2*rand) */ const u_int32_t ru_m; /* ru_m = 2^x*3^y */ const u_int32_t pfacts[4]; /* factors of ru_n */ u_int32_t ru_counter; u_int32_t ru_msb; u_int32_t ru_x; u_int32_t ru_seed, ru_seed2; u_int32_t ru_a, ru_b; u_int32_t ru_g; long ru_reseed; }; static struct randomtab randomtab_20 = { 20, /* resulting bits */ 180, /* Time after which will be reseeded */ 200000, /* Uniq cycle, avoid blackjack prediction */ 2, /* Starting generator */ 524269, /* RU_N-1 = 2^2*3^2*14563 */ 7, /* determine ru_a as RU_AGEN^(2*rand) */ 279936, /* RU_M = 2^7*3^7 - don't change */ { 2, 3, 14563, 0 }, /* factors of ru_n */ }; u_int32_t ip6id_pmod(u_int32_t, u_int32_t, u_int32_t); void ip6id_initid(struct randomtab *); u_int32_t ip6id_randomid(struct randomtab *); /* * Do a fast modular exponation, returned value will be in the range * of 0 - (mod-1) */ u_int32_t ip6id_pmod(u_int32_t gen, u_int32_t expo, u_int32_t mod) { u_int64_t s, t, u; s = 1; t = gen; u = expo; while (u) { if (u & 1) s = (s * t) % mod; u >>= 1; t = (t * t) % mod; } return (s); } /* * Initializes the seed and chooses a suitable generator. Also toggles * the msb flag. The msb flag is used to generate two distinct * cycles of random numbers and thus avoiding reuse of ids. * * This function is called from id_randomid() when needed, an * application does not have to worry about it. */ void ip6id_initid(struct randomtab *p) { u_int32_t j, i; int noprime = 1; p->ru_x = arc4random_uniform(p->ru_m); /* (bits - 1) bits of random seed */ p->ru_seed = arc4random() & (~0U >> (32 - p->ru_bits + 1)); p->ru_seed2 = arc4random() & (~0U >> (32 - p->ru_bits + 1)); /* Determine the LCG we use */ p->ru_b = (arc4random() & (~0U >> (32 - p->ru_bits))) | 1; p->ru_a = ip6id_pmod(p->ru_agen, (arc4random() & (~0U >> (32 - p->ru_bits))) & (~1U), p->ru_m); while (p->ru_b % 3 == 0) p->ru_b += 2; j = arc4random_uniform(p->ru_n); /* * Do a fast gcd(j, RU_N - 1), so we can find a j with * gcd(j, RU_N - 1) == 1, giving a new generator for * RU_GEN^j mod RU_N */ while (noprime) { for (i = 0; p->pfacts[i] > 0; i++) if (j % p->pfacts[i] == 0) break; if (p->pfacts[i] == 0) noprime = 0; else j = (j + 1) % p->ru_n; } p->ru_g = ip6id_pmod(p->ru_gen, j, p->ru_n); p->ru_counter = 0; p->ru_reseed = getuptime() + p->ru_out; p->ru_msb = p->ru_msb ? 0 : (1U << (p->ru_bits - 1)); } u_int32_t ip6id_randomid(struct randomtab *p) { int i, n; if (p->ru_counter >= p->ru_max || getuptime() > p->ru_reseed) ip6id_initid(p); /* Skip a random number of ids */ n = arc4random() & 0x3; if (p->ru_counter + n >= p->ru_max) ip6id_initid(p); for (i = 0; i <= n; i++) { /* Linear Congruential Generator */ p->ru_x = (u_int32_t)((u_int64_t)p->ru_a * p->ru_x + p->ru_b) % p->ru_m; } p->ru_counter += i; return (p->ru_seed ^ ip6id_pmod(p->ru_g, p->ru_seed2 + p->ru_x, p->ru_n)) | p->ru_msb; } u_int32_t ip6_randomflowlabel(void) { return ip6id_randomid(&randomtab_20) & 0xfffff; }
484 4 187 4 302 464 10 250 5 469 260 284 160 64 1 58 4 57 162 7 112 3 151 4 105 55 143 4 149 1569 1 170 4 157 2848 8 1708 3 2828 13 2601 208 64 2801 1 12 2824 1339 8 1054 1 1116 232 1126 146 2 80 6 8 9 6 1304 1081 289 1331 70 46 2 18 5 3 21 66 1 47 18 64 61 59 51 5 5 5 56 43 26 10 6 5 2 15 2 44 43 44 39 5 5 5 27 45 55 53 53 50 7 39 36 42 7 34 84 23 82 7 4982 4987 4998 4955 2630 1 1 1752 1982 28 24 3 101 109 120 2 1 115 118 1 109 7 3 114 114 114 107 75 2 41 25 22 42 105 5 6 105 107 5 106 7 /* $OpenBSD: sys_generic.c,v 1.135 2021/01/08 09:29:04 visa Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* * Copyright (c) 1996 Theo de Raadt * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/filedesc.h> #include <sys/ioctl.h> #include <sys/fcntl.h> #include <sys/vnode.h> #include <sys/file.h> #include <sys/proc.h> #include <sys/resourcevar.h> #include <sys/socketvar.h> #include <sys/signalvar.h> #include <sys/uio.h> #include <sys/kernel.h> #include <sys/stat.h> #include <sys/time.h> #include <sys/malloc.h> #include <sys/poll.h> #ifdef KTRACE #include <sys/ktrace.h> #endif #include <sys/sched.h> #include <sys/pledge.h> #include <sys/mount.h> #include <sys/syscallargs.h> #include <uvm/uvm_extern.h> int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); void pollscan(struct proc *, struct pollfd *, u_int, register_t *); int pollout(struct pollfd *, struct pollfd *, u_int); int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, struct timespec *, const sigset_t *, register_t *); int doppoll(struct proc *, struct pollfd *, u_int, struct timespec *, const sigset_t *, register_t *); void doselwakeup(struct selinfo *); int iovec_copyin(const struct iovec *uiov, struct iovec **iovp, struct iovec *aiov, unsigned int iovcnt, size_t *residp) { #ifdef KTRACE struct proc *p = curproc; #endif struct iovec *iov; int error, i; size_t resid = 0; if (iovcnt > UIO_SMALLIOV) { if (iovcnt > IOV_MAX) return (EINVAL); iov = mallocarray(iovcnt, sizeof(*iov), M_IOV, M_WAITOK); } else if (iovcnt > 0) { iov = aiov; } else { return (EINVAL); } *iovp = iov; if ((error = copyin(uiov, iov, iovcnt * sizeof(*iov)))) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktriovec(p, iov, iovcnt); #endif for (i = 0; i < iovcnt; i++) { resid += iov->iov_len; /* * Writes return ssize_t because -1 is returned on error. * Therefore we must restrict the length to SSIZE_MAX to * avoid garbage return values. Note that the addition is * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. */ if (iov->iov_len > SSIZE_MAX || resid > SSIZE_MAX) return (EINVAL); iov++; } if (residp != NULL) *residp = resid; return (0); } void iovec_free(struct iovec *iov, unsigned int iovcnt) { if (iovcnt > UIO_SMALLIOV) free(iov, M_IOV, iovcnt * sizeof(*iov)); } /* * Read system call. */ int sys_read(struct proc *p, void *v, register_t *retval) { struct sys_read_args /* { syscallarg(int) fd; syscallarg(void *) buf; syscallarg(size_t) nbyte; } */ *uap = v; struct iovec iov; struct uio auio; iov.iov_base = SCARG(uap, buf); iov.iov_len = SCARG(uap, nbyte); if (iov.iov_len > SSIZE_MAX) return (EINVAL); auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_resid = iov.iov_len; return (dofilereadv(p, SCARG(uap, fd), &auio, 0, retval)); } /* * Scatter read system call. */ int sys_readv(struct proc *p, void *v, register_t *retval) { struct sys_readv_args /* { syscallarg(int) fd; syscallarg(const struct iovec *) iovp; syscallarg(int) iovcnt; } */ *uap = v; struct iovec aiov[UIO_SMALLIOV], *iov = NULL; int error, iovcnt = SCARG(uap, iovcnt); struct uio auio; size_t resid; error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); if (error) goto done; auio.uio_iov = iov; auio.uio_iovcnt = iovcnt; auio.uio_resid = resid; error = dofilereadv(p, SCARG(uap, fd), &auio, 0, retval); done: iovec_free(iov, iovcnt); return (error); } int dofilereadv(struct proc *p, int fd, struct uio *uio, int flags, register_t *retval) { struct filedesc *fdp = p->p_fd; struct file *fp; long cnt, error = 0; u_int iovlen; #ifdef KTRACE struct iovec *ktriov = NULL; #endif KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); iovlen = uio->uio_iovcnt * sizeof(struct iovec); if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) return (EBADF); /* Checks for positioned read. */ if (flags & FO_POSITION) { struct vnode *vp = fp->f_data; if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || (vp->v_flag & VISTTY)) { error = ESPIPE; goto done; } if (uio->uio_offset < 0 && vp->v_type != VCHR) { error = EINVAL; goto done; } } uio->uio_rw = UIO_READ; uio->uio_segflg = UIO_USERSPACE; uio->uio_procp = p; #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { ktriov = malloc(iovlen, M_TEMP, M_WAITOK); memcpy(ktriov, uio->uio_iov, iovlen); } #endif cnt = uio->uio_resid; error = (*fp->f_ops->fo_read)(fp, uio, flags); if (error) { if (uio->uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } cnt -= uio->uio_resid; mtx_enter(&fp->f_mtx); fp->f_rxfer++; fp->f_rbytes += cnt; mtx_leave(&fp->f_mtx); #ifdef KTRACE if (ktriov != NULL) { if (error == 0) ktrgenio(p, fd, UIO_READ, ktriov, cnt); free(ktriov, M_TEMP, iovlen); } #endif *retval = cnt; done: FRELE(fp, p); return (error); } /* * Write system call */ int sys_write(struct proc *p, void *v, register_t *retval) { struct sys_write_args /* { syscallarg(int) fd; syscallarg(const void *) buf; syscallarg(size_t) nbyte; } */ *uap = v; struct iovec iov; struct uio auio; iov.iov_base = (void *)SCARG(uap, buf); iov.iov_len = SCARG(uap, nbyte); if (iov.iov_len > SSIZE_MAX) return (EINVAL); auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_resid = iov.iov_len; return (dofilewritev(p, SCARG(uap, fd), &auio, 0, retval)); } /* * Gather write system call */ int sys_writev(struct proc *p, void *v, register_t *retval) { struct sys_writev_args /* { syscallarg(int) fd; syscallarg(const struct iovec *) iovp; syscallarg(int) iovcnt; } */ *uap = v; struct iovec aiov[UIO_SMALLIOV], *iov = NULL; int error, iovcnt = SCARG(uap, iovcnt); struct uio auio; size_t resid; error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); if (error) goto done; auio.uio_iov = iov; auio.uio_iovcnt = iovcnt; auio.uio_resid = resid; error = dofilewritev(p, SCARG(uap, fd), &auio, 0, retval); done: iovec_free(iov, iovcnt); return (error); } int dofilewritev(struct proc *p, int fd, struct uio *uio, int flags, register_t *retval) { struct filedesc *fdp = p->p_fd; struct file *fp; long cnt, error = 0; u_int iovlen; #ifdef KTRACE struct iovec *ktriov = NULL; #endif KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); iovlen = uio->uio_iovcnt * sizeof(struct iovec); if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) return (EBADF); /* Checks for positioned write. */ if (flags & FO_POSITION) { struct vnode *vp = fp->f_data; if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || (vp->v_flag & VISTTY)) { error = ESPIPE; goto done; } if (uio->uio_offset < 0 && vp->v_type != VCHR) { error = EINVAL; goto done; } } uio->uio_rw = UIO_WRITE; uio->uio_segflg = UIO_USERSPACE; uio->uio_procp = p; #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { ktriov = malloc(iovlen, M_TEMP, M_WAITOK); memcpy(ktriov, uio->uio_iov, iovlen); } #endif cnt = uio->uio_resid; error = (*fp->f_ops->fo_write)(fp, uio, flags); if (error) { if (uio->uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; if (error == EPIPE) { KERNEL_LOCK(); ptsignal(p, SIGPIPE, STHREAD); KERNEL_UNLOCK(); } } cnt -= uio->uio_resid; mtx_enter(&fp->f_mtx); fp->f_wxfer++; fp->f_wbytes += cnt; mtx_leave(&fp->f_mtx); #ifdef KTRACE if (ktriov != NULL) { if (error == 0) ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); free(ktriov, M_TEMP, iovlen); } #endif *retval = cnt; done: FRELE(fp, p); return (error); } /* * Ioctl system call */ int sys_ioctl(struct proc *p, void *v, register_t *retval) { struct sys_ioctl_args /* { syscallarg(int) fd; syscallarg(u_long) com; syscallarg(void *) data; } */ *uap = v; struct file *fp; struct filedesc *fdp = p->p_fd; u_long com = SCARG(uap, com); int error = 0; u_int size = 0; caddr_t data, memp = NULL; int tmp; #define STK_PARAMS 128 long long stkbuf[STK_PARAMS / sizeof(long long)]; if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL) return (EBADF); if (fp->f_type == DTYPE_SOCKET) { struct socket *so = fp->f_data; if (so->so_state & SS_DNS) { error = EINVAL; goto out; } } error = pledge_ioctl(p, com, fp); if (error) goto out; switch (com) { case FIONCLEX: case FIOCLEX: fdplock(fdp); if (com == FIONCLEX) fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; else fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; fdpunlock(fdp); goto out; } /* * Interpret high order word to find amount of data to be * copied to/from the user's address space. */ size = IOCPARM_LEN(com); if (size > IOCPARM_MAX) { error = ENOTTY; goto out; } if (size > sizeof (stkbuf)) { memp = malloc(size, M_IOCTLOPS, M_WAITOK); data = memp; } else data = (caddr_t)stkbuf; if (com&IOC_IN) { if (size) { error = copyin(SCARG(uap, data), data, size); if (error) { goto out; } } else *(caddr_t *)data = SCARG(uap, data); } else if ((com&IOC_OUT) && size) /* * Zero the buffer so the user always * gets back something deterministic. */ memset(data, 0, size); else if (com&IOC_VOID) *(caddr_t *)data = SCARG(uap, data); switch (com) { case FIONBIO: if ((tmp = *(int *)data) != 0) atomic_setbits_int(&fp->f_flag, FNONBLOCK); else atomic_clearbits_int(&fp->f_flag, FNONBLOCK); error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); break; case FIOASYNC: if ((tmp = *(int *)data) != 0) atomic_setbits_int(&fp->f_flag, FASYNC); else atomic_clearbits_int(&fp->f_flag, FASYNC); error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); break; default: error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); break; } /* * Copy any data to user, size was * already set and checked above. */ if (error == 0 && (com&IOC_OUT) && size) error = copyout(data, SCARG(uap, data), size); out: FRELE(fp, p); free(memp, M_IOCTLOPS, size); return (error); } int selwait, nselcoll; /* * Select system call. */ int sys_select(struct proc *p, void *v, register_t *retval) { struct sys_select_args /* { syscallarg(int) nd; syscallarg(fd_set *) in; syscallarg(fd_set *) ou; syscallarg(fd_set *) ex; syscallarg(struct timeval *) tv; } */ *uap = v; struct timespec ts, *tsp = NULL; int error; if (SCARG(uap, tv) != NULL) { struct timeval tv; if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrreltimeval(p, &tv); #endif if (tv.tv_sec < 0 || !timerisvalid(&tv)) return (EINVAL); TIMEVAL_TO_TIMESPEC(&tv, &ts); tsp = &ts; } return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), SCARG(uap, ex), tsp, NULL, retval)); } int sys_pselect(struct proc *p, void *v, register_t *retval) { struct sys_pselect_args /* { syscallarg(int) nd; syscallarg(fd_set *) in; syscallarg(fd_set *) ou; syscallarg(fd_set *) ex; syscallarg(const struct timespec *) ts; syscallarg(const sigset_t *) mask; } */ *uap = v; struct timespec ts, *tsp = NULL; sigset_t ss, *ssp = NULL; int error; if (SCARG(uap, ts) != NULL) { if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrreltimespec(p, &ts); #endif if (ts.tv_sec < 0 || !timespecisvalid(&ts)) return (EINVAL); tsp = &ts; } if (SCARG(uap, mask) != NULL) { if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) return (error); ssp = &ss; } return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), SCARG(uap, ex), tsp, ssp, retval)); } int dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timespec *timeout, const sigset_t *sigmask, register_t *retval) { fd_mask bits[6]; fd_set *pibits[3], *pobits[3]; struct timespec elapsed, start, stop; uint64_t nsecs; int s, ncoll, error = 0; u_int ni; if (nd < 0) return (EINVAL); if (nd > p->p_fd->fd_nfiles) { /* forgiving; slightly wrong */ nd = p->p_fd->fd_nfiles; } ni = howmany(nd, NFDBITS) * sizeof(fd_mask); if (ni > sizeof(bits[0])) { caddr_t mbits; mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); pibits[0] = (fd_set *)&mbits[ni * 0]; pibits[1] = (fd_set *)&mbits[ni * 1]; pibits[2] = (fd_set *)&mbits[ni * 2]; pobits[0] = (fd_set *)&mbits[ni * 3]; pobits[1] = (fd_set *)&mbits[ni * 4]; pobits[2] = (fd_set *)&mbits[ni * 5]; } else { memset(bits, 0, sizeof(bits)); pibits[0] = (fd_set *)&bits[0]; pibits[1] = (fd_set *)&bits[1]; pibits[2] = (fd_set *)&bits[2]; pobits[0] = (fd_set *)&bits[3]; pobits[1] = (fd_set *)&bits[4]; pobits[2] = (fd_set *)&bits[5]; } #define getbits(name, x) \ if (name && (error = copyin(name, pibits[x], ni))) \ goto done; getbits(in, 0); getbits(ou, 1); getbits(ex, 2); #undef getbits #ifdef KTRACE if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { if (in) ktrfdset(p, pibits[0], ni); if (ou) ktrfdset(p, pibits[1], ni); if (ex) ktrfdset(p, pibits[2], ni); } #endif if (sigmask) dosigsuspend(p, *sigmask &~ sigcantmask); retry: ncoll = nselcoll; atomic_setbits_int(&p->p_flag, P_SELECT); error = selscan(p, pibits[0], pobits[0], nd, ni, retval); if (error || *retval) goto done; if (timeout == NULL || timespecisset(timeout)) { if (timeout != NULL) { getnanouptime(&start); nsecs = MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP); } else nsecs = INFSLP; s = splhigh(); if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { splx(s); goto retry; } atomic_clearbits_int(&p->p_flag, P_SELECT); error = tsleep_nsec(&selwait, PSOCK | PCATCH, "select", nsecs); splx(s); if (timeout != NULL) { getnanouptime(&stop); timespecsub(&stop, &start, &elapsed); timespecsub(timeout, &elapsed, timeout); if (timeout->tv_sec < 0) timespecclear(timeout); } if (error == 0 || error == EWOULDBLOCK) goto retry; } done: atomic_clearbits_int(&p->p_flag, P_SELECT); /* select is not restarted after signals... */ if (error == ERESTART) error = EINTR; if (error == EWOULDBLOCK) error = 0; #define putbits(name, x) \ if (name && (error2 = copyout(pobits[x], name, ni))) \ error = error2; if (error == 0) { int error2; putbits(in, 0); putbits(ou, 1); putbits(ex, 2); #undef putbits #ifdef KTRACE if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { if (in) ktrfdset(p, pobits[0], ni); if (ou) ktrfdset(p, pobits[1], ni); if (ex) ktrfdset(p, pobits[2], ni); } #endif } if (pibits[0] != (fd_set *)&bits[0]) free(pibits[0], M_TEMP, 6 * ni); return (error); } int selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, register_t *retval) { caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; struct filedesc *fdp = p->p_fd; int msk, i, j, fd; fd_mask bits; struct file *fp; int n = 0; static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; for (msk = 0; msk < 3; msk++) { fd_set *pibits = (fd_set *)&cibits[msk*ni]; fd_set *pobits = (fd_set *)&cobits[msk*ni]; for (i = 0; i < nfd; i += NFDBITS) { bits = pibits->fds_bits[i/NFDBITS]; while ((j = ffs(bits)) && (fd = i + --j) < nfd) { bits &= ~(1 << j); if ((fp = fd_getfile(fdp, fd)) == NULL) return (EBADF); if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { FD_SET(fd, pobits); n++; } FRELE(fp, p); } } } *retval = n; return (0); } int seltrue(dev_t dev, int events, struct proc *p) { return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); } int selfalse(dev_t dev, int events, struct proc *p) { return (0); } /* * Record a select request. */ void selrecord(struct proc *selector, struct selinfo *sip) { struct proc *p; pid_t mytid; KERNEL_ASSERT_LOCKED(); mytid = selector->p_tid; if (sip->si_seltid == mytid) return; if (sip->si_seltid && (p = tfind(sip->si_seltid)) && p->p_wchan == (caddr_t)&selwait) sip->si_flags |= SI_COLL; else sip->si_seltid = mytid; } /* * Do a wakeup when a selectable event occurs. */ void selwakeup(struct selinfo *sip) { KERNEL_LOCK(); KNOTE(&sip->si_note, NOTE_SUBMIT); doselwakeup(sip); KERNEL_UNLOCK(); } void doselwakeup(struct selinfo *sip) { struct proc *p; KERNEL_ASSERT_LOCKED(); if (sip->si_seltid == 0) return; if (sip->si_flags & SI_COLL) { nselcoll++; sip->si_flags &= ~SI_COLL; wakeup(&selwait); } p = tfind(sip->si_seltid); sip->si_seltid = 0; if (p != NULL) { if (wakeup_proc(p, &selwait)) { /* nothing else to do */ } else if (p->p_flag & P_SELECT) atomic_clearbits_int(&p->p_flag, P_SELECT); } } void pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) { struct filedesc *fdp = p->p_fd; struct file *fp; u_int i; int n = 0; for (i = 0; i < nfd; i++, pl++) { /* Check the file descriptor. */ if (pl->fd < 0) { pl->revents = 0; continue; } if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { pl->revents = POLLNVAL; n++; continue; } pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); FRELE(fp, p); if (pl->revents != 0) n++; } *retval = n; } /* * Only copyout the revents field. */ int pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) { int error = 0; u_int i = 0; while (!error && i++ < nfds) { error = copyout(&pl->revents, &upl->revents, sizeof(upl->revents)); pl++; upl++; } return (error); } /* * We are using the same mechanism as select only we encode/decode args * differently. */ int sys_poll(struct proc *p, void *v, register_t *retval) { struct sys_poll_args /* { syscallarg(struct pollfd *) fds; syscallarg(u_int) nfds; syscallarg(int) timeout; } */ *uap = v; struct timespec ts, *tsp = NULL; int msec = SCARG(uap, timeout); if (msec != INFTIM) { if (msec < 0) return (EINVAL); ts.tv_sec = msec / 1000; ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; tsp = &ts; } return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, retval)); } int sys_ppoll(struct proc *p, void *v, register_t *retval) { struct sys_ppoll_args /* { syscallarg(struct pollfd *) fds; syscallarg(u_int) nfds; syscallarg(const struct timespec *) ts; syscallarg(const sigset_t *) mask; } */ *uap = v; int error; struct timespec ts, *tsp = NULL; sigset_t ss, *ssp = NULL; if (SCARG(uap, ts) != NULL) { if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrreltimespec(p, &ts); #endif if (ts.tv_sec < 0 || !timespecisvalid(&ts)) return (EINVAL); tsp = &ts; } if (SCARG(uap, mask) != NULL) { if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) return (error); ssp = &ss; } return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, retval)); } int doppoll(struct proc *p, struct pollfd *fds, u_int nfds, struct timespec *timeout, const sigset_t *sigmask, register_t *retval) { size_t sz; struct pollfd pfds[4], *pl = pfds; struct timespec elapsed, start, stop; uint64_t nsecs; int ncoll, i, s, error; /* Standards say no more than MAX_OPEN; this is possibly better. */ if (nfds > min((int)lim_cur(RLIMIT_NOFILE), maxfiles)) return (EINVAL); /* optimize for the default case, of a small nfds value */ if (nfds > nitems(pfds)) { pl = mallocarray(nfds, sizeof(*pl), M_TEMP, M_WAITOK | M_CANFAIL); if (pl == NULL) return (EINVAL); } sz = nfds * sizeof(*pl); if ((error = copyin(fds, pl, sz)) != 0) goto bad; for (i = 0; i < nfds; i++) { pl[i].events &= ~POLL_NOHUP; pl[i].revents = 0; } if (sigmask) dosigsuspend(p, *sigmask &~ sigcantmask); retry: ncoll = nselcoll; atomic_setbits_int(&p->p_flag, P_SELECT); pollscan(p, pl, nfds, retval); if (*retval) goto done; if (timeout == NULL || timespecisset(timeout)) { if (timeout != NULL) { getnanouptime(&start); nsecs = MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP); } else nsecs = INFSLP; s = splhigh(); if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { splx(s); goto retry; } atomic_clearbits_int(&p->p_flag, P_SELECT); error = tsleep_nsec(&selwait, PSOCK | PCATCH, "poll", nsecs); splx(s); if (timeout != NULL) { getnanouptime(&stop); timespecsub(&stop, &start, &elapsed); timespecsub(timeout, &elapsed, timeout); if (timeout->tv_sec < 0) timespecclear(timeout); } if (error == 0 || error == EWOULDBLOCK) goto retry; } done: atomic_clearbits_int(&p->p_flag, P_SELECT); /* * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is * ignored (since the whole point is to see what would block). */ switch (error) { case ERESTART: error = pollout(pl, fds, nfds); if (error == 0) error = EINTR; break; case EWOULDBLOCK: case 0: error = pollout(pl, fds, nfds); break; } #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrpollfd(p, pl, nfds); #endif /* KTRACE */ bad: if (pl != pfds) free(pl, M_TEMP, sz); return (error); } /* * utrace system call */ int sys_utrace(struct proc *curp, void *v, register_t *retval) { #ifdef KTRACE struct sys_utrace_args /* { syscallarg(const char *) label; syscallarg(const void *) addr; syscallarg(size_t) len; } */ *uap = v; return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len))); #else return (0); #endif }
/* * Copyright 2016 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: Christian König */ #ifndef __AMDGPU_RING_H__ #define __AMDGPU_RING_H__ #include <drm/amdgpu_drm.h> #include <drm/gpu_scheduler.h> #include <drm/drm_print.h> /* max number of rings */ #define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 #define AMDGPU_MAX_UVD_ENC_RINGS 2 /* some special values for the owner field */ #define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul) #define AMDGPU_FENCE_OWNER_VM ((void *)1ul) #define AMDGPU_FENCE_OWNER_KFD ((void *)2ul) #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) enum amdgpu_ring_type { AMDGPU_RING_TYPE_GFX, AMDGPU_RING_TYPE_COMPUTE, AMDGPU_RING_TYPE_SDMA, AMDGPU_RING_TYPE_UVD, AMDGPU_RING_TYPE_VCE, AMDGPU_RING_TYPE_KIQ, AMDGPU_RING_TYPE_UVD_ENC, AMDGPU_RING_TYPE_VCN_DEC, AMDGPU_RING_TYPE_VCN_ENC, AMDGPU_RING_TYPE_VCN_JPEG }; struct amdgpu_device; struct amdgpu_ring; struct amdgpu_ib; struct amdgpu_cs_parser; struct amdgpu_job; /* * Fences. */ struct amdgpu_fence_driver { uint64_t gpu_addr; volatile uint32_t *cpu_addr; /* sync_seq is protected by ring emission lock */ uint32_t sync_seq; atomic_t last_seq; bool initialized; struct amdgpu_irq_src *irq_src; unsigned irq_type; struct timeout fallback_timer; unsigned num_fences_mask; spinlock_t lock; struct dma_fence **fences; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); void amdgpu_fence_driver_fini(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, unsigned flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); bool amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, uint32_t wait_seq, signed long timeout); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); /* * Rings. */ /* provided by hw blocks that expose a ring buffer for commands */ struct amdgpu_ring_funcs { enum amdgpu_ring_type type; uint32_t align_mask; u32 nop; bool support_64bit_ptrs; bool no_user_fence; unsigned vmhub; unsigned extra_dw; /* ring read/write ptr handling */ u64 (*get_rptr)(struct amdgpu_ring *ring); u64 (*get_wptr)(struct amdgpu_ring *ring); void (*set_wptr)(struct amdgpu_ring *ring); /* validating and patching of IBs */ int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx); /* constants to calculate how many DW are needed for an emit */ unsigned emit_frame_size; unsigned emit_ib_size; /* command emit functions */ void (*emit_ib)(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); void (*emit_pipeline_sync)(struct amdgpu_ring *ring); void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void (*emit_hdp_flush)(struct amdgpu_ring *ring); void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, uint32_t oa_base, uint32_t oa_size); /* testing functions */ int (*test_ring)(struct amdgpu_ring *ring); int (*test_ib)(struct amdgpu_ring *ring, long timeout); /* insert NOP packets */ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); void (*insert_start)(struct amdgpu_ring *ring); void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); unsigned (*init_cond_exec)(struct amdgpu_ring *ring); void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset); /* note usage for clock and power gating */ void (*begin_use)(struct amdgpu_ring *ring); void (*end_use)(struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring); void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); /* Try to soft recover the ring to make the fence signal */ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); int (*preempt_ib)(struct amdgpu_ring *ring); }; struct amdgpu_ring { struct amdgpu_device *adev; const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; struct drm_gpu_scheduler sched; struct amdgpu_bo *ring_obj; volatile uint32_t *ring; unsigned rptr_offs; u64 wptr; u64 wptr_old; unsigned ring_size; unsigned max_dw; int count_dw; uint64_t gpu_addr; uint64_t ptr_mask; uint32_t buf_mask; u32 idx; u32 me; u32 pipe; u32 queue; struct amdgpu_bo *mqd_obj; uint64_t mqd_gpu_addr; void *mqd_ptr; uint64_t eop_gpu_addr; u32 doorbell_index; bool use_doorbell; bool use_pollmem; unsigned wptr_offs; unsigned fence_offs; uint64_t current_ctx; char name[16]; u32 trail_seq; unsigned trail_fence_offs; u64 trail_fence_gpu_addr; volatile u32 *trail_fence_cpu_addr; unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; struct dma_fence *vmid_wait; bool has_compute_vm_bug; atomic_t num_jobs[DRM_SCHED_PRIORITY_MAX]; struct rwlock priority_mutex; /* protected by priority_mutex */ int priority; bool has_high_prio; #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif }; #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) #define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) #define amdgpu_ring_emit_ib(r, job, ib, flags) ((r)->funcs->emit_ib((r), (job), (ib), (flags))) #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_ring_fini(struct amdgpu_ring *ring); void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, uint32_t reg0, uint32_t val0, uint32_t reg1, uint32_t val1); bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, struct dma_fence *fence); static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring, bool cond_exec) { *ring->cond_exe_cpu_addr = cond_exec; } static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { int i = 0; while (i <= ring->buf_mask) ring->ring[i++] = ring->funcs->nop; } static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) { if (ring->count_dw <= 0) DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); ring->ring[ring->wptr++ & ring->buf_mask] = v; ring->wptr &= ring->ptr_mask; ring->count_dw--; } static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw) { unsigned occupied, chunk1, chunk2; void *dst; if (unlikely(ring->count_dw < count_dw)) DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); occupied = ring->wptr & ring->buf_mask; dst = (void *)&ring->ring[occupied]; chunk1 = ring->buf_mask + 1 - occupied; chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; chunk2 = count_dw - chunk1; chunk1 <<= 2; chunk2 <<= 2; if (chunk1) memcpy(dst, src, chunk1); if (chunk2) { src += chunk1; dst = (void *)ring->ring; memcpy(dst, src, chunk2); } ring->wptr += count_dw; ring->wptr &= ring->ptr_mask; ring->count_dw -= count_dw; } int amdgpu_ring_test_helper(struct amdgpu_ring *ring); int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring); #endif
206 192 14 3 12 11 2 124 48 5 32 7 1 40 40 36 25 36 48 48 4 2 1 12 6 12 2 2 2 41 17 25 17 25 36 3 2 16 10 6 10 1 2 2 187 187 271 272 50 3 4 8 20 4 13 8 29 254 255 178 8 /* $OpenBSD: in6_src.c,v 1.85 2021/03/10 10:21:49 jsg Exp $ */ /* $KAME: in6_src.c,v 1.36 2001/02/06 04:08:17 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/ioctl.h> #include <sys/errno.h> #include <sys/time.h> #include <net/if.h> #include <net/if_var.h> #include <net/route.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/in_pcb.h> #include <netinet6/in6_var.h> #include <netinet/ip6.h> #include <netinet6/ip6_var.h> #include <netinet6/nd6.h> int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, u_int); /* * Return an IPv6 address, which is the most appropriate for a given * destination and pcb. We need the additional opt parameter because * the values set at pcb level can be overridden via cmsg. */ int in6_pcbselsrc(struct in6_addr **in6src, struct sockaddr_in6 *dstsock, struct inpcb *inp, struct ip6_pktopts *opts) { struct ip6_moptions *mopts = inp->inp_moptions6; struct route_in6 *ro = &inp->inp_route6; struct in6_addr *laddr = &inp->inp_laddr6; u_int rtableid = inp->inp_rtableid; struct ifnet *ifp = NULL; struct sockaddr *ip6_source = NULL; struct in6_addr *dst; struct in6_ifaddr *ia6 = NULL; struct in6_pktinfo *pi = NULL; int error; dst = &dstsock->sin6_addr; /* * If the source address is explicitly specified by the caller, * check if the requested source address is indeed a unicast address * assigned to the node, and can be used as the packet's source * address. If everything is okay, use the address as source. */ if (opts && (pi = opts->ip6po_pktinfo) && !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { struct sockaddr_in6 sa6; /* get the outgoing interface */ error = in6_selectif(dstsock, opts, mopts, ro, &ifp, rtableid); if (error) return (error); bzero(&sa6, sizeof(sa6)); sa6.sin6_family = AF_INET6; sa6.sin6_len = sizeof(sa6); sa6.sin6_addr = pi->ipi6_addr; if (ifp && IN6_IS_SCOPE_EMBED(&sa6.sin6_addr)) sa6.sin6_addr.s6_addr16[1] = htons(ifp->if_index); if_put(ifp); /* put reference from in6_selectif */ ia6 = ifatoia6(ifa_ifwithaddr(sin6tosa(&sa6), rtableid)); if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED))) return (EADDRNOTAVAIL); pi->ipi6_addr = sa6.sin6_addr; /* XXX: this overrides pi */ *in6src = &pi->ipi6_addr; return (0); } /* * If the source address is not specified but the socket(if any) * is already bound, use the bound address. */ if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) { *in6src = laddr; return (0); } /* * If the caller doesn't specify the source address but * the outgoing interface, use an address associated with * the interface. */ if (pi && pi->ipi6_ifindex) { ifp = if_get(pi->ipi6_ifindex); if (ifp == NULL) return (ENXIO); /* XXX: better error? */ ia6 = in6_ifawithscope(ifp, dst, rtableid); if_put(ifp); if (ia6 == NULL) return (EADDRNOTAVAIL); *in6src = &ia6->ia_addr.sin6_addr; return (0); } error = in6_selectsrc(in6src, dstsock, mopts, rtableid); if (error != EADDRNOTAVAIL) return (error); /* * If route is known or can be allocated now, * our src addr is taken from the i/f, else punt. */ if (!rtisvalid(ro->ro_rt) || (ro->ro_tableid != rtableid) || !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst)) { rtfree(ro->ro_rt); ro->ro_rt = NULL; } if (ro->ro_rt == NULL) { struct sockaddr_in6 *sa6; /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); ro->ro_tableid = rtableid; sa6 = &ro->ro_dst; sa6->sin6_family = AF_INET6; sa6->sin6_len = sizeof(struct sockaddr_in6); sa6->sin6_addr = *dst; sa6->sin6_scope_id = dstsock->sin6_scope_id; ro->ro_rt = rtalloc(sin6tosa(&ro->ro_dst), RT_RESOLVE, ro->ro_tableid); } /* * in_pcbconnect() checks out IFF_LOOPBACK to skip using * the address. But we don't know why it does so. * It is necessary to ensure the scope even for lo0 * so doesn't check out IFF_LOOPBACK. */ if (ro->ro_rt) { ifp = if_get(ro->ro_rt->rt_ifidx); if (ifp != NULL) { ia6 = in6_ifawithscope(ifp, dst, rtableid); if_put(ifp); } if (ia6 == NULL) /* xxx scope error ?*/ ia6 = ifatoia6(ro->ro_rt->rt_ifa); } /* * Use preferred source address if : * - destination is not onlink * - preferred source address is set * - output interface is UP */ if (ro->ro_rt && !(ro->ro_rt->rt_flags & RTF_LLINFO) && !(ro->ro_rt->rt_flags & RTF_HOST)) { ip6_source = rtable_getsource(rtableid, AF_INET6); if (ip6_source != NULL) { struct ifaddr *ifa; if ((ifa = ifa_ifwithaddr(ip6_source, rtableid)) != NULL && ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) { *in6src = &satosin6(ip6_source)->sin6_addr; return (0); } } } if (ia6 == NULL) return (EHOSTUNREACH); /* no route */ *in6src = &ia6->ia_addr.sin6_addr; return (0); } /* * Return an IPv6 address, which is the most appropriate for a given * destination and multicast options. * If necessary, this function lookups the routing table and returns * an entry to the caller for later use. */ int in6_selectsrc(struct in6_addr **in6src, struct sockaddr_in6 *dstsock, struct ip6_moptions *mopts, unsigned int rtableid) { struct ifnet *ifp = NULL; struct in6_addr *dst; struct in6_ifaddr *ia6 = NULL; dst = &dstsock->sin6_addr; /* * If the destination address is a link-local unicast address or * a link/interface-local multicast address, and if the outgoing * interface is specified by the sin6_scope_id filed, use an address * associated with the interface. * XXX: We're now trying to define more specific semantics of * sin6_scope_id field, so this part will be rewritten in * the near future. */ if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MC_LINKLOCAL(dst) || IN6_IS_ADDR_MC_INTFACELOCAL(dst)) && dstsock->sin6_scope_id) { ifp = if_get(dstsock->sin6_scope_id); if (ifp == NULL) return (ENXIO); /* XXX: better error? */ ia6 = in6_ifawithscope(ifp, dst, rtableid); if_put(ifp); if (ia6 == NULL) return (EADDRNOTAVAIL); *in6src = &ia6->ia_addr.sin6_addr; return (0); } /* * If the destination address is a multicast address and * the outgoing interface for the address is specified * by the caller, use an address associated with the interface. * Even if the outgoing interface is not specified, we also * choose a loopback interface as the outgoing interface. */ if (IN6_IS_ADDR_MULTICAST(dst)) { ifp = mopts ? if_get(mopts->im6o_ifidx) : NULL; if (!ifp && dstsock->sin6_scope_id) ifp = if_get(htons(dstsock->sin6_scope_id)); if (ifp) { ia6 = in6_ifawithscope(ifp, dst, rtableid); if_put(ifp); if (ia6 == NULL) return (EADDRNOTAVAIL); *in6src = &ia6->ia_addr.sin6_addr; return (0); } } return (EADDRNOTAVAIL); } struct rtentry * in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct route_in6 *ro, unsigned int rtableid) { struct in6_addr *dst; dst = &dstsock->sin6_addr; /* * Use a cached route if it exists and is valid, else try to allocate * a new one. */ if (ro) { if (rtisvalid(ro->ro_rt)) KASSERT(sin6tosa(&ro->ro_dst)->sa_family == AF_INET6); if (!rtisvalid(ro->ro_rt) || !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst)) { rtfree(ro->ro_rt); ro->ro_rt = NULL; } if (ro->ro_rt == NULL) { struct sockaddr_in6 *sa6; /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); ro->ro_tableid = rtableid; sa6 = &ro->ro_dst; *sa6 = *dstsock; sa6->sin6_scope_id = 0; ro->ro_tableid = rtableid; ro->ro_rt = rtalloc_mpath(sin6tosa(&ro->ro_dst), NULL, ro->ro_tableid); } /* * Check if the outgoing interface conflicts with * the interface specified by ipi6_ifindex (if specified). * Note that loopback interface is always okay. * (this may happen when we are sending a packet to one of * our own addresses.) */ if (opts && opts->ip6po_pktinfo && opts->ip6po_pktinfo->ipi6_ifindex) { if (ro->ro_rt != NULL && !ISSET(ro->ro_rt->rt_flags, RTF_LOCAL) && ro->ro_rt->rt_ifidx != opts->ip6po_pktinfo->ipi6_ifindex) { return (NULL); } } return (ro->ro_rt); } return (NULL); } int in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, u_int rtableid) { struct rtentry *rt = NULL; struct in6_pktinfo *pi = NULL; /* If the caller specify the outgoing interface explicitly, use it. */ if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { *retifp = if_get(pi->ipi6_ifindex); if (*retifp != NULL) return (0); } /* * If the destination address is a multicast address and the outgoing * interface for the address is specified by the caller, use it. */ if (IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) && mopts != NULL && (*retifp = if_get(mopts->im6o_ifidx)) != NULL) return (0); rt = in6_selectroute(dstsock, opts, ro, rtableid); if (rt == NULL) return (EHOSTUNREACH); /* * do not use a rejected or black hole route. * XXX: this check should be done in the L2 output routine. * However, if we skipped this check here, we'd see the following * scenario: * - install a rejected route for a scoped address prefix * (like fe80::/10) * - send a packet to a destination that matches the scoped prefix, * with ambiguity about the scope zone. * - pick the outgoing interface from the route, and disambiguate the * scope zone with the interface. * - ip6_output() would try to get another route with the "new" * destination, which may be valid. * - we'd see no error on output. * Although this may not be very harmful, it should still be confusing. * We thus reject the case here. */ if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); if (rt != NULL) *retifp = if_get(rt->rt_ifidx); return (0); } int in6_selecthlim(struct inpcb *in6p) { if (in6p && in6p->inp_hops >= 0) return (in6p->inp_hops); return (ip6_defhlim); } /* * generate kernel-internal form (scopeid embedded into s6_addr16[1]). * If the address scope of is link-local, embed the interface index in the * address. The routine determines our precedence * between advanced API scope/interface specification and basic API * specification. * * this function should be nuked in the future, when we get rid of * embedded scopeid thing. * * XXX actually, it is over-specification to return ifp against sin6_scope_id. * there can be multiple interfaces that belong to a particular scope zone * (in specification, we have 1:N mapping between a scope zone and interfaces). * we may want to change the function to return something other than ifp. */ int in6_embedscope(struct in6_addr *in6, const struct sockaddr_in6 *sin6, struct inpcb *in6p) { struct ifnet *ifp = NULL; u_int32_t scopeid; *in6 = sin6->sin6_addr; scopeid = sin6->sin6_scope_id; /* * don't try to read sin6->sin6_addr beyond here, since the caller may * ask us to overwrite existing sockaddr_in6 */ if (IN6_IS_SCOPE_EMBED(in6)) { struct in6_pktinfo *pi; /* * KAME assumption: link id == interface id */ if (in6p && in6p->inp_outputopts6 && (pi = in6p->inp_outputopts6->ip6po_pktinfo) && pi->ipi6_ifindex) { ifp = if_get(pi->ipi6_ifindex); if (ifp == NULL) return ENXIO; /* XXX EINVAL? */ in6->s6_addr16[1] = htons(pi->ipi6_ifindex); } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) && in6p->inp_moptions6 && (ifp = if_get(in6p->inp_moptions6->im6o_ifidx))) { in6->s6_addr16[1] = htons(ifp->if_index); } else if (scopeid) { ifp = if_get(scopeid); if (ifp == NULL) return ENXIO; /* XXX EINVAL? */ /*XXX assignment to 16bit from 32bit variable */ in6->s6_addr16[1] = htons(scopeid & 0xffff); } if_put(ifp); } return 0; } /* * generate standard sockaddr_in6 from embedded form. * touches sin6_addr and sin6_scope_id only. * * this function should be nuked in the future, when we get rid of * embedded scopeid thing. */ void in6_recoverscope(struct sockaddr_in6 *sin6, const struct in6_addr *in6) { u_int32_t scopeid; sin6->sin6_addr = *in6; /* * don't try to read *in6 beyond here, since the caller may * ask us to overwrite existing sockaddr_in6 */ sin6->sin6_scope_id = 0; if (IN6_IS_SCOPE_EMBED(in6)) { /* * KAME assumption: link id == interface id */ scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]); if (scopeid) { sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = scopeid; } } } /* * just clear the embedded scope identifier. */ void in6_clearscope(struct in6_addr *addr) { if (IN6_IS_SCOPE_EMBED(addr)) addr->s6_addr16[1] = 0; }
15 /* $OpenBSD: malo.c,v 1.121 2020/07/10 13:26:37 patrick Exp $ */ /* * Copyright (c) 2006 Claudio Jeker <claudio@openbsd.org> * Copyright (c) 2006 Marcus Glocker <mglocker@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "bpfilter.h" #include <sys/param.h> #include <sys/device.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/systm.h> #include <sys/endian.h> #include <machine/bus.h> #include <machine/intr.h> #include <net/if.h> #include <net/if_media.h> #if NBPFILTER > 0 #include <net/bpf.h> #endif #include <netinet/in.h> #include <netinet/if_ether.h> #include <net80211/ieee80211_var.h> #include <net80211/ieee80211_radiotap.h> #include <dev/ic/malo.h> #ifdef MALO_DEBUG int malo_d = 1; #define DPRINTF(l, x...) do { if ((l) <= malo_d) printf(x); } while (0) #else #define DPRINTF(l, x...) #endif /* internal structures and defines */ struct malo_node { struct ieee80211_node ni; }; struct malo_rx_data { bus_dmamap_t map; struct mbuf *m; }; struct malo_tx_data { bus_dmamap_t map; struct mbuf *m; uint32_t softstat; struct ieee80211_node *ni; }; /* RX descriptor used by HW */ struct malo_rx_desc { uint8_t rxctrl; uint8_t rssi; uint8_t status; uint8_t channel; uint16_t len; uint8_t reserved1; /* actually unused */ uint8_t datarate; uint32_t physdata; /* DMA address of data */ uint32_t physnext; /* DMA address of next control block */ uint16_t qosctrl; uint16_t reserved2; } __packed; /* TX descriptor used by HW */ struct malo_tx_desc { uint32_t status; uint8_t datarate; uint8_t txpriority; uint16_t qosctrl; uint32_t physdata; /* DMA address of data */ uint16_t len; uint8_t destaddr[6]; uint32_t physnext; /* DMA address of next control block */ uint32_t reserved1; /* SAP packet info ??? */ uint32_t reserved2; } __packed; #define MALO_RX_RING_COUNT 256 #define MALO_TX_RING_COUNT 256 #define MALO_MAX_SCATTER 8 /* XXX unknown, wild guess */ #define MALO_CMD_TIMEOUT 50 /* MALO_CMD_TIMEOUT * 100us */ /* * Firmware commands */ #define MALO_CMD_GET_HW_SPEC 0x0003 #define MALO_CMD_SET_RADIO 0x001c #define MALO_CMD_SET_AID 0x010d #define MALO_CMD_SET_TXPOWER 0x001e #define MALO_CMD_SET_ANTENNA 0x0020 #define MALO_CMD_SET_PRESCAN 0x0107 #define MALO_CMD_SET_POSTSCAN 0x0108 #define MALO_CMD_SET_RATE 0x0110 #define MALO_CMD_SET_CHANNEL 0x010a #define MALO_CMD_SET_RTS 0x0113 #define MALO_CMD_SET_SLOT 0x0114 #define MALO_CMD_RESPONSE 0x8000 #define MALO_CMD_RESULT_OK 0x0000 /* everything is fine */ #define MALO_CMD_RESULT_ERROR 0x0001 /* general error */ #define MALO_CMD_RESULT_NOSUPPORT 0x0002 /* command not valid */ #define MALO_CMD_RESULT_PENDING 0x0003 /* will be processed */ #define MALO_CMD_RESULT_BUSY 0x0004 /* command ignored */ #define MALO_CMD_RESULT_PARTIALDATA 0x0005 /* buffer too small */ struct malo_cmdheader { uint16_t cmd; uint16_t size; /* size of the command, incl. header */ uint16_t seqnum; /* seems not to matter that much */ uint16_t result; /* set to 0 on request */ /* following the data payload, up to 256 bytes */ }; struct malo_hw_spec { uint16_t HwVersion; uint16_t NumOfWCB; uint16_t NumOfMCastAdr; uint8_t PermanentAddress[6]; uint16_t RegionCode; uint16_t NumberOfAntenna; uint32_t FWReleaseNumber; uint32_t WcbBase0; uint32_t RxPdWrPtr; uint32_t RxPdRdPtr; uint32_t CookiePtr; uint32_t WcbBase1; uint32_t WcbBase2; uint32_t WcbBase3; } __packed; struct malo_cmd_radio { uint16_t action; uint16_t preamble_mode; uint16_t enable; } __packed; struct malo_cmd_aid { uint16_t associd; uint8_t macaddr[6]; uint32_t gprotection; uint8_t aprates[14]; } __packed; struct malo_cmd_txpower { uint16_t action; uint16_t supportpowerlvl; uint16_t currentpowerlvl; uint16_t reserved; uint16_t powerlvllist[8]; } __packed; struct malo_cmd_antenna { uint16_t action; uint16_t mode; } __packed; struct malo_cmd_postscan { uint32_t isibss; uint8_t bssid[6]; } __packed; struct malo_cmd_channel { uint16_t action; uint8_t channel; } __packed; struct malo_cmd_rate { uint8_t dataratetype; uint8_t rateindex; uint8_t aprates[14]; } __packed; struct malo_cmd_rts { uint16_t action; uint32_t threshold; } __packed; struct malo_cmd_slot { uint16_t action; uint8_t slot; } __packed; #define malo_mem_write4(sc, off, x) \ bus_space_write_4((sc)->sc_mem1_bt, (sc)->sc_mem1_bh, (off), (x)) #define malo_mem_write2(sc, off, x) \ bus_space_write_2((sc)->sc_mem1_bt, (sc)->sc_mem1_bh, (off), (x)) #define malo_mem_write1(sc, off, x) \ bus_space_write_1((sc)->sc_mem1_bt, (sc)->sc_mem1_bh, (off), (x)) #define malo_mem_read4(sc, off) \ bus_space_read_4((sc)->sc_mem1_bt, (sc)->sc_mem1_bh, (off)) #define malo_mem_read1(sc, off) \ bus_space_read_1((sc)->sc_mem1_bt, (sc)->sc_mem1_bh, (off)) #define malo_ctl_write4(sc, off, x) \ bus_space_write_4((sc)->sc_mem2_bt, (sc)->sc_mem2_bh, (off), (x)) #define malo_ctl_read4(sc, off) \ bus_space_read_4((sc)->sc_mem2_bt, (sc)->sc_mem2_bh, (off)) #define malo_ctl_read1(sc, off) \ bus_space_read_1((sc)->sc_mem2_bt, (sc)->sc_mem2_bh, (off)) #define malo_ctl_barrier(sc, t) \ bus_space_barrier((sc)->sc_mem2_bt, (sc)->sc_mem2_bh, 0x0c00, 0xff, (t)) struct cfdriver malo_cd = { NULL, "malo", DV_IFNET }; int malo_alloc_cmd(struct malo_softc *sc); void malo_free_cmd(struct malo_softc *sc); void malo_send_cmd(struct malo_softc *sc, bus_addr_t addr); int malo_send_cmd_dma(struct malo_softc *sc, bus_addr_t addr); int malo_alloc_rx_ring(struct malo_softc *sc, struct malo_rx_ring *ring, int count); void malo_reset_rx_ring(struct malo_softc *sc, struct malo_rx_ring *ring); void malo_free_rx_ring(struct malo_softc *sc, struct malo_rx_ring *ring); int malo_alloc_tx_ring(struct malo_softc *sc, struct malo_tx_ring *ring, int count); void malo_reset_tx_ring(struct malo_softc *sc, struct malo_tx_ring *ring); void malo_free_tx_ring(struct malo_softc *sc, struct malo_tx_ring *ring); int malo_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); void malo_start(struct ifnet *ifp); void malo_watchdog(struct ifnet *ifp); int malo_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg); void malo_newassoc(struct ieee80211com *ic, struct ieee80211_node *ni, int isnew); struct ieee80211_node * malo_node_alloc(struct ieee80211com *ic); int malo_media_change(struct ifnet *ifp); void malo_media_status(struct ifnet *ifp, struct ifmediareq *imr); int malo_chip2rate(int chip_rate); int malo_fix2rate(int fix_rate); void malo_next_scan(void *arg); void malo_tx_intr(struct malo_softc *sc); int malo_tx_mgt(struct malo_softc *sc, struct mbuf *m0, struct ieee80211_node *ni); int malo_tx_data(struct malo_softc *sc, struct mbuf *m0, struct ieee80211_node *ni); void malo_tx_setup_desc(struct malo_softc *sc, struct malo_tx_desc *desc, int len, int rate, const bus_dma_segment_t *segs, int nsegs); void malo_rx_intr(struct malo_softc *sc); int malo_load_bootimg(struct malo_softc *sc); int malo_load_firmware(struct malo_softc *sc); int malo_set_slot(struct malo_softc *sc); void malo_update_slot(struct ieee80211com *ic); #ifdef MALO_DEBUG void malo_hexdump(void *buf, int len); #endif static char * malo_cmd_string(uint16_t cmd); static char * malo_cmd_string_result(uint16_t result); int malo_cmd_get_spec(struct malo_softc *sc); int malo_cmd_set_prescan(struct malo_softc *sc); int malo_cmd_set_postscan(struct malo_softc *sc, uint8_t *macaddr, uint8_t ibsson); int malo_cmd_set_channel(struct malo_softc *sc, uint8_t channel); int malo_cmd_set_antenna(struct malo_softc *sc, uint16_t antenna_type); int malo_cmd_set_radio(struct malo_softc *sc, uint16_t mode, uint16_t preamble); int malo_cmd_set_aid(struct malo_softc *sc, uint8_t *bssid, uint16_t associd); int malo_cmd_set_txpower(struct malo_softc *sc, unsigned int powerlevel); int malo_cmd_set_rts(struct malo_softc *sc, uint32_t threshold); int malo_cmd_set_slot(struct malo_softc *sc, uint8_t slot); int malo_cmd_set_rate(struct malo_softc *sc, uint8_t rate); void malo_cmd_response(struct malo_softc *sc); int malo_intr(void *arg) { struct malo_softc *sc = arg; uint32_t status; status = malo_ctl_read4(sc, 0x0c30); if (status == 0xffffffff || status == 0) /* not for us */ return (0); if (status & 0x1) malo_tx_intr(sc); if (status & 0x2) malo_rx_intr(sc); if (status & 0x4) { /* XXX cmd done interrupt handling doesn't work yet */ DPRINTF(1, "%s: got cmd done interrupt\n", sc->sc_dev.dv_xname); //malo_cmd_response(sc); } if (status & ~0x7) DPRINTF(1, "%s: unknown interrupt %x\n", sc->sc_dev.dv_xname, status); /* just ack the interrupt */ malo_ctl_write4(sc, 0x0c30, 0); return (1); } int malo_attach(struct malo_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &sc->sc_ic.ic_if; int i; /* initialize channel scanning timer */ timeout_set(&sc->sc_scan_to, malo_next_scan, sc); /* allocate DMA structures */ malo_alloc_cmd(sc); malo_alloc_rx_ring(sc, &sc->sc_rxring, MALO_RX_RING_COUNT); malo_alloc_tx_ring(sc, &sc->sc_txring, MALO_TX_RING_COUNT); /* setup interface */ ifp->if_softc = sc; ifp->if_ioctl = malo_ioctl; ifp->if_start = malo_start; ifp->if_watchdog = malo_watchdog; ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; strlcpy(ifp->if_xname, sc->sc_dev.dv_xname, IFNAMSIZ); ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN); /* set supported rates */ ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b; ic->ic_sup_rates[IEEE80211_MODE_11G] = ieee80211_std_rateset_11g; sc->sc_last_txrate = -1; /* set channels */ for (i = 1; i <= 14; i++) { ic->ic_channels[i].ic_freq = ieee80211_ieee2mhz(i, IEEE80211_CHAN_2GHZ); ic->ic_channels[i].ic_flags = IEEE80211_CHAN_PUREG | IEEE80211_CHAN_B | IEEE80211_CHAN_G; } /* set the rest */ ic->ic_caps = IEEE80211_C_IBSS | IEEE80211_C_MONITOR | IEEE80211_C_SHPREAMBLE | IEEE80211_C_SHSLOT | IEEE80211_C_WEP | IEEE80211_C_RSN; ic->ic_opmode = IEEE80211_M_STA; ic->ic_state = IEEE80211_S_INIT; ic->ic_max_rssi = 75; for (i = 0; i < 6; i++) ic->ic_myaddr[i] = malo_ctl_read1(sc, 0xa528 + i); /* show our mac address */ printf(", address %s\n", ether_sprintf(ic->ic_myaddr)); /* attach interface */ if_attach(ifp); ieee80211_ifattach(ifp); /* post attach vector functions */ sc->sc_newstate = ic->ic_newstate; ic->ic_newstate = malo_newstate; ic->ic_newassoc = malo_newassoc; ic->ic_node_alloc = malo_node_alloc; ic->ic_updateslot = malo_update_slot; ieee80211_media_init(ifp, malo_media_change, malo_media_status); #if NBPFILTER > 0 bpfattach(&sc->sc_drvbpf, ifp, DLT_IEEE802_11_RADIO, sizeof(struct ieee80211_frame) + 64); sc->sc_rxtap_len = sizeof(sc->sc_rxtapu); sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len); sc->sc_rxtap.wr_ihdr.it_present = htole32(MALO_RX_RADIOTAP_PRESENT); sc->sc_txtap_len = sizeof(sc->sc_txtapu); sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len); sc->sc_txtap.wt_ihdr.it_present = htole32(MALO_TX_RADIOTAP_PRESENT); #endif return (0); } int malo_detach(void *arg) { struct malo_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; /* remove channel scanning timer */ timeout_del(&sc->sc_scan_to); malo_stop(sc); ieee80211_ifdetach(ifp); if_detach(ifp); malo_free_cmd(sc); malo_free_rx_ring(sc, &sc->sc_rxring); malo_free_tx_ring(sc, &sc->sc_txring); return (0); } int malo_alloc_cmd(struct malo_softc *sc) { int error, nsegs; error = bus_dmamap_create(sc->sc_dmat, PAGE_SIZE, 1, PAGE_SIZE, 0, BUS_DMA_ALLOCNOW, &sc->sc_cmd_dmam); if (error != 0) { printf("%s: can not create DMA tag\n", sc->sc_dev.dv_xname); return (-1); } error = bus_dmamem_alloc(sc->sc_dmat, PAGE_SIZE, PAGE_SIZE, 0, &sc->sc_cmd_dmas, 1, &nsegs, BUS_DMA_WAITOK); if (error != 0) { printf("%s: error alloc dma memory\n", sc->sc_dev.dv_xname); return (-1); } error = bus_dmamem_map(sc->sc_dmat, &sc->sc_cmd_dmas, nsegs, PAGE_SIZE, (caddr_t *)&sc->sc_cmd_mem, BUS_DMA_WAITOK); if (error != 0) { printf("%s: error map dma memory\n", sc->sc_dev.dv_xname); return (-1); } error = bus_dmamap_load(sc->sc_dmat, sc->sc_cmd_dmam, sc->sc_cmd_mem, PAGE_SIZE, NULL, BUS_DMA_NOWAIT); if (error != 0) { printf("%s: error load dma memory\n", sc->sc_dev.dv_xname); bus_dmamem_free(sc->sc_dmat, &sc->sc_cmd_dmas, nsegs); return (-1); } sc->sc_cookie = sc->sc_cmd_mem; *sc->sc_cookie = htole32(0xaa55aa55); sc->sc_cmd_mem = (caddr_t)sc->sc_cmd_mem + sizeof(uint32_t); sc->sc_cookie_dmaaddr = sc->sc_cmd_dmam->dm_segs[0].ds_addr; sc->sc_cmd_dmaaddr = sc->sc_cmd_dmam->dm_segs[0].ds_addr + sizeof(uint32_t); return (0); } void malo_free_cmd(struct malo_softc *sc) { bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, sc->sc_cmd_dmam); bus_dmamem_unmap(sc->sc_dmat, (caddr_t)sc->sc_cookie, PAGE_SIZE); bus_dmamem_free(sc->sc_dmat, &sc->sc_cmd_dmas, 1); } void malo_send_cmd(struct malo_softc *sc, bus_addr_t addr) { malo_ctl_write4(sc, 0x0c10, (uint32_t)addr); malo_ctl_barrier(sc, BUS_SPACE_BARRIER_WRITE); malo_ctl_write4(sc, 0x0c18, 2); /* CPU_TRANSFER_CMD */ malo_ctl_barrier(sc, BUS_SPACE_BARRIER_WRITE); } int malo_send_cmd_dma(struct malo_softc *sc, bus_addr_t addr) { int i; struct malo_cmdheader *hdr = sc->sc_cmd_mem; malo_ctl_write4(sc, 0x0c10, (uint32_t)addr); malo_ctl_barrier(sc, BUS_SPACE_BARRIER_WRITE); malo_ctl_write4(sc, 0x0c18, 2); /* CPU_TRANSFER_CMD */ malo_ctl_barrier(sc, BUS_SPACE_BARRIER_WRITE); for (i = 0; i < MALO_CMD_TIMEOUT; i++) { delay(100); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD); if (hdr->cmd & htole16(0x8000)) break; } if (i == MALO_CMD_TIMEOUT) { printf("%s: timeout while waiting for cmd response!\n", sc->sc_dev.dv_xname); return (ETIMEDOUT); } malo_cmd_response(sc); return (0); } int malo_alloc_rx_ring(struct malo_softc *sc, struct malo_rx_ring *ring, int count) { struct malo_rx_desc *desc; struct malo_rx_data *data; int i, nsegs, error; ring->count = count; ring->cur = ring->next = 0; error = bus_dmamap_create(sc->sc_dmat, count * sizeof(struct malo_rx_desc), 1, count * sizeof(struct malo_rx_desc), 0, BUS_DMA_NOWAIT, &ring->map); if (error != 0) { printf("%s: could not create desc DMA map\n", sc->sc_dev.dv_xname); goto fail; } error = bus_dmamem_alloc(sc->sc_dmat, count * sizeof(struct malo_rx_desc), PAGE_SIZE, 0, &ring->seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO); if (error != 0) { printf("%s: could not allocate DMA memory\n", sc->sc_dev.dv_xname); goto fail; } error = bus_dmamem_map(sc->sc_dmat, &ring->seg, nsegs, count * sizeof(struct malo_rx_desc), (caddr_t *)&ring->desc, BUS_DMA_NOWAIT); if (error != 0) { printf("%s: can't map desc DMA memory\n", sc->sc_dev.dv_xname); goto fail; } error = bus_dmamap_load(sc->sc_dmat, ring->map, ring->desc, count * sizeof(struct malo_rx_desc), NULL, BUS_DMA_NOWAIT); if (error != 0) { printf("%s: could not load desc DMA map\n", sc->sc_dev.dv_xname); goto fail; } ring->physaddr = ring->map->dm_segs->ds_addr; ring->data = mallocarray(count, sizeof (struct malo_rx_data), M_DEVBUF, M_NOWAIT); if (ring->data == NULL) { printf("%s: could not allocate soft data\n", sc->sc_dev.dv_xname); error = ENOMEM; goto fail; } /* * Pre-allocate Rx buffers and populate Rx ring. */ bzero(ring->data, count * sizeof (struct malo_rx_data)); for (i = 0; i < count; i++) { desc = &ring->desc[i]; data = &ring->data[i]; error = bus_dmamap_create(sc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0, BUS_DMA_NOWAIT, &data->map); if (error != 0) { printf("%s: could not create DMA map\n", sc->sc_dev.dv_xname); goto fail; } MGETHDR(data->m, M_DONTWAIT, MT_DATA); if (data->m == NULL) { printf("%s: could not allocate rx mbuf\n", sc->sc_dev.dv_xname); error = ENOMEM; goto fail; } MCLGET(data->m, M_DONTWAIT); if (!(data->m->m_flags & M_EXT)) { printf("%s: could not allocate rx mbuf cluster\n", sc->sc_dev.dv_xname); error = ENOMEM; goto fail; } error = bus_dmamap_load(sc->sc_dmat, data->map, mtod(data->m, void *), MCLBYTES, NULL, BUS_DMA_NOWAIT); if (error != 0) { printf("%s: could not load rx buf DMA map", sc->sc_dev.dv_xname); goto fail; } desc->status = 1; desc->physdata = htole32(data->map->dm_segs->ds_addr); desc->physnext = htole32(ring->physaddr + (i + 1) % count * sizeof(struct malo_rx_desc)); } bus_dmamap_sync(sc->sc_dmat, ring->map, 0, ring->map->dm_mapsize, BUS_DMASYNC_PREWRITE); return (0); fail: malo_free_rx_ring(sc, ring); return (error); } void malo_reset_rx_ring(struct malo_softc *sc, struct malo_rx_ring *ring) { int i; for (i = 0; i < ring->count; i++) ring->desc[i].status = 0; bus_dmamap_sync(sc->sc_dmat, ring->map, 0, ring->map->dm_mapsize, BUS_DMASYNC_PREWRITE); ring->cur = ring->next = 0; } void malo_free_rx_ring(struct malo_softc *sc, struct malo_rx_ring *ring) { struct malo_rx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(sc->sc_dmat, ring->map, 0, ring->map->dm_mapsize, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, ring->map); bus_dmamem_unmap(sc->sc_dmat, (caddr_t)ring->desc, ring->count * sizeof(struct malo_rx_desc)); bus_dmamem_free(sc->sc_dmat, &ring->seg, 1); } if (ring->data != NULL) { for (i = 0; i < ring->count; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, data->map); m_freem(data->m); } if (data->map != NULL) bus_dmamap_destroy(sc->sc_dmat, data->map); } free(ring->data, M_DEVBUF, 0); } } int malo_alloc_tx_ring(struct malo_softc *sc, struct malo_tx_ring *ring, int count) { int i, nsegs, error; ring->count = count; ring->queued = 0; ring->cur = ring->next = ring->stat = 0; error = bus_dmamap_create(sc->sc_dmat, count * sizeof(struct malo_tx_desc), 1, count * sizeof(struct malo_tx_desc), 0, BUS_DMA_NOWAIT, &ring->map); if (error != 0) { printf("%s: could not create desc DMA map\n", sc->sc_dev.dv_xname); goto fail; } error = bus_dmamem_alloc(sc->sc_dmat, count * sizeof(struct malo_tx_desc), PAGE_SIZE, 0, &ring->seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO); if (error != 0) { printf("%s: could not allocate DMA memory\n", sc->sc_dev.dv_xname); goto fail; } error = bus_dmamem_map(sc->sc_dmat, &ring->seg, nsegs, count * sizeof(struct malo_tx_desc), (caddr_t *)&ring->desc, BUS_DMA_NOWAIT); if (error != 0) { printf("%s: can't map desc DMA memory\n", sc->sc_dev.dv_xname); goto fail; } error = bus_dmamap_load(sc->sc_dmat, ring->map, ring->desc, count * sizeof(struct malo_tx_desc), NULL, BUS_DMA_NOWAIT); if (error != 0) { printf("%s: could not load desc DMA map\n", sc->sc_dev.dv_xname); goto fail; } ring->physaddr = ring->map->dm_segs->ds_addr; ring->data = mallocarray(count, sizeof(struct malo_tx_data), M_DEVBUF, M_NOWAIT); if (ring->data == NULL) { printf("%s: could not allocate soft data\n", sc->sc_dev.dv_xname); error = ENOMEM; goto fail; } memset(ring->data, 0, count * sizeof(struct malo_tx_data)); for (i = 0; i < count; i++) { error = bus_dmamap_create(sc->sc_dmat, MCLBYTES, MALO_MAX_SCATTER, MCLBYTES, 0, BUS_DMA_NOWAIT, &ring->data[i].map); if (error != 0) { printf("%s: could not create DMA map\n", sc->sc_dev.dv_xname); goto fail; } ring->desc[i].physnext = htole32(ring->physaddr + (i + 1) % count * sizeof(struct malo_tx_desc)); } return (0); fail: malo_free_tx_ring(sc, ring); return (error); } void malo_reset_tx_ring(struct malo_softc *sc, struct malo_tx_ring *ring) { struct malo_tx_desc *desc; struct malo_tx_data *data; int i; for (i = 0; i < ring->count; i++) { desc = &ring->desc[i]; data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, data->map); m_freem(data->m); data->m = NULL; } /* * The node has already been freed at that point so don't call * ieee80211_release_node() here. */ data->ni = NULL; desc->status = 0; } bus_dmamap_sync(sc->sc_dmat, ring->map, 0, ring->map->dm_mapsize, BUS_DMASYNC_PREWRITE); ring->queued = 0; ring->cur = ring->next = ring->stat = 0; } void malo_free_tx_ring(struct malo_softc *sc, struct malo_tx_ring *ring) { struct malo_tx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(sc->sc_dmat, ring->map, 0, ring->map->dm_mapsize, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, ring->map); bus_dmamem_unmap(sc->sc_dmat, (caddr_t)ring->desc, ring->count * sizeof(struct malo_tx_desc)); bus_dmamem_free(sc->sc_dmat, &ring->seg, 1); } if (ring->data != NULL) { for (i = 0; i < ring->count; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, data->map); m_freem(data->m); } /* * The node has already been freed at that point so * don't call ieee80211_release_node() here. */ data->ni = NULL; if (data->map != NULL) bus_dmamap_destroy(sc->sc_dmat, data->map); } free(ring->data, M_DEVBUF, 0); } } int malo_init(struct ifnet *ifp) { struct malo_softc *sc = ifp->if_softc; struct ieee80211com *ic = &sc->sc_ic; uint8_t chan; int error; DPRINTF(1, "%s: %s\n", ifp->if_xname, __func__); /* if interface already runs stop it first */ if (ifp->if_flags & IFF_RUNNING) malo_stop(sc); /* power on cardbus socket */ if (sc->sc_enable) sc->sc_enable(sc); /* disable interrupts */ malo_ctl_read4(sc, 0x0c30); malo_ctl_write4(sc, 0x0c30, 0); malo_ctl_write4(sc, 0x0c34, 0); malo_ctl_write4(sc, 0x0c3c, 0); /* load firmware */ if ((error = malo_load_bootimg(sc))) goto fail; if ((error = malo_load_firmware(sc))) goto fail; /* enable interrupts */ malo_ctl_write4(sc, 0x0c34, 0x1f); malo_ctl_barrier(sc, BUS_SPACE_BARRIER_WRITE); malo_ctl_write4(sc, 0x0c3c, 0x1f); malo_ctl_barrier(sc, BUS_SPACE_BARRIER_WRITE); if ((error = malo_cmd_get_spec(sc))) goto fail; /* select default channel */ ic->ic_bss->ni_chan = ic->ic_ibss_chan; chan = ieee80211_chan2ieee(ic, ic->ic_bss->ni_chan); /* initialize hardware */ if ((error = malo_cmd_set_channel(sc, chan))) { printf("%s: setting channel failed!\n", sc->sc_dev.dv_xname); goto fail; } if ((error = malo_cmd_set_antenna(sc, 1))) { printf("%s: setting RX antenna failed!\n", sc->sc_dev.dv_xname); goto fail; } if ((error = malo_cmd_set_antenna(sc, 2))) { printf("%s: setting TX antenna failed!\n", sc->sc_dev.dv_xname); goto fail; } if ((error = malo_cmd_set_radio(sc, 1, 5))) { printf("%s: turn radio on failed!\n", sc->sc_dev.dv_xname); goto fail; } if ((error = malo_cmd_set_txpower(sc, 100))) { printf("%s: setting TX power failed!\n", sc->sc_dev.dv_xname); goto fail; } if ((error = malo_cmd_set_rts(sc, IEEE80211_RTS_MAX))) { printf("%s: setting RTS failed!\n", sc->sc_dev.dv_xname); goto fail; } ifp->if_flags |= IFF_RUNNING; if (ic->ic_opmode != IEEE80211_M_MONITOR) /* start background scanning */ ieee80211_new_state(ic, IEEE80211_S_SCAN, -1); else /* in monitor mode change directly into run state */ ieee80211_new_state(ic, IEEE80211_S_RUN, -1); return (0); fail: /* reset adapter */ DPRINTF(1, "%s: malo_init failed, resetting card\n", sc->sc_dev.dv_xname); malo_stop(sc); return (error); } int malo_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct malo_softc *sc = ifp->if_softc; struct ieee80211com *ic = &sc->sc_ic; int s, error = 0; uint8_t chan; s = splnet(); switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; /* FALLTHROUGH */ case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if ((ifp->if_flags & IFF_RUNNING) == 0) malo_init(ifp); } else { if (ifp->if_flags & IFF_RUNNING) malo_stop(sc); } break; case SIOCS80211CHANNEL: /* allow fast channel switching in monitor mode */ error = ieee80211_ioctl(ifp, cmd, data); if (error == ENETRESET && ic->ic_opmode == IEEE80211_M_MONITOR) { if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) { ic->ic_bss->ni_chan = ic->ic_ibss_chan; chan = ieee80211_chan2ieee(ic, ic->ic_bss->ni_chan); malo_cmd_set_channel(sc, chan); } error = 0; } break; default: error = ieee80211_ioctl(ifp, cmd, data); break; } if (error == ENETRESET) { if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) malo_init(ifp); error = 0; } splx(s); return (error); } void malo_start(struct ifnet *ifp) { struct malo_softc *sc = ifp->if_softc; struct ieee80211com *ic = &sc->sc_ic; struct mbuf *m0; struct ieee80211_node *ni; DPRINTF(2, "%s: %s\n", sc->sc_dev.dv_xname, __func__); if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd)) return; for (;;) { if (sc->sc_txring.queued >= MALO_TX_RING_COUNT - 1) { ifq_set_oactive(&ifp->if_snd); break; } m0 = mq_dequeue(&ic->ic_mgtq); if (m0 != NULL) { ni = m0->m_pkthdr.ph_cookie; #if NBPFILTER > 0 if (ic->ic_rawbpf != NULL) bpf_mtap(ic->ic_rawbpf, m0, BPF_DIRECTION_OUT); #endif if (malo_tx_mgt(sc, m0, ni) != 0) break; } else { if (ic->ic_state != IEEE80211_S_RUN) break; m0 = ifq_dequeue(&ifp->if_snd); if (m0 == NULL) break; #if NBPFILTER > 0 if (ifp->if_bpf != NULL) bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT); #endif m0 = ieee80211_encap(ifp, m0, &ni); if (m0 == NULL) continue; #if NBPFILTER > 0 if (ic->ic_rawbpf != NULL) bpf_mtap(ic->ic_rawbpf, m0, BPF_DIRECTION_OUT); #endif if (malo_tx_data(sc, m0, ni) != 0) { if (ni != NULL) ieee80211_release_node(ic, ni); ifp->if_oerrors++; break; } } } } void malo_stop(struct malo_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; DPRINTF(1, "%s: %s\n", ifp->if_xname, __func__); /* reset adapter */ if (ifp->if_flags & IFF_RUNNING) malo_ctl_write4(sc, 0x0c18, (1 << 15)); /* device is not running anymore */ ifp->if_flags &= ~IFF_RUNNING; ifq_clr_oactive(&ifp->if_snd); /* change back to initial state */ ieee80211_new_state(ic, IEEE80211_S_INIT, -1); /* reset RX / TX rings */ malo_reset_tx_ring(sc, &sc->sc_txring); malo_reset_rx_ring(sc, &sc->sc_rxring); /* set initial rate */ sc->sc_last_txrate = -1; /* power off cardbus socket */ if (sc->sc_disable) sc->sc_disable(sc); } void malo_watchdog(struct ifnet *ifp) { } int malo_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg) { struct malo_softc *sc = ic->ic_if.if_softc; enum ieee80211_state ostate; uint8_t chan; int rate; DPRINTF(2, "%s: %s\n", sc->sc_dev.dv_xname, __func__); ostate = ic->ic_state; timeout_del(&sc->sc_scan_to); switch (nstate) { case IEEE80211_S_INIT: break; case IEEE80211_S_SCAN: if (ostate == IEEE80211_S_INIT) { if (malo_cmd_set_prescan(sc) != 0) DPRINTF(1, "%s: can't set prescan\n", sc->sc_dev.dv_xname); } else { chan = ieee80211_chan2ieee(ic, ic->ic_bss->ni_chan); malo_cmd_set_channel(sc, chan); } timeout_add_msec(&sc->sc_scan_to, 500); break; case IEEE80211_S_AUTH: DPRINTF(1, "%s: newstate AUTH\n", sc->sc_dev.dv_xname); malo_cmd_set_postscan(sc, ic->ic_myaddr, 1); chan = ieee80211_chan2ieee(ic, ic->ic_bss->ni_chan); malo_cmd_set_channel(sc, chan); break; case IEEE80211_S_ASSOC: DPRINTF(1, "%s: newstate ASSOC\n", sc->sc_dev.dv_xname); if (ic->ic_flags & IEEE80211_F_SHPREAMBLE) malo_cmd_set_radio(sc, 1, 3); /* short preamble */ else malo_cmd_set_radio(sc, 1, 1); /* long preamble */ malo_cmd_set_aid(sc, ic->ic_bss->ni_bssid, ic->ic_bss->ni_associd); if (ic->ic_fixed_rate == -1) /* automatic rate adaption */ malo_cmd_set_rate(sc, 0); else { /* fixed rate */ rate = malo_fix2rate(ic->ic_fixed_rate); malo_cmd_set_rate(sc, rate); } malo_set_slot(sc); break; case IEEE80211_S_RUN: DPRINTF(1, "%s: newstate RUN\n", sc->sc_dev.dv_xname); break; default: break; } return (sc->sc_newstate(ic, nstate, arg)); } void malo_newassoc(struct ieee80211com *ic, struct ieee80211_node *ni, int isnew) { } struct ieee80211_node * malo_node_alloc(struct ieee80211com *ic) { struct malo_node *wn; wn = malloc(sizeof(*wn), M_DEVBUF, M_NOWAIT | M_ZERO); if (wn == NULL) return (NULL); return ((struct ieee80211_node *)wn); } int malo_media_change(struct ifnet *ifp) { int error; DPRINTF(1, "%s: %s\n", ifp->if_xname, __func__); error = ieee80211_media_change(ifp); if (error != ENETRESET) return (error); if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) malo_init(ifp); return (0); } void malo_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct malo_softc *sc = ifp->if_softc; struct ieee80211com *ic = &sc->sc_ic; imr->ifm_status = IFM_AVALID; imr->ifm_active = IFM_IEEE80211; if (ic->ic_state == IEEE80211_S_RUN) imr->ifm_status |= IFM_ACTIVE; /* report last TX rate used by chip */ imr->ifm_active |= ieee80211_rate2media(ic, sc->sc_last_txrate, ic->ic_curmode); switch (ic->ic_opmode) { case IEEE80211_M_STA: break; #ifndef IEEE80211_STA_ONLY case IEEE80211_M_IBSS: imr->ifm_active |= IFM_IEEE80211_ADHOC; break; case IEEE80211_M_AHDEMO: break; case IEEE80211_M_HOSTAP: break; #endif case IEEE80211_M_MONITOR: imr->ifm_active |= IFM_IEEE80211_MONITOR; break; default: break; } switch (ic->ic_curmode) { case IEEE80211_MODE_11B: imr->ifm_active |= IFM_IEEE80211_11B; break; case IEEE80211_MODE_11G: imr->ifm_active |= IFM_IEEE80211_11G; break; } } int malo_chip2rate(int chip_rate) { switch (chip_rate) { /* CCK rates */ case 0: return (2); case 1: return (4); case 2: return (11); case 3: return (22); /* OFDM rates */ case 4: return (0); /* reserved */ case 5: return (12); case 6: return (18); case 7: return (24); case 8: return (36); case 9: return (48); case 10: return (72); case 11: return (96); case 12: return (108); /* no rate select yet or unknown rate */ default: return (-1); } } int malo_fix2rate(int fix_rate) { switch (fix_rate) { /* CCK rates */ case 0: return (2); case 1: return (4); case 2: return (11); case 3: return (22); /* OFDM rates */ case 4: return (12); case 5: return (18); case 6: return (24); case 7: return (36); case 8: return (48); case 9: return (72); case 10: return (96); case 11: return (108); /* unknown rate: should not happen */ default: return (0); } } void malo_next_scan(void *arg) { struct malo_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; int s; DPRINTF(1, "%s: %s\n", ifp->if_xname, __func__); s = splnet(); if (ic->ic_state == IEEE80211_S_SCAN) ieee80211_next_scan(ifp); splx(s); } void malo_tx_intr(struct malo_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; struct malo_tx_desc *desc; struct malo_tx_data *data; struct malo_node *rn; int stat; DPRINTF(2, "%s: %s\n", sc->sc_dev.dv_xname, __func__); stat = sc->sc_txring.stat; for (;;) { desc = &sc->sc_txring.desc[sc->sc_txring.stat]; data = &sc->sc_txring.data[sc->sc_txring.stat]; rn = (struct malo_node *)data->ni; /* check if TX descriptor is not owned by FW anymore */ if ((letoh32(desc->status) & 0x80000000) || !(letoh32(data->softstat) & 0x80)) break; /* if no frame has been sent, ignore */ if (rn == NULL) goto next; /* check TX state */ switch (letoh32(desc->status) & 0x1) { case 0x1: DPRINTF(2, "%s: data frame was sent successfully\n", sc->sc_dev.dv_xname); break; default: DPRINTF(1, "%s: data frame sending error\n", sc->sc_dev.dv_xname); ifp->if_oerrors++; break; } /* save last used TX rate */ sc->sc_last_txrate = malo_chip2rate(desc->datarate); /* cleanup TX data and TX descriptor */ bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, data->map); m_freem(data->m); ieee80211_release_node(ic, data->ni); data->m = NULL; data->ni = NULL; data->softstat &= htole32(~0x80); desc->status = 0; desc->len = 0; DPRINTF(2, "%s: tx done idx=%d\n", sc->sc_dev.dv_xname, sc->sc_txring.stat); sc->sc_txring.queued--; next: if (++sc->sc_txring.stat >= sc->sc_txring.count) sc->sc_txring.stat = 0; if (sc->sc_txring.stat == stat) break; } sc->sc_tx_timer = 0; ifq_clr_oactive(&ifp->if_snd); malo_start(ifp); } int malo_tx_mgt(struct malo_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) { struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; struct malo_tx_desc *desc; struct malo_tx_data *data; struct ieee80211_frame *wh; int error; DPRINTF(2, "%s: %s\n", sc->sc_dev.dv_xname, __func__); desc = &sc->sc_txring.desc[sc->sc_txring.cur]; data = &sc->sc_txring.data[sc->sc_txring.cur]; if (m0->m_len < sizeof(struct ieee80211_frame)) { m0 = m_pullup(m0, sizeof(struct ieee80211_frame)); if (m0 == NULL) { ifp->if_ierrors++; return (ENOBUFS); } } wh = mtod(m0, struct ieee80211_frame *); #if NBPFILTER > 0 if (sc->sc_drvbpf != NULL) { struct mbuf mb; struct malo_tx_radiotap_hdr *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_rate = sc->sc_last_txrate; tap->wt_chan_freq = htole16(ic->ic_bss->ni_chan->ic_freq); tap->wt_chan_flags = htole16(ic->ic_bss->ni_chan->ic_flags); mb.m_data = (caddr_t)tap; mb.m_len = sc->sc_txtap_len; mb.m_next = m0; mb.m_nextpkt = NULL; mb.m_type = 0; mb.m_flags = 0; bpf_mtap(sc->sc_drvbpf, &mb, BPF_DIRECTION_OUT); } #endif /* * inject FW specific fields into the 802.11 frame * * 2 bytes FW len (inject) * 24 bytes 802.11 frame header * 6 bytes addr4 (inject) * n bytes 802.11 frame body */ if (m_leadingspace(m0) < 8) { if (m_trailingspace(m0) < 8) panic("%s: not enough space for mbuf dance", sc->sc_dev.dv_xname); bcopy(m0->m_data, m0->m_data + 8, m0->m_len); m0->m_data += 8; } /* move frame header */ bcopy(m0->m_data, m0->m_data - 6, sizeof(*wh)); m0->m_data -= 8; m0->m_len += 8; m0->m_pkthdr.len += 8; *mtod(m0, uint16_t *) = htole16(m0->m_len - 32); /* FW len */ error = bus_dmamap_load_mbuf(sc->sc_dmat, data->map, m0, BUS_DMA_NOWAIT); if (error != 0) { printf("%s: can't map mbuf (error %d)\n", sc->sc_dev.dv_xname, error); m_freem(m0); return (error); } data->m = m0; data->ni = ni; data->softstat |= htole32(0x80); malo_tx_setup_desc(sc, desc, m0->m_pkthdr.len, 0, data->map->dm_segs, data->map->dm_nsegs); bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->sc_dmat, sc->sc_txring.map, sc->sc_txring.cur * sizeof(struct malo_tx_desc), sizeof(struct malo_tx_desc), BUS_DMASYNC_PREWRITE); DPRINTF(2, "%s: sending mgmt frame, pktlen=%u, idx=%u\n", sc->sc_dev.dv_xname, m0->m_pkthdr.len, sc->sc_txring.cur); sc->sc_txring.queued++; sc->sc_txring.cur = (sc->sc_txring.cur + 1) % MALO_TX_RING_COUNT; /* kick mgmt TX */ malo_ctl_write4(sc, 0x0c18, 1); malo_ctl_barrier(sc, BUS_SPACE_BARRIER_WRITE); return (0); } int malo_tx_data(struct malo_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) { struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; struct malo_tx_desc *desc; struct malo_tx_data *data; struct ieee80211_frame *wh; struct ieee80211_key *k; struct mbuf *mnew; int error; DPRINTF(2, "%s: %s\n", sc->sc_dev.dv_xname, __func__); desc = &sc->sc_txring.desc[sc->sc_txring.cur]; data = &sc->sc_txring.data[sc->sc_txring.cur]; if (m0->m_len < sizeof(struct ieee80211_frame)) { m0 = m_pullup(m0, sizeof(struct ieee80211_frame)); if (m0 == NULL) { ifp->if_ierrors++; return (ENOBUFS); } } wh = mtod(m0, struct ieee80211_frame *); if (wh->i_fc[1] & IEEE80211_FC1_WEP) { k = ieee80211_get_txkey(ic, wh, ni); if ((m0 = ieee80211_encrypt(ic, m0, k)) == NULL) return (ENOBUFS); /* packet header may have moved, reset our local pointer */ wh = mtod(m0, struct ieee80211_frame *); } #if NBPFILTER > 0 if (sc->sc_drvbpf != NULL) { struct mbuf mb; struct malo_tx_radiotap_hdr *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_rate = sc->sc_last_txrate; tap->wt_chan_freq = htole16(ic->ic_bss->ni_chan->ic_freq); tap->wt_chan_flags = htole16(ic->ic_bss->ni_chan->ic_flags); mb.m_data = (caddr_t)tap; mb.m_len = sc->sc_txtap_len; mb.m_next = m0; mb.m_nextpkt = NULL; mb.m_type = 0; mb.m_flags = 0; bpf_mtap(sc->sc_drvbpf, &mb, BPF_DIRECTION_OUT); } #endif /* * inject FW specific fields into the 802.11 frame * * 2 bytes FW len (inject) * 24 bytes 802.11 frame header * 6 bytes addr4 (inject) * n bytes 802.11 frame body * * For now copy all into a new mcluster. */ MGETHDR(mnew, M_DONTWAIT, MT_DATA); if (mnew == NULL) return (ENOBUFS); MCLGET(mnew, M_DONTWAIT); if (!(mnew->m_flags & M_EXT)) { m_free(mnew); return (ENOBUFS); } *mtod(mnew, uint16_t *) = htole16(m0->m_pkthdr.len - 24); /* FW len */ bcopy(wh, mtod(mnew, caddr_t) + 2, sizeof(*wh)); bzero(mtod(mnew, caddr_t) + 26, 6); m_copydata(m0, sizeof(*wh), m0->m_pkthdr.len - sizeof(*wh), mtod(mnew, caddr_t) + 32); mnew->m_pkthdr.len = mnew->m_len = m0->m_pkthdr.len + 8; m_freem(m0); m0 = mnew; error = bus_dmamap_load_mbuf(sc->sc_dmat, data->map, m0, BUS_DMA_NOWAIT); if (error != 0) { printf("%s: can't map mbuf (error %d)\n", sc->sc_dev.dv_xname, error); m_freem(m0); return (error); } data->m = m0; data->ni = ni; data->softstat |= htole32(0x80); malo_tx_setup_desc(sc, desc, m0->m_pkthdr.len, 1, data->map->dm_segs, data->map->dm_nsegs); bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->sc_dmat, sc->sc_txring.map, sc->sc_txring.cur * sizeof(struct malo_tx_desc), sizeof(struct malo_tx_desc), BUS_DMASYNC_PREWRITE); DPRINTF(2, "%s: sending data frame, pktlen=%u, idx=%u\n", sc->sc_dev.dv_xname, m0->m_pkthdr.len, sc->sc_txring.cur); sc->sc_txring.queued++; sc->sc_txring.cur = (sc->sc_txring.cur + 1) % MALO_TX_RING_COUNT; /* kick data TX */ malo_ctl_write4(sc, 0x0c18, 1); malo_ctl_barrier(sc, BUS_SPACE_BARRIER_WRITE); return (0); } void malo_tx_setup_desc(struct malo_softc *sc, struct malo_tx_desc *desc, int len, int rate, const bus_dma_segment_t *segs, int nsegs) { desc->len = htole16(segs[0].ds_len); desc->datarate = rate; /* 0 = mgmt frame, 1 = data frame */ desc->physdata = htole32(segs[0].ds_addr); desc->status = htole32(0x00000001 | 0x80000000); } void malo_rx_intr(struct malo_softc *sc) { struct mbuf_list ml = MBUF_LIST_INITIALIZER(); struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; struct malo_rx_desc *desc; struct malo_rx_data *data; struct ieee80211_frame *wh; struct ieee80211_rxinfo rxi; struct ieee80211_node *ni; struct mbuf *mnew, *m; uint32_t rxRdPtr, rxWrPtr; int error, i; rxRdPtr = malo_mem_read4(sc, sc->sc_RxPdRdPtr); rxWrPtr = malo_mem_read4(sc, sc->sc_RxPdWrPtr); for (i = 0; i < MALO_RX_RING_COUNT && rxRdPtr != rxWrPtr; i++) { desc = &sc->sc_rxring.desc[sc->sc_rxring.cur]; data = &sc->sc_rxring.data[sc->sc_rxring.cur]; bus_dmamap_sync(sc->sc_dmat, sc->sc_rxring.map, sc->sc_rxring.cur * sizeof(struct malo_rx_desc), sizeof(struct malo_rx_desc), BUS_DMASYNC_POSTREAD); DPRINTF(3, "%s: rx intr idx=%d, rxctrl=0x%02x, rssi=%d, " "status=0x%02x, channel=%d, len=%d, res1=%02x, rate=%d, " "physdata=0x%04x, physnext=0x%04x, qosctrl=%02x, res2=%d\n", sc->sc_dev.dv_xname, sc->sc_rxring.cur, desc->rxctrl, desc->rssi, desc->status, desc->channel, letoh16(desc->len), desc->reserved1, desc->datarate, letoh32(desc->physdata), letoh32(desc->physnext), desc->qosctrl, desc->reserved2); if ((desc->rxctrl & 0x80) == 0) break; MGETHDR(mnew, M_DONTWAIT, MT_DATA); if (mnew == NULL) { ifp->if_ierrors++; goto skip; } MCLGET(mnew, M_DONTWAIT); if (!(mnew->m_flags & M_EXT)) { m_freem(mnew); ifp->if_ierrors++; goto skip; } bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, data->map); error = bus_dmamap_load(sc->sc_dmat, data->map, mtod(mnew, void *), MCLBYTES, NULL, BUS_DMA_NOWAIT); if (error != 0) { m_freem(mnew); error = bus_dmamap_load(sc->sc_dmat, data->map, mtod(data->m, void *), MCLBYTES, NULL, BUS_DMA_NOWAIT); if (error != 0) { panic("%s: could not load old rx mbuf", sc->sc_dev.dv_xname); } ifp->if_ierrors++; goto skip; } /* * New mbuf mbuf successfully loaded */ m = data->m; data->m = mnew; desc->physdata = htole32(data->map->dm_segs->ds_addr); /* finalize mbuf */ m->m_pkthdr.len = m->m_len = letoh16(desc->len); /* * cut out FW specific fields from the 802.11 frame * * 2 bytes FW len (cut out) * 24 bytes 802.11 frame header * 6 bytes addr4 (cut out) * n bytes 802.11 frame data */ bcopy(m->m_data, m->m_data + 6, 26); m_adj(m, 8); #if NBPFILTER > 0 if (sc->sc_drvbpf != NULL) { struct mbuf mb; struct malo_rx_radiotap_hdr *tap = &sc->sc_rxtap; tap->wr_flags = 0; tap->wr_chan_freq = htole16(ic->ic_bss->ni_chan->ic_freq); tap->wr_chan_flags = htole16(ic->ic_bss->ni_chan->ic_flags); tap->wr_rssi = desc->rssi; tap->wr_max_rssi = ic->ic_max_rssi; mb.m_data = (caddr_t)tap; mb.m_len = sc->sc_rxtap_len; mb.m_next = m; mb.m_nextpkt = NULL; mb.m_type = 0; mb.m_flags = 0; bpf_mtap(sc->sc_drvbpf, &mb, BPF_DIRECTION_IN); } #endif wh = mtod(m, struct ieee80211_frame *); ni = ieee80211_find_rxnode(ic, wh); /* send the frame to the 802.11 layer */ rxi.rxi_flags = 0; rxi.rxi_rssi = desc->rssi; rxi.rxi_tstamp = 0; /* unused */ ieee80211_inputm(ifp, m, ni, &rxi, &ml); /* node is no longer needed */ ieee80211_release_node(ic, ni); skip: desc->rxctrl = 0; rxRdPtr = letoh32(desc->physnext); bus_dmamap_sync(sc->sc_dmat, sc->sc_rxring.map, sc->sc_rxring.cur * sizeof(struct malo_rx_desc), sizeof(struct malo_rx_desc), BUS_DMASYNC_PREWRITE); sc->sc_rxring.cur = (sc->sc_rxring.cur + 1) % MALO_RX_RING_COUNT; } if_input(ifp, &ml); malo_mem_write4(sc, sc->sc_RxPdRdPtr, rxRdPtr); } int malo_load_bootimg(struct malo_softc *sc) { char *name = "malo8335-h"; uint8_t *ucode; size_t usize; int error, i; /* load boot firmware */ if ((error = loadfirmware(name, &ucode, &usize)) != 0) { printf("%s: error %d, could not read firmware %s\n", sc->sc_dev.dv_xname, error, name); return (EIO); } /* * It seems we are putting this code directly onto the stack of * the ARM cpu. I don't know why we need to instruct the DMA * engine to move the code. This is a big riddle without docu. */ DPRINTF(1, "%s: loading boot firmware\n", sc->sc_dev.dv_xname); malo_mem_write2(sc, 0xbef8, 0x001); malo_mem_write2(sc, 0xbefa, usize); malo_mem_write4(sc, 0xbefc, 0); bus_space_write_region_1(sc->sc_mem1_bt, sc->sc_mem1_bh, 0xbf00, ucode, usize); /* * we loaded the firmware into card memory now tell the CPU * to fetch the code and execute it. The memory mapped via the * first bar is internaly mapped to 0xc0000000. */ malo_send_cmd(sc, 0xc000bef8); /* wait for the device to go into FW loading mode */ for (i = 0; i < 10; i++) { delay(50); malo_ctl_barrier(sc, BUS_SPACE_BARRIER_READ); if (malo_ctl_read4(sc, 0x0c14) == 0x5) break; } if (i == 10) { printf("%s: timeout at boot firmware load!\n", sc->sc_dev.dv_xname); free(ucode, M_DEVBUF, usize); return (ETIMEDOUT); } free(ucode, M_DEVBUF, usize); /* tell the card we're done and... */ malo_mem_write2(sc, 0xbef8, 0x001); malo_mem_write2(sc, 0xbefa, 0); malo_mem_write4(sc, 0xbefc, 0); malo_send_cmd(sc, 0xc000bef8); DPRINTF(1, "%s: boot firmware loaded\n", sc->sc_dev.dv_xname); return (0); } int malo_load_firmware(struct malo_softc *sc) { struct malo_cmdheader *hdr; char *name = "malo8335-m"; void *data; uint8_t *ucode; size_t size, count, bsize; int i, sn, error; /* load real firmware now */ if ((error = loadfirmware(name, &ucode, &size)) != 0) { printf("%s: error %d, could not read firmware %s\n", sc->sc_dev.dv_xname, error, name); return (EIO); } DPRINTF(1, "%s: uploading firmware\n", sc->sc_dev.dv_xname); hdr = sc->sc_cmd_mem; data = hdr + 1; sn = 1; for (count = 0; count < size; count += bsize) { bsize = MIN(256, size - count); hdr->cmd = htole16(0x0001); hdr->size = htole16(bsize); hdr->seqnum = htole16(sn++); hdr->result = 0; bcopy(ucode + count, data, bsize); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE); malo_send_cmd(sc, sc->sc_cmd_dmaaddr); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_POSTWRITE); delay(500); } free(ucode, M_DEVBUF, size); DPRINTF(1, "%s: firmware upload finished\n", sc->sc_dev.dv_xname); /* * send a command with size 0 to tell that the firmware has been * uploaded */ hdr->cmd = htole16(0x0001); hdr->size = 0; hdr->seqnum = htole16(sn++); hdr->result = 0; bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE); malo_send_cmd(sc, sc->sc_cmd_dmaaddr); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_POSTWRITE); delay(100); DPRINTF(1, "%s: loading firmware\n", sc->sc_dev.dv_xname); /* wait until firmware has been loaded */ for (i = 0; i < 200; i++) { malo_ctl_write4(sc, 0x0c10, 0x5a); delay(500); malo_ctl_barrier(sc, BUS_SPACE_BARRIER_WRITE | BUS_SPACE_BARRIER_READ); if (malo_ctl_read4(sc, 0x0c14) == 0xf0f1f2f4) break; } if (i == 200) { printf("%s: timeout at firmware load!\n", sc->sc_dev.dv_xname); return (ETIMEDOUT); } DPRINTF(1, "%s: firmware loaded\n", sc->sc_dev.dv_xname); return (0); } int malo_set_slot(struct malo_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; if (ic->ic_flags & IEEE80211_F_SHSLOT) { /* set short slot */ if (malo_cmd_set_slot(sc, 1)) { printf("%s: setting short slot failed\n", sc->sc_dev.dv_xname); return (ENXIO); } } else { /* set long slot */ if (malo_cmd_set_slot(sc, 0)) { printf("%s: setting long slot failed\n", sc->sc_dev.dv_xname); return (ENXIO); } } return (0); } void malo_update_slot(struct ieee80211com *ic) { struct malo_softc *sc = ic->ic_if.if_softc; malo_set_slot(sc); #ifndef IEEE80211_STA_ONLY if (ic->ic_opmode == IEEE80211_M_HOSTAP) { /* TODO */ } #endif } #ifdef MALO_DEBUG void malo_hexdump(void *buf, int len) { u_char b[16]; int i, j, l; for (i = 0; i < len; i += l) { printf("%4i:", i); l = min(sizeof(b), len - i); bcopy(buf + i, b, l); for (j = 0; j < sizeof(b); j++) { if (j % 2 == 0) printf(" "); if (j % 8 == 0) printf(" "); if (j < l) printf("%02x", (int)b[j]); else printf(" "); } printf(" |"); for (j = 0; j < l; j++) { if (b[j] >= 0x20 && b[j] <= 0x7e) printf("%c", b[j]); else printf("."); } printf("|\n"); } } #endif static char * malo_cmd_string(uint16_t cmd) { int i; static char cmd_buf[16]; static const struct { uint16_t cmd_code; char *cmd_string; } cmds[] = { { MALO_CMD_GET_HW_SPEC, "GetHwSpecifications" }, { MALO_CMD_SET_RADIO, "SetRadio" }, { MALO_CMD_SET_AID, "SetAid" }, { MALO_CMD_SET_TXPOWER, "SetTxPower" }, { MALO_CMD_SET_ANTENNA, "SetAntenna" }, { MALO_CMD_SET_PRESCAN, "SetPrescan" }, { MALO_CMD_SET_POSTSCAN, "SetPostscan" }, { MALO_CMD_SET_RATE, "SetRate" }, { MALO_CMD_SET_CHANNEL, "SetChannel" }, { MALO_CMD_SET_RTS, "SetRTS" }, { MALO_CMD_SET_SLOT, "SetSlot" }, }; for (i = 0; i < sizeof(cmds) / sizeof(cmds[0]); i++) if ((letoh16(cmd) & 0x7fff) == cmds[i].cmd_code) return (cmds[i].cmd_string); snprintf(cmd_buf, sizeof(cmd_buf), "unknown %#x", cmd); return (cmd_buf); } static char * malo_cmd_string_result(uint16_t result) { int i; static const struct { uint16_t result_code; char *result_string; } results[] = { { MALO_CMD_RESULT_OK, "OK" }, { MALO_CMD_RESULT_ERROR, "general error" }, { MALO_CMD_RESULT_NOSUPPORT, "not supported" }, { MALO_CMD_RESULT_PENDING, "pending" }, { MALO_CMD_RESULT_BUSY, "ignored" }, { MALO_CMD_RESULT_PARTIALDATA, "incomplete" }, }; for (i = 0; i < sizeof(results) / sizeof(results[0]); i++) if (letoh16(result) == results[i].result_code) return (results[i].result_string); return ("unknown"); } int malo_cmd_get_spec(struct malo_softc *sc) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_hw_spec *spec; hdr->cmd = htole16(MALO_CMD_GET_HW_SPEC); hdr->size = htole16(sizeof(*hdr) + sizeof(*spec)); hdr->seqnum = htole16(42); /* the one and only */ hdr->result = 0; spec = (struct malo_hw_spec *)(hdr + 1); bzero(spec, sizeof(*spec)); memset(spec->PermanentAddress, 0xff, ETHER_ADDR_LEN); spec->CookiePtr = htole32(sc->sc_cookie_dmaaddr); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); if (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr) != 0) return (ETIMEDOUT); /* get the data from the buffer */ DPRINTF(1, "%s: get_hw_spec: V%x R%x, #WCB %d, #Mcast %d, Regcode %d, " "#Ant %d\n", sc->sc_dev.dv_xname, htole16(spec->HwVersion), htole32(spec->FWReleaseNumber), htole16(spec->NumOfWCB), htole16(spec->NumOfMCastAdr), htole16(spec->RegionCode), htole16(spec->NumberOfAntenna)); /* tell the DMA engine where our rings are */ malo_mem_write4(sc, letoh32(spec->RxPdRdPtr) & 0xffff, sc->sc_rxring.physaddr); malo_mem_write4(sc, letoh32(spec->RxPdWrPtr) & 0xffff, sc->sc_rxring.physaddr); malo_mem_write4(sc, letoh32(spec->WcbBase0) & 0xffff, sc->sc_txring.physaddr); /* save DMA RX pointers for later use */ sc->sc_RxPdRdPtr = letoh32(spec->RxPdRdPtr) & 0xffff; sc->sc_RxPdWrPtr = letoh32(spec->RxPdWrPtr) & 0xffff; return (0); } int malo_cmd_set_prescan(struct malo_softc *sc) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; hdr->cmd = htole16(MALO_CMD_SET_PRESCAN); hdr->size = htole16(sizeof(*hdr)); hdr->seqnum = 1; hdr->result = 0; bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } int malo_cmd_set_postscan(struct malo_softc *sc, uint8_t *macaddr, uint8_t ibsson) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_cmd_postscan *body; hdr->cmd = htole16(MALO_CMD_SET_POSTSCAN); hdr->size = htole16(sizeof(*hdr) + sizeof(*body)); hdr->seqnum = 1; hdr->result = 0; body = (struct malo_cmd_postscan *)(hdr + 1); bzero(body, sizeof(*body)); memcpy(&body->bssid, macaddr, ETHER_ADDR_LEN); body->isibss = htole32(ibsson); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } int malo_cmd_set_channel(struct malo_softc *sc, uint8_t channel) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_cmd_channel *body; hdr->cmd = htole16(MALO_CMD_SET_CHANNEL); hdr->size = htole16(sizeof(*hdr) + sizeof(*body)); hdr->seqnum = 1; hdr->result = 0; body = (struct malo_cmd_channel *)(hdr + 1); bzero(body, sizeof(*body)); body->action = htole16(1); body->channel = channel; bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } int malo_cmd_set_antenna(struct malo_softc *sc, uint16_t antenna) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_cmd_antenna *body; hdr->cmd = htole16(MALO_CMD_SET_ANTENNA); hdr->size = htole16(sizeof(*hdr) + sizeof(*body)); hdr->seqnum = 1; hdr->result = 0; body = (struct malo_cmd_antenna *)(hdr + 1); bzero(body, sizeof(*body)); body->action = htole16(antenna); if (antenna == 1) body->mode = htole16(0xffff); else body->mode = htole16(2); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } int malo_cmd_set_radio(struct malo_softc *sc, uint16_t enable, uint16_t preamble_mode) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_cmd_radio *body; hdr->cmd = htole16(MALO_CMD_SET_RADIO); hdr->size = htole16(sizeof(*hdr) + sizeof(*body)); hdr->seqnum = 1; hdr->result = 0; body = (struct malo_cmd_radio *)(hdr + 1); bzero(body, sizeof(*body)); body->action = htole16(1); body->preamble_mode = htole16(preamble_mode); body->enable = htole16(enable); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } int malo_cmd_set_aid(struct malo_softc *sc, uint8_t *bssid, uint16_t associd) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_cmd_aid *body; hdr->cmd = htole16(MALO_CMD_SET_AID); hdr->size = htole16(sizeof(*hdr) + sizeof(*body)); hdr->seqnum = 1; hdr->result = 0; body = (struct malo_cmd_aid *)(hdr + 1); bzero(body, sizeof(*body)); body->associd = htole16(associd); memcpy(&body->macaddr[0], bssid, IEEE80211_ADDR_LEN); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } int malo_cmd_set_txpower(struct malo_softc *sc, unsigned int powerlevel) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_cmd_txpower *body; hdr->cmd = htole16(MALO_CMD_SET_TXPOWER); hdr->size = htole16(sizeof(*hdr) + sizeof(*body)); hdr->seqnum = 1; hdr->result = 0; body = (struct malo_cmd_txpower *)(hdr + 1); bzero(body, sizeof(*body)); body->action = htole16(1); if (powerlevel < 30) body->supportpowerlvl = htole16(5); /* LOW */ else if (powerlevel >= 30 && powerlevel < 60) body->supportpowerlvl = htole16(10); /* MEDIUM */ else body->supportpowerlvl = htole16(15); /* HIGH */ bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } int malo_cmd_set_rts(struct malo_softc *sc, uint32_t threshold) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_cmd_rts *body; hdr->cmd = htole16(MALO_CMD_SET_RTS); hdr->size = htole16(sizeof(*hdr) + sizeof(*body)); hdr->seqnum = 1; hdr->result = 0; body = (struct malo_cmd_rts *)(hdr + 1); bzero(body, sizeof(*body)); body->action = htole16(1); body->threshold = htole32(threshold); bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } int malo_cmd_set_slot(struct malo_softc *sc, uint8_t slot) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_cmd_slot *body; hdr->cmd = htole16(MALO_CMD_SET_SLOT); hdr->size = htole16(sizeof(*hdr) + sizeof(*body)); hdr->seqnum = 1; hdr->result = 0; body = (struct malo_cmd_slot *)(hdr + 1); bzero(body, sizeof(*body)); body->action = htole16(1); body->slot = slot; bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } int malo_cmd_set_rate(struct malo_softc *sc, uint8_t rate) { struct ieee80211com *ic = &sc->sc_ic; struct malo_cmdheader *hdr = sc->sc_cmd_mem; struct malo_cmd_rate *body; int i; hdr->cmd = htole16(MALO_CMD_SET_RATE); hdr->size = htole16(sizeof(*hdr) + sizeof(*body)); hdr->seqnum = 1; hdr->result = 0; body = (struct malo_cmd_rate *)(hdr + 1); bzero(body, sizeof(*body)); #ifndef IEEE80211_STA_ONLY if (ic->ic_opmode == IEEE80211_M_HOSTAP) { /* TODO */ } else #endif { body->aprates[0] = 2; body->aprates[1] = 4; body->aprates[2] = 11; body->aprates[3] = 22; if (ic->ic_curmode == IEEE80211_MODE_11G) { body->aprates[4] = 0; body->aprates[5] = 12; body->aprates[6] = 18; body->aprates[7] = 24; body->aprates[8] = 36; body->aprates[9] = 48; body->aprates[10] = 72; body->aprates[11] = 96; body->aprates[12] = 108; } } if (rate != 0) { /* fixed rate */ for (i = 0; i < 13; i++) { if (body->aprates[i] == rate) { body->rateindex = i; body->dataratetype = 1; break; } } } bus_dmamap_sync(sc->sc_dmat, sc->sc_cmd_dmam, 0, PAGE_SIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); return (malo_send_cmd_dma(sc, sc->sc_cmd_dmaaddr)); } void malo_cmd_response(struct malo_softc *sc) { struct malo_cmdheader *hdr = sc->sc_cmd_mem; if (letoh16(hdr->result) != MALO_CMD_RESULT_OK) { printf("%s: firmware cmd %s failed with %s\n", sc->sc_dev.dv_xname, malo_cmd_string(hdr->cmd), malo_cmd_string_result(hdr->result)); } #ifdef MALO_DEBUG printf("%s: cmd answer for %s=%s\n", sc->sc_dev.dv_xname, malo_cmd_string(hdr->cmd), malo_cmd_string_result(hdr->result)); if (malo_d > 2) malo_hexdump(hdr, letoh16(hdr->size)); #endif }
33 34 1 1 /* * Created: Fri Jan 19 10:48:35 2001 by faith@acm.org * * Copyright 2001 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Author Rickard E. (Rik) Faith <faith@valinux.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include <sys/param.h> #include <sys/fcntl.h> #include <sys/poll.h> #include <sys/specdev.h> #include <sys/vnode.h> #include <machine/bus.h> #ifdef __HAVE_ACPI #include <dev/acpi/acpidev.h> #include <dev/acpi/acpivar.h> #include <dev/acpi/dsdt.h> #endif #include <linux/debugfs.h> #include <linux/fs.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/mount.h> #include <linux/pseudo_fs.h> #include <linux/slab.h> #include <linux/srcu.h> #include <drm/drm_client.h> #include <drm/drm_color_mgmt.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_mode_object.h> #include <drm/drm_print.h> #include <drm/drm_gem.h> #include <drm/drm_agpsupport.h> #include <drm/drm_irq.h> #include "drm_crtc_internal.h" #include "drm_internal.h" #include "drm_legacy.h" MODULE_AUTHOR("Gareth Hughes, Leif Delgass, José Fonseca, Jon Smirl"); MODULE_DESCRIPTION("DRM shared core routines"); MODULE_LICENSE("GPL and additional rights"); static DEFINE_SPINLOCK(drm_minor_lock); static struct idr drm_minors_idr; /* * If the drm core fails to init for whatever reason, * we should prevent any drivers from registering with it. * It's best to check this at drm_dev_init(), as some drivers * prefer to embed struct drm_device into their own device * structure and call drm_dev_init() themselves. */ static bool drm_core_init_complete = false; static struct dentry *drm_debugfs_root; #ifdef notyet DEFINE_STATIC_SRCU(drm_unplug_srcu); #endif /* * Some functions are only called once on init regardless of how many times * drm attaches. In linux this is handled via module_init()/module_exit() */ int drm_refcnt; struct drm_softc { struct device sc_dev; struct drm_device *sc_drm; int sc_allocated; }; struct drm_attach_args { struct drm_device *drm; struct drm_driver *driver; char *busid; bus_dma_tag_t dmat; bus_space_tag_t bst; size_t busid_len; int is_agp; struct pci_attach_args *pa; int primary; }; void drm_linux_init(void); void drm_linux_exit(void); int drm_linux_acpi_notify(struct aml_node *, int, void *); int drm_dequeue_event(struct drm_device *, struct drm_file *, size_t, struct drm_pending_event **); int drmprint(void *, const char *); int drmsubmatch(struct device *, void *, void *); const struct pci_device_id * drm_find_description(int, int, const struct pci_device_id *); int drm_file_cmp(struct drm_file *, struct drm_file *); SPLAY_PROTOTYPE(drm_file_tree, drm_file, link, drm_file_cmp); #define DRMDEVCF_PRIMARY 0 #define drmdevcf_primary cf_loc[DRMDEVCF_PRIMARY] /* spec'd as primary? */ #define DRMDEVCF_PRIMARY_UNK -1 /* * DRM Minors * A DRM device can provide several char-dev interfaces on the DRM-Major. Each * of them is represented by a drm_minor object. Depending on the capabilities * of the device-driver, different interfaces are registered. * * Minors can be accessed via dev->$minor_name. This pointer is either * NULL or a valid drm_minor pointer and stays valid as long as the device is * valid. This means, DRM minors have the same life-time as the underlying * device. However, this doesn't mean that the minor is active. Minors are * registered and unregistered dynamically according to device-state. */ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev, unsigned int type) { switch (type) { case DRM_MINOR_PRIMARY: return &dev->primary; case DRM_MINOR_RENDER: return &dev->render; default: BUG(); } } static int drm_minor_alloc(struct drm_device *dev, unsigned int type) { struct drm_minor *minor; unsigned long flags; int r; minor = kzalloc(sizeof(*minor), GFP_KERNEL); if (!minor) return -ENOMEM; minor->type = type; minor->dev = dev; idr_preload(GFP_KERNEL); spin_lock_irqsave(&drm_minor_lock, flags); r = idr_alloc(&drm_minors_idr, NULL, 64 * type, 64 * (type + 1), GFP_NOWAIT); spin_unlock_irqrestore(&drm_minor_lock, flags); idr_preload_end(); if (r < 0) goto err_free; minor->index = r; #ifdef __linux__ minor->kdev = drm_sysfs_minor_alloc(minor); if (IS_ERR(minor->kdev)) { r = PTR_ERR(minor->kdev); goto err_index; } #endif *drm_minor_get_slot(dev, type) = minor; return 0; #ifdef __linux__ err_index: spin_lock_irqsave(&drm_minor_lock, flags); idr_remove(&drm_minors_idr, minor->index); spin_unlock_irqrestore(&drm_minor_lock, flags); #endif err_free: kfree(minor); return r; } static void drm_minor_free(struct drm_device *dev, unsigned int type) { struct drm_minor **slot, *minor; unsigned long flags; slot = drm_minor_get_slot(dev, type); minor = *slot; if (!minor) return; #ifdef __linux__ put_device(minor->kdev); #endif spin_lock_irqsave(&drm_minor_lock, flags); idr_remove(&drm_minors_idr, minor->index); spin_unlock_irqrestore(&drm_minor_lock, flags); kfree(minor); *slot = NULL; } static int drm_minor_register(struct drm_device *dev, unsigned int type) { struct drm_minor *minor; unsigned long flags; #ifdef __linux__ int ret; #endif DRM_DEBUG("\n"); minor = *drm_minor_get_slot(dev, type); if (!minor) return 0; #ifdef __linux__ ret = drm_debugfs_init(minor, minor->index, drm_debugfs_root); if (ret) { DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n"); goto err_debugfs; } ret = device_add(minor->kdev); if (ret) goto err_debugfs; #else drm_debugfs_root = NULL; #endif /* replace NULL with @minor so lookups will succeed from now on */ spin_lock_irqsave(&drm_minor_lock, flags); idr_replace(&drm_minors_idr, minor, minor->index); spin_unlock_irqrestore(&drm_minor_lock, flags); DRM_DEBUG("new minor registered %d\n", minor->index); return 0; #ifdef __linux__ err_debugfs: drm_debugfs_cleanup(minor); return ret; #endif } static void drm_minor_unregister(struct drm_device *dev, unsigned int type) { struct drm_minor *minor; unsigned long flags; minor = *drm_minor_get_slot(dev, type); #ifdef __linux__ if (!minor || !device_is_registered(minor->kdev)) #else if (!minor) #endif return; /* replace @minor with NULL so lookups will fail from now on */ spin_lock_irqsave(&drm_minor_lock, flags); idr_replace(&drm_minors_idr, NULL, minor->index); spin_unlock_irqrestore(&drm_minor_lock, flags); #ifdef __linux__ device_del(minor->kdev); #endif dev_set_drvdata(minor->kdev, NULL); /* safety belt */ drm_debugfs_cleanup(minor); } /* * Looks up the given minor-ID and returns the respective DRM-minor object. The * refence-count of the underlying device is increased so you must release this * object with drm_minor_release(). * * As long as you hold this minor, it is guaranteed that the object and the * minor->dev pointer will stay valid! However, the device may get unplugged and * unregistered while you hold the minor. */ struct drm_minor *drm_minor_acquire(unsigned int minor_id) { struct drm_minor *minor; unsigned long flags; spin_lock_irqsave(&drm_minor_lock, flags); minor = idr_find(&drm_minors_idr, minor_id); if (minor) drm_dev_get(minor->dev); spin_unlock_irqrestore(&drm_minor_lock, flags); if (!minor) { return ERR_PTR(-ENODEV); } else if (drm_dev_is_unplugged(minor->dev)) { drm_dev_put(minor->dev); return ERR_PTR(-ENODEV); } return minor; } void drm_minor_release(struct drm_minor *minor) { drm_dev_put(minor->dev); } /** * DOC: driver instance overview * * A device instance for a drm driver is represented by &struct drm_device. This * is initialized with drm_dev_init(), usually from bus-specific ->probe() * callbacks implemented by the driver. The driver then needs to initialize all * the various subsystems for the drm device like memory management, vblank * handling, modesetting support and intial output configuration plus obviously * initialize all the corresponding hardware bits. Finally when everything is up * and running and ready for userspace the device instance can be published * using drm_dev_register(). * * There is also deprecated support for initalizing device instances using * bus-specific helpers and the &drm_driver.load callback. But due to * backwards-compatibility needs the device instance have to be published too * early, which requires unpretty global locking to make safe and is therefore * only support for existing drivers not yet converted to the new scheme. * * When cleaning up a device instance everything needs to be done in reverse: * First unpublish the device instance with drm_dev_unregister(). Then clean up * any other resources allocated at device initialization and drop the driver's * reference to &drm_device using drm_dev_put(). * * Note that the lifetime rules for &drm_device instance has still a lot of * historical baggage. Hence use the reference counting provided by * drm_dev_get() and drm_dev_put() only carefully. * * Display driver example * ~~~~~~~~~~~~~~~~~~~~~~ * * The following example shows a typical structure of a DRM display driver. * The example focus on the probe() function and the other functions that is * almost always present and serves as a demonstration of devm_drm_dev_init() * usage with its accompanying drm_driver->release callback. * * .. code-block:: c * * struct driver_device { * struct drm_device drm; * void *userspace_facing; * struct clk *pclk; * }; * * static void driver_drm_release(struct drm_device *drm) * { * struct driver_device *priv = container_of(...); * * drm_mode_config_cleanup(drm); * drm_dev_fini(drm); * kfree(priv->userspace_facing); * kfree(priv); * } * * static struct drm_driver driver_drm_driver = { * [...] * .release = driver_drm_release, * }; * * static int driver_probe(struct platform_device *pdev) * { * struct driver_device *priv; * struct drm_device *drm; * int ret; * * // devm_kzalloc() can't be used here because the drm_device ' * // lifetime can exceed the device lifetime if driver unbind * // happens when userspace still has open file descriptors. * priv = kzalloc(sizeof(*priv), GFP_KERNEL); * if (!priv) * return -ENOMEM; * * drm = &priv->drm; * * ret = devm_drm_dev_init(&pdev->dev, drm, &driver_drm_driver); * if (ret) { * kfree(drm); * return ret; * } * * drm_mode_config_init(drm); * * priv->userspace_facing = kzalloc(..., GFP_KERNEL); * if (!priv->userspace_facing) * return -ENOMEM; * * priv->pclk = devm_clk_get(dev, "PCLK"); * if (IS_ERR(priv->pclk)) * return PTR_ERR(priv->pclk); * * // Further setup, display pipeline etc * * platform_set_drvdata(pdev, drm); * * drm_mode_config_reset(drm); * * ret = drm_dev_register(drm); * if (ret) * return ret; * * drm_fbdev_generic_setup(drm, 32); * * return 0; * } * * // This function is called before the devm_ resources are released * static int driver_remove(struct platform_device *pdev) * { * struct drm_device *drm = platform_get_drvdata(pdev); * * drm_dev_unregister(drm); * drm_atomic_helper_shutdown(drm) * * return 0; * } * * // This function is called on kernel restart and shutdown * static void driver_shutdown(struct platform_device *pdev) * { * drm_atomic_helper_shutdown(platform_get_drvdata(pdev)); * } * * static int __maybe_unused driver_pm_suspend(struct device *dev) * { * return drm_mode_config_helper_suspend(dev_get_drvdata(dev)); * } * * static int __maybe_unused driver_pm_resume(struct device *dev) * { * drm_mode_config_helper_resume(dev_get_drvdata(dev)); * * return 0; * } * * static const struct dev_pm_ops driver_pm_ops = { * SET_SYSTEM_SLEEP_PM_OPS(driver_pm_suspend, driver_pm_resume) * }; * * static struct platform_driver driver_driver = { * .driver = { * [...] * .pm = &driver_pm_ops, * }, * .probe = driver_probe, * .remove = driver_remove, * .shutdown = driver_shutdown, * }; * module_platform_driver(driver_driver); * * Drivers that want to support device unplugging (USB, DT overlay unload) should * use drm_dev_unplug() instead of drm_dev_unregister(). The driver must protect * regions that is accessing device resources to prevent use after they're * released. This is done using drm_dev_enter() and drm_dev_exit(). There is one * shortcoming however, drm_dev_unplug() marks the drm_device as unplugged before * drm_atomic_helper_shutdown() is called. This means that if the disable code * paths are protected, they will not run on regular driver module unload, * possibily leaving the hardware enabled. */ /** * drm_put_dev - Unregister and release a DRM device * @dev: DRM device * * Called at module unload time or when a PCI device is unplugged. * * Cleans up all DRM device, calling drm_lastclose(). * * Note: Use of this function is deprecated. It will eventually go away * completely. Please use drm_dev_unregister() and drm_dev_put() explicitly * instead to make sure that the device isn't userspace accessible any more * while teardown is in progress, ensuring that userspace can't access an * inconsistent state. */ void drm_put_dev(struct drm_device *dev) { DRM_DEBUG("\n"); if (!dev) { DRM_ERROR("cleanup called no dev\n"); return; } drm_dev_unregister(dev); drm_dev_put(dev); } EXPORT_SYMBOL(drm_put_dev); /** * drm_dev_enter - Enter device critical section * @dev: DRM device * @idx: Pointer to index that will be passed to the matching drm_dev_exit() * * This function marks and protects the beginning of a section that should not * be entered after the device has been unplugged. The section end is marked * with drm_dev_exit(). Calls to this function can be nested. * * Returns: * True if it is OK to enter the section, false otherwise. */ bool drm_dev_enter(struct drm_device *dev, int *idx) { #ifdef notyet *idx = srcu_read_lock(&drm_unplug_srcu); if (dev->unplugged) { srcu_read_unlock(&drm_unplug_srcu, *idx); return false; } #endif return true; } EXPORT_SYMBOL(drm_dev_enter); /** * drm_dev_exit - Exit device critical section * @idx: index returned from drm_dev_enter() * * This function marks the end of a section that should not be entered after * the device has been unplugged. */ void drm_dev_exit(int idx) { #ifdef notyet srcu_read_unlock(&drm_unplug_srcu, idx); #endif } EXPORT_SYMBOL(drm_dev_exit); /** * drm_dev_unplug - unplug a DRM device * @dev: DRM device * * This unplugs a hotpluggable DRM device, which makes it inaccessible to * userspace operations. Entry-points can use drm_dev_enter() and * drm_dev_exit() to protect device resources in a race free manner. This * essentially unregisters the device like drm_dev_unregister(), but can be * called while there are still open users of @dev. */ void drm_dev_unplug(struct drm_device *dev) { STUB(); #ifdef notyet /* * After synchronizing any critical read section is guaranteed to see * the new value of ->unplugged, and any critical section which might * still have seen the old value of ->unplugged is guaranteed to have * finished. */ dev->unplugged = true; synchronize_srcu(&drm_unplug_srcu); drm_dev_unregister(dev); #endif } EXPORT_SYMBOL(drm_dev_unplug); #ifdef __linux__ /* * DRM internal mount * We want to be able to allocate our own "struct address_space" to control * memory-mappings in VRAM (or stolen RAM, ...). However, core MM does not allow * stand-alone address_space objects, so we need an underlying inode. As there * is no way to allocate an independent inode easily, we need a fake internal * VFS mount-point. * * The drm_fs_inode_new() function allocates a new inode, drm_fs_inode_free() * frees it again. You are allowed to use iget() and iput() to get references to * the inode. But each drm_fs_inode_new() call must be paired with exactly one * drm_fs_inode_free() call (which does not have to be the last iput()). * We use drm_fs_inode_*() to manage our internal VFS mount-point and share it * between multiple inode-users. You could, technically, call * iget() + drm_fs_inode_free() directly after alloc and sometime later do an * iput(), but this way you'd end up with a new vfsmount for each inode. */ static int drm_fs_cnt; static struct vfsmount *drm_fs_mnt; static int drm_fs_init_fs_context(struct fs_context *fc) { return init_pseudo(fc, 0x010203ff) ? 0 : -ENOMEM; } static struct file_system_type drm_fs_type = { .name = "drm", .owner = THIS_MODULE, .init_fs_context = drm_fs_init_fs_context, .kill_sb = kill_anon_super, }; static struct inode *drm_fs_inode_new(void) { struct inode *inode; int r; r = simple_pin_fs(&drm_fs_type, &drm_fs_mnt, &drm_fs_cnt); if (r < 0) { DRM_ERROR("Cannot mount pseudo fs: %d\n", r); return ERR_PTR(r); } inode = alloc_anon_inode(drm_fs_mnt->mnt_sb); if (IS_ERR(inode)) simple_release_fs(&drm_fs_mnt, &drm_fs_cnt); return inode; } static void drm_fs_inode_free(struct inode *inode) { if (inode) { iput(inode); simple_release_fs(&drm_fs_mnt, &drm_fs_cnt); } } #endif /* __linux__ */ /** * DOC: component helper usage recommendations * * DRM drivers that drive hardware where a logical device consists of a pile of * independent hardware blocks are recommended to use the :ref:`component helper * library<component>`. For consistency and better options for code reuse the * following guidelines apply: * * - The entire device initialization procedure should be run from the * &component_master_ops.master_bind callback, starting with drm_dev_init(), * then binding all components with component_bind_all() and finishing with * drm_dev_register(). * * - The opaque pointer passed to all components through component_bind_all() * should point at &struct drm_device of the device instance, not some driver * specific private structure. * * - The component helper fills the niche where further standardization of * interfaces is not practical. When there already is, or will be, a * standardized interface like &drm_bridge or &drm_panel, providing its own * functions to find such components at driver load time, like * drm_of_find_panel_or_bridge(), then the component helper should not be * used. */ /** * drm_dev_init - Initialise new DRM device * @dev: DRM device * @driver: DRM driver * @parent: Parent device object * * Initialize a new DRM device. No device registration is done. * Call drm_dev_register() to advertice the device to user space and register it * with other core subsystems. This should be done last in the device * initialization sequence to make sure userspace can't access an inconsistent * state. * * The initial ref-count of the object is 1. Use drm_dev_get() and * drm_dev_put() to take and drop further ref-counts. * * It is recommended that drivers embed &struct drm_device into their own device * structure. * * Drivers that do not want to allocate their own device struct * embedding &struct drm_device can call drm_dev_alloc() instead. For drivers * that do embed &struct drm_device it must be placed first in the overall * structure, and the overall structure must be allocated using kmalloc(): The * drm core's release function unconditionally calls kfree() on the @dev pointer * when the final reference is released. To override this behaviour, and so * allow embedding of the drm_device inside the driver's device struct at an * arbitrary offset, you must supply a &drm_driver.release callback and control * the finalization explicitly. * * RETURNS: * 0 on success, or error code on failure. */ int drm_dev_init(struct drm_device *dev, struct drm_driver *driver, struct device *parent) { int ret; if (!drm_core_init_complete) { DRM_ERROR("DRM core is not initialized\n"); return -ENODEV; } if (WARN_ON(!parent)) return -EINVAL; kref_init(&dev->ref); #ifdef __linux__ dev->dev = get_device(parent); #endif dev->driver = driver; /* no per-device feature limits by default */ dev->driver_features = ~0u; drm_legacy_init_members(dev); #ifdef notyet INIT_LIST_HEAD(&dev->filelist); #else SPLAY_INIT(&dev->files); #endif INIT_LIST_HEAD(&dev->filelist_internal); INIT_LIST_HEAD(&dev->clientlist); INIT_LIST_HEAD(&dev->vblank_event_list); mtx_init(&dev->event_lock, IPL_TTY); mtx_init(&dev->event_lock, IPL_TTY); rw_init(&dev->struct_mutex, "drmdevlk"); rw_init(&dev->filelist_mutex, "drmflist"); rw_init(&dev->clientlist_mutex, "drmclist"); rw_init(&dev->master_mutex, "drmmast"); #ifdef __linux__ dev->anon_inode = drm_fs_inode_new(); if (IS_ERR(dev->anon_inode)) { ret = PTR_ERR(dev->anon_inode); DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret); goto err_free; } #endif if (drm_core_check_feature(dev, DRIVER_RENDER)) { ret = drm_minor_alloc(dev, DRM_MINOR_RENDER); if (ret) goto err_minors; } ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY); if (ret) goto err_minors; ret = drm_legacy_create_map_hash(dev); if (ret) goto err_minors; drm_legacy_ctxbitmap_init(dev); if (drm_core_check_feature(dev, DRIVER_GEM)) { ret = drm_gem_init(dev); if (ret) { DRM_ERROR("Cannot initialize graphics execution manager (GEM)\n"); goto err_ctxbitmap; } } ret = drm_dev_set_unique(dev, dev_name(parent)); if (ret) goto err_setunique; return 0; err_setunique: if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_destroy(dev); err_ctxbitmap: drm_legacy_ctxbitmap_cleanup(dev); drm_legacy_remove_map_hash(dev); err_minors: drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); #ifdef __linux__ drm_fs_inode_free(dev->anon_inode); err_free: put_device(dev->dev); #endif mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); mutex_destroy(&dev->struct_mutex); drm_legacy_destroy_members(dev); return ret; } EXPORT_SYMBOL(drm_dev_init); #ifdef notyet static void devm_drm_dev_init_release(void *data) { drm_dev_put(data); } #endif /** * devm_drm_dev_init - Resource managed drm_dev_init() * @parent: Parent device object * @dev: DRM device * @driver: DRM driver * * Managed drm_dev_init(). The DRM device initialized with this function is * automatically put on driver detach using drm_dev_put(). You must supply a * &drm_driver.release callback to control the finalization explicitly. * * RETURNS: * 0 on success, or error code on failure. */ int devm_drm_dev_init(struct device *parent, struct drm_device *dev, struct drm_driver *driver) { STUB(); return -ENOSYS; #ifdef notyet int ret; if (WARN_ON(!driver->release)) return -EINVAL; ret = drm_dev_init(dev, driver, parent); if (ret) return ret; ret = devm_add_action(parent, devm_drm_dev_init_release, dev); if (ret) devm_drm_dev_init_release(dev); return ret; #endif } EXPORT_SYMBOL(devm_drm_dev_init); /** * drm_dev_fini - Finalize a dead DRM device * @dev: DRM device * * Finalize a dead DRM device. This is the converse to drm_dev_init() and * frees up all data allocated by it. All driver private data should be * finalized first. Note that this function does not free the @dev, that is * left to the caller. * * The ref-count of @dev must be zero, and drm_dev_fini() should only be called * from a &drm_driver.release callback. */ void drm_dev_fini(struct drm_device *dev) { drm_vblank_cleanup(dev); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_destroy(dev); drm_legacy_ctxbitmap_cleanup(dev); drm_legacy_remove_map_hash(dev); #ifdef __linux__ drm_fs_inode_free(dev->anon_inode); #endif drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); #ifdef __linux__ put_device(dev->dev); #endif mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); mutex_destroy(&dev->struct_mutex); drm_legacy_destroy_members(dev); kfree(dev->unique); } EXPORT_SYMBOL(drm_dev_fini); /** * drm_dev_alloc - Allocate new DRM device * @driver: DRM driver to allocate device for * @parent: Parent device object * * Allocate and initialize a new DRM device. No device registration is done. * Call drm_dev_register() to advertice the device to user space and register it * with other core subsystems. This should be done last in the device * initialization sequence to make sure userspace can't access an inconsistent * state. * * The initial ref-count of the object is 1. Use drm_dev_get() and * drm_dev_put() to take and drop further ref-counts. * * Note that for purely virtual devices @parent can be NULL. * * Drivers that wish to subclass or embed &struct drm_device into their * own struct should look at using drm_dev_init() instead. * * RETURNS: * Pointer to new DRM device, or ERR_PTR on failure. */ struct drm_device *drm_dev_alloc(struct drm_driver *driver, struct device *parent) { struct drm_device *dev; int ret; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); ret = drm_dev_init(dev, driver, parent); if (ret) { kfree(dev); return ERR_PTR(ret); } return dev; } EXPORT_SYMBOL(drm_dev_alloc); static void drm_dev_release(struct kref *ref) { struct drm_device *dev = container_of(ref, struct drm_device, ref); if (dev->driver->release) { dev->driver->release(dev); } else { drm_dev_fini(dev); kfree(dev); } } /** * drm_dev_get - Take reference of a DRM device * @dev: device to take reference of or NULL * * This increases the ref-count of @dev by one. You *must* already own a * reference when calling this. Use drm_dev_put() to drop this reference * again. * * This function never fails. However, this function does not provide *any* * guarantee whether the device is alive or running. It only provides a * reference to the object and the memory associated with it. */ void drm_dev_get(struct drm_device *dev) { if (dev) kref_get(&dev->ref); } EXPORT_SYMBOL(drm_dev_get); /** * drm_dev_put - Drop reference of a DRM device * @dev: device to drop reference of or NULL * * This decreases the ref-count of @dev by one. The device is destroyed if the * ref-count drops to zero. */ void drm_dev_put(struct drm_device *dev) { if (dev) kref_put(&dev->ref, drm_dev_release); } EXPORT_SYMBOL(drm_dev_put); static int create_compat_control_link(struct drm_device *dev) { struct drm_minor *minor; char *name; int ret; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return 0; minor = *drm_minor_get_slot(dev, DRM_MINOR_PRIMARY); if (!minor) return 0; /* * Some existing userspace out there uses the existing of the controlD* * sysfs files to figure out whether it's a modeset driver. It only does * readdir, hence a symlink is sufficient (and the least confusing * option). Otherwise controlD* is entirely unused. * * Old controlD chardev have been allocated in the range * 64-127. */ name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64); if (!name) return -ENOMEM; ret = sysfs_create_link(minor->kdev->kobj.parent, &minor->kdev->kobj, name); kfree(name); return ret; } static void remove_compat_control_link(struct drm_device *dev) { struct drm_minor *minor; char *name; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return; minor = *drm_minor_get_slot(dev, DRM_MINOR_PRIMARY); if (!minor) return; name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64); if (!name) return; sysfs_remove_link(minor->kdev->kobj.parent, name); kfree(name); } /** * drm_dev_register - Register DRM device * @dev: Device to register * @flags: Flags passed to the driver's .load() function * * Register the DRM device @dev with the system, advertise device to user-space * and start normal device operation. @dev must be initialized via drm_dev_init() * previously. * * Never call this twice on any device! * * NOTE: To ensure backward compatibility with existing drivers method this * function calls the &drm_driver.load method after registering the device * nodes, creating race conditions. Usage of the &drm_driver.load methods is * therefore deprecated, drivers must perform all initialization before calling * drm_dev_register(). * * RETURNS: * 0 on success, negative error code on failure. */ int drm_dev_register(struct drm_device *dev, unsigned long flags) { struct drm_driver *driver = dev->driver; int ret; if (drm_dev_needs_global_mutex(dev)) mutex_lock(&drm_global_mutex); ret = drm_minor_register(dev, DRM_MINOR_RENDER); if (ret) goto err_minors; ret = drm_minor_register(dev, DRM_MINOR_PRIMARY); if (ret) goto err_minors; ret = create_compat_control_link(dev); if (ret) goto err_minors; dev->registered = true; if (dev->driver->load) { ret = dev->driver->load(dev, flags); if (ret) goto err_minors; } if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_modeset_register_all(dev); ret = 0; DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", driver->name, driver->major, driver->minor, driver->patchlevel, driver->date, dev->dev ? dev_name(dev->dev) : "virtual device", dev->primary->index); goto out_unlock; err_minors: remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); out_unlock: if (drm_dev_needs_global_mutex(dev)) mutex_unlock(&drm_global_mutex); return ret; } EXPORT_SYMBOL(drm_dev_register); /** * drm_dev_unregister - Unregister DRM device * @dev: Device to unregister * * Unregister the DRM device from the system. This does the reverse of * drm_dev_register() but does not deallocate the device. The caller must call * drm_dev_put() to drop their final reference. * * A special form of unregistering for hotpluggable devices is drm_dev_unplug(), * which can be called while there are still open users of @dev. * * This should be called first in the device teardown code to make sure * userspace can't access the device instance any more. */ void drm_dev_unregister(struct drm_device *dev) { if (drm_core_check_feature(dev, DRIVER_LEGACY)) drm_lastclose(dev); dev->registered = false; drm_client_dev_unregister(dev); if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_modeset_unregister_all(dev); if (dev->driver->unload) dev->driver->unload(dev); #if IS_ENABLED(CONFIG_AGP) if (dev->agp) drm_agp_takedown(dev); #endif drm_legacy_rmmaps(dev); remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); } EXPORT_SYMBOL(drm_dev_unregister); /** * drm_dev_set_unique - Set the unique name of a DRM device * @dev: device of which to set the unique name * @name: unique name * * Sets the unique name of a DRM device using the specified string. This is * already done by drm_dev_init(), drivers should only override the default * unique name for backwards compatibility reasons. * * Return: 0 on success or a negative error code on failure. */ int drm_dev_set_unique(struct drm_device *dev, const char *name) { kfree(dev->unique); dev->unique = kstrdup(name, GFP_KERNEL); return dev->unique ? 0 : -ENOMEM; } EXPORT_SYMBOL(drm_dev_set_unique); /* * DRM Core * The DRM core module initializes all global DRM objects and makes them * available to drivers. Once setup, drivers can probe their respective * devices. * Currently, core management includes: * - The "DRM-Global" key/value database * - Global ID management for connectors * - DRM major number allocation * - DRM minor management * - DRM sysfs class * - DRM debugfs root * * Furthermore, the DRM core provides dynamic char-dev lookups. For each * interface registered on a DRM device, you can request minor numbers from DRM * core. DRM core takes care of major-number management and char-dev * registration. A stub ->open() callback forwards any open() requests to the * registered minor. */ #ifdef __linux__ static int drm_stub_open(struct inode *inode, struct file *filp) { const struct file_operations *new_fops; struct drm_minor *minor; int err; DRM_DEBUG("\n"); minor = drm_minor_acquire(iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); new_fops = fops_get(minor->dev->driver->fops); if (!new_fops) { err = -ENODEV; goto out; } replace_fops(filp, new_fops); if (filp->f_op->open) err = filp->f_op->open(inode, filp); else err = 0; out: drm_minor_release(minor); return err; } static const struct file_operations drm_stub_fops = { .owner = THIS_MODULE, .open = drm_stub_open, .llseek = noop_llseek, }; #endif /* __linux__ */ static void drm_core_exit(void) { #ifdef __linux__ unregister_chrdev(DRM_MAJOR, "drm"); debugfs_remove(drm_debugfs_root); drm_sysfs_destroy(); #endif idr_destroy(&drm_minors_idr); drm_connector_ida_destroy(); } static int __init drm_core_init(void) { #ifdef __linux__ int ret; #endif drm_connector_ida_init(); idr_init(&drm_minors_idr); #ifdef __linux__ ret = drm_sysfs_init(); if (ret < 0) { DRM_ERROR("Cannot create DRM class: %d\n", ret); goto error; } drm_debugfs_root = debugfs_create_dir("dri", NULL); ret = register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops); if (ret < 0) goto error; #endif drm_core_init_complete = true; DRM_DEBUG("Initialized\n"); return 0; #ifdef __linux__ error: drm_core_exit(); return ret; #endif } #ifdef __linux__ module_init(drm_core_init); module_exit(drm_core_exit); #endif void drm_attach_platform(struct drm_driver *driver, bus_space_tag_t iot, bus_dma_tag_t dmat, struct device *dev, struct drm_device *drm) { struct drm_attach_args arg; memset(&arg, 0, sizeof(arg)); arg.driver = driver; arg.bst = iot; arg.dmat = dmat; arg.drm = drm; arg.busid = dev->dv_xname; arg.busid_len = strlen(dev->dv_xname) + 1; config_found_sm(dev, &arg, drmprint, drmsubmatch); } struct drm_device * drm_attach_pci(struct drm_driver *driver, struct pci_attach_args *pa, int is_agp, int primary, struct device *dev, struct drm_device *drm) { struct drm_attach_args arg; struct drm_softc *sc; arg.drm = drm; arg.driver = driver; arg.dmat = pa->pa_dmat; arg.bst = pa->pa_memt; arg.is_agp = is_agp; arg.primary = primary; arg.pa = pa; arg.busid_len = 20; arg.busid = malloc(arg.busid_len + 1, M_DRM, M_NOWAIT); if (arg.busid == NULL) { printf("%s: no memory for drm\n", dev->dv_xname); return (NULL); } snprintf(arg.busid, arg.busid_len, "pci:%04x:%02x:%02x.%1x", pa->pa_domain, pa->pa_bus, pa->pa_device, pa->pa_function); sc = (struct drm_softc *)config_found_sm(dev, &arg, drmprint, drmsubmatch); if (sc == NULL) return NULL; return sc->sc_drm; } int drmprint(void *aux, const char *pnp) { if (pnp != NULL) printf("drm at %s", pnp); return (UNCONF); } int drmsubmatch(struct device *parent, void *match, void *aux) { extern struct cfdriver drm_cd; struct cfdata *cf = match; /* only allow drm to attach */ if (cf->cf_driver == &drm_cd) return ((*cf->cf_attach->ca_match)(parent, match, aux)); return (0); } int drm_pciprobe(struct pci_attach_args *pa, const struct pci_device_id *idlist) { const struct pci_device_id *id_entry; id_entry = drm_find_description(PCI_VENDOR(pa->pa_id), PCI_PRODUCT(pa->pa_id), idlist); if (id_entry != NULL) return 1; return 0; } int drm_probe(struct device *parent, void *match, void *aux) { struct cfdata *cf = match; struct drm_attach_args *da = aux; if (cf->drmdevcf_primary != DRMDEVCF_PRIMARY_UNK) { /* * If primary-ness of device specified, either match * exactly (at high priority), or fail. */ if (cf->drmdevcf_primary != 0 && da->primary != 0) return (10); else return (0); } /* If primary-ness unspecified, it wins. */ return (1); } void drm_attach(struct device *parent, struct device *self, void *aux) { struct drm_softc *sc = (struct drm_softc *)self; struct drm_attach_args *da = aux; struct drm_device *dev = da->drm; int ret; if (drm_refcnt == 0) { drm_linux_init(); drm_core_init(); } drm_refcnt++; if (dev == NULL) { dev = malloc(sizeof(struct drm_device), M_DRM, M_WAITOK | M_ZERO); sc->sc_allocated = 1; } sc->sc_drm = dev; dev->dev = self; dev->dev_private = parent; dev->driver = da->driver; /* no per-device feature limits by default */ dev->driver_features = ~0u; dev->dmat = da->dmat; dev->bst = da->bst; dev->unique = da->busid; if (da->pa) { struct pci_attach_args *pa = da->pa; pcireg_t subsys; subsys = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG); dev->pdev = &dev->_pdev; dev->pdev->vendor = PCI_VENDOR(pa->pa_id); dev->pdev->device = PCI_PRODUCT(pa->pa_id); dev->pdev->subsystem_vendor = PCI_VENDOR(subsys); dev->pdev->subsystem_device = PCI_PRODUCT(subsys); dev->pdev->revision = PCI_REVISION(pa->pa_class); dev->pdev->devfn = PCI_DEVFN(pa->pa_device, pa->pa_function); dev->pdev->bus = &dev->pdev->_bus; dev->pdev->bus->pc = pa->pa_pc; dev->pdev->bus->number = pa->pa_bus; dev->pdev->bus->domain_nr = pa->pa_domain; dev->pdev->bus->bridgetag = pa->pa_bridgetag; if (pa->pa_bridgetag != NULL) { dev->pdev->bus->self = malloc(sizeof(struct pci_dev), M_DRM, M_WAITOK | M_ZERO); dev->pdev->bus->self->pc = pa->pa_pc; dev->pdev->bus->self->tag = *pa->pa_bridgetag; } dev->pdev->pc = pa->pa_pc; dev->pdev->tag = pa->pa_tag; dev->pdev->pci = (struct pci_softc *)parent->dv_parent; #ifdef CONFIG_ACPI dev->pdev->dev.node = acpi_find_pci(pa->pa_pc, pa->pa_tag); aml_register_notify(dev->pdev->dev.node, NULL, drm_linux_acpi_notify, NULL, ACPIDEV_NOPOLL); #endif } mtx_init(&dev->quiesce_mtx, IPL_NONE); mtx_init(&dev->event_lock, IPL_TTY); rw_init(&dev->struct_mutex, "drmdevlk"); rw_init(&dev->filelist_mutex, "drmflist"); rw_init(&dev->clientlist_mutex, "drmclist"); rw_init(&dev->master_mutex, "drmmast"); SPLAY_INIT(&dev->files); INIT_LIST_HEAD(&dev->filelist_internal); INIT_LIST_HEAD(&dev->clientlist); INIT_LIST_HEAD(&dev->vblank_event_list); if (drm_core_check_feature(dev, DRIVER_RENDER)) { ret = drm_minor_alloc(dev, DRM_MINOR_RENDER); if (ret) goto error; } ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY); if (ret) goto error; if (drm_core_check_feature(dev, DRIVER_USE_AGP)) { #if IS_ENABLED(CONFIG_AGP) if (da->is_agp) dev->agp = drm_agp_init(); #endif if (dev->agp != NULL) { if (drm_mtrr_add(dev->agp->info.ai_aperture_base, dev->agp->info.ai_aperture_size, DRM_MTRR_WC) == 0) dev->agp->mtrr = 1; } } if (dev->driver->gem_size > 0) { KASSERT(dev->driver->gem_size >= sizeof(struct drm_gem_object)); /* XXX unique name */ pool_init(&dev->objpl, dev->driver->gem_size, 0, IPL_NONE, 0, "drmobjpl", NULL); } if (drm_core_check_feature(dev, DRIVER_GEM)) { ret = drm_gem_init(dev); if (ret) { DRM_ERROR("Cannot initialize graphics execution manager (GEM)\n"); goto error; } } printf("\n"); return; error: drm_lastclose(dev); dev->dev_private = NULL; } int drm_detach(struct device *self, int flags) { struct drm_softc *sc = (struct drm_softc *)self; struct drm_device *dev = sc->sc_drm; drm_refcnt--; if (drm_refcnt == 0) { drm_core_exit(); drm_linux_exit(); } drm_lastclose(dev); if (drm_core_check_feature(dev, DRIVER_GEM)) { drm_gem_destroy(dev); if (dev->driver->gem_size > 0) pool_destroy(&dev->objpl); } drm_vblank_cleanup(dev); if (dev->agp && dev->agp->mtrr) { int retcode; retcode = drm_mtrr_del(0, dev->agp->info.ai_aperture_base, dev->agp->info.ai_aperture_size, DRM_MTRR_WC); DRM_DEBUG("mtrr_del = %d", retcode); } free(dev->agp, M_DRM, 0); if (dev->pdev && dev->pdev->bus) free(dev->pdev->bus->self, M_DRM, sizeof(struct pci_dev)); if (sc->sc_allocated) free(dev, M_DRM, sizeof(struct drm_device)); return 0; } void drm_quiesce(struct drm_device *dev) { mtx_enter(&dev->quiesce_mtx); dev->quiesce = 1; while (dev->quiesce_count > 0) { msleep_nsec(&dev->quiesce_count, &dev->quiesce_mtx, PZERO, "drmqui", INFSLP); } mtx_leave(&dev->quiesce_mtx); } void drm_wakeup(struct drm_device *dev) { mtx_enter(&dev->quiesce_mtx); dev->quiesce = 0; wakeup(&dev->quiesce); mtx_leave(&dev->quiesce_mtx); } int drm_activate(struct device *self, int act) { struct drm_softc *sc = (struct drm_softc *)self; struct drm_device *dev = sc->sc_drm; switch (act) { case DVACT_QUIESCE: drm_quiesce(dev); break; case DVACT_WAKEUP: drm_wakeup(dev); break; } return (0); } struct cfattach drm_ca = { sizeof(struct drm_softc), drm_probe, drm_attach, drm_detach, drm_activate }; struct cfdriver drm_cd = { 0, "drm", DV_DULL }; const struct pci_device_id * drm_find_description(int vendor, int device, const struct pci_device_id *idlist) { int i = 0; for (i = 0; idlist[i].vendor != 0; i++) { if ((idlist[i].vendor == vendor) && (idlist[i].device == device) && (idlist[i].subvendor == PCI_ANY_ID) && (idlist[i].subdevice == PCI_ANY_ID)) return &idlist[i]; } return NULL; } int drm_file_cmp(struct drm_file *f1, struct drm_file *f2) { return (f1->fminor < f2->fminor ? -1 : f1->fminor > f2->fminor); } SPLAY_GENERATE(drm_file_tree, drm_file, link, drm_file_cmp); struct drm_file * drm_find_file_by_minor(struct drm_device *dev, int minor) { struct drm_file key; key.fminor = minor; return (SPLAY_FIND(drm_file_tree, &dev->files, &key)); } struct drm_device * drm_get_device_from_kdev(dev_t kdev) { int unit = minor(kdev) & ((1 << CLONE_SHIFT) - 1); /* control */ if (unit >= 64 && unit < 128) unit -= 64; /* render */ if (unit >= 128) unit -= 128; struct drm_softc *sc; if (unit < drm_cd.cd_ndevs) { sc = (struct drm_softc *)drm_cd.cd_devs[unit]; if (sc) return sc->sc_drm; } return NULL; } void filt_drmdetach(struct knote *kn) { struct drm_device *dev = kn->kn_hook; int s; s = spltty(); klist_remove_locked(&dev->note, kn); splx(s); } int filt_drmkms(struct knote *kn, long hint) { if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } void filt_drmreaddetach(struct knote *kn) { struct drm_file *file_priv = kn->kn_hook; int s; s = spltty(); klist_remove_locked(&file_priv->rsel.si_note, kn); splx(s); } int filt_drmread(struct knote *kn, long hint) { struct drm_file *file_priv = kn->kn_hook; int val = 0; if ((hint & NOTE_SUBMIT) == 0) mtx_enter(&file_priv->minor->dev->event_lock); val = !list_empty(&file_priv->event_list); if ((hint & NOTE_SUBMIT) == 0) mtx_leave(&file_priv->minor->dev->event_lock); return (val); } const struct filterops drm_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_drmdetach, .f_event = filt_drmkms, }; const struct filterops drmread_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_drmreaddetach, .f_event = filt_drmread, }; int drmkqfilter(dev_t kdev, struct knote *kn) { struct drm_device *dev = NULL; struct drm_file *file_priv = NULL; int s; dev = drm_get_device_from_kdev(kdev); if (dev == NULL || dev->dev_private == NULL) return (ENXIO); switch (kn->kn_filter) { case EVFILT_READ: mutex_lock(&dev->struct_mutex); file_priv = drm_find_file_by_minor(dev, minor(kdev)); mutex_unlock(&dev->struct_mutex); if (file_priv == NULL) return (ENXIO); kn->kn_fop = &drmread_filtops; kn->kn_hook = file_priv; s = spltty(); klist_insert_locked(&file_priv->rsel.si_note, kn); splx(s); break; case EVFILT_DEVICE: kn->kn_fop = &drm_filtops; kn->kn_hook = dev; s = spltty(); klist_insert_locked(&dev->note, kn); splx(s); break; default: return (EINVAL); } return (0); } int drmopen(dev_t kdev, int flags, int fmt, struct proc *p) { struct drm_device *dev = NULL; struct drm_file *file_priv; struct drm_minor *dm; int ret = 0; int dminor, realminor, minor_type; int need_setup = 0; dev = drm_get_device_from_kdev(kdev); if (dev == NULL || dev->dev_private == NULL) return (ENXIO); DRM_DEBUG("open_count = %d\n", atomic_read(&dev->open_count)); if (flags & O_EXCL) return (EBUSY); /* No exclusive opens */ if (drm_dev_needs_global_mutex(dev)) mutex_lock(&drm_global_mutex); if (!atomic_fetch_inc(&dev->open_count)) need_setup = 1; dminor = minor(kdev); realminor = dminor & ((1 << CLONE_SHIFT) - 1); if (realminor < 64) minor_type = DRM_MINOR_PRIMARY; else if (realminor >= 64 && realminor < 128) minor_type = DRM_MINOR_CONTROL; else minor_type = DRM_MINOR_RENDER; dm = *drm_minor_get_slot(dev, minor_type); dm->index = minor(kdev); file_priv = drm_file_alloc(dm); if (IS_ERR(file_priv)) { ret = ENOMEM; goto err; } /* first opener automatically becomes master */ if (drm_is_primary_client(file_priv)) { ret = drm_master_open(file_priv); if (ret != 0) goto out_file_free; } file_priv->filp = (void *)file_priv; file_priv->fminor = minor(kdev); mutex_lock(&dev->filelist_mutex); SPLAY_INSERT(drm_file_tree, &dev->files, file_priv); mutex_unlock(&dev->filelist_mutex); if (need_setup) { ret = drm_legacy_setup(dev); if (ret) goto out_file_free; } if (drm_dev_needs_global_mutex(dev)) mutex_unlock(&drm_global_mutex); return 0; out_file_free: drm_file_free(file_priv); err: atomic_dec(&dev->open_count); if (drm_dev_needs_global_mutex(dev)) mutex_unlock(&drm_global_mutex); return (ret); } int drmclose(dev_t kdev, int flags, int fmt, struct proc *p) { struct drm_device *dev = drm_get_device_from_kdev(kdev); struct drm_file *file_priv; int retcode = 0; if (dev == NULL) return (ENXIO); if (drm_dev_needs_global_mutex(dev)) mutex_lock(&drm_global_mutex); DRM_DEBUG("open_count = %d\n", atomic_read(&dev->open_count)); mutex_lock(&dev->filelist_mutex); file_priv = drm_find_file_by_minor(dev, minor(kdev)); if (file_priv == NULL) { DRM_ERROR("can't find authenticator\n"); retcode = EINVAL; mutex_unlock(&dev->filelist_mutex); goto done; } SPLAY_REMOVE(drm_file_tree, &dev->files, file_priv); mutex_unlock(&dev->filelist_mutex); drm_file_free(file_priv); done: if (atomic_dec_and_test(&dev->open_count)) drm_lastclose(dev); if (drm_dev_needs_global_mutex(dev)) mutex_unlock(&drm_global_mutex); return (retcode); } int drmread(dev_t kdev, struct uio *uio, int ioflag) { struct drm_device *dev = drm_get_device_from_kdev(kdev); struct drm_file *file_priv; struct drm_pending_event *ev; int error = 0; if (dev == NULL) return (ENXIO); mutex_lock(&dev->filelist_mutex); file_priv = drm_find_file_by_minor(dev, minor(kdev)); mutex_unlock(&dev->filelist_mutex); if (file_priv == NULL) return (ENXIO); /* * The semantics are a little weird here. We will wait until we * have events to process, but as soon as we have events we will * only deliver as many as we have. * Note that events are atomic, if the read buffer will not fit in * a whole event, we won't read any of it out. */ mtx_enter(&dev->event_lock); while (error == 0 && list_empty(&file_priv->event_list)) { if (ioflag & IO_NDELAY) { mtx_leave(&dev->event_lock); return (EAGAIN); } error = msleep_nsec(&file_priv->event_wait, &dev->event_lock, PWAIT | PCATCH, "drmread", INFSLP); } if (error) { mtx_leave(&dev->event_lock); return (error); } while (drm_dequeue_event(dev, file_priv, uio->uio_resid, &ev)) { MUTEX_ASSERT_UNLOCKED(&dev->event_lock); /* XXX we always destroy the event on error. */ error = uiomove(ev->event, ev->event->length, uio); kfree(ev); if (error) break; mtx_enter(&dev->event_lock); } MUTEX_ASSERT_UNLOCKED(&dev->event_lock); return (error); } /* * Deqeue an event from the file priv in question. returning 1 if an * event was found. We take the resid from the read as a parameter because * we will only dequeue and event if the read buffer has space to fit the * entire thing. * * We are called locked, but we will *unlock* the queue on return so that * we may sleep to copyout the event. */ int drm_dequeue_event(struct drm_device *dev, struct drm_file *file_priv, size_t resid, struct drm_pending_event **out) { struct drm_pending_event *e = NULL; int gotone = 0; MUTEX_ASSERT_LOCKED(&dev->event_lock); *out = NULL; if (list_empty(&file_priv->event_list)) goto out; e = list_first_entry(&file_priv->event_list, struct drm_pending_event, link); if (e->event->length > resid) goto out; file_priv->event_space += e->event->length; list_del(&e->link); *out = e; gotone = 1; out: mtx_leave(&dev->event_lock); return (gotone); } int drmpoll(dev_t kdev, int events, struct proc *p) { struct drm_device *dev = drm_get_device_from_kdev(kdev); struct drm_file *file_priv; int revents = 0; if (dev == NULL) return (POLLERR); mutex_lock(&dev->filelist_mutex); file_priv = drm_find_file_by_minor(dev, minor(kdev)); mutex_unlock(&dev->filelist_mutex); if (file_priv == NULL) return (POLLERR); mtx_enter(&dev->event_lock); if (events & (POLLIN | POLLRDNORM)) { if (!list_empty(&file_priv->event_list)) revents |= events & (POLLIN | POLLRDNORM); else selrecord(p, &file_priv->rsel); } mtx_leave(&dev->event_lock); return (revents); } paddr_t drmmmap(dev_t kdev, off_t offset, int prot) { return -1; } struct drm_dmamem * drm_dmamem_alloc(bus_dma_tag_t dmat, bus_size_t size, bus_size_t alignment, int nsegments, bus_size_t maxsegsz, int mapflags, int loadflags) { struct drm_dmamem *mem; size_t strsize; /* * segs is the last member of the struct since we modify the size * to allow extra segments if more than one are allowed. */ strsize = sizeof(*mem) + (sizeof(bus_dma_segment_t) * (nsegments - 1)); mem = malloc(strsize, M_DRM, M_NOWAIT | M_ZERO); if (mem == NULL) return (NULL); mem->size = size; if (bus_dmamap_create(dmat, size, nsegments, maxsegsz, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &mem->map) != 0) goto strfree; if (bus_dmamem_alloc(dmat, size, alignment, 0, mem->segs, nsegments, &mem->nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO) != 0) goto destroy; if (bus_dmamem_map(dmat, mem->segs, mem->nsegs, size, &mem->kva, BUS_DMA_NOWAIT | mapflags) != 0) goto free; if (bus_dmamap_load(dmat, mem->map, mem->kva, size, NULL, BUS_DMA_NOWAIT | loadflags) != 0) goto unmap; return (mem); unmap: bus_dmamem_unmap(dmat, mem->kva, size); free: bus_dmamem_free(dmat, mem->segs, mem->nsegs); destroy: bus_dmamap_destroy(dmat, mem->map); strfree: free(mem, M_DRM, 0); return (NULL); } void drm_dmamem_free(bus_dma_tag_t dmat, struct drm_dmamem *mem) { if (mem == NULL) return; bus_dmamap_unload(dmat, mem->map); bus_dmamem_unmap(dmat, mem->kva, mem->size); bus_dmamem_free(dmat, mem->segs, mem->nsegs); bus_dmamap_destroy(dmat, mem->map); free(mem, M_DRM, 0); } struct drm_dma_handle * drm_pci_alloc(struct drm_device *dev, size_t size, size_t align) { struct drm_dma_handle *dmah; dmah = malloc(sizeof(*dmah), M_DRM, M_WAITOK); dmah->mem = drm_dmamem_alloc(dev->dmat, size, align, 1, size, BUS_DMA_NOCACHE, 0); if (dmah->mem == NULL) { free(dmah, M_DRM, sizeof(*dmah)); return NULL; } dmah->busaddr = dmah->mem->segs[0].ds_addr; dmah->size = dmah->mem->size; dmah->vaddr = dmah->mem->kva; return (dmah); } void drm_pci_free(struct drm_device *dev, struct drm_dma_handle *dmah) { if (dmah == NULL) return; drm_dmamem_free(dev->dmat, dmah->mem); free(dmah, M_DRM, sizeof(*dmah)); } /* * Compute order. Can be made faster. */ int drm_order(unsigned long size) { int order; unsigned long tmp; for (order = 0, tmp = size; tmp >>= 1; ++order) ; if (size & ~(1 << order)) ++order; return order; } int drm_getpciinfo(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_pciinfo *info = data; if (dev->pdev == NULL) return -ENOTTY; info->domain = dev->pdev->bus->domain_nr; info->bus = dev->pdev->bus->number; info->dev = PCI_SLOT(dev->pdev->devfn); info->func = PCI_FUNC(dev->pdev->devfn); info->vendor_id = dev->pdev->vendor; info->device_id = dev->pdev->device; info->subvendor_id = dev->pdev->subsystem_vendor; info->subdevice_id = dev->pdev->subsystem_device; info->revision_id = 0; return 0; }
3696 192 16 3537 3364 3682 44 3670 3672 3678 3538 161 1 1 1 1 13 12 13 3686 4128 3763 3550 12 11 11 12 5 5 12 8 6 2 8 8 1 6 6 4 1 6 6 14 14 3 14 4 11 6 12 20 20 13 16 20 20 20 15 8 18 18 18 18 1 17 17 2 18 18 17 17 1 3268 11 3265 9 2 7 20 20 20 4 18 1 17 17 17 16 10 16 10 3 14 8 9 6 9 6 9 9 6 9 6 9 5 6 4 10 4 6 10 9 4 5 3 6 5 5 5 5 5 5 2 2 2 10 9 9 9 9 9 3 12 3 3 3 6 5 1 1 1 6 20 2 1 1 1 36 38 3 13 1 2405 3044 402 3054 3071 3072 3040 3041 3 3 23 8 26 26 /* $OpenBSD: route.c,v 1.398 2021/03/10 10:21:48 jsg Exp $ */ /* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.c 8.2 (Berkeley) 11/15/93 */ /* * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 * * NRL grants permission for redistribution and use in source and binary * forms, with or without modification, of the software and documentation * created at NRL provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgements: * This product includes software developed by the University of * California, Berkeley and its contributors. * This product includes software developed at the Information * Technology Division, US Naval Research Laboratory. * 4. Neither the name of the NRL nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation * are those of the authors and should not be interpreted as representing * official policies, either expressed or implied, of the US Naval * Research Laboratory (NRL). */ #include <sys/param.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/timeout.h> #include <sys/domain.h> #include <sys/protosw.h> #include <sys/ioctl.h> #include <sys/kernel.h> #include <sys/queue.h> #include <sys/pool.h> #include <sys/atomic.h> #include <net/if.h> #include <net/if_var.h> #include <net/if_dl.h> #include <net/route.h> #include <netinet/in.h> #include <netinet/ip_var.h> #include <netinet/in_var.h> #ifdef INET6 #include <netinet/ip6.h> #include <netinet6/ip6_var.h> #include <netinet6/in6_var.h> #endif #ifdef MPLS #include <netmpls/mpls.h> #endif #ifdef BFD #include <net/bfd.h> #endif #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) /* Give some jitter to hash, to avoid synchronization between routers. */ static uint32_t rt_hashjitter; extern unsigned int rtmap_limit; struct cpumem * rtcounters; int rttrash; /* routes not in table but not freed */ int ifatrash; /* ifas not in ifp list but not free */ struct pool rtentry_pool; /* pool for rtentry structures */ struct pool rttimer_pool; /* pool for rttimer structures */ void rt_timer_init(void); int rt_setgwroute(struct rtentry *, u_int); void rt_putgwroute(struct rtentry *); int rtflushclone1(struct rtentry *, void *, u_int); int rtflushclone(struct rtentry *, unsigned int); int rt_ifa_purge_walker(struct rtentry *, void *, unsigned int); struct rtentry *rt_match(struct sockaddr *, uint32_t *, int, unsigned int); int rt_clone(struct rtentry **, struct sockaddr *, unsigned int); struct sockaddr *rt_plentosa(sa_family_t, int, struct sockaddr_in6 *); static int rt_copysa(struct sockaddr *, struct sockaddr *, struct sockaddr **); #ifdef DDB void db_print_sa(struct sockaddr *); void db_print_ifa(struct ifaddr *); int db_show_rtentry(struct rtentry *, void *, unsigned int); #endif #define LABELID_MAX 50000 struct rt_label { TAILQ_ENTRY(rt_label) rtl_entry; char rtl_name[RTLABEL_LEN]; u_int16_t rtl_id; int rtl_ref; }; TAILQ_HEAD(rt_labels, rt_label) rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels); void route_init(void) { rtcounters = counters_alloc(rts_ncounters); pool_init(&rtentry_pool, sizeof(struct rtentry), 0, IPL_SOFTNET, 0, "rtentry", NULL); while (rt_hashjitter == 0) rt_hashjitter = arc4random(); #ifdef BFD bfdinit(); #endif } /* * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise. */ int rtisvalid(struct rtentry *rt) { if (rt == NULL) return (0); if (!ISSET(rt->rt_flags, RTF_UP)) return (0); if (ISSET(rt->rt_flags, RTF_GATEWAY)) { KASSERT(rt->rt_gwroute != NULL); KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY)); if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP)) return (0); } return (1); } /* * Do the actual lookup for rtalloc(9), do not use directly! * * Return the best matching entry for the destination ``dst''. * * "RT_RESOLVE" means that a corresponding L2 entry should * be added to the routing table and resolved (via ARP or * NDP), if it does not exist. */ struct rtentry * rt_match(struct sockaddr *dst, uint32_t *src, int flags, unsigned int tableid) { struct rtentry *rt = NULL; rt = rtable_match(tableid, dst, src); if (rt == NULL) { rtstat_inc(rts_unreach); return (NULL); } if (ISSET(rt->rt_flags, RTF_CLONING) && ISSET(flags, RT_RESOLVE)) rt_clone(&rt, dst, tableid); rt->rt_use++; return (rt); } int rt_clone(struct rtentry **rtp, struct sockaddr *dst, unsigned int rtableid) { struct rt_addrinfo info; struct rtentry *rt = *rtp; int error = 0; memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = dst; /* * The priority of cloned route should be different * to avoid conflict with /32 cloning routes. * * It should also be higher to let the ARP layer find * cloned routes instead of the cloning one. */ KERNEL_LOCK(); error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt, rtableid); KERNEL_UNLOCK(); if (error) { rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid); } else { /* Inform listeners of the new route */ rtm_send(rt, RTM_ADD, 0, rtableid); rtfree(*rtp); *rtp = rt; } return (error); } /* * Originated from bridge_hash() in if_bridge.c */ #define mix(a, b, c) do { \ a -= b; a -= c; a ^= (c >> 13); \ b -= c; b -= a; b ^= (a << 8); \ c -= a; c -= b; c ^= (b >> 13); \ a -= b; a -= c; a ^= (c >> 12); \ b -= c; b -= a; b ^= (a << 16); \ c -= a; c -= b; c ^= (b >> 5); \ a -= b; a -= c; a ^= (c >> 3); \ b -= c; b -= a; b ^= (a << 10); \ c -= a; c -= b; c ^= (b >> 15); \ } while (0) int rt_hash(struct rtentry *rt, struct sockaddr *dst, uint32_t *src) { uint32_t a, b, c; if (src == NULL || !rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPATH)) return (-1); a = b = 0x9e3779b9; c = rt_hashjitter; switch (dst->sa_family) { case AF_INET: { struct sockaddr_in *sin; if (!ipmultipath) return (-1); sin = satosin(dst); a += sin->sin_addr.s_addr; b += src[0]; mix(a, b, c); break; } #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6; if (!ip6_multipath) return (-1); sin6 = satosin6(dst); a += sin6->sin6_addr.s6_addr32[0]; b += sin6->sin6_addr.s6_addr32[2]; c += src[0]; mix(a, b, c); a += sin6->sin6_addr.s6_addr32[1]; b += sin6->sin6_addr.s6_addr32[3]; c += src[1]; mix(a, b, c); a += sin6->sin6_addr.s6_addr32[2]; b += sin6->sin6_addr.s6_addr32[1]; c += src[2]; mix(a, b, c); a += sin6->sin6_addr.s6_addr32[3]; b += sin6->sin6_addr.s6_addr32[0]; c += src[3]; mix(a, b, c); break; } #endif /* INET6 */ } return (c & 0xffff); } /* * Allocate a route, potentially using multipath to select the peer. */ struct rtentry * rtalloc_mpath(struct sockaddr *dst, uint32_t *src, unsigned int rtableid) { return (rt_match(dst, src, RT_RESOLVE, rtableid)); } /* * Look in the routing table for the best matching entry for * ``dst''. * * If a route with a gateway is found and its next hop is no * longer valid, try to cache it. */ struct rtentry * rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid) { return (rt_match(dst, NULL, flags, rtableid)); } /* * Cache the route entry corresponding to a reachable next hop in * the gateway entry ``rt''. */ int rt_setgwroute(struct rtentry *rt, u_int rtableid) { struct rtentry *prt, *nhrt; unsigned int rdomain = rtable_l2(rtableid); int error; NET_ASSERT_LOCKED(); KASSERT(ISSET(rt->rt_flags, RTF_GATEWAY)); /* If we cannot find a valid next hop bail. */ nhrt = rt_match(rt->rt_gateway, NULL, RT_RESOLVE, rdomain); if (nhrt == NULL) return (ENOENT); /* Next hop entry must be on the same interface. */ if (nhrt->rt_ifidx != rt->rt_ifidx) { struct sockaddr_in6 sa_mask; if (!ISSET(nhrt->rt_flags, RTF_LLINFO) || !ISSET(nhrt->rt_flags, RTF_CLONED)) { rtfree(nhrt); return (EHOSTUNREACH); } /* * We found a L2 entry, so we might have multiple * RTF_CLONING routes for the same subnet. Query * the first route of the multipath chain and iterate * until we find the correct one. */ prt = rtable_lookup(rdomain, rt_key(nhrt->rt_parent), rt_plen2mask(nhrt->rt_parent, &sa_mask), NULL, RTP_ANY); rtfree(nhrt); while (prt != NULL && prt->rt_ifidx != rt->rt_ifidx) prt = rtable_iterate(prt); /* We found nothing or a non-cloning MPATH route. */ if (prt == NULL || !ISSET(prt->rt_flags, RTF_CLONING)) { rtfree(prt); return (EHOSTUNREACH); } error = rt_clone(&prt, rt->rt_gateway, rdomain); if (error) { rtfree(prt); return (error); } nhrt = prt; } /* * Next hop must be reachable, this also prevents rtentry * loops for example when rt->rt_gwroute points to rt. */ if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) { rtfree(nhrt); return (ENETUNREACH); } /* Next hop is valid so remove possible old cache. */ rt_putgwroute(rt); KASSERT(rt->rt_gwroute == NULL); /* * If the MTU of next hop is 0, this will reset the MTU of the * route to run PMTUD again from scratch. */ if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu)) rt->rt_mtu = nhrt->rt_mtu; /* * To avoid reference counting problems when writing link-layer * addresses in an outgoing packet, we ensure that the lifetime * of a cached entry is greater than the bigger lifetime of the * gateway entries it is pointed by. */ nhrt->rt_flags |= RTF_CACHED; nhrt->rt_cachecnt++; rt->rt_gwroute = nhrt; return (0); } /* * Invalidate the cached route entry of the gateway entry ``rt''. */ void rt_putgwroute(struct rtentry *rt) { struct rtentry *nhrt = rt->rt_gwroute; NET_ASSERT_LOCKED(); if (!ISSET(rt->rt_flags, RTF_GATEWAY) || nhrt == NULL) return; KASSERT(ISSET(nhrt->rt_flags, RTF_CACHED)); KASSERT(nhrt->rt_cachecnt > 0); --nhrt->rt_cachecnt; if (nhrt->rt_cachecnt == 0) nhrt->rt_flags &= ~RTF_CACHED; rtfree(rt->rt_gwroute); rt->rt_gwroute = NULL; } void rtref(struct rtentry *rt) { atomic_inc_int(&rt->rt_refcnt); } void rtfree(struct rtentry *rt) { int refcnt; if (rt == NULL) return; refcnt = (int)atomic_dec_int_nv(&rt->rt_refcnt); if (refcnt <= 0) { KASSERT(!ISSET(rt->rt_flags, RTF_UP)); KASSERT(!RT_ROOT(rt)); atomic_dec_int(&rttrash); if (refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); return; } KERNEL_LOCK(); rt_timer_remove_all(rt); ifafree(rt->rt_ifa); rtlabel_unref(rt->rt_labelid); #ifdef MPLS rt_mpls_clear(rt); #endif free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len)); free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len); KERNEL_UNLOCK(); pool_put(&rtentry_pool, rt); } } void ifafree(struct ifaddr *ifa) { if (ifa == NULL) panic("ifafree"); if (ifa->ifa_refcnt == 0) { ifatrash--; free(ifa, M_IFADDR, 0); } else ifa->ifa_refcnt--; } /* * Force a routing table entry to the specified * destination to go through the given gateway. * Normally called as a result of a routing redirect * message from the network layer. */ void rtredirect(struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *src, struct rtentry **rtp, unsigned int rdomain) { struct rtentry *rt; int error = 0; enum rtstat_counters stat = rts_ncounters; struct rt_addrinfo info; struct ifaddr *ifa; unsigned int ifidx = 0; int flags = RTF_GATEWAY|RTF_HOST; uint8_t prio = RTP_NONE; NET_ASSERT_LOCKED(); /* verify the gateway is directly reachable */ rt = rtalloc(gateway, 0, rdomain); if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY)) { rtfree(rt); error = ENETUNREACH; goto out; } ifidx = rt->rt_ifidx; ifa = rt->rt_ifa; rtfree(rt); rt = NULL; rt = rtable_lookup(rdomain, dst, NULL, NULL, RTP_ANY); /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, * we have a routing loop, perhaps as a result of an interface * going down recently. */ #define equal(a1, a2) \ ((a1)->sa_len == (a2)->sa_len && \ bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) if (rt != NULL && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) error = EINVAL; else if (ifa_ifwithaddr(gateway, rdomain) != NULL || (gateway->sa_family = AF_INET && in_broadcast(satosin(gateway)->sin_addr, rdomain))) error = EHOSTUNREACH; if (error) goto done; /* * Create a new entry if we just got back a wildcard entry * or the lookup failed. This is necessary for hosts * which use routing redirects generated by smart gateways * to dynamically build the routing tables. */ if (rt == NULL) goto create; /* * Don't listen to the redirect if it's * for a route to an interface. */ if (ISSET(rt->rt_flags, RTF_GATEWAY)) { if (!ISSET(rt->rt_flags, RTF_HOST)) { /* * Changing from route to net => route to host. * Create new route, rather than smashing route to net. */ create: rtfree(rt); flags |= RTF_DYNAMIC; bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_ifa = ifa; info.rti_flags = flags; rt = NULL; error = rtrequest(RTM_ADD, &info, RTP_DEFAULT, &rt, rdomain); if (error == 0) { flags = rt->rt_flags; prio = rt->rt_priority; } stat = rts_dynamic; } else { /* * Smash the current notion of the gateway to * this destination. Should check about netmask!!! */ rt->rt_flags |= RTF_MODIFIED; flags |= RTF_MODIFIED; prio = rt->rt_priority; stat = rts_newgateway; rt_setgate(rt, gateway, rdomain); } } else error = EHOSTUNREACH; done: if (rt) { if (rtp && !error) *rtp = rt; else rtfree(rt); } out: if (error) rtstat_inc(rts_badredirect); else if (stat != rts_ncounters) rtstat_inc(stat); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_AUTHOR] = src; rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain); } /* * Delete a route and generate a message */ int rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, u_int tableid) { int error; struct rt_addrinfo info; struct sockaddr_rtlabel sa_rl; struct sockaddr_in6 sa_mask; KASSERT(rt->rt_ifidx == ifp->if_index); /* * Request the new route so that the entry is not actually * deleted. That will allow the information being reported to * be accurate (and consistent with route_output()). */ memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; if (!ISSET(rt->rt_flags, RTF_HOST)) info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); info.rti_flags = rt->rt_flags; info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid); rtm_miss(RTM_DELETE, &info, info.rti_flags, rt->rt_priority, rt->rt_ifidx, error, tableid); if (error == 0) rtfree(rt); return (error); } static inline int rtequal(struct rtentry *a, struct rtentry *b) { if (a == b) return 1; if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 && rt_plen(a) == rt_plen(b)) return 1; else return 0; } int rtflushclone1(struct rtentry *rt, void *arg, u_int id) { struct rtentry *cloningrt = arg; struct ifnet *ifp; if (!ISSET(rt->rt_flags, RTF_CLONED)) return 0; /* Cached route must stay alive as long as their parent are alive. */ if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt)) return 0; if (!rtequal(rt->rt_parent, cloningrt)) return 0; /* * This happens when an interface with a RTF_CLONING route is * being detached. In this case it's safe to bail because all * the routes are being purged by rt_ifa_purge(). */ ifp = if_get(rt->rt_ifidx); if (ifp == NULL) return 0; if_put(ifp); return EEXIST; } int rtflushclone(struct rtentry *parent, unsigned int rtableid) { struct rtentry *rt = NULL; struct ifnet *ifp; int error; #ifdef DIAGNOSTIC if (!parent || (parent->rt_flags & RTF_CLONING) == 0) panic("rtflushclone: called with a non-cloning route"); #endif do { error = rtable_walk(rtableid, rt_key(parent)->sa_family, &rt, rtflushclone1, parent); if (rt != NULL && error == EEXIST) { ifp = if_get(rt->rt_ifidx); if (ifp == NULL) { error = EAGAIN; } else { error = rtdeletemsg(rt, ifp, rtableid); if (error == 0) error = EAGAIN; if_put(ifp); } } rtfree(rt); rt = NULL; } while (error == EAGAIN); return error; } int rtrequest_delete(struct rt_addrinfo *info, u_int8_t prio, struct ifnet *ifp, struct rtentry **ret_nrt, u_int tableid) { struct rtentry *rt; int error; NET_ASSERT_LOCKED(); if (!rtable_exists(tableid)) return (EAFNOSUPPORT); rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio); if (rt == NULL) return (ESRCH); /* Make sure that's the route the caller want to delete. */ if (ifp != NULL && ifp->if_index != rt->rt_ifidx) { rtfree(rt); return (ESRCH); } #ifdef BFD if (ISSET(rt->rt_flags, RTF_BFD)) bfdclear(rt); #endif error = rtable_delete(tableid, info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rt); if (error != 0) { rtfree(rt); return (ESRCH); } /* Release next hop cache before flushing cloned entries. */ rt_putgwroute(rt); /* Clean up any cloned children. */ if (ISSET(rt->rt_flags, RTF_CLONING)) rtflushclone(rt, tableid); rtfree(rt->rt_parent); rt->rt_parent = NULL; rt->rt_flags &= ~RTF_UP; KASSERT(ifp->if_index == rt->rt_ifidx); ifp->if_rtrequest(ifp, RTM_DELETE, rt); atomic_inc_int(&rttrash); if (ret_nrt != NULL) *ret_nrt = rt; else rtfree(rt); return (0); } int rtrequest(int req, struct rt_addrinfo *info, u_int8_t prio, struct rtentry **ret_nrt, u_int tableid) { struct ifnet *ifp; struct rtentry *rt, *crt; struct ifaddr *ifa; struct sockaddr *ndst; struct sockaddr_rtlabel *sa_rl, sa_rl2; struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; int error; NET_ASSERT_LOCKED(); if (!rtable_exists(tableid)) return (EAFNOSUPPORT); if (info->rti_flags & RTF_HOST) info->rti_info[RTAX_NETMASK] = NULL; switch (req) { case RTM_DELETE: return (EINVAL); case RTM_RESOLVE: if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) return (EINVAL); if ((rt->rt_flags & RTF_CLONING) == 0) return (EINVAL); KASSERT(rt->rt_ifa->ifa_ifp != NULL); info->rti_ifa = rt->rt_ifa; info->rti_flags = rt->rt_flags | (RTF_CLONED|RTF_HOST); info->rti_flags &= ~(RTF_CLONING|RTF_CONNECTED|RTF_STATIC); info->rti_info[RTAX_GATEWAY] = sdltosa(&sa_dl); info->rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl2); /* FALLTHROUGH */ case RTM_ADD: if (info->rti_ifa == NULL) return (EINVAL); ifa = info->rti_ifa; ifp = ifa->ifa_ifp; if (prio == 0) prio = ifp->if_priority + RTP_STATIC; error = rt_copysa(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], &ndst); if (error) return (error); rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO); if (rt == NULL) { free(ndst, M_RTABLE, ndst->sa_len); return (ENOBUFS); } rt->rt_refcnt = 1; rt->rt_flags = info->rti_flags | RTF_UP; rt->rt_priority = prio; /* init routing priority */ LIST_INIT(&rt->rt_timer); /* Check the link state if the table supports it. */ if (rtable_mpath_capable(tableid, ndst->sa_family) && !ISSET(rt->rt_flags, RTF_LOCAL) && (!LINK_STATE_IS_UP(ifp->if_link_state) || !ISSET(ifp->if_flags, IFF_UP))) { rt->rt_flags &= ~RTF_UP; rt->rt_priority |= RTP_DOWN; } if (info->rti_info[RTAX_LABEL] != NULL) { sa_rl = (struct sockaddr_rtlabel *) info->rti_info[RTAX_LABEL]; rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label); } #ifdef MPLS /* We have to allocate additional space for MPLS infos */ if (info->rti_flags & RTF_MPLS && (info->rti_info[RTAX_SRC] != NULL || info->rti_info[RTAX_DST]->sa_family == AF_MPLS)) { error = rt_mpls_set(rt, info->rti_info[RTAX_SRC], info->rti_mpls); if (error) { free(ndst, M_RTABLE, ndst->sa_len); pool_put(&rtentry_pool, rt); return (error); } } else rt_mpls_clear(rt); #endif ifa->ifa_refcnt++; rt->rt_ifa = ifa; rt->rt_ifidx = ifp->if_index; /* * Copy metrics and a back pointer from the cloned * route's parent. */ if (ISSET(rt->rt_flags, RTF_CLONED)) { rtref(*ret_nrt); rt->rt_parent = *ret_nrt; rt->rt_rmx = (*ret_nrt)->rt_rmx; } /* * We must set rt->rt_gateway before adding ``rt'' to * the routing table because the radix MPATH code use * it to (re)order routes. */ if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY], tableid))) { ifafree(ifa); rtfree(rt->rt_parent); rt_putgwroute(rt); free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len)); free(ndst, M_RTABLE, ndst->sa_len); pool_put(&rtentry_pool, rt); return (error); } error = rtable_insert(tableid, ndst, info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], rt->rt_priority, rt); if (error != 0 && (crt = rtable_match(tableid, ndst, NULL)) != NULL) { /* overwrite cloned route */ if (ISSET(crt->rt_flags, RTF_CLONED) && !ISSET(crt->rt_flags, RTF_CACHED)) { struct ifnet *cifp; cifp = if_get(crt->rt_ifidx); KASSERT(cifp != NULL); rtdeletemsg(crt, cifp, tableid); if_put(cifp); error = rtable_insert(tableid, ndst, info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], rt->rt_priority, rt); } rtfree(crt); } if (error != 0) { ifafree(ifa); rtfree(rt->rt_parent); rt_putgwroute(rt); free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len)); free(ndst, M_RTABLE, ndst->sa_len); pool_put(&rtentry_pool, rt); return (EEXIST); } ifp->if_rtrequest(ifp, req, rt); if_group_routechange(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK]); if (ret_nrt != NULL) *ret_nrt = rt; else rtfree(rt); break; } return (0); } int rt_setgate(struct rtentry *rt, struct sockaddr *gate, u_int rtableid) { int glen = ROUNDUP(gate->sa_len); struct sockaddr *sa; if (rt->rt_gateway == NULL || glen != ROUNDUP(rt->rt_gateway->sa_len)) { sa = malloc(glen, M_RTABLE, M_NOWAIT); if (sa == NULL) return (ENOBUFS); if (rt->rt_gateway != NULL) { free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len)); } rt->rt_gateway = sa; } memmove(rt->rt_gateway, gate, glen); if (ISSET(rt->rt_flags, RTF_GATEWAY)) return (rt_setgwroute(rt, rtableid)); return (0); } /* * Return the route entry containing the next hop link-layer * address corresponding to ``rt''. */ struct rtentry * rt_getll(struct rtentry *rt) { if (ISSET(rt->rt_flags, RTF_GATEWAY)) { KASSERT(rt->rt_gwroute != NULL); return (rt->rt_gwroute); } return (rt); } void rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) { u_char *cp1 = (u_char *)src; u_char *cp2 = (u_char *)dst; u_char *cp3 = (u_char *)netmask; u_char *cplim = cp2 + *cp3; u_char *cplim2 = cp2 + *cp1; *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ cp3 += 2; if (cplim > cplim2) cplim = cplim2; while (cp2 < cplim) *cp2++ = *cp1++ & *cp3++; if (cp2 < cplim2) bzero(cp2, cplim2 - cp2); } /* * allocate new sockaddr structure based on the user supplied src and mask * that is useable for the routing table. */ static int rt_copysa(struct sockaddr *src, struct sockaddr *mask, struct sockaddr **dst) { static const u_char maskarray[] = { 0x0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe }; struct sockaddr *ndst; struct domain *dp; u_char *csrc, *cdst; int i, plen; for (i = 0; (dp = domains[i]) != NULL; i++) { if (dp->dom_rtoffset == 0) continue; if (src->sa_family == dp->dom_family) break; } if (dp == NULL) return (EAFNOSUPPORT); if (src->sa_len < dp->dom_sasize) return (EINVAL); plen = rtable_satoplen(src->sa_family, mask); if (plen == -1) return (EINVAL); ndst = malloc(dp->dom_sasize, M_RTABLE, M_NOWAIT|M_ZERO); if (ndst == NULL) return (ENOBUFS); ndst->sa_family = src->sa_family; ndst->sa_len = dp->dom_sasize; csrc = (u_char *)src + dp->dom_rtoffset; cdst = (u_char *)ndst + dp->dom_rtoffset; memcpy(cdst, csrc, plen / 8); if (plen % 8 != 0) cdst[plen / 8] = csrc[plen / 8] & maskarray[plen % 8]; *dst = ndst; return (0); } int rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst, unsigned int rdomain) { struct ifnet *ifp = ifa->ifa_ifp; struct rtentry *rt; struct sockaddr_rtlabel sa_rl; struct rt_addrinfo info; uint8_t prio = ifp->if_priority + RTP_STATIC; int error; KASSERT(rdomain == rtable_l2(rdomain)); memset(&info, 0, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; if (flags & RTF_LLINFO) info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl); else info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl); #ifdef MPLS if ((flags & RTF_MPLS) == RTF_MPLS) info.rti_mpls = MPLS_OP_POP; #endif /* MPLS */ if ((flags & RTF_HOST) == 0) info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; if (flags & (RTF_LOCAL|RTF_BROADCAST)) prio = RTP_LOCAL; if (flags & RTF_CONNECTED) prio = ifp->if_priority + RTP_CONNECTED; error = rtrequest(RTM_ADD, &info, prio, &rt, rdomain); if (error == 0) { /* * A local route is created for every address configured * on an interface, so use this information to notify * userland that a new address has been added. */ if (flags & RTF_LOCAL) rtm_addr(RTM_NEWADDR, ifa); rtm_send(rt, RTM_ADD, 0, rdomain); rtfree(rt); } return (error); } int rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst, unsigned int rdomain) { struct ifnet *ifp = ifa->ifa_ifp; struct rtentry *rt; struct mbuf *m = NULL; struct sockaddr *deldst; struct rt_addrinfo info; struct sockaddr_rtlabel sa_rl; uint8_t prio = ifp->if_priority + RTP_STATIC; int error; KASSERT(rdomain == rtable_l2(rdomain)); if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { m = m_get(M_DONTWAIT, MT_SONAME); if (m == NULL) return (ENOBUFS); deldst = mtod(m, struct sockaddr *); rt_maskedcopy(dst, deldst, ifa->ifa_netmask); dst = deldst; } memset(&info, 0, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; if ((flags & RTF_LLINFO) == 0) info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl); if ((flags & RTF_HOST) == 0) info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; if (flags & (RTF_LOCAL|RTF_BROADCAST)) prio = RTP_LOCAL; if (flags & RTF_CONNECTED) prio = ifp->if_priority + RTP_CONNECTED; rtable_clearsource(rdomain, ifa->ifa_addr); error = rtrequest_delete(&info, prio, ifp, &rt, rdomain); if (error == 0) { rtm_send(rt, RTM_DELETE, 0, rdomain); if (flags & RTF_LOCAL) rtm_addr(RTM_DELADDR, ifa); rtfree(rt); } m_free(m); return (error); } /* * Add ifa's address as a local rtentry. */ int rt_ifa_addlocal(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct rtentry *rt; u_int flags = RTF_HOST|RTF_LOCAL; int error = 0; /* * If the configured address correspond to the magical "any" * address do not add a local route entry because that might * corrupt the routing tree which uses this value for the * default routes. */ switch (ifa->ifa_addr->sa_family) { case AF_INET: if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY) return (0); break; #ifdef INET6 case AF_INET6: if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr, &in6addr_any)) return (0); break; #endif default: break; } if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT))) flags |= RTF_LLINFO; /* If there is no local entry, allocate one. */ rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain); if (rt == NULL || ISSET(rt->rt_flags, flags) != flags) { error = rt_ifa_add(ifa, flags | RTF_MPATH, ifa->ifa_addr, ifp->if_rdomain); } rtfree(rt); return (error); } /* * Remove local rtentry of ifa's address if it exists. */ int rt_ifa_dellocal(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct rtentry *rt; u_int flags = RTF_HOST|RTF_LOCAL; int error = 0; /* * We do not add local routes for such address, so do not bother * removing them. */ switch (ifa->ifa_addr->sa_family) { case AF_INET: if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY) return (0); break; #ifdef INET6 case AF_INET6: if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr, &in6addr_any)) return (0); break; #endif default: break; } if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT))) flags |= RTF_LLINFO; /* * Before deleting, check if a corresponding local host * route surely exists. With this check, we can avoid to * delete an interface direct route whose destination is same * as the address being removed. This can happen when removing * a subnet-router anycast address on an interface attached * to a shared medium. */ rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain); if (rt != NULL && ISSET(rt->rt_flags, flags) == flags) { error = rt_ifa_del(ifa, flags, ifa->ifa_addr, ifp->if_rdomain); } rtfree(rt); return (error); } /* * Remove all addresses attached to ``ifa''. */ void rt_ifa_purge(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct rtentry *rt = NULL; unsigned int rtableid; int error, af = ifa->ifa_addr->sa_family; KASSERT(ifp != NULL); for (rtableid = 0; rtableid < rtmap_limit; rtableid++) { /* skip rtables that are not in the rdomain of the ifp */ if (rtable_l2(rtableid) != ifp->if_rdomain) continue; do { error = rtable_walk(rtableid, af, &rt, rt_ifa_purge_walker, ifa); if (rt != NULL && error == EEXIST) { error = rtdeletemsg(rt, ifp, rtableid); if (error == 0) error = EAGAIN; } rtfree(rt); rt = NULL; } while (error == EAGAIN); if (error == EAFNOSUPPORT) error = 0; if (error) break; } } int rt_ifa_purge_walker(struct rtentry *rt, void *vifa, unsigned int rtableid) { struct ifaddr *ifa = vifa; if (rt->rt_ifa == ifa) return EEXIST; return 0; } /* * Route timer routines. These routes allow functions to be called * for various routes at any time. This is useful in supporting * path MTU discovery and redirect route deletion. * * This is similar to some BSDI internal functions, but it provides * for multiple queues for efficiency's sake... */ LIST_HEAD(, rttimer_queue) rttimer_queue_head; static int rt_init_done = 0; #define RTTIMER_CALLOUT(r) { \ if (r->rtt_func != NULL) { \ (*r->rtt_func)(r->rtt_rt, r); \ } else { \ struct ifnet *ifp; \ \ ifp = if_get(r->rtt_rt->rt_ifidx); \ if (ifp != NULL) \ rtdeletemsg(r->rtt_rt, ifp, r->rtt_tableid); \ if_put(ifp); \ } \ } /* * Some subtle order problems with domain initialization mean that * we cannot count on this being run from rt_init before various * protocol initializations are done. Therefore, we make sure * that this is run when the first queue is added... */ void rt_timer_init(void) { static struct timeout rt_timer_timeout; if (rt_init_done) panic("rt_timer_init: already initialized"); pool_init(&rttimer_pool, sizeof(struct rttimer), 0, IPL_SOFTNET, 0, "rttmr", NULL); LIST_INIT(&rttimer_queue_head); timeout_set_proc(&rt_timer_timeout, rt_timer_timer, &rt_timer_timeout); timeout_add_sec(&rt_timer_timeout, 1); rt_init_done = 1; } struct rttimer_queue * rt_timer_queue_create(u_int timeout) { struct rttimer_queue *rtq; if (rt_init_done == 0) rt_timer_init(); if ((rtq = malloc(sizeof(*rtq), M_RTABLE, M_NOWAIT|M_ZERO)) == NULL) return (NULL); rtq->rtq_timeout = timeout; rtq->rtq_count = 0; TAILQ_INIT(&rtq->rtq_head); LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); return (rtq); } void rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) { rtq->rtq_timeout = timeout; } void rt_timer_queue_destroy(struct rttimer_queue *rtq) { struct rttimer *r; NET_ASSERT_LOCKED(); while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { LIST_REMOVE(r, rtt_link); TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); RTTIMER_CALLOUT(r); pool_put(&rttimer_pool, r); if (rtq->rtq_count > 0) rtq->rtq_count--; else printf("rt_timer_queue_destroy: rtq_count reached 0\n"); } LIST_REMOVE(rtq, rtq_link); free(rtq, M_RTABLE, sizeof(*rtq)); } unsigned long rt_timer_queue_count(struct rttimer_queue *rtq) { return (rtq->rtq_count); } void rt_timer_remove_all(struct rtentry *rt) { struct rttimer *r; while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { LIST_REMOVE(r, rtt_link); TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); if (r->rtt_queue->rtq_count > 0) r->rtt_queue->rtq_count--; else printf("rt_timer_remove_all: rtq_count reached 0\n"); pool_put(&rttimer_pool, r); } } int rt_timer_add(struct rtentry *rt, void (*func)(struct rtentry *, struct rttimer *), struct rttimer_queue *queue, u_int rtableid) { struct rttimer *r; long current_time; current_time = getuptime(); rt->rt_expire = getuptime() + queue->rtq_timeout; /* * If there's already a timer with this action, destroy it before * we add a new one. */ LIST_FOREACH(r, &rt->rt_timer, rtt_link) { if (r->rtt_func == func) { LIST_REMOVE(r, rtt_link); TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); if (r->rtt_queue->rtq_count > 0) r->rtt_queue->rtq_count--; else printf("rt_timer_add: rtq_count reached 0\n"); pool_put(&rttimer_pool, r); break; /* only one per list, so we can quit... */ } } r = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO); if (r == NULL) return (ENOBUFS); r->rtt_rt = rt; r->rtt_time = current_time; r->rtt_func = func; r->rtt_queue = queue; r->rtt_tableid = rtableid; LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); r->rtt_queue->rtq_count++; return (0); } void rt_timer_timer(void *arg) { struct timeout *to = (struct timeout *)arg; struct rttimer_queue *rtq; struct rttimer *r; long current_time; current_time = getuptime(); NET_LOCK(); LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && (r->rtt_time + rtq->rtq_timeout) < current_time) { LIST_REMOVE(r, rtt_link); TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); RTTIMER_CALLOUT(r); pool_put(&rttimer_pool, r); if (rtq->rtq_count > 0) rtq->rtq_count--; else printf("rt_timer_timer: rtq_count reached 0\n"); } } NET_UNLOCK(); timeout_add_sec(to, 1); } #ifdef MPLS int rt_mpls_set(struct rtentry *rt, struct sockaddr *src, uint8_t op) { struct sockaddr_mpls *psa_mpls = (struct sockaddr_mpls *)src; struct rt_mpls *rt_mpls; if (psa_mpls == NULL && op != MPLS_OP_POP) return (EOPNOTSUPP); if (psa_mpls != NULL && psa_mpls->smpls_len != sizeof(*psa_mpls)) return (EINVAL); if (psa_mpls != NULL && psa_mpls->smpls_family != AF_MPLS) return (EAFNOSUPPORT); rt->rt_llinfo = malloc(sizeof(struct rt_mpls), M_TEMP, M_NOWAIT|M_ZERO); if (rt->rt_llinfo == NULL) return (ENOMEM); rt_mpls = (struct rt_mpls *)rt->rt_llinfo; if (psa_mpls != NULL) rt_mpls->mpls_label = psa_mpls->smpls_label; rt_mpls->mpls_operation = op; /* XXX: set experimental bits */ rt->rt_flags |= RTF_MPLS; return (0); } void rt_mpls_clear(struct rtentry *rt) { if (rt->rt_llinfo != NULL && rt->rt_flags & RTF_MPLS) { free(rt->rt_llinfo, M_TEMP, sizeof(struct rt_mpls)); rt->rt_llinfo = NULL; } rt->rt_flags &= ~RTF_MPLS; } #endif u_int16_t rtlabel_name2id(char *name) { struct rt_label *label, *p; u_int16_t new_id = 1; if (!name[0]) return (0); TAILQ_FOREACH(label, &rt_labels, rtl_entry) if (strcmp(name, label->rtl_name) == 0) { label->rtl_ref++; return (label->rtl_id); } /* * to avoid fragmentation, we do a linear search from the beginning * and take the first free slot we find. if there is none or the list * is empty, append a new entry at the end. */ TAILQ_FOREACH(p, &rt_labels, rtl_entry) { if (p->rtl_id != new_id) break; new_id = p->rtl_id + 1; } if (new_id > LABELID_MAX) return (0); label = malloc(sizeof(*label), M_RTABLE, M_NOWAIT|M_ZERO); if (label == NULL) return (0); strlcpy(label->rtl_name, name, sizeof(label->rtl_name)); label->rtl_id = new_id; label->rtl_ref++; if (p != NULL) /* insert new entry before p */ TAILQ_INSERT_BEFORE(p, label, rtl_entry); else /* either list empty or no free slot in between */ TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry); return (label->rtl_id); } const char * rtlabel_id2name(u_int16_t id) { struct rt_label *label; TAILQ_FOREACH(label, &rt_labels, rtl_entry) if (label->rtl_id == id) return (label->rtl_name); return (NULL); } struct sockaddr * rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl) { const char *label; if (labelid == 0 || (label = rtlabel_id2name(labelid)) == NULL) return (NULL); bzero(sa_rl, sizeof(*sa_rl)); sa_rl->sr_len = sizeof(*sa_rl); sa_rl->sr_family = AF_UNSPEC; strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label)); return ((struct sockaddr *)sa_rl); } void rtlabel_unref(u_int16_t id) { struct rt_label *p, *next; if (id == 0) return; TAILQ_FOREACH_SAFE(p, &rt_labels, rtl_entry, next) { if (id == p->rtl_id) { if (--p->rtl_ref == 0) { TAILQ_REMOVE(&rt_labels, p, rtl_entry); free(p, M_RTABLE, sizeof(*p)); } break; } } } int rt_if_track(struct ifnet *ifp) { unsigned int rtableid; struct rtentry *rt = NULL; int i, error = 0; for (rtableid = 0; rtableid < rtmap_limit; rtableid++) { /* skip rtables that are not in the rdomain of the ifp */ if (rtable_l2(rtableid) != ifp->if_rdomain) continue; for (i = 1; i <= AF_MAX; i++) { if (!rtable_mpath_capable(rtableid, i)) continue; do { error = rtable_walk(rtableid, i, &rt, rt_if_linkstate_change, ifp); if (rt != NULL && error == EEXIST) { error = rtdeletemsg(rt, ifp, rtableid); if (error == 0) error = EAGAIN; } rtfree(rt); rt = NULL; } while (error == EAGAIN); if (error == EAFNOSUPPORT) error = 0; if (error) break; } } return (error); } int rt_if_linkstate_change(struct rtentry *rt, void *arg, u_int id) { struct ifnet *ifp = arg; struct sockaddr_in6 sa_mask; int error; if (rt->rt_ifidx != ifp->if_index) return (0); /* Local routes are always usable. */ if (rt->rt_flags & RTF_LOCAL) { rt->rt_flags |= RTF_UP; return (0); } if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP) { if (ISSET(rt->rt_flags, RTF_UP)) return (0); /* bring route up */ rt->rt_flags |= RTF_UP; error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt), rt->rt_priority & RTP_MASK, rt); } else { /* * Remove redirected and cloned routes (mainly ARP) * from down interfaces so we have a chance to get * new routes from a better source. */ if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) && !ISSET(rt->rt_flags, RTF_CACHED|RTF_BFD)) { return (EEXIST); } if (!ISSET(rt->rt_flags, RTF_UP)) return (0); /* take route down */ rt->rt_flags &= ~RTF_UP; error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt), rt->rt_priority | RTP_DOWN, rt); } if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask)); return (error); } struct sockaddr * rt_plentosa(sa_family_t af, int plen, struct sockaddr_in6 *sa_mask) { struct sockaddr_in *sin = (struct sockaddr_in *)sa_mask; #ifdef INET6 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa_mask; #endif KASSERT(plen >= 0 || plen == -1); if (plen == -1) return (NULL); memset(sa_mask, 0, sizeof(*sa_mask)); switch (af) { case AF_INET: sin->sin_family = AF_INET; sin->sin_len = sizeof(struct sockaddr_in); in_prefixlen2mask(&sin->sin_addr, plen); break; #ifdef INET6 case AF_INET6: sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(struct sockaddr_in6); in6_prefixlen2mask(&sin6->sin6_addr, plen); break; #endif /* INET6 */ default: return (NULL); } return ((struct sockaddr *)sa_mask); } struct sockaddr * rt_plen2mask(struct rtentry *rt, struct sockaddr_in6 *sa_mask) { return (rt_plentosa(rt_key(rt)->sa_family, rt_plen(rt), sa_mask)); } #ifdef DDB #include <machine/db_machdep.h> #include <ddb/db_output.h> void db_print_sa(struct sockaddr *sa) { int len; u_char *p; if (sa == NULL) { db_printf("[NULL]"); return; } p = (u_char *)sa; len = sa->sa_len; db_printf("["); while (len > 0) { db_printf("%d", *p); p++; len--; if (len) db_printf(","); } db_printf("]\n"); } void db_print_ifa(struct ifaddr *ifa) { if (ifa == NULL) return; db_printf(" ifa_addr="); db_print_sa(ifa->ifa_addr); db_printf(" ifa_dsta="); db_print_sa(ifa->ifa_dstaddr); db_printf(" ifa_mask="); db_print_sa(ifa->ifa_netmask); db_printf(" flags=0x%x, refcnt=%d, metric=%d\n", ifa->ifa_flags, ifa->ifa_refcnt, ifa->ifa_metric); } /* * Function to pass to rtalble_walk(). * Return non-zero error to abort walk. */ int db_show_rtentry(struct rtentry *rt, void *w, unsigned int id) { db_printf("rtentry=%p", rt); db_printf(" flags=0x%x refcnt=%d use=%llu expire=%lld rtableid=%u\n", rt->rt_flags, rt->rt_refcnt, rt->rt_use, rt->rt_expire, id); db_printf(" key="); db_print_sa(rt_key(rt)); db_printf(" plen=%d", rt_plen(rt)); db_printf(" gw="); db_print_sa(rt->rt_gateway); db_printf(" ifidx=%u ", rt->rt_ifidx); db_printf(" ifa=%p\n", rt->rt_ifa); db_print_ifa(rt->rt_ifa); db_printf(" gwroute=%p llinfo=%p\n", rt->rt_gwroute, rt->rt_llinfo); return (0); } /* * Function to print all the route trees. * Use this from ddb: "call db_show_arptab" */ int db_show_arptab(void) { db_printf("Route tree for AF_INET\n"); rtable_walk(0, AF_INET, NULL, db_show_rtentry, NULL); return (0); } #endif /* DDB */
349 350 350 176 47 127 2 9 10 127 119 53 10 64 133 128 2 9 57 87 101 57 56 215 215 350 350 129 129 /* $OpenBSD: ufs_quota.c,v 1.47 2020/06/24 22:03:45 cheloha Exp $ */ /* $NetBSD: ufs_quota.c,v 1.8 1996/02/09 22:36:09 christos Exp $ */ /* * Copyright (c) 1982, 1986, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Robert Elz at The University of Melbourne. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_quota.c 8.5 (Berkeley) 8/19/94 */ #include <sys/param.h> #include <sys/kernel.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/malloc.h> #include <sys/fcntl.h> #include <sys/proc.h> #include <sys/vnode.h> #include <sys/mount.h> #include <sys/ktrace.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> #include <sys/queue.h> #include <crypto/siphash.h> /* * The following structure records disk usage for a user or group on a * filesystem. There is one allocated for each quota that exists on any * filesystem for the current user or group. A cache is kept of recently * used entries. */ struct dquot { LIST_ENTRY(dquot) dq_hash; /* hash list */ TAILQ_ENTRY(dquot) dq_freelist; /* free list */ u_int16_t dq_flags; /* flags, see below */ u_int16_t dq_type; /* quota type of this dquot */ u_int32_t dq_cnt; /* count of active references */ u_int32_t dq_id; /* identifier this applies to */ struct vnode *dq_vp; /* file backing this quota */ struct ucred *dq_cred; /* credentials for writing file */ struct dqblk dq_dqb; /* actual usage & quotas */ }; /* * Flag values. */ #define DQ_LOCK 0x01 /* this quota locked (no MODS) */ #define DQ_WANT 0x02 /* wakeup on unlock */ #define DQ_MOD 0x04 /* this quota modified since read */ #define DQ_FAKE 0x08 /* no limits here, just usage */ #define DQ_BLKS 0x10 /* has been warned about blk limit */ #define DQ_INODS 0x20 /* has been warned about inode limit */ /* * Shorthand notation. */ #define dq_bhardlimit dq_dqb.dqb_bhardlimit #define dq_bsoftlimit dq_dqb.dqb_bsoftlimit #define dq_curblocks dq_dqb.dqb_curblocks #define dq_ihardlimit dq_dqb.dqb_ihardlimit #define dq_isoftlimit dq_dqb.dqb_isoftlimit #define dq_curinodes dq_dqb.dqb_curinodes #define dq_btime dq_dqb.dqb_btime #define dq_itime dq_dqb.dqb_itime /* * If the system has never checked for a quota for this file, then it is * set to NODQUOT. Once a write attempt is made the inode pointer is set * to reference a dquot structure. */ #define NODQUOT NULL void dqref(struct dquot *); void dqrele(struct vnode *, struct dquot *); int dqsync(struct vnode *, struct dquot *); #ifdef DIAGNOSTIC void chkdquot(struct inode *); #endif int getquota(struct mount *, u_long, int, caddr_t); int quotaon(struct proc *, struct mount *, int, caddr_t); int setquota(struct mount *, u_long, int, caddr_t); int setuse(struct mount *, u_long, int, caddr_t); int chkdqchg(struct inode *, long, struct ucred *, int); int chkiqchg(struct inode *, long, struct ucred *, int); int dqget(struct vnode *, u_long, struct ufsmount *, int, struct dquot **); int quotaon_vnode(struct vnode *, void *); int quotaoff_vnode(struct vnode *, void *); int qsync_vnode(struct vnode *, void *); /* * Quota name to error message mapping. */ static char *quotatypes[] = INITQFNAMES; /* * Obtain a reference to a dquot. */ void dqref(struct dquot *dq) { dq->dq_cnt++; } /* * Set up the quotas for an inode. * * This routine completely defines the semantics of quotas. * If other criterion want to be used to establish quotas, the * MAXQUOTAS value in quotas.h should be increased, and the * additional dquots set up here. */ int getinoquota(struct inode *ip) { struct ufsmount *ump; struct vnode *vp = ITOV(ip); int error; ump = ip->i_ump; /* * Set up the user quota based on file uid. * EINVAL means that quotas are not enabled. */ if (ip->i_dquot[USRQUOTA] == NODQUOT && (error = dqget(vp, DIP(ip, uid), ump, USRQUOTA, &ip->i_dquot[USRQUOTA])) && error != EINVAL) return (error); /* * Set up the group quota based on file gid. * EINVAL means that quotas are not enabled. */ if (ip->i_dquot[GRPQUOTA] == NODQUOT && (error = dqget(vp, DIP(ip, gid), ump, GRPQUOTA, &ip->i_dquot[GRPQUOTA])) && error != EINVAL) return (error); return (0); } /* * Update disk usage, and take corrective action. */ int ufs_quota_alloc_blocks2(struct inode *ip, daddr_t change, struct ucred *cred, enum ufs_quota_flags flags) { struct dquot *dq; int i; int error; #ifdef DIAGNOSTIC chkdquot(ip); #endif if (change == 0) return (0); if ((flags & UFS_QUOTA_FORCE) == 0 && (cred != NOCRED && cred->cr_uid != 0)) { for (i = 0; i < MAXQUOTAS; i++) { if (flags & (1 << i)) continue; if ((dq = ip->i_dquot[i]) == NODQUOT) continue; if ((error = chkdqchg(ip, change, cred, i)) != 0) return (error); } } for (i = 0; i < MAXQUOTAS; i++) { if (flags & (1 << i)) continue; if ((dq = ip->i_dquot[i]) == NODQUOT) continue; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; tsleep_nsec(dq, PINOD+1, "chkdq", INFSLP); } dq->dq_curblocks += change; dq->dq_flags |= DQ_MOD; } return (0); } int ufs_quota_free_blocks2(struct inode *ip, daddr_t change, struct ucred *cred, enum ufs_quota_flags flags) { struct dquot *dq; int i; #ifdef DIAGNOSTIC if (!VOP_ISLOCKED(ITOV(ip))) panic ("ufs_quota_free_blocks2: vnode is not locked"); #endif if (change == 0) return (0); for (i = 0; i < MAXQUOTAS; i++) { if (flags & (1 << i)) continue; if ((dq = ip->i_dquot[i]) == NODQUOT) continue; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; tsleep_nsec(dq, PINOD+1, "chkdq", INFSLP); } if (dq->dq_curblocks >= change) dq->dq_curblocks -= change; else dq->dq_curblocks = 0; dq->dq_flags &= ~DQ_BLKS; dq->dq_flags |= DQ_MOD; } return (0); } /* * Check for a valid change to a users allocation. * Issue an error message if appropriate. */ int chkdqchg(struct inode *ip, long change, struct ucred *cred, int type) { struct dquot *dq = ip->i_dquot[type]; long ncurblocks = dq->dq_curblocks + change; /* * If user would exceed their hard limit, disallow space allocation. */ if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) { if ((dq->dq_flags & DQ_BLKS) == 0 && DIP(ip, uid) == cred->cr_uid) { uprintf("\n%s: write failed, %s disk limit reached\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type]); dq->dq_flags |= DQ_BLKS; } return (EDQUOT); } /* * If user is over their soft limit for too long, disallow space * allocation. Reset time limit as they cross their soft limit. */ if (ncurblocks >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) { if (dq->dq_curblocks < dq->dq_bsoftlimit) { dq->dq_btime = gettime() + ip->i_ump->um_btime[type]; if (DIP(ip, uid) == cred->cr_uid) uprintf("\n%s: warning, %s %s\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type], "disk quota exceeded"); return (0); } if (gettime() > dq->dq_btime) { if ((dq->dq_flags & DQ_BLKS) == 0 && DIP(ip, uid) == cred->cr_uid) { uprintf("\n%s: write failed, %s %s\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type], "disk quota exceeded for too long"); dq->dq_flags |= DQ_BLKS; } return (EDQUOT); } } return (0); } /* * Check the inode limit, applying corrective action. */ int ufs_quota_alloc_inode2(struct inode *ip, struct ucred *cred, enum ufs_quota_flags flags) { struct dquot *dq; int i; int error; #ifdef DIAGNOSTIC chkdquot(ip); #endif if ((flags & UFS_QUOTA_FORCE) == 0 && cred->cr_uid != 0) { for (i = 0; i < MAXQUOTAS; i++) { if (flags & (1 << i)) continue; if ((dq = ip->i_dquot[i]) == NODQUOT) continue; if ((error = chkiqchg(ip, 1, cred, i)) != 0) return (error); } } for (i = 0; i < MAXQUOTAS; i++) { if (flags & (1 << i)) continue; if ((dq = ip->i_dquot[i]) == NODQUOT) continue; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; tsleep_nsec(dq, PINOD+1, "chkiq", INFSLP); } dq->dq_curinodes++; dq->dq_flags |= DQ_MOD; } return (0); } int ufs_quota_free_inode2(struct inode *ip, struct ucred *cred, enum ufs_quota_flags flags) { struct dquot *dq; int i; #ifdef DIAGNOSTIC if (!VOP_ISLOCKED(ITOV(ip))) panic ("ufs_quota_free_blocks2: vnode is not locked"); #endif for (i = 0; i < MAXQUOTAS; i++) { if (flags & (1 << i)) continue; if ((dq = ip->i_dquot[i]) == NODQUOT) continue; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; tsleep_nsec(dq, PINOD+1, "chkiq", INFSLP); } if (dq->dq_curinodes > 0) dq->dq_curinodes--; dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; } return (0); } /* * Check for a valid change to a users allocation. * Issue an error message if appropriate. */ int chkiqchg(struct inode *ip, long change, struct ucred *cred, int type) { struct dquot *dq = ip->i_dquot[type]; long ncurinodes = dq->dq_curinodes + change; /* * If user would exceed their hard limit, disallow inode allocation. */ if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) { if ((dq->dq_flags & DQ_INODS) == 0 && DIP(ip, uid) == cred->cr_uid) { uprintf("\n%s: write failed, %s inode limit reached\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type]); dq->dq_flags |= DQ_INODS; } return (EDQUOT); } /* * If user is over their soft limit for too long, disallow inode * allocation. Reset time limit as they cross their soft limit. */ if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) { if (dq->dq_curinodes < dq->dq_isoftlimit) { dq->dq_itime = gettime() + ip->i_ump->um_itime[type]; if (DIP(ip, uid) == cred->cr_uid) uprintf("\n%s: warning, %s %s\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type], "inode quota exceeded"); return (0); } if (gettime() > dq->dq_itime) { if ((dq->dq_flags & DQ_INODS) == 0 && DIP(ip, uid) == cred->cr_uid) { uprintf("\n%s: write failed, %s %s\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type], "inode quota exceeded for too long"); dq->dq_flags |= DQ_INODS; } return (EDQUOT); } } return (0); } #ifdef DIAGNOSTIC /* * On filesystems with quotas enabled, it is an error for a file to change * size and not to have a dquot structure associated with it. */ void chkdquot(struct inode *ip) { struct ufsmount *ump = ip->i_ump; int i; struct vnode *vp = ITOV(ip); if (!VOP_ISLOCKED(vp)) panic ("chkdquot: vnode is not locked"); for (i = 0; i < MAXQUOTAS; i++) { if (ump->um_quotas[i] == NULLVP || (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING))) continue; if (ip->i_dquot[i] == NODQUOT) { vprint("chkdquot: missing dquot", ITOV(ip)); panic("missing dquot"); } } } #endif /* * Code to process quotactl commands. */ int quotaon_vnode(struct vnode *vp, void *arg) { int error; if (vp->v_type == VNON || vp->v_writecount == 0) return (0); if (vget(vp, LK_EXCLUSIVE)) { return (0); } error = getinoquota(VTOI(vp)); vput(vp); return (error); } /* * Q_QUOTAON - set up a quota file for a particular file system. */ int quotaon(struct proc *p, struct mount *mp, int type, caddr_t fname) { struct ufsmount *ump = VFSTOUFS(mp); struct vnode *vp, **vpp; struct dquot *dq; int error; struct nameidata nd; #ifdef DIAGNOSTIC if (!vfs_isbusy(mp)) panic ("quotaon: mount point not busy"); #endif vpp = &ump->um_quotas[type]; NDINIT(&nd, 0, 0, UIO_USERSPACE, fname, p); if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) return (error); vp = nd.ni_vp; VOP_UNLOCK(vp); if (vp->v_type != VREG) { (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p); return (EACCES); } /* * Update the vnode and ucred for quota file updates */ if (*vpp != vp) { quotaoff(p, mp, type); *vpp = vp; crhold(p->p_ucred); ump->um_cred[type] = p->p_ucred; } else { struct ucred *ocred = ump->um_cred[type]; (void) vn_close(vp, FREAD|FWRITE, ocred, p); if (ocred != p->p_ucred) { crhold(p->p_ucred); ump->um_cred[type] = p->p_ucred; crfree(ocred); } } ump->um_qflags[type] |= QTF_OPENING; mp->mnt_flag |= MNT_QUOTA; vp->v_flag |= VSYSTEM; /* * Set up the time limits for this quota. */ ump->um_btime[type] = MAX_DQ_TIME; ump->um_itime[type] = MAX_IQ_TIME; if (dqget(NULLVP, 0, ump, type, &dq) == 0) { if (dq->dq_btime > 0) ump->um_btime[type] = dq->dq_btime; if (dq->dq_itime > 0) ump->um_itime[type] = dq->dq_itime; dqrele(NULLVP, dq); } /* * Search vnodes associated with this mount point, * adding references to quota file being opened. * NB: only need to add dquot's for inodes being modified. */ error = vfs_mount_foreach_vnode(mp, quotaon_vnode, NULL); ump->um_qflags[type] &= ~QTF_OPENING; if (error) quotaoff(p, mp, type); return (error); } struct quotaoff_arg { struct proc *p; int type; }; int quotaoff_vnode(struct vnode *vp, void *arg) { struct quotaoff_arg *qa = (struct quotaoff_arg *)arg; struct inode *ip; struct dquot *dq; if (vp->v_type == VNON) return (0); if (vget(vp, LK_EXCLUSIVE)) return (0); ip = VTOI(vp); dq = ip->i_dquot[qa->type]; ip->i_dquot[qa->type] = NODQUOT; dqrele(vp, dq); vput(vp); return (0); } /* * Q_QUOTAOFF - turn off disk quotas for a filesystem. */ int quotaoff(struct proc *p, struct mount *mp, int type) { struct vnode *qvp; struct ufsmount *ump = VFSTOUFS(mp); struct quotaoff_arg qa; int error; #ifdef DIAGNOSTIC if (!vfs_isbusy(mp)) panic ("quotaoff: mount point not busy"); #endif if ((qvp = ump->um_quotas[type]) == NULLVP) return (0); ump->um_qflags[type] |= QTF_CLOSING; /* * Search vnodes associated with this mount point, * deleting any references to quota file being closed. */ qa.p = p; qa.type = type; vfs_mount_foreach_vnode(mp, quotaoff_vnode, &qa); error = vn_close(qvp, FREAD|FWRITE, p->p_ucred, p); ump->um_quotas[type] = NULLVP; crfree(ump->um_cred[type]); ump->um_cred[type] = NOCRED; ump->um_qflags[type] &= ~QTF_CLOSING; for (type = 0; type < MAXQUOTAS; type++) if (ump->um_quotas[type] != NULLVP) break; if (type == MAXQUOTAS) mp->mnt_flag &= ~MNT_QUOTA; return (error); } /* * Q_GETQUOTA - return current values in a dqblk structure. */ int getquota(struct mount *mp, u_long id, int type, caddr_t addr) { struct dquot *dq; int error; if ((error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq)) != 0) return (error); error = copyout((caddr_t)&dq->dq_dqb, addr, sizeof (struct dqblk)); #ifdef KTRACE if (error == 0) { struct proc *p = curproc; if (KTRPOINT(p, KTR_STRUCT)) ktrquota(p, &dq->dq_dqb); } #endif dqrele(NULLVP, dq); return (error); } /* * Q_SETQUOTA - assign an entire dqblk structure. */ int setquota(struct mount *mp, u_long id, int type, caddr_t addr) { struct dquot *dq; struct dquot *ndq; struct ufsmount *ump = VFSTOUFS(mp); struct dqblk newlim; int error; error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)); if (error) return (error); #ifdef KTRACE { struct proc *p = curproc; if (KTRPOINT(p, KTR_STRUCT)) ktrquota(p, &newlim); } #endif if ((error = dqget(NULLVP, id, ump, type, &ndq)) != 0) return (error); dq = ndq; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; tsleep_nsec(dq, PINOD+1, "setquota", INFSLP); } /* * Copy all but the current values. * Reset time limit if previously had no soft limit or were * under it, but now have a soft limit and are over it. */ newlim.dqb_curblocks = dq->dq_curblocks; newlim.dqb_curinodes = dq->dq_curinodes; if (dq->dq_id != 0) { newlim.dqb_btime = dq->dq_btime; newlim.dqb_itime = dq->dq_itime; } if (newlim.dqb_bsoftlimit && dq->dq_curblocks >= newlim.dqb_bsoftlimit && (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit)) newlim.dqb_btime = gettime() + ump->um_btime[type]; if (newlim.dqb_isoftlimit && dq->dq_curinodes >= newlim.dqb_isoftlimit && (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) newlim.dqb_itime = gettime() + ump->um_itime[type]; dq->dq_dqb = newlim; if (dq->dq_curblocks < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_BLKS; if (dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_flags &= ~DQ_INODS; if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 && dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0) dq->dq_flags |= DQ_FAKE; else dq->dq_flags &= ~DQ_FAKE; dq->dq_flags |= DQ_MOD; dqrele(NULLVP, dq); return (0); } /* * Q_SETUSE - set current inode and block usage. */ int setuse(struct mount *mp, u_long id, int type, caddr_t addr) { struct dquot *dq; struct ufsmount *ump = VFSTOUFS(mp); struct dquot *ndq; struct dqblk usage; int error; error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk)); if (error) return (error); #ifdef KTRACE { struct proc *p = curproc; if (KTRPOINT(p, KTR_STRUCT)) ktrquota(p, &usage); } #endif if ((error = dqget(NULLVP, id, ump, type, &ndq)) != 0) return (error); dq = ndq; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; tsleep_nsec(dq, PINOD+1, "setuse", INFSLP); } /* * Reset time limit if have a soft limit and were * previously under it, but are now over it. */ if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit && usage.dqb_curblocks >= dq->dq_bsoftlimit) dq->dq_btime = gettime() + ump->um_btime[type]; if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit && usage.dqb_curinodes >= dq->dq_isoftlimit) dq->dq_itime = gettime() + ump->um_itime[type]; dq->dq_curblocks = usage.dqb_curblocks; dq->dq_curinodes = usage.dqb_curinodes; if (dq->dq_curblocks < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_BLKS; if (dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; dqrele(NULLVP, dq); return (0); } int qsync_vnode(struct vnode *vp, void *arg) { int i; struct dquot *dq; if (vp->v_type == VNON) return (0); if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT)) return (0); for (i = 0; i < MAXQUOTAS; i++) { dq = VTOI(vp)->i_dquot[i]; if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) dqsync(vp, dq); } vput(vp); return (0); } /* * Q_SYNC - sync quota files to disk. */ int qsync(struct mount *mp) { struct ufsmount *ump = VFSTOUFS(mp); int i; /* * Check if the mount point has any quotas. * If not, simply return. */ for (i = 0; i < MAXQUOTAS; i++) if (ump->um_quotas[i] != NULLVP) break; if (i == MAXQUOTAS) return (0); /* * Search vnodes associated with this mount point, * synchronizing any modified dquot structures. */ vfs_mount_foreach_vnode(mp, qsync_vnode, NULL); return (0); } /* * Code pertaining to management of the in-core dquot data structures. */ LIST_HEAD(dqhash, dquot) *dqhashtbl; SIPHASH_KEY dqhashkey; u_long dqhash; /* * Dquot free list. */ #define DQUOTINC 5 /* minimum free dquots desired */ TAILQ_HEAD(dqfreelist, dquot) dqfreelist; long numdquot, desireddquot = DQUOTINC; /* * Initialize the quota system. */ void ufs_quota_init(void) { dqhashtbl = hashinit(initialvnodes, M_DQUOT, M_WAITOK, &dqhash); arc4random_buf(&dqhashkey, sizeof(dqhashkey)); TAILQ_INIT(&dqfreelist); } /* * Obtain a dquot structure for the specified identifier and quota file * reading the information from the file if necessary. */ int dqget(struct vnode *vp, u_long id, struct ufsmount *ump, int type, struct dquot **dqp) { SIPHASH_CTX ctx; struct dquot *dq; struct dqhash *dqh; struct vnode *dqvp; struct iovec aiov; struct uio auio; int error; dqvp = ump->um_quotas[type]; if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) { *dqp = NODQUOT; return (EINVAL); } /* * Check the cache first. */ SipHash24_Init(&ctx, &dqhashkey); SipHash24_Update(&ctx, &dqvp, sizeof(dqvp)); SipHash24_Update(&ctx, &id, sizeof(id)); dqh = &dqhashtbl[SipHash24_End(&ctx) & dqhash]; LIST_FOREACH(dq, dqh, dq_hash) { if (dq->dq_id != id || dq->dq_vp != dqvp) continue; /* * Cache hit with no references. Take * the structure off the free list. */ if (dq->dq_cnt == 0) TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); dqref(dq); *dqp = dq; return (0); } /* * Not in cache, allocate a new one. */ if (TAILQ_FIRST(&dqfreelist) == NODQUOT && numdquot < MAXQUOTAS * initialvnodes) desireddquot += DQUOTINC; if (numdquot < desireddquot) { dq = malloc(sizeof *dq, M_DQUOT, M_WAITOK | M_ZERO); numdquot++; } else { if ((dq = TAILQ_FIRST(&dqfreelist)) == NULL) { tablefull("dquot"); *dqp = NODQUOT; return (EUSERS); } if (dq->dq_cnt || (dq->dq_flags & DQ_MOD)) panic("free dquot isn't"); TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); LIST_REMOVE(dq, dq_hash); crfree(dq->dq_cred); dq->dq_cred = NOCRED; } /* * Initialize the contents of the dquot structure. */ if (vp != dqvp) vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY); LIST_INSERT_HEAD(dqh, dq, dq_hash); dqref(dq); dq->dq_flags = DQ_LOCK; dq->dq_id = id; dq->dq_vp = dqvp; dq->dq_type = type; crhold(ump->um_cred[type]); dq->dq_cred = ump->um_cred[type]; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = (caddr_t)&dq->dq_dqb; aiov.iov_len = sizeof (struct dqblk); auio.uio_resid = sizeof (struct dqblk); auio.uio_offset = (off_t)(id * sizeof (struct dqblk)); auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_procp = NULL; error = VOP_READ(dqvp, &auio, 0, dq->dq_cred); if (auio.uio_resid == sizeof(struct dqblk) && error == 0) memset(&dq->dq_dqb, 0, sizeof(struct dqblk)); if (vp != dqvp) VOP_UNLOCK(dqvp); if (dq->dq_flags & DQ_WANT) wakeup(dq); dq->dq_flags = 0; /* * I/O error in reading quota file, release * quota structure and reflect problem to caller. */ if (error) { LIST_REMOVE(dq, dq_hash); dqrele(vp, dq); *dqp = NODQUOT; return (error); } /* * Check for no limit to enforce. * Initialize time values if necessary. */ if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 && dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0) dq->dq_flags |= DQ_FAKE; if (dq->dq_id != 0) { if (dq->dq_btime == 0) dq->dq_btime = gettime() + ump->um_btime[type]; if (dq->dq_itime == 0) dq->dq_itime = gettime() + ump->um_itime[type]; } *dqp = dq; return (0); } /* * Release a reference to a dquot. */ void dqrele(struct vnode *vp, struct dquot *dq) { if (dq == NODQUOT) return; if (dq->dq_cnt > 1) { dq->dq_cnt--; return; } if (dq->dq_flags & DQ_MOD) (void) dqsync(vp, dq); if (--dq->dq_cnt > 0) return; TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist); } /* * Update the disk quota in the quota file. */ int dqsync(struct vnode *vp, struct dquot *dq) { struct vnode *dqvp; struct iovec aiov; struct uio auio; int error; if (dq == NODQUOT) panic("dqsync: dquot"); if ((dq->dq_flags & DQ_MOD) == 0) return (0); if ((dqvp = dq->dq_vp) == NULLVP) panic("dqsync: file"); if (vp != dqvp) vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY); while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; tsleep_nsec(dq, PINOD+2, "dqsync", INFSLP); if ((dq->dq_flags & DQ_MOD) == 0) { if (vp != dqvp) VOP_UNLOCK(dqvp); return (0); } } dq->dq_flags |= DQ_LOCK; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = (caddr_t)&dq->dq_dqb; aiov.iov_len = sizeof (struct dqblk); auio.uio_resid = sizeof (struct dqblk); auio.uio_offset = (off_t)(dq->dq_id * sizeof (struct dqblk)); auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_procp = NULL; error = VOP_WRITE(dqvp, &auio, 0, dq->dq_cred); if (auio.uio_resid && error == 0) error = EIO; if (dq->dq_flags & DQ_WANT) wakeup(dq); dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT); if (vp != dqvp) VOP_UNLOCK(dqvp); return (error); } int ufs_quota_delete(struct inode *ip) { struct vnode *vp = ITOV(ip); int i; for (i = 0; i < MAXQUOTAS; i++) { if (ip->i_dquot[i] != NODQUOT) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } return (0); } /* * Do operations associated with quotas */ int ufs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t arg, struct proc *p) { int cmd, type, error; if (uid == -1) uid = p->p_ucred->cr_ruid; cmd = cmds >> SUBCMDSHIFT; switch (cmd) { case Q_SYNC: break; case Q_GETQUOTA: if (uid == p->p_ucred->cr_ruid) break; /* FALLTHROUGH */ default: if ((error = suser(p)) != 0) return (error); } type = cmds & SUBCMDMASK; if ((u_int)type >= MAXQUOTAS) return (EINVAL); if (vfs_busy(mp, VB_READ|VB_NOWAIT)) return (0); switch (cmd) { case Q_QUOTAON: error = quotaon(p, mp, type, arg); break; case Q_QUOTAOFF: error = quotaoff(p, mp, type); break; case Q_SETQUOTA: error = setquota(mp, uid, type, arg) ; break; case Q_SETUSE: error = setuse(mp, uid, type, arg); break; case Q_GETQUOTA: error = getquota(mp, uid, type, arg); break; case Q_SYNC: error = qsync(mp); break; default: error = EINVAL; break; } vfs_unbusy(mp); return (error); }
3347 211 5 3352 3353 5 3 3347 3379 3379 3092 2906 3 1 2841 2370 3373 /* $OpenBSD: in4_cksum.c,v 1.10 2014/09/08 06:24:13 jsg Exp $ */ /* $KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $ */ /* $NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $ */ /* * Copyright (C) 1999 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1988, 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 */ #include <sys/param.h> #include <sys/mbuf.h> #include <sys/systm.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_var.h> /* * Checksum routine for Internet Protocol family headers (Portable Version). * This is only for IPv4 pseudo header checksum. * No need to clear non-pseudo-header fields in IPv4 header. * len is for actual payload size, and does not include IPv4 header and * skipped header chain (off + len should be equal to the whole packet). * * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. */ #define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) #define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len) { u_int16_t *w; int sum = 0; int mlen = 0; int byte_swapped = 0; union { struct ipovly ipov; u_int16_t w[10]; } u; union { u_int8_t c[2]; u_int16_t s; } s_util; union { u_int16_t s[2]; u_int32_t l; } l_util; if (nxt != 0) { /* pseudo header */ if (off < sizeof(struct ipovly)) panic("in4_cksum: offset too short"); if (m->m_len < sizeof(struct ip)) panic("in4_cksum: bad mbuf chain"); bzero(&u.ipov, sizeof(u.ipov)); u.ipov.ih_len = htons(len); u.ipov.ih_pr = nxt; u.ipov.ih_src = mtod(m, struct ip *)->ip_src; u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst; w = u.w; /* assumes sizeof(ipov) == 20 */ sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; } /* skip unnecessary part */ while (m && off > 0) { if (m->m_len > off) break; off -= m->m_len; m = m->m_next; } for (;m && len; m = m->m_next) { if (m->m_len == 0) continue; w = (u_int16_t *)(mtod(m, caddr_t) + off); if (mlen == -1) { /* * The first byte of this mbuf is the continuation * of a word spanning between this mbuf and the * last mbuf. * * s_util.c[0] is already saved when scanning previous * mbuf. */ s_util.c[1] = *(u_int8_t *)w; sum += s_util.s; w = (u_int16_t *)((u_int8_t *)w + 1); mlen = m->m_len - off - 1; len--; } else mlen = m->m_len - off; off = 0; if (len < mlen) mlen = len; len -= mlen; /* * Force to even boundary. */ if ((1 & (long) w) && (mlen > 0)) { REDUCE; sum <<= 8; s_util.c[0] = *(u_int8_t *)w; w = (u_int16_t *)((int8_t *)w + 1); mlen--; byte_swapped = 1; } /* * Unroll the loop to make overhead from * branches &c small. */ while ((mlen -= 32) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; w += 16; } mlen += 32; while ((mlen -= 8) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; w += 4; } mlen += 8; if (mlen == 0 && byte_swapped == 0) continue; REDUCE; while ((mlen -= 2) >= 0) { sum += *w++; } if (byte_swapped) { REDUCE; sum <<= 8; byte_swapped = 0; if (mlen == -1) { s_util.c[1] = *(u_int8_t *)w; sum += s_util.s; mlen = 0; } else mlen = -1; } else if (mlen == -1) s_util.c[0] = *(u_int8_t *)w; } if (len) printf("cksum4: out of data\n"); if (mlen == -1) { /* The last mbuf has odd # of bytes. Follow the standard (the odd byte may be shifted left by 8 bits or not as determined by endian-ness of the machine) */ s_util.c[1] = 0; sum += s_util.s; } REDUCE; return (~sum & 0xffff); }
412 413 1 17 19 1 20 2 31 3 30 22 333 2 3 19 117 250 170 2 192 359 3 104 249 22 101 180 51 235 235 203 2 191 13 80 90 143 51 23 28 48 2 23 28 46 6 116 74 41 2 2 35 71 2617 2619 1593 1028 4 1469 10 10 1074 1078 2 6 1068 102 3654 4 3664 4 154 142 12 154 89 10 1 1 2 1 3 2 3 4 6 /* $OpenBSD: vfs_vnops.c,v 1.114 2020/04/08 08:07:51 mpi Exp $ */ /* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_vnops.c 8.5 (Berkeley) 12/8/94 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/fcntl.h> #include <sys/file.h> #include <sys/stat.h> #include <sys/proc.h> #include <sys/resourcevar.h> #include <sys/signalvar.h> #include <sys/mount.h> #include <sys/namei.h> #include <sys/lock.h> #include <sys/vnode.h> #include <sys/ioctl.h> #include <sys/tty.h> #include <sys/cdio.h> #include <sys/poll.h> #include <sys/filedesc.h> #include <sys/specdev.h> #include <sys/unistd.h> int vn_read(struct file *, struct uio *, int); int vn_write(struct file *, struct uio *, int); int vn_poll(struct file *, int, struct proc *); int vn_kqfilter(struct file *, struct knote *); int vn_closefile(struct file *, struct proc *); int vn_seek(struct file *, off_t *, int, struct proc *); const struct fileops vnops = { .fo_read = vn_read, .fo_write = vn_write, .fo_ioctl = vn_ioctl, .fo_poll = vn_poll, .fo_kqfilter = vn_kqfilter, .fo_stat = vn_statfile, .fo_close = vn_closefile, .fo_seek = vn_seek, }; /* * Common code for vnode open operations. * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. */ int vn_open(struct nameidata *ndp, int fmode, int cmode) { struct vnode *vp; struct proc *p = ndp->ni_cnd.cn_proc; struct ucred *cred = p->p_ucred; struct vattr va; struct cloneinfo *cip; int error; /* * The only valid flag to pass in here from NDINIT is * KERNELPATH, This function will override the nameiop based * on the fmode and cmode flags, So validate that our caller * has not set other flags or operations in the nameidata * structure. */ KASSERT(ndp->ni_cnd.cn_flags == 0 || ndp->ni_cnd.cn_flags == KERNELPATH); KASSERT(ndp->ni_cnd.cn_nameiop == 0); if ((fmode & (FREAD|FWRITE)) == 0) return (EINVAL); if ((fmode & (O_TRUNC | FWRITE)) == O_TRUNC) return (EINVAL); if (fmode & O_CREAT) { ndp->ni_cnd.cn_nameiop = CREATE; ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF; if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) ndp->ni_cnd.cn_flags |= FOLLOW; if ((error = namei(ndp)) != 0) return (error); if (ndp->ni_vp == NULL) { VATTR_NULL(&va); va.va_type = VREG; va.va_mode = cmode; if (fmode & O_EXCL) va.va_vaflags |= VA_EXCLUSIVE; error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &va); vput(ndp->ni_dvp); if (error) return (error); fmode &= ~O_TRUNC; vp = ndp->ni_vp; } else { VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); ndp->ni_dvp = NULL; vp = ndp->ni_vp; if (fmode & O_EXCL) { error = EEXIST; goto bad; } fmode &= ~O_CREAT; } } else { ndp->ni_cnd.cn_nameiop = LOOKUP; ndp->ni_cnd.cn_flags |= ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; if ((error = namei(ndp)) != 0) return (error); vp = ndp->ni_vp; } if (vp->v_type == VSOCK) { error = EOPNOTSUPP; goto bad; } if (vp->v_type == VLNK) { error = ELOOP; goto bad; } if ((fmode & O_DIRECTORY) && vp->v_type != VDIR) { error = ENOTDIR; goto bad; } if ((fmode & O_CREAT) == 0) { if (fmode & FREAD) { if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0) goto bad; } if (fmode & FWRITE) { if (vp->v_type == VDIR) { error = EISDIR; goto bad; } if ((error = vn_writechk(vp)) != 0 || (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0) goto bad; } } if ((fmode & O_TRUNC) && vp->v_type == VREG) { VATTR_NULL(&va); va.va_size = 0; if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0) goto bad; } if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) goto bad; if (vp->v_flag & VCLONED) { cip = (struct cloneinfo *)vp->v_data; vp->v_flag &= ~VCLONED; ndp->ni_vp = cip->ci_vp; /* return cloned vnode */ vp->v_data = cip->ci_data; /* restore v_data */ VOP_UNLOCK(vp); /* keep a reference */ vp = ndp->ni_vp; /* for the increment below */ free(cip, M_TEMP, sizeof(*cip)); } if (fmode & FWRITE) vp->v_writecount++; return (0); bad: vput(vp); return (error); } /* * Check for write permissions on the specified vnode. * Prototype text segments cannot be written. */ int vn_writechk(struct vnode *vp) { /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); case VNON: case VCHR: case VSOCK: case VFIFO: case VBAD: case VBLK: break; } } /* * If there's shared text associated with * the vnode, try to free it up once. If * we fail, we can't allow writing. */ if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp)) return (ETXTBSY); return (0); } /* * Check whether a write operation would exceed the file size rlimit * for the process, if one should be applied for this operation. * If a partial write should take place, the uio is adjusted and the * amount by which the request would have exceeded the limit is returned * via the 'overrun' argument. */ int vn_fsizechk(struct vnode *vp, struct uio *uio, int ioflag, ssize_t *overrun) { struct proc *p = uio->uio_procp; *overrun = 0; if (vp->v_type == VREG && p != NULL && !(ioflag & IO_NOLIMIT)) { rlim_t limit = lim_cur_proc(p, RLIMIT_FSIZE); /* if already at or over the limit, send the signal and fail */ if (uio->uio_offset >= limit) { psignal(p, SIGXFSZ); return (EFBIG); } /* otherwise, clamp the write to stay under the limit */ if (uio->uio_resid > limit - uio->uio_offset) { *overrun = uio->uio_resid - (limit - uio->uio_offset); uio->uio_resid = limit - uio->uio_offset; } } return (0); } /* * Mark a vnode as being the text image of a running process. */ void vn_marktext(struct vnode *vp) { vp->v_flag |= VTEXT; } /* * Vnode close call */ int vn_close(struct vnode *vp, int flags, struct ucred *cred, struct proc *p) { int error; if (flags & FWRITE) vp->v_writecount--; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_CLOSE(vp, flags, cred, p); vput(vp); return (error); } /* * Package up an I/O request on a vnode into a uio and do it. */ int vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *cred, size_t *aresid, struct proc *p) { struct uio auio; struct iovec aiov; int error; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = base; aiov.iov_len = len; auio.uio_resid = len; auio.uio_offset = offset; auio.uio_segflg = segflg; auio.uio_rw = rw; auio.uio_procp = p; if ((ioflg & IO_NODELOCKED) == 0) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (rw == UIO_READ) { error = VOP_READ(vp, &auio, ioflg, cred); } else { error = VOP_WRITE(vp, &auio, ioflg, cred); } if ((ioflg & IO_NODELOCKED) == 0) VOP_UNLOCK(vp); if (aresid) *aresid = auio.uio_resid; else if (auio.uio_resid && error == 0) error = EIO; return (error); } /* * File table vnode read routine. */ int vn_read(struct file *fp, struct uio *uio, int fflags) { struct vnode *vp = fp->f_data; struct ucred *cred = fp->f_cred; size_t count = uio->uio_resid; off_t offset; int error; KERNEL_LOCK(); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if ((fflags & FO_POSITION) == 0) offset = uio->uio_offset = fp->f_offset; else offset = uio->uio_offset; /* no wrap around of offsets except on character devices */ if (vp->v_type != VCHR && count > LLONG_MAX - offset) { error = EINVAL; goto done; } if (vp->v_type == VDIR) { error = EISDIR; goto done; } error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred); if ((fflags & FO_POSITION) == 0) { mtx_enter(&fp->f_mtx); fp->f_offset += count - uio->uio_resid; mtx_leave(&fp->f_mtx); } done: VOP_UNLOCK(vp); KERNEL_UNLOCK(); return (error); } /* * File table vnode write routine. */ int vn_write(struct file *fp, struct uio *uio, int fflags) { struct vnode *vp = fp->f_data; struct ucred *cred = fp->f_cred; int error, ioflag = IO_UNIT; size_t count; KERNEL_LOCK(); /* note: pwrite/pwritev are unaffected by O_APPEND */ if (vp->v_type == VREG && (fp->f_flag & O_APPEND) && (fflags & FO_POSITION) == 0) ioflag |= IO_APPEND; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; if ((fp->f_flag & FFSYNC) || (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) ioflag |= IO_SYNC; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if ((fflags & FO_POSITION) == 0) uio->uio_offset = fp->f_offset; count = uio->uio_resid; error = VOP_WRITE(vp, uio, ioflag, cred); if ((fflags & FO_POSITION) == 0) { mtx_enter(&fp->f_mtx); if (ioflag & IO_APPEND) fp->f_offset = uio->uio_offset; else fp->f_offset += count - uio->uio_resid; mtx_leave(&fp->f_mtx); } VOP_UNLOCK(vp); KERNEL_UNLOCK(); return (error); } /* * File table wrapper for vn_stat */ int vn_statfile(struct file *fp, struct stat *sb, struct proc *p) { struct vnode *vp = fp->f_data; return vn_stat(vp, sb, p); } /* * vnode stat routine. */ int vn_stat(struct vnode *vp, struct stat *sb, struct proc *p) { struct vattr va; int error; mode_t mode; error = VOP_GETATTR(vp, &va, p->p_ucred, p); if (error) return (error); /* * Copy from vattr table */ memset(sb, 0, sizeof(*sb)); sb->st_dev = va.va_fsid; sb->st_ino = va.va_fileid; mode = va.va_mode; switch (vp->v_type) { case VREG: mode |= S_IFREG; break; case VDIR: mode |= S_IFDIR; break; case VBLK: mode |= S_IFBLK; break; case VCHR: mode |= S_IFCHR; break; case VLNK: mode |= S_IFLNK; break; case VSOCK: mode |= S_IFSOCK; break; case VFIFO: mode |= S_IFIFO; break; default: return (EBADF); } sb->st_mode = mode; sb->st_nlink = va.va_nlink; sb->st_uid = va.va_uid; sb->st_gid = va.va_gid; sb->st_rdev = va.va_rdev; sb->st_size = va.va_size; sb->st_atim.tv_sec = va.va_atime.tv_sec; sb->st_atim.tv_nsec = va.va_atime.tv_nsec; sb->st_mtim.tv_sec = va.va_mtime.tv_sec; sb->st_mtim.tv_nsec = va.va_mtime.tv_nsec; sb->st_ctim.tv_sec = va.va_ctime.tv_sec; sb->st_ctim.tv_nsec = va.va_ctime.tv_nsec; sb->st_blksize = va.va_blocksize; sb->st_flags = va.va_flags; sb->st_gen = va.va_gen; sb->st_blocks = va.va_bytes / S_BLKSIZE; return (0); } /* * File table vnode ioctl routine. */ int vn_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) { struct vnode *vp = fp->f_data; struct vattr vattr; int error = ENOTTY; KERNEL_LOCK(); switch (vp->v_type) { case VREG: case VDIR: if (com == FIONREAD) { error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); if (error) break; *(int *)data = vattr.va_size - foffset(fp); } else if (com == FIONBIO || com == FIOASYNC) /* XXX */ error = 0; /* XXX */ break; case VFIFO: case VCHR: case VBLK: error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); if (error == 0 && com == TIOCSCTTY) { struct session *s = p->p_p->ps_session; struct vnode *ovp = s->s_ttyvp; s->s_ttyvp = vp; vref(vp); if (ovp) vrele(ovp); } break; default: break; } KERNEL_UNLOCK(); return (error); } /* * File table vnode poll routine. */ int vn_poll(struct file *fp, int events, struct proc *p) { return (VOP_POLL(fp->f_data, fp->f_flag, events, p)); } /* * Check that the vnode is still valid, and if so * acquire requested lock. */ int vn_lock(struct vnode *vp, int flags) { int error; do { if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; tsleep_nsec(vp, PINOD, "vn_lock", INFSLP); error = ENOENT; } else { vp->v_lockcount++; error = VOP_LOCK(vp, flags); vp->v_lockcount--; if (error == 0) { if ((vp->v_flag & VXLOCK) == 0) return (0); /* * The vnode was exclusively locked while * acquiring the requested lock. Release it and * try again. */ error = ENOENT; VOP_UNLOCK(vp); if (vp->v_lockcount == 0) wakeup_one(&vp->v_lockcount); } } } while (flags & LK_RETRY); return (error); } /* * File table vnode close routine. */ int vn_closefile(struct file *fp, struct proc *p) { struct vnode *vp = fp->f_data; struct flock lf; int error; KERNEL_LOCK(); if ((fp->f_iflags & FIF_HASLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); } error = vn_close(vp, fp->f_flag, fp->f_cred, p); KERNEL_UNLOCK(); return (error); } int vn_kqfilter(struct file *fp, struct knote *kn) { return (VOP_KQFILTER(fp->f_data, fp->f_flag, kn)); } int vn_seek(struct file *fp, off_t *offset, int whence, struct proc *p) { struct ucred *cred = p->p_ucred; struct vnode *vp = fp->f_data; struct vattr vattr; off_t newoff; int error = 0; int special; if (vp->v_type == VFIFO) return (ESPIPE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_type == VCHR) special = 1; else special = 0; switch (whence) { case SEEK_CUR: newoff = fp->f_offset + *offset; break; case SEEK_END: error = VOP_GETATTR(vp, &vattr, cred, p); if (error) goto out; newoff = *offset + (off_t)vattr.va_size; break; case SEEK_SET: newoff = *offset; break; default: error = EINVAL; goto out; } if (!special && newoff < 0) { error = EINVAL; goto out; } mtx_enter(&fp->f_mtx); fp->f_offset = newoff; mtx_leave(&fp->f_mtx); *offset = newoff; out: VOP_UNLOCK(vp); return (error); } /* * Common code for vnode access operations. */ /* Check if a directory can be found inside another in the hierarchy */ int vn_isunder(struct vnode *lvp, struct vnode *rvp, struct proc *p) { int error; error = vfs_getcwd_common(lvp, rvp, NULL, NULL, MAXPATHLEN/2, 0, p); if (!error) return (1); return (0); }
2 /* $OpenBSD: ntfs_vfsops.c,v 1.64 2020/02/27 09:10:31 mpi Exp $ */ /* $NetBSD: ntfs_vfsops.c,v 1.7 2003/04/24 07:50:19 christos Exp $ */ /*- * Copyright (c) 1998, 1999 Semen Ustimenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Id: ntfs_vfsops.c,v 1.7 1999/05/31 11:28:30 phk Exp */ #include <sys/param.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/proc.h> #include <sys/kernel.h> #include <sys/vnode.h> #include <sys/lock.h> #include <sys/mount.h> #include <sys/buf.h> #include <sys/disk.h> #include <sys/fcntl.h> #include <sys/malloc.h> #include <sys/device.h> #include <sys/conf.h> #include <sys/specdev.h> /*#define NTFS_DEBUG 1*/ #include <ntfs/ntfs.h> #include <ntfs/ntfs_inode.h> #include <ntfs/ntfs_subr.h> #include <ntfs/ntfs_vfsops.h> #include <ntfs/ntfs_ihash.h> int ntfs_mount(struct mount *, const char *, void *, struct nameidata *, struct proc *); int ntfs_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *); int ntfs_root(struct mount *, struct vnode **); int ntfs_start(struct mount *, int, struct proc *); int ntfs_statfs(struct mount *, struct statfs *, struct proc *); int ntfs_sync(struct mount *, int, int, struct ucred *, struct proc *); int ntfs_unmount(struct mount *, int, struct proc *); int ntfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp); int ntfs_mountfs(struct vnode *, struct mount *, struct ntfs_args *, struct proc *); int ntfs_vptofh(struct vnode *, struct fid *); int ntfs_init(struct vfsconf *); int ntfs_fhtovp(struct mount *, struct fid *, struct vnode **); int ntfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred **); int ntfs_sysctl(int *, u_int, void *, size_t *, void *, size_t, struct proc *); /* * Verify a remote client has export rights and return these rights via. * exflagsp and credanonp. */ int ntfs_checkexp(struct mount *mp, struct mbuf *nam, int *exflagsp, struct ucred **credanonp) { struct netcred *np; struct ntfsmount *ntm = VFSTONTFS(mp); /* * Get the export permission structure for this <mp, client> tuple. */ np = vfs_export_lookup(mp, &ntm->ntm_export, nam); if (np == NULL) return (EACCES); *exflagsp = np->netc_exflags; *credanonp = &np->netc_anon; return (0); } int ntfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p) { return (EINVAL); } int ntfs_init(struct vfsconf *vcp) { return 0; } int ntfs_mount(struct mount *mp, const char *path, void *data, struct nameidata *ndp, struct proc *p) { int err = 0; struct vnode *devvp; struct ntfs_args *args = data; char fname[MNAMELEN]; char fspec[MNAMELEN]; ntfs_nthashinit(); /* *** * Mounting non-root file system or updating a file system *** */ /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { /* if not updating name...*/ if (args && args->fspec == NULL) { /* * Process export requests. Jumping to "success" * will return the vfs_export() error code. */ struct ntfsmount *ntm = VFSTONTFS(mp); err = vfs_export(mp, &ntm->ntm_export, &args->export_info); goto success; } printf("ntfs_mount(): MNT_UPDATE not supported\n"); err = EINVAL; goto error_1; } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ err = copyinstr(args->fspec, fspec, sizeof(fspec), NULL); if (err) goto error_1; if (disk_map(fspec, fname, sizeof(fname), DM_OPENBLCK) == -1) bcopy(fspec, fname, sizeof(fname)); NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fname, p); err = namei(ndp); if (err) { /* can't get devvp!*/ goto error_1; } devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { err = ENOTBLK; goto error_2; } if (major(devvp->v_rdev) >= nblkdev) { err = ENXIO; goto error_2; } if (mp->mnt_flag & MNT_UPDATE) { #if 0 /* ******************** * UPDATE ******************** */ if (devvp != ntmp->um_devvp) err = EINVAL; /* needs translation */ else vrele(devvp); /* * Update device name only on success */ if( !err) { err = set_statfs_info(NULL, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp, p); } #endif } else { /* ******************** * NEW MOUNT ******************** */ /* * Since this is a new mount, we want the names for * the device and the mount point copied in. If an * error occurs, the mountpoint is discarded by the * upper level code. */ /* Save "last mounted on" info for mount point (NULL pad)*/ bzero(mp->mnt_stat.f_mntonname, MNAMELEN); strlcpy(mp->mnt_stat.f_mntonname, path, MNAMELEN); bzero(mp->mnt_stat.f_mntfromname, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromname, fname, MNAMELEN); bzero(mp->mnt_stat.f_mntfromspec, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromspec, fspec, MNAMELEN); bcopy(args, &mp->mnt_stat.mount_info.ntfs_args, sizeof(*args)); if ( !err) { err = ntfs_mountfs(devvp, mp, args, p); } } if (err) { goto error_2; } /* * Initialize FS stat information in mount struct; uses both * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname * * This code is common to root and non-root mounts */ (void)VFS_STATFS(mp, &mp->mnt_stat, p); goto success; error_2: /* error with devvp held*/ /* release devvp before failing*/ vrele(devvp); error_1: /* no state to back out*/ success: return(err); } /* * Common code for mount and mountroot */ int ntfs_mountfs(struct vnode *devvp, struct mount *mp, struct ntfs_args *argsp, struct proc *p) { struct buf *bp; struct ntfsmount *ntmp = NULL; dev_t dev = devvp->v_rdev; int error, ncount, i; struct vnode *vp; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ error = vfs_mountedon(devvp); if (error) return (error); ncount = vcount(devvp); if (ncount > 1 && devvp != rootvp) return (EBUSY); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, INFSLP); VOP_UNLOCK(devvp); if (error) return (error); error = VOP_OPEN(devvp, FREAD, FSCRED, p); if (error) return (error); bp = NULL; error = bread(devvp, BBLOCK, BBSIZE, &bp); if (error) goto out; ntmp = malloc(sizeof *ntmp, M_NTFSMNT, M_WAITOK | M_ZERO); bcopy(bp->b_data, &ntmp->ntm_bootfile, sizeof(struct bootfile)); brelse(bp); bp = NULL; if (strncmp(ntmp->ntm_bootfile.bf_sysid, NTFS_BBID, NTFS_BBIDLEN)) { error = EINVAL; DPRINTF("ntfs_mountfs: invalid boot block\n"); goto out; } { int8_t cpr = ntmp->ntm_mftrecsz; if( cpr > 0 ) ntmp->ntm_bpmftrec = ntmp->ntm_spc * cpr; else ntmp->ntm_bpmftrec = (1 << (-cpr)) / ntmp->ntm_bps; } DPRINTF("ntfs_mountfs(): bps: %u, spc: %u, media: %x, " "mftrecsz: %u (%u sects)\n", ntmp->ntm_bps, ntmp->ntm_spc, ntmp->ntm_bootfile.bf_media, ntmp->ntm_mftrecsz, ntmp->ntm_bpmftrec); DPRINTF("ntfs_mountfs(): mftcn: 0x%llx|0x%llx\n", ntmp->ntm_mftcn, ntmp->ntm_mftmirrcn); ntmp->ntm_mountp = mp; ntmp->ntm_dev = dev; ntmp->ntm_devvp = devvp; ntmp->ntm_uid = argsp->uid; ntmp->ntm_gid = argsp->gid; ntmp->ntm_mode = argsp->mode; ntmp->ntm_flag = argsp->flag; mp->mnt_data = ntmp; TAILQ_INIT(&ntmp->ntm_ntnodeq); /* set file name encode/decode hooks XXX utf-8 only for now */ ntmp->ntm_wget = ntfs_utf8_wget; ntmp->ntm_wput = ntfs_utf8_wput; ntmp->ntm_wcmp = ntfs_utf8_wcmp; DPRINTF("ntfs_mountfs(): case-%s,%s uid: %d, gid: %d, mode: %o\n", (ntmp->ntm_flag & NTFS_MFLAG_CASEINS) ? "insens." : "sens.", (ntmp->ntm_flag & NTFS_MFLAG_ALLNAMES) ? " allnames," : "", ntmp->ntm_uid, ntmp->ntm_gid, ntmp->ntm_mode); /* * We read in some system nodes to do not allow * reclaim them and to have everytime access to them. */ { int pi[3] = { NTFS_MFTINO, NTFS_ROOTINO, NTFS_BITMAPINO }; for (i=0; i<3; i++) { error = VFS_VGET(mp, pi[i], &(ntmp->ntm_sysvn[pi[i]])); if(error) goto out1; ntmp->ntm_sysvn[pi[i]]->v_flag |= VSYSTEM; vref(ntmp->ntm_sysvn[pi[i]]); vput(ntmp->ntm_sysvn[pi[i]]); } } /* read the Unicode lowercase --> uppercase translation table, * if necessary */ if ((error = ntfs_toupper_use(mp, ntmp, p))) goto out1; /* * Scan $BitMap and count free clusters */ error = ntfs_calccfree(ntmp, &ntmp->ntm_cfree); if(error) goto out1; /* * Read and translate to internal format attribute * definition file. */ { int num,j; struct attrdef ad; /* Open $AttrDef */ error = VFS_VGET(mp, NTFS_ATTRDEFINO, &vp ); if(error) goto out1; /* Count valid entries */ for(num = 0; ; num++) { error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL, num * sizeof(ad), sizeof(ad), &ad, NULL); if (error) goto out1; if (ad.ad_name[0] == 0) break; } /* Alloc memory for attribute definitions */ ntmp->ntm_ad = mallocarray(num, sizeof(struct ntvattrdef), M_NTFSMNT, M_WAITOK); ntmp->ntm_adnum = num; /* Read them and translate */ for(i = 0; i < num; i++){ error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL, i * sizeof(ad), sizeof(ad), &ad, NULL); if (error) goto out1; j = 0; do { ntmp->ntm_ad[i].ad_name[j] = ad.ad_name[j]; } while(ad.ad_name[j++]); ntmp->ntm_ad[i].ad_namelen = j - 1; ntmp->ntm_ad[i].ad_type = ad.ad_type; } vput(vp); } mp->mnt_stat.f_fsid.val[0] = dev; mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_stat.f_namemax = NTFS_MAXFILENAME; mp->mnt_flag |= MNT_LOCAL; devvp->v_specmountpoint = mp; return (0); out1: for (i = 0; i < NTFS_SYSNODESNUM; i++) if (ntmp->ntm_sysvn[i]) vrele(ntmp->ntm_sysvn[i]); if (vflush(mp,NULLVP,0)) DPRINTF("ntfs_mountfs: vflush failed\n"); out: if (devvp->v_specinfo) devvp->v_specmountpoint = NULL; if (bp) brelse(bp); if (ntmp != NULL) { if (ntmp->ntm_ad != NULL) free(ntmp->ntm_ad, M_NTFSMNT, 0); free(ntmp, M_NTFSMNT, 0); mp->mnt_data = NULL; } /* lock the device vnode before calling VOP_CLOSE() */ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); (void)VOP_CLOSE(devvp, FREAD, NOCRED, p); VOP_UNLOCK(devvp); return (error); } int ntfs_start(struct mount *mp, int flags, struct proc *p) { return (0); } int ntfs_unmount(struct mount *mp, int mntflags, struct proc *p) { struct ntfsmount *ntmp; int error, flags, i; DPRINTF("ntfs_unmount: unmounting...\n"); ntmp = VFSTONTFS(mp); flags = 0; if(mntflags & MNT_FORCE) flags |= FORCECLOSE; DPRINTF("ntfs_unmount: vflushing...\n"); error = vflush(mp,NULLVP,flags | SKIPSYSTEM); if (error) { DPRINTF("ntfs_unmount: vflush failed: %d\n", error); return (error); } /* Check if system vnodes are still referenced */ for(i=0;i<NTFS_SYSNODESNUM;i++) { if(((mntflags & MNT_FORCE) == 0) && (ntmp->ntm_sysvn[i] && ntmp->ntm_sysvn[i]->v_usecount > 1)) return (EBUSY); } /* Dereference all system vnodes */ for(i=0;i<NTFS_SYSNODESNUM;i++) if(ntmp->ntm_sysvn[i]) vrele(ntmp->ntm_sysvn[i]); /* vflush system vnodes */ error = vflush(mp,NULLVP,flags); if (error) { /* XXX should this be panic() ? */ printf("ntfs_unmount: vflush failed(sysnodes): %d\n",error); } /* Check if the type of device node isn't VBAD before * touching v_specinfo. If the device vnode is revoked, the * field is NULL and touching it causes null pointer derefercence. */ if (ntmp->ntm_devvp->v_type != VBAD) ntmp->ntm_devvp->v_specmountpoint = NULL; /* lock the device vnode before calling VOP_CLOSE() */ vn_lock(ntmp->ntm_devvp, LK_EXCLUSIVE | LK_RETRY); vinvalbuf(ntmp->ntm_devvp, V_SAVE, NOCRED, p, 0, INFSLP); (void)VOP_CLOSE(ntmp->ntm_devvp, FREAD, NOCRED, p); vput(ntmp->ntm_devvp); /* free the toupper table, if this has been last mounted ntfs volume */ ntfs_toupper_unuse(p); DPRINTF("ntfs_unmount: freeing memory...\n"); free(ntmp->ntm_ad, M_NTFSMNT, 0); free(ntmp, M_NTFSMNT, 0); mp->mnt_data = NULL; mp->mnt_flag &= ~MNT_LOCAL; return (0); } int ntfs_root(struct mount *mp, struct vnode **vpp) { struct vnode *nvp; int error = 0; DPRINTF("ntfs_root(): sysvn: %p\n", VFSTONTFS(mp)->ntm_sysvn[NTFS_ROOTINO]); error = VFS_VGET(mp, (ino_t)NTFS_ROOTINO, &nvp); if(error) { printf("ntfs_root: VFS_VGET failed: %d\n",error); return (error); } *vpp = nvp; return (0); } /* * Do operations associated with quotas, not supported */ int ntfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t arg, struct proc *p) { return EOPNOTSUPP; } int ntfs_calccfree(struct ntfsmount *ntmp, cn_t *cfreep) { struct vnode *vp; u_int8_t *tmp; int j, error; cn_t cfree = 0; uint64_t bmsize, offset; size_t chunksize, i; vp = ntmp->ntm_sysvn[NTFS_BITMAPINO]; bmsize = VTOF(vp)->f_size; if (bmsize > 1024 * 1024) chunksize = 1024 * 1024; else chunksize = bmsize; tmp = malloc(chunksize, M_TEMP, M_WAITOK); for (offset = 0; offset < bmsize; offset += chunksize) { if (chunksize > bmsize - offset) chunksize = bmsize - offset; error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL, offset, chunksize, tmp, NULL); if (error) goto out; for (i = 0; i < chunksize; i++) for (j = 0; j < 8; j++) if (~tmp[i] & (1 << j)) cfree++; } *cfreep = cfree; out: free(tmp, M_TEMP, 0); return(error); } int ntfs_statfs(struct mount *mp, struct statfs *sbp, struct proc *p) { struct ntfsmount *ntmp = VFSTONTFS(mp); u_int64_t mftallocated; DPRINTF("ntfs_statfs():\n"); mftallocated = VTOF(ntmp->ntm_sysvn[NTFS_MFTINO])->f_allocated; sbp->f_bsize = ntmp->ntm_bps; sbp->f_iosize = ntmp->ntm_bps * ntmp->ntm_spc; sbp->f_blocks = ntmp->ntm_bootfile.bf_spv; sbp->f_bfree = sbp->f_bavail = ntfs_cntobn(ntmp->ntm_cfree); sbp->f_ffree = sbp->f_favail = sbp->f_bfree / ntmp->ntm_bpmftrec; sbp->f_files = mftallocated / ntfs_bntob(ntmp->ntm_bpmftrec) + sbp->f_ffree; copy_statfs_info(sbp, mp); return (0); } int ntfs_sync(struct mount *mp, int waitfor, int stall, struct ucred *cred, struct proc *p) { /*DPRINTF("ntfs_sync():\n");*/ return (0); } int ntfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) { struct ntfid *ntfhp = (struct ntfid *)fhp; int error; DDPRINTF("ntfs_fhtovp(): %s: %u\n", mp->mnt_stat.f_mntonname, ntfhp->ntfid_ino); error = ntfs_vgetex(mp, ntfhp->ntfid_ino, ntfhp->ntfid_attr, NULL, LK_EXCLUSIVE | LK_RETRY, 0, vpp); /* XXX */ if (error != 0) { *vpp = NULLVP; return (error); } /* XXX as unlink/rmdir/mkdir/creat are not currently possible * with NTFS, we don't need to check anything else for now */ return (0); } int ntfs_vptofh(struct vnode *vp, struct fid *fhp) { struct ntnode *ntp; struct ntfid *ntfhp; struct fnode *fn; DDPRINTF("ntfs_fhtovp(): %s: %p\n", vp->v_mount->mnt_stat.f_mntonname, vp); fn = VTOF(vp); ntp = VTONT(vp); ntfhp = (struct ntfid *)fhp; ntfhp->ntfid_len = sizeof(struct ntfid); ntfhp->ntfid_ino = ntp->i_number; ntfhp->ntfid_attr = fn->f_attrtype; #ifdef notyet ntfhp->ntfid_gen = ntp->i_gen; #endif return (0); } int ntfs_vgetex(struct mount *mp, ntfsino_t ino, u_int32_t attrtype, char *attrname, u_long lkflags, u_long flags, struct vnode **vpp) { int error; struct ntfsmount *ntmp; struct ntnode *ip; struct fnode *fp; struct vnode *vp; enum vtype f_type; DPRINTF("ntfs_vgetex: ino: %u, attr: 0x%x:%s, lkf: 0x%lx, f: 0x%lx\n", ino, attrtype, attrname ? attrname : "", lkflags, flags); ntmp = VFSTONTFS(mp); *vpp = NULL; /* Get ntnode */ error = ntfs_ntlookup(ntmp, ino, &ip); if (error) { printf("ntfs_vget: ntfs_ntget failed\n"); return (error); } /* It may be not initialized fully, so force load it */ if (!(flags & VG_DONTLOADIN) && !(ip->i_flag & IN_LOADED)) { error = ntfs_loadntnode(ntmp, ip); if(error) { printf("ntfs_vget: CAN'T LOAD ATTRIBUTES FOR INO: %d\n", ip->i_number); ntfs_ntput(ip); return (error); } } error = ntfs_fget(ntmp, ip, attrtype, attrname, &fp); if (error) { printf("ntfs_vget: ntfs_fget failed\n"); ntfs_ntput(ip); return (error); } if (!(flags & VG_DONTVALIDFN) && !(fp->f_flag & FN_VALID)) { if ((ip->i_frflag & NTFS_FRFLAG_DIR) && (fp->f_attrtype == NTFS_A_DATA && fp->f_attrname == NULL)) { f_type = VDIR; } else if (flags & VG_EXT) { f_type = VNON; fp->f_size = fp->f_allocated = 0; } else { f_type = VREG; error = ntfs_filesize(ntmp, fp, &fp->f_size, &fp->f_allocated); if (error) { ntfs_ntput(ip); return (error); } } fp->f_flag |= FN_VALID; } /* * We may be calling vget() now. To avoid potential deadlock, we need * to release ntnode lock, since due to locking order vnode * lock has to be acquired first. * ntfs_fget() bumped ntnode usecount, so ntnode won't be recycled * prematurely. */ ntfs_ntput(ip); if (FTOV(fp)) { /* vget() returns error if the vnode has been recycled */ if (vget(FTOV(fp), lkflags) == 0) { *vpp = FTOV(fp); return (0); } } error = getnewvnode(VT_NTFS, ntmp->ntm_mountp, &ntfs_vops, &vp); if(error) { ntfs_frele(fp); ntfs_ntput(ip); return (error); } DPRINTF("ntfs_vget: vnode: %p for ntnode: %u\n", vp, ino); fp->f_vp = vp; vp->v_data = fp; vp->v_type = f_type; if (ino == NTFS_ROOTINO) vp->v_flag |= VROOT; if (lkflags & LK_TYPE_MASK) { error = vn_lock(vp, lkflags); if (error) { vput(vp); return (error); } } *vpp = vp; return (0); } int ntfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) { if (ino > (ntfsino_t)-1) panic("ntfs_vget: alien ino_t %llu", (unsigned long long)ino); return ntfs_vgetex(mp, ino, NTFS_A_DATA, NULL, LK_EXCLUSIVE | LK_RETRY, 0, vpp); /* XXX */ } const struct vfsops ntfs_vfsops = { .vfs_mount = ntfs_mount, .vfs_start = ntfs_start, .vfs_unmount = ntfs_unmount, .vfs_root = ntfs_root, .vfs_quotactl = ntfs_quotactl, .vfs_statfs = ntfs_statfs, .vfs_sync = ntfs_sync, .vfs_vget = ntfs_vget, .vfs_fhtovp = ntfs_fhtovp, .vfs_vptofh = ntfs_vptofh, .vfs_init = ntfs_init, .vfs_sysctl = ntfs_sysctl, .vfs_checkexp = ntfs_checkexp, };
/* Public domain. */ #ifndef _DRM_MIPI_DSI_H_ #define _DRM_MIPI_DSI_H_ #include <sys/types.h> #include <linux/errno.h> #include <linux/types.h> struct mipi_dsi_host; struct mipi_dsi_device; struct mipi_dsi_msg; struct drm_dsc_picture_parameter_set; struct mipi_dsi_host_ops { int (*attach)(struct mipi_dsi_host *, struct mipi_dsi_device *); int (*detach)(struct mipi_dsi_host *, struct mipi_dsi_device *); ssize_t (*transfer)(struct mipi_dsi_host *, const struct mipi_dsi_msg *); }; struct mipi_dsi_host { const struct mipi_dsi_host_ops *ops; }; struct mipi_dsi_device { struct mipi_dsi_host *host; uint32_t channel; uint32_t mode_flags; #define MIPI_DSI_MODE_LPM (1 << 0) }; struct mipi_dsi_msg { uint8_t type; uint8_t channel; uint16_t flags; #define MIPI_DSI_MSG_USE_LPM (1 << 0) const void *tx_buf; size_t tx_len; uint8_t *rx_buf; size_t rx_len; }; struct mipi_dsi_packet { size_t size; size_t payload_length; uint8_t header[4]; const uint8_t *payload; }; enum mipi_dsi_dcs_tear_mode { MIPI_DSI_DCS_TEAR_MODE_UNUSED }; enum mipi_dsi_pixel_format { MIPI_DSI_FMT_RGB888, MIPI_DSI_FMT_RGB666, MIPI_DSI_FMT_RGB666_PACKED, MIPI_DSI_FMT_RGB565, }; int mipi_dsi_attach(struct mipi_dsi_device *); int mipi_dsi_create_packet(struct mipi_dsi_packet *, const struct mipi_dsi_msg *); ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *, const void *, size_t); ssize_t mipi_dsi_dcs_write_buffer(struct mipi_dsi_device *, const void *, size_t); ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *, u8, void *, size_t); ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device *, u8, const void *, size_t); int mipi_dsi_dcs_nop(struct mipi_dsi_device *); int mipi_dsi_set_maximum_return_packet_size(struct mipi_dsi_device *, u16); bool mipi_dsi_packet_format_is_long(u8); ssize_t mipi_dsi_compression_mode(struct mipi_dsi_device *, bool); ssize_t mipi_dsi_picture_parameter_set(struct mipi_dsi_device *, const struct drm_dsc_picture_parameter_set *); static inline int mipi_dsi_pixel_format_to_bpp(enum mipi_dsi_pixel_format fmt) { switch (fmt) { case MIPI_DSI_FMT_RGB888: case MIPI_DSI_FMT_RGB666: return 24; case MIPI_DSI_FMT_RGB666_PACKED: return 18; case MIPI_DSI_FMT_RGB565: return 16; } return -EINVAL; } #endif
3613 3628 1 3620 3621 3627 3646 48 3601 3579 3 3623 /* $OpenBSD: in_cksum.c,v 1.9 2019/04/22 22:47:49 bluhm Exp $ */ /* $NetBSD: in_cksum.c,v 1.11 1996/04/08 19:55:37 jonathan Exp $ */ /* * Copyright (c) 1988, 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 */ #include <sys/param.h> #include <sys/mbuf.h> #include <sys/systm.h> /* * Checksum routine for Internet Protocol family headers (Portable Version). * * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. */ #define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) #define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} int in_cksum(struct mbuf *m, int len) { uint16_t *w; int sum = 0; int mlen = 0; int byte_swapped = 0; union { uint8_t c[2]; uint16_t s; } s_util; union { uint16_t s[2]; uint32_t l; } l_util; for (;m && len; m = m->m_next) { if (m->m_len == 0) continue; w = mtod(m, uint16_t *); if (mlen == -1) { /* * The first byte of this mbuf is the continuation * of a word spanning between this mbuf and the * last mbuf. * * s_util.c[0] is already saved when scanning previous * mbuf. */ s_util.c[1] = *(uint8_t *)w; sum += s_util.s; w = (uint16_t *)((uint8_t *)w + 1); mlen = m->m_len - 1; len--; } else mlen = m->m_len; if (len < mlen) mlen = len; len -= mlen; /* * Force to even boundary. */ if ((1 & (long) w) && (mlen > 0)) { REDUCE; sum <<= 8; s_util.c[0] = *(uint8_t *)w; w = (uint16_t *)((uint8_t *)w + 1); mlen--; byte_swapped = 1; } /* * Unroll the loop to make overhead from * branches &c small. */ while ((mlen -= 32) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; w += 16; } mlen += 32; while ((mlen -= 8) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; w += 4; } mlen += 8; if (mlen == 0 && byte_swapped == 0) continue; REDUCE; while ((mlen -= 2) >= 0) { sum += *w++; } if (byte_swapped) { REDUCE; sum <<= 8; byte_swapped = 0; if (mlen == -1) { s_util.c[1] = *(uint8_t *)w; sum += s_util.s; mlen = 0; } else mlen = -1; } else if (mlen == -1) s_util.c[0] = *(uint8_t *)w; } if (len) panic("%s: out of data, len %d", __func__, len); if (mlen == -1) { /* The last mbuf has odd # of bytes. Follow the standard (the odd byte may be shifted left by 8 bits or not as determined by endian-ness of the machine) */ s_util.c[1] = 0; sum += s_util.s; } REDUCE; return (~sum & 0xffff); }
2 1 1 2 1 1 1 932 913 /* $OpenBSD: pf_syncookies.c,v 1.7 2018/09/10 15:54:28 henning Exp $ */ /* Copyright (c) 2016,2017 Henning Brauer <henning@openbsd.org> * Copyright (c) 2016 Alexandr Nedvedicky <sashan@openbsd.org> * * syncookie parts based on FreeBSD sys/netinet/tcp_syncache.c * * Copyright (c) 2001 McAfee, Inc. * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG * All rights reserved. * * This software was developed for the FreeBSD Project by Jonathan Lemon * and McAfee Research, the Security Research Division of McAfee, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. [2001 McAfee, Inc.] * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * when we're under synflood, we use syncookies to prevent state table * exhaustion. Trigger for the synflood mode is the number of half-open * connections in the state table. * We leave synflood mode when the number of half-open states - including * in-flight syncookies - drops far enough again */ /* * syncookie enabled Initial Sequence Number: * 24 bit MAC * 3 bit WSCALE index * 3 bit MSS index * 1 bit SACK permitted * 1 bit odd/even secret * * References: * RFC4987 TCP SYN Flooding Attacks and Common Mitigations * http://cr.yp.to/syncookies.html (overview) * http://cr.yp.to/syncookies/archive (details) */ #include "pflog.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/filio.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/kernel.h> #include <sys/time.h> #include <sys/pool.h> #include <sys/proc.h> #include <sys/rwlock.h> #include <sys/syslog.h> #include <net/if.h> #include <net/if_var.h> #include <net/if_types.h> #include <net/route.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/tcp.h> #include <netinet/tcp_seq.h> #include <netinet/udp.h> #include <netinet/ip_icmp.h> #include <netinet/in_pcb.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #include <netinet/tcp_fsm.h> #include <netinet/udp_var.h> #include <netinet/icmp_var.h> #include <netinet/ip_divert.h> #include <net/pfvar.h> #include <net/pfvar_priv.h> #if NPFLOG > 0 #include <net/if_pflog.h> #endif /* NPFLOG > 0 */ union pf_syncookie { uint8_t cookie; struct { uint8_t oddeven:1, sack_ok:1, wscale_idx:3, mss_idx:3; } flags; }; #define PF_SYNCOOKIE_SECRET_SIZE SIPHASH_KEY_LENGTH #define PF_SYNCOOKIE_SECRET_LIFETIME 15 /* seconds */ static struct { struct timeout keytimeout; volatile uint oddeven; SIPHASH_KEY key[2]; uint32_t hiwat; /* absolute; # of states */ uint32_t lowat; } pf_syncookie_status; void pf_syncookie_rotate(void *); void pf_syncookie_newkey(void); uint32_t pf_syncookie_mac(struct pf_pdesc *, union pf_syncookie, uint32_t); uint32_t pf_syncookie_generate(struct pf_pdesc *, uint16_t); void pf_syncookies_init(void) { timeout_set(&pf_syncookie_status.keytimeout, pf_syncookie_rotate, NULL); pf_syncookie_status.hiwat = PFSTATE_HIWAT * PF_SYNCOOKIES_HIWATPCT/100; pf_syncookie_status.lowat = PFSTATE_HIWAT * PF_SYNCOOKIES_LOWATPCT/100; pf_syncookies_setmode(PF_SYNCOOKIES_NEVER); } int pf_syncookies_setmode(u_int8_t mode) { if (mode > PF_SYNCOOKIES_MODE_MAX) return (EINVAL); if (pf_status.syncookies_mode == mode) return (0); pf_status.syncookies_mode = mode; if (pf_status.syncookies_mode == PF_SYNCOOKIES_ALWAYS) { pf_syncookie_newkey(); pf_status.syncookies_active = 1; } return (0); } int pf_syncookies_setwats(u_int32_t hiwat, u_int32_t lowat) { if (lowat > hiwat) return (EINVAL); pf_syncookie_status.hiwat = hiwat; pf_syncookie_status.lowat = lowat; return (0); } int pf_syncookies_getwats(struct pfioc_synflwats *wats) { wats->hiwat = pf_syncookie_status.hiwat; wats->lowat = pf_syncookie_status.lowat; return (0); } int pf_synflood_check(struct pf_pdesc *pd) { KASSERT (pd->proto == IPPROTO_TCP); if (pd->m && (pd->m->m_pkthdr.pf.tag & PF_TAG_SYNCOOKIE_RECREATED)) return (0); if (pf_status.syncookies_mode != PF_SYNCOOKIES_ADAPTIVE) return (pf_status.syncookies_mode); if (!pf_status.syncookies_active && pf_status.states_halfopen > pf_syncookie_status.hiwat) { pf_syncookie_newkey(); pf_status.syncookies_active = 1; DPFPRINTF(LOG_WARNING, "synflood detected, enabling syncookies"); pf_status.lcounters[LCNT_SYNFLOODS]++; } return (pf_status.syncookies_active); } void pf_syncookie_send(struct pf_pdesc *pd) { uint16_t mss; uint32_t iss; mss = max(tcp_mssdflt, pf_get_mss(pd)); iss = pf_syncookie_generate(pd, mss); pf_send_tcp(NULL, pd->af, pd->dst, pd->src, *pd->dport, *pd->sport, iss, ntohl(pd->hdr.tcp.th_seq) + 1, TH_SYN|TH_ACK, 0, mss, 0, 1, 0, pd->rdomain); pf_status.syncookies_inflight[pf_syncookie_status.oddeven]++; pf_status.lcounters[LCNT_SYNCOOKIES_SENT]++; } uint8_t pf_syncookie_validate(struct pf_pdesc *pd) { uint32_t hash, ack, seq; union pf_syncookie cookie; KASSERT(pd->proto == IPPROTO_TCP); seq = ntohl(pd->hdr.tcp.th_seq) - 1; ack = ntohl(pd->hdr.tcp.th_ack) - 1; cookie.cookie = (ack & 0xff) ^ (ack >> 24); /* we don't know oddeven before setting the cookie (union) */ if (pf_status.syncookies_inflight[cookie.flags.oddeven] == 0) return (0); hash = pf_syncookie_mac(pd, cookie, seq); if ((ack & ~0xff) != (hash & ~0xff)) return (0); pf_status.syncookies_inflight[cookie.flags.oddeven]--; pf_status.lcounters[LCNT_SYNCOOKIES_VALID]++; return (1); } /* * all following functions private */ void pf_syncookie_rotate(void *arg) { /* do we want to disable syncookies? */ if (pf_status.syncookies_active && ((pf_status.syncookies_mode == PF_SYNCOOKIES_ADAPTIVE && pf_status.states_halfopen + pf_status.syncookies_inflight[0] + pf_status.syncookies_inflight[1] < pf_syncookie_status.lowat) || pf_status.syncookies_mode == PF_SYNCOOKIES_NEVER)) { pf_status.syncookies_active = 0; DPFPRINTF(LOG_WARNING, "syncookies disabled"); } /* nothing in flight any more? delete keys and return */ if (!pf_status.syncookies_active && pf_status.syncookies_inflight[0] == 0 && pf_status.syncookies_inflight[1] == 0) { memset(&pf_syncookie_status.key[0], 0, PF_SYNCOOKIE_SECRET_SIZE); memset(&pf_syncookie_status.key[1], 0, PF_SYNCOOKIE_SECRET_SIZE); return; } /* new key, including timeout */ pf_syncookie_newkey(); } void pf_syncookie_newkey(void) { pf_syncookie_status.oddeven = (pf_syncookie_status.oddeven + 1) & 0x1; pf_status.syncookies_inflight[pf_syncookie_status.oddeven] = 0; arc4random_buf(&pf_syncookie_status.key[pf_syncookie_status.oddeven], PF_SYNCOOKIE_SECRET_SIZE); timeout_add_sec(&pf_syncookie_status.keytimeout, PF_SYNCOOKIE_SECRET_LIFETIME); } /* * Distribution and probability of certain MSS values. Those in between are * rounded down to the next lower one. * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011] * .2% .3% 5% 7% 7% 20% 15% 45% */ static int pf_syncookie_msstab[] = { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 }; /* * Distribution and probability of certain WSCALE values. * The absence of the WSCALE option is encoded with index zero. * [WSCALE values histograms, Allman, 2012] * X 10 10 35 5 6 14 10% by host * X 11 4 5 5 18 49 3% by connections */ static int pf_syncookie_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 }; uint32_t pf_syncookie_mac(struct pf_pdesc *pd, union pf_syncookie cookie, uint32_t seq) { SIPHASH_CTX ctx; uint32_t siphash[2]; KASSERT(pd->proto == IPPROTO_TCP); SipHash24_Init(&ctx, &pf_syncookie_status.key[cookie.flags.oddeven]); switch (pd->af) { case AF_INET: SipHash24_Update(&ctx, pd->src, sizeof(pd->src->v4)); SipHash24_Update(&ctx, pd->dst, sizeof(pd->dst->v4)); break; case AF_INET6: SipHash24_Update(&ctx, pd->src, sizeof(pd->src->v6)); SipHash24_Update(&ctx, pd->dst, sizeof(pd->dst->v6)); break; default: panic("unknown address family"); } SipHash24_Update(&ctx, pd->sport, sizeof(*pd->sport)); SipHash24_Update(&ctx, pd->dport, sizeof(*pd->dport)); SipHash24_Update(&ctx, &seq, sizeof(seq)); SipHash24_Update(&ctx, &cookie, sizeof(cookie)); SipHash24_Final((uint8_t *)&siphash, &ctx); return (siphash[0] ^ siphash[1]); } uint32_t pf_syncookie_generate(struct pf_pdesc *pd, uint16_t mss) { uint8_t i, wscale; uint32_t iss, hash; union pf_syncookie cookie; cookie.cookie = 0; /* map MSS */ for (i = nitems(pf_syncookie_msstab) - 1; pf_syncookie_msstab[i] > mss && i > 0; i--) /* nada */; cookie.flags.mss_idx = i; /* map WSCALE */ wscale = pf_get_wscale(pd); for (i = nitems(pf_syncookie_wstab) - 1; pf_syncookie_wstab[i] > wscale && i > 0; i--) /* nada */; cookie.flags.wscale_idx = i; cookie.flags.sack_ok = 0; /* XXX */ cookie.flags.oddeven = pf_syncookie_status.oddeven; hash = pf_syncookie_mac(pd, cookie, ntohl(pd->hdr.tcp.th_seq)); /* * Put the flags into the hash and XOR them to get better ISS number * variance. This doesn't enhance the cryptographic strength and is * done to prevent the 8 cookie bits from showing up directly on the * wire. */ iss = hash & ~0xff; iss |= cookie.cookie ^ (hash >> 24); return (iss); } struct mbuf * pf_syncookie_recreate_syn(struct pf_pdesc *pd) { uint8_t wscale; uint16_t mss; uint32_t ack, seq; union pf_syncookie cookie; seq = ntohl(pd->hdr.tcp.th_seq) - 1; ack = ntohl(pd->hdr.tcp.th_ack) - 1; cookie.cookie = (ack & 0xff) ^ (ack >> 24); if (cookie.flags.mss_idx >= nitems(pf_syncookie_msstab) || cookie.flags.wscale_idx >= nitems(pf_syncookie_wstab)) return (NULL); mss = pf_syncookie_msstab[cookie.flags.mss_idx]; wscale = pf_syncookie_wstab[cookie.flags.wscale_idx]; return (pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport, *pd->dport, seq, 0, TH_SYN, wscale, mss, pd->ttl, 0, PF_TAG_SYNCOOKIE_RECREATED, cookie.flags.sack_ok, pd->rdomain)); }
9 1 3 2 1 7 7 7 4 3 7 8 7 1 /* $OpenBSD: kern_physio.c,v 1.47 2020/02/20 16:26:01 krw Exp $ */ /* $NetBSD: kern_physio.c,v 1.28 1997/05/19 10:43:28 pk Exp $ */ /*- * Copyright (c) 1994 Christopher G. Demetriou * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_physio.c 8.1 (Berkeley) 6/10/93 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/buf.h> #include <sys/pool.h> #include <uvm/uvm_extern.h> /* * The routines implemented in this file are described in: * Leffler, et al.: The Design and Implementation of the 4.3BSD * UNIX Operating System (Addison Welley, 1989) * on pages 231-233. */ /* * Do "physical I/O" on behalf of a user. "Physical I/O" is I/O directly * from the raw device to user buffers, and bypasses the buffer cache. * * Comments in brackets are from Leffler, et al.'s pseudo-code implementation. */ int physio(void (*strategy)(struct buf *), dev_t dev, int flags, void (*minphys)(struct buf *), struct uio *uio) { struct iovec *iovp; struct proc *p = curproc; long done, todo; int error, i, s; struct buf *bp; if ((uio->uio_offset % DEV_BSIZE) != 0) return (EINVAL); error = 0; flags &= B_READ | B_WRITE; /* Create a buffer. */ s = splbio(); bp = pool_get(&bufpool, PR_WAITOK | PR_ZERO); /* [set up the fixed part of the buffer for a transfer] */ bp->b_vnbufs.le_next = NOLIST; bp->b_dev = dev; bp->b_error = 0; bp->b_proc = p; bp->b_flags = B_BUSY; LIST_INIT(&bp->b_dep); splx(s); /* * [while there are data to transfer and no I/O error] * Note that I/O errors are handled with a 'goto' at the bottom * of the 'while' loop. */ for (i = 0; i < uio->uio_iovcnt; i++) { iovp = &uio->uio_iov[i]; while (iovp->iov_len > 0) { void *map = NULL; /* * [mark the buffer busy for physical I/O] * (i.e. set B_PHYS (because it's an I/O to user * memory), and B_RAW, because B_RAW is to be * "Set by physio for raw transfers.", in addition * to the "busy" and read/write flag.) */ CLR(bp->b_flags, B_DONE | B_ERROR); bp->b_flags |= (B_BUSY | B_PHYS | B_RAW | flags); /* [set up the buffer for a maximum-sized transfer] */ bp->b_blkno = btodb(uio->uio_offset); /* * Because iov_len is size_t (unsigned) but b_bcount is * long (signed), an overflow is possible. Therefore * limit b_bcount to LONG_MAX before calling the provided * minphys. */ if (iovp->iov_len > LONG_MAX) bp->b_bcount = LONG_MAX; else bp->b_bcount = iovp->iov_len; /* * [call minphys to bound the transfer size] * and remember the amount of data to transfer, * for later comparison. */ (*minphys)(bp); todo = bp->b_bcount; KASSERTMSG(todo >= 0, "minphys broken"); /* * [lock the part of the user address space involved * in the transfer] * Beware vmapbuf(); it clobbers b_data and * saves it in b_saveaddr. However, vunmapbuf() * restores it. */ error = uvm_vslock_device(p, iovp->iov_base, todo, (flags & B_READ) ? PROT_READ | PROT_WRITE : PROT_READ, &map); if (error) goto done; if (map) { bp->b_data = map; } else { bp->b_data = iovp->iov_base; vmapbuf(bp, todo); } /* [call strategy to start the transfer] */ (*strategy)(bp); /* * Note that the raise/wait/lower/get error * steps below would be done by biowait(), but * we want to unlock the address space before * we lower the priority. * * [raise the priority level to splbio] */ s = splbio(); /* [wait for the transfer to complete] */ while ((bp->b_flags & B_DONE) == 0) tsleep_nsec(bp, PRIBIO + 1, "physio", INFSLP); /* Mark it busy again, so nobody else will use it. */ bp->b_flags |= B_BUSY; /* [lower the priority level] */ splx(s); /* * [unlock the part of the address space previously * locked] */ if (!map) vunmapbuf(bp, todo); uvm_vsunlock_device(p, iovp->iov_base, todo, map); /* remember error value (save a splbio/splx pair) */ if (bp->b_flags & B_ERROR) error = (bp->b_error ? bp->b_error : EIO); /* * [deduct the transfer size from the total number * of data to transfer] */ KASSERTMSG(bp->b_resid <= LONG_MAX, "strategy broken"); done = bp->b_bcount - bp->b_resid; KASSERTMSG(done >= 0, "strategy broken"); KASSERTMSG(done <= todo, "strategy broken"); iovp->iov_len -= done; iovp->iov_base = (caddr_t)iovp->iov_base + done; uio->uio_offset += done; uio->uio_resid -= done; /* * Now, check for an error. * Also, handle weird end-of-disk semantics. */ if (error || done < todo) goto done; } } done: /* * [clean up the state of the buffer] */ s = splbio(); /* XXXCDC: is this necessary? */ if (bp->b_vp) brelvp(bp); splx(s); pool_put(&bufpool, bp); return (error); } /* * Leffler, et al., says on p. 231: * "The minphys() routine is called by physio() to adjust the * size of each I/O transfer before the latter is passed to * the strategy routine..." * * so, just adjust the buffer's count accounting to MAXPHYS here, * and return the new count; */ void minphys(struct buf *bp) { if (bp->b_bcount > MAXPHYS) bp->b_bcount = MAXPHYS; }
4 10 6 4 /* $OpenBSD: exec_elf.c,v 1.160 2021/03/10 10:21:47 jsg Exp $ */ /* * Copyright (c) 1996 Per Fogelstrom * All rights reserved. * * Copyright (c) 1994 Christos Zoulas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* * Copyright (c) 2001 Wasabi Systems, Inc. * All rights reserved. * * Written by Jason R. Thorpe for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/malloc.h> #include <sys/pool.h> #include <sys/mount.h> #include <sys/namei.h> #include <sys/vnode.h> #include <sys/core.h> #include <sys/syslog.h> #include <sys/exec.h> #include <sys/exec_elf.h> #include <sys/fcntl.h> #include <sys/ptrace.h> #include <sys/syscall.h> #include <sys/signalvar.h> #include <sys/stat.h> #include <sys/pledge.h> #include <sys/mman.h> #include <uvm/uvm_extern.h> #include <machine/reg.h> #include <machine/exec.h> int elf_load_file(struct proc *, char *, struct exec_package *, struct elf_args *); int elf_check_header(Elf_Ehdr *); int elf_read_from(struct proc *, struct vnode *, u_long, void *, int); void elf_load_psection(struct exec_vmcmd_set *, struct vnode *, Elf_Phdr *, Elf_Addr *, Elf_Addr *, int *, int); int coredump_elf(struct proc *, void *); void *elf_copyargs(struct exec_package *, struct ps_strings *, void *, void *); int exec_elf_fixup(struct proc *, struct exec_package *); int elf_os_pt_note_name(Elf_Note *); int elf_os_pt_note(struct proc *, struct exec_package *, Elf_Ehdr *, int *); extern char sigcode[], esigcode[], sigcoderet[]; #ifdef SYSCALL_DEBUG extern char *syscallnames[]; #endif /* round up and down to page boundaries. */ #define ELF_ROUND(a, b) (((a) + (b) - 1) & ~((b) - 1)) #define ELF_TRUNC(a, b) ((a) & ~((b) - 1)) /* * We limit the number of program headers to 32, this should * be a reasonable limit for ELF, the most we have seen so far is 12 */ #define ELF_MAX_VALID_PHDR 32 /* * How many entries are in the AuxInfo array we pass to the process? */ #define ELF_AUX_ENTRIES 9 /* * This is the OpenBSD ELF emul */ struct emul emul_elf = { "native", NULL, SYS_syscall, SYS_MAXSYSCALL, sysent, #ifdef SYSCALL_DEBUG syscallnames, #else NULL, #endif (sizeof(AuxInfo) * ELF_AUX_ENTRIES / sizeof(char *)), elf_copyargs, setregs, exec_elf_fixup, coredump_elf, sigcode, esigcode, sigcoderet }; #define ELF_NOTE_NAME_OPENBSD 0x01 struct elf_note_name { char *name; int id; } elf_note_names[] = { { "OpenBSD", ELF_NOTE_NAME_OPENBSD }, }; #define ELFROUNDSIZE sizeof(Elf_Word) #define elfround(x) roundup((x), ELFROUNDSIZE) /* * Copy arguments onto the stack in the normal way, but add some * space for extra information in case of dynamic binding. */ void * elf_copyargs(struct exec_package *pack, struct ps_strings *arginfo, void *stack, void *argp) { stack = copyargs(pack, arginfo, stack, argp); if (!stack) return (NULL); /* * Push space for extra arguments on the stack needed by * dynamically linked binaries. */ if (pack->ep_emul_arg != NULL) { pack->ep_emul_argp = stack; stack = (char *)stack + ELF_AUX_ENTRIES * sizeof (AuxInfo); } return (stack); } /* * Check header for validity; return 0 for ok, ENOEXEC if error */ int elf_check_header(Elf_Ehdr *ehdr) { /* * We need to check magic, class size, endianness, and version before * we look at the rest of the Elf_Ehdr structure. These few elements * are represented in a machine independent fashion. */ if (!IS_ELF(*ehdr) || ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || ehdr->e_ident[EI_DATA] != ELF_TARG_DATA || ehdr->e_ident[EI_VERSION] != ELF_TARG_VER) return (ENOEXEC); /* Now check the machine dependent header */ if (ehdr->e_machine != ELF_TARG_MACH || ehdr->e_version != ELF_TARG_VER) return (ENOEXEC); /* Don't allow an insane amount of sections. */ if (ehdr->e_phnum > ELF_MAX_VALID_PHDR) return (ENOEXEC); return (0); } /* * Load a psection at the appropriate address */ void elf_load_psection(struct exec_vmcmd_set *vcset, struct vnode *vp, Elf_Phdr *ph, Elf_Addr *addr, Elf_Addr *size, int *prot, int flags) { u_long msize, lsize, psize, rm, rf; long diff, offset, bdiff; Elf_Addr base; /* * If the user specified an address, then we load there. */ if (*addr != ELF_NO_ADDR) { if (ph->p_align > 1) { *addr = ELF_TRUNC(*addr, ph->p_align); diff = ph->p_vaddr - ELF_TRUNC(ph->p_vaddr, ph->p_align); /* page align vaddr */ base = *addr + trunc_page(ph->p_vaddr) - ELF_TRUNC(ph->p_vaddr, ph->p_align); } else { diff = 0; base = *addr + trunc_page(ph->p_vaddr) - ph->p_vaddr; } } else { *addr = ph->p_vaddr; if (ph->p_align > 1) *addr = ELF_TRUNC(*addr, ph->p_align); base = trunc_page(ph->p_vaddr); diff = ph->p_vaddr - *addr; } bdiff = ph->p_vaddr - trunc_page(ph->p_vaddr); /* * Enforce W^X and map W|X segments without X permission * initially. The dynamic linker will make these read-only * and add back X permission after relocation processing. * Static executables with W|X segments will probably crash. */ *prot |= (ph->p_flags & PF_R) ? PROT_READ : 0; *prot |= (ph->p_flags & PF_W) ? PROT_WRITE : 0; if ((ph->p_flags & PF_W) == 0) *prot |= (ph->p_flags & PF_X) ? PROT_EXEC : 0; msize = ph->p_memsz + diff; offset = ph->p_offset - bdiff; lsize = ph->p_filesz + bdiff; psize = round_page(lsize); /* * Because the pagedvn pager can't handle zero fill of the last * data page if it's not page aligned we map the last page readvn. */ if (ph->p_flags & PF_W) { psize = trunc_page(lsize); if (psize > 0) NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp, offset, *prot, flags); if (psize != lsize) { NEW_VMCMD2(vcset, vmcmd_map_readvn, lsize - psize, base + psize, vp, offset + psize, *prot, flags); } } else { NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp, offset, *prot, flags); } /* * Check if we need to extend the size of the segment */ rm = round_page(*addr + ph->p_memsz + diff); rf = round_page(*addr + ph->p_filesz + diff); if (rm != rf) { NEW_VMCMD2(vcset, vmcmd_map_zero, rm - rf, rf, NULLVP, 0, *prot, flags); } *size = msize; } /* * Read from vnode into buffer at offset. */ int elf_read_from(struct proc *p, struct vnode *vp, u_long off, void *buf, int size) { int error; size_t resid; if ((error = vn_rdwr(UIO_READ, vp, buf, size, off, UIO_SYSSPACE, 0, p->p_ucred, &resid, p)) != 0) return error; /* * See if we got all of it */ if (resid != 0) return (ENOEXEC); return (0); } /* * Load a file (interpreter/library) pointed to by path [stolen from * coff_load_shlib()]. Made slightly generic so it might be used externally. */ int elf_load_file(struct proc *p, char *path, struct exec_package *epp, struct elf_args *ap) { int error, i; struct nameidata nd; Elf_Ehdr eh; Elf_Phdr *ph = NULL; u_long phsize = 0; Elf_Addr addr; struct vnode *vp; Elf_Phdr *base_ph = NULL; struct interp_ld_sec { Elf_Addr vaddr; u_long memsz; } loadmap[ELF_MAX_VALID_PHDR]; int nload, idx = 0; Elf_Addr pos; int file_align; int loop; size_t randomizequota = ELF_RANDOMIZE_LIMIT; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p); nd.ni_pledge = PLEDGE_RPATH; nd.ni_unveil = UNVEIL_READ; if ((error = namei(&nd)) != 0) { return (error); } vp = nd.ni_vp; if (vp->v_type != VREG) { error = EACCES; goto bad; } if ((error = VOP_GETATTR(vp, epp->ep_vap, p->p_ucred, p)) != 0) goto bad; if (vp->v_mount->mnt_flag & MNT_NOEXEC) { error = EACCES; goto bad; } if ((error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) != 0) goto bad1; if ((error = elf_read_from(p, nd.ni_vp, 0, &eh, sizeof(eh))) != 0) goto bad1; if (elf_check_header(&eh) || eh.e_type != ET_DYN) { error = ENOEXEC; goto bad1; } ph = mallocarray(eh.e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK); phsize = eh.e_phnum * sizeof(Elf_Phdr); if ((error = elf_read_from(p, nd.ni_vp, eh.e_phoff, ph, phsize)) != 0) goto bad1; for (i = 0; i < eh.e_phnum; i++) { if (ph[i].p_type == PT_LOAD) { if (ph[i].p_filesz > ph[i].p_memsz || ph[i].p_memsz == 0) { error = EINVAL; goto bad1; } loadmap[idx].vaddr = trunc_page(ph[i].p_vaddr); loadmap[idx].memsz = round_page (ph[i].p_vaddr + ph[i].p_memsz - loadmap[idx].vaddr); file_align = ph[i].p_align; idx++; } } nload = idx; /* * Load the interpreter where a non-fixed mmap(NULL, ...) * would (i.e. something safely out of the way). */ pos = uvm_map_hint(p->p_vmspace, PROT_EXEC, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); pos = ELF_ROUND(pos, file_align); loop = 0; for (i = 0; i < nload;/**/) { vaddr_t addr; struct uvm_object *uobj; off_t uoff; size_t size; #ifdef this_needs_fixing if (i == 0) { uobj = &vp->v_uvm.u_obj; /* need to fix uoff */ } else { #endif uobj = NULL; uoff = 0; #ifdef this_needs_fixing } #endif addr = trunc_page(pos + loadmap[i].vaddr); size = round_page(addr + loadmap[i].memsz) - addr; /* CRAP - map_findspace does not avoid daddr+BRKSIZ */ if ((addr + size > (vaddr_t)p->p_vmspace->vm_daddr) && (addr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ)) addr = round_page((vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ); if (uvm_map_mquery(&p->p_vmspace->vm_map, &addr, size, (i == 0 ? uoff : UVM_UNKNOWN_OFFSET), 0) != 0) { if (loop == 0) { loop = 1; i = 0; pos = 0; continue; } error = ENOMEM; goto bad1; } if (addr != pos + loadmap[i].vaddr) { /* base changed. */ pos = addr - trunc_page(loadmap[i].vaddr); pos = ELF_ROUND(pos,file_align); i = 0; continue; } i++; } /* * Load all the necessary sections */ for (i = 0; i < eh.e_phnum; i++) { Elf_Addr size = 0; int prot = 0; int flags; switch (ph[i].p_type) { case PT_LOAD: if (base_ph == NULL) { flags = VMCMD_BASE; addr = pos; base_ph = &ph[i]; } else { flags = VMCMD_RELATIVE; addr = ph[i].p_vaddr - base_ph->p_vaddr; } elf_load_psection(&epp->ep_vmcmds, nd.ni_vp, &ph[i], &addr, &size, &prot, flags | VMCMD_SYSCALL); /* If entry is within this section it must be text */ if (eh.e_entry >= ph[i].p_vaddr && eh.e_entry < (ph[i].p_vaddr + size)) { epp->ep_entry = addr + eh.e_entry - ELF_TRUNC(ph[i].p_vaddr,ph[i].p_align); if (flags == VMCMD_RELATIVE) epp->ep_entry += pos; ap->arg_interp = pos; } addr += size; break; case PT_DYNAMIC: case PT_PHDR: case PT_NOTE: break; case PT_OPENBSD_RANDOMIZE: if (ph[i].p_memsz > randomizequota) { error = ENOMEM; goto bad1; } randomizequota -= ph[i].p_memsz; NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize, ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0); break; default: break; } } vn_marktext(nd.ni_vp); bad1: VOP_CLOSE(nd.ni_vp, FREAD, p->p_ucred, p); bad: free(ph, M_TEMP, phsize); vput(nd.ni_vp); return (error); } /* * Prepare an Elf binary's exec package * * First, set of the various offsets/lengths in the exec package. * * Then, mark the text image busy (so it can be demand paged) or error out if * this is not possible. Finally, set up vmcmds for the text, data, bss, and * stack segments. */ int exec_elf_makecmds(struct proc *p, struct exec_package *epp) { Elf_Ehdr *eh = epp->ep_hdr; Elf_Phdr *ph, *pp, *base_ph = NULL; Elf_Addr phdr = 0, exe_base = 0; int error, i, has_phdr = 0, names = 0; char *interp = NULL; u_long phsize; size_t randomizequota = ELF_RANDOMIZE_LIMIT; if (epp->ep_hdrvalid < sizeof(Elf_Ehdr)) return (ENOEXEC); if (elf_check_header(eh) || (eh->e_type != ET_EXEC && eh->e_type != ET_DYN)) return (ENOEXEC); /* * check if vnode is in open for writing, because we want to demand- * page out of it. if it is, don't do it, for various reasons. */ if (epp->ep_vp->v_writecount != 0) { #ifdef DIAGNOSTIC if (epp->ep_vp->v_flag & VTEXT) panic("exec: a VTEXT vnode has writecount != 0"); #endif return (ETXTBSY); } /* * Allocate space to hold all the program headers, and read them * from the file */ ph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK); phsize = eh->e_phnum * sizeof(Elf_Phdr); if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff, ph, phsize)) != 0) goto bad; epp->ep_tsize = ELF_NO_ADDR; epp->ep_dsize = ELF_NO_ADDR; for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) { if (pp->p_type == PT_INTERP && !interp) { if (pp->p_filesz < 2 || pp->p_filesz > MAXPATHLEN) goto bad; interp = pool_get(&namei_pool, PR_WAITOK); if ((error = elf_read_from(p, epp->ep_vp, pp->p_offset, interp, pp->p_filesz)) != 0) { goto bad; } if (interp[pp->p_filesz - 1] != '\0') goto bad; } else if (pp->p_type == PT_LOAD) { if (pp->p_filesz > pp->p_memsz || pp->p_memsz == 0) { error = EINVAL; goto bad; } if (base_ph == NULL) base_ph = pp; } else if (pp->p_type == PT_PHDR) { has_phdr = 1; } } if (eh->e_type == ET_DYN) { /* need phdr and load sections for PIE */ if (!has_phdr || base_ph == NULL) { error = EINVAL; goto bad; } /* randomize exe_base for PIE */ exe_base = uvm_map_pie(base_ph->p_align); } /* * OK, we want a slightly different twist of the * standard emulation package for "real" elf. */ epp->ep_emul = &emul_elf; /* * Verify this is an OpenBSD executable. If it's marked that way * via a PT_NOTE then also check for a PT_OPENBSD_WXNEEDED segment. */ if ((error = elf_os_pt_note(p, epp, epp->ep_hdr, &names)) != 0) goto bad; if (eh->e_ident[EI_OSABI] == ELFOSABI_OPENBSD) names |= ELF_NOTE_NAME_OPENBSD; /* * Load all the necessary sections */ for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) { Elf_Addr addr, size = 0; int prot = 0; int flags = 0; switch (pp->p_type) { case PT_LOAD: if (exe_base != 0) { if (pp == base_ph) { flags = VMCMD_BASE; addr = exe_base; } else { flags = VMCMD_RELATIVE; addr = pp->p_vaddr - base_ph->p_vaddr; } } else addr = ELF_NO_ADDR; /* Permit system calls in specific main-programs */ if (interp == NULL) { /* statics. Also block the ld.so syscall-grant */ flags |= VMCMD_SYSCALL; p->p_vmspace->vm_map.flags |= VM_MAP_SYSCALL_ONCE; } /* * Calculates size of text and data segments * by starting at first and going to end of last. * 'rwx' sections are treated as data. * this is correct for BSS_PLT, but may not be * for DATA_PLT, is fine for TEXT_PLT. */ elf_load_psection(&epp->ep_vmcmds, epp->ep_vp, pp, &addr, &size, &prot, flags); /* * Update exe_base in case alignment was off. * For PIE, addr is relative to exe_base so * adjust it (non PIE exe_base is 0 so no change). */ if (flags == VMCMD_BASE) exe_base = addr; else addr += exe_base; /* * Decide whether it's text or data by looking * at the protection of the section */ if (prot & PROT_WRITE) { /* data section */ if (epp->ep_dsize == ELF_NO_ADDR) { epp->ep_daddr = addr; epp->ep_dsize = size; } else { if (addr < epp->ep_daddr) { epp->ep_dsize = epp->ep_dsize + epp->ep_daddr - addr; epp->ep_daddr = addr; } else epp->ep_dsize = addr+size - epp->ep_daddr; } } else if (prot & PROT_EXEC) { /* text section */ if (epp->ep_tsize == ELF_NO_ADDR) { epp->ep_taddr = addr; epp->ep_tsize = size; } else { if (addr < epp->ep_taddr) { epp->ep_tsize = epp->ep_tsize + epp->ep_taddr - addr; epp->ep_taddr = addr; } else epp->ep_tsize = addr+size - epp->ep_taddr; } } break; case PT_SHLIB: error = ENOEXEC; goto bad; case PT_INTERP: /* Already did this one */ case PT_DYNAMIC: case PT_NOTE: break; case PT_PHDR: /* Note address of program headers (in text segment) */ phdr = pp->p_vaddr; break; case PT_OPENBSD_RANDOMIZE: if (ph[i].p_memsz > randomizequota) { error = ENOMEM; goto bad; } randomizequota -= ph[i].p_memsz; NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize, ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0); break; default: /* * Not fatal, we don't need to understand everything * :-) */ break; } } phdr += exe_base; /* * Strangely some linux programs may have all load sections marked * writeable, in this case, textsize is not -1, but rather 0; */ if (epp->ep_tsize == ELF_NO_ADDR) epp->ep_tsize = 0; /* * Another possibility is that it has all load sections marked * read-only. Fake a zero-sized data segment right after the * text segment. */ if (epp->ep_dsize == ELF_NO_ADDR) { epp->ep_daddr = round_page(epp->ep_taddr + epp->ep_tsize); epp->ep_dsize = 0; } epp->ep_interp = interp; epp->ep_entry = eh->e_entry + exe_base; /* * Check if we found a dynamically linked binary and arrange to load * its interpreter when the exec file is released. */ if (interp || eh->e_type == ET_DYN) { struct elf_args *ap; ap = malloc(sizeof(*ap), M_TEMP, M_WAITOK); ap->arg_phaddr = phdr; ap->arg_phentsize = eh->e_phentsize; ap->arg_phnum = eh->e_phnum; ap->arg_entry = eh->e_entry + exe_base; ap->arg_interp = exe_base; epp->ep_emul_arg = ap; epp->ep_emul_argsize = sizeof *ap; } free(ph, M_TEMP, phsize); vn_marktext(epp->ep_vp); return (exec_setup_stack(p, epp)); bad: if (interp) pool_put(&namei_pool, interp); free(ph, M_TEMP, phsize); kill_vmcmds(&epp->ep_vmcmds); if (error == 0) return (ENOEXEC); return (error); } /* * Phase II of load. It is now safe to load the interpreter. Info collected * when loading the program is available for setup of the interpreter. */ int exec_elf_fixup(struct proc *p, struct exec_package *epp) { char *interp; int error = 0; struct elf_args *ap; AuxInfo ai[ELF_AUX_ENTRIES], *a; if (epp->ep_emul_arg == NULL) { return (0); } interp = epp->ep_interp; ap = epp->ep_emul_arg; if (interp && (error = elf_load_file(p, interp, epp, ap)) != 0) { free(ap, M_TEMP, epp->ep_emul_argsize); pool_put(&namei_pool, interp); kill_vmcmds(&epp->ep_vmcmds); return (error); } /* * We have to do this ourselves... */ error = exec_process_vmcmds(p, epp); /* * Push extra arguments on the stack needed by dynamically * linked binaries */ if (error == 0) { memset(&ai, 0, sizeof ai); a = ai; a->au_id = AUX_phdr; a->au_v = ap->arg_phaddr; a++; a->au_id = AUX_phent; a->au_v = ap->arg_phentsize; a++; a->au_id = AUX_phnum; a->au_v = ap->arg_phnum; a++; a->au_id = AUX_pagesz; a->au_v = PAGE_SIZE; a++; a->au_id = AUX_base; a->au_v = ap->arg_interp; a++; a->au_id = AUX_flags; a->au_v = 0; a++; a->au_id = AUX_entry; a->au_v = ap->arg_entry; a++; a->au_id = AUX_openbsd_timekeep; a->au_v = p->p_p->ps_timekeep; a++; a->au_id = AUX_null; a->au_v = 0; a++; error = copyout(ai, epp->ep_emul_argp, sizeof ai); } free(ap, M_TEMP, epp->ep_emul_argsize); if (interp) pool_put(&namei_pool, interp); return (error); } int elf_os_pt_note_name(Elf_Note *np) { int i, j; for (i = 0; i < nitems(elf_note_names); i++) { size_t namlen = strlen(elf_note_names[i].name); if (np->namesz < namlen) continue; /* verify name padding (after the NUL) is NUL */ for (j = namlen + 1; j < elfround(np->namesz); j++) if (((char *)(np + 1))[j] != '\0') continue; /* verify desc padding is NUL */ for (j = np->descsz; j < elfround(np->descsz); j++) if (((char *)(np + 1))[j] != '\0') continue; if (strcmp((char *)(np + 1), elf_note_names[i].name) == 0) return elf_note_names[i].id; } return (0); } int elf_os_pt_note(struct proc *p, struct exec_package *epp, Elf_Ehdr *eh, int *namesp) { Elf_Phdr *hph, *ph; Elf_Note *np = NULL; size_t phsize, offset, pfilesz = 0, total; int error, names = 0; hph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK); phsize = eh->e_phnum * sizeof(Elf_Phdr); if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff, hph, phsize)) != 0) goto out1; for (ph = hph; ph < &hph[eh->e_phnum]; ph++) { if (ph->p_type == PT_OPENBSD_WXNEEDED) { epp->ep_flags |= EXEC_WXNEEDED; continue; } if (ph->p_type != PT_NOTE || ph->p_filesz > 1024) continue; if (np && ph->p_filesz != pfilesz) { free(np, M_TEMP, pfilesz); np = NULL; } if (!np) np = malloc(ph->p_filesz, M_TEMP, M_WAITOK); pfilesz = ph->p_filesz; if ((error = elf_read_from(p, epp->ep_vp, ph->p_offset, np, ph->p_filesz)) != 0) goto out2; for (offset = 0; offset < ph->p_filesz; offset += total) { Elf_Note *np2 = (Elf_Note *)((char *)np + offset); if (offset + sizeof(Elf_Note) > ph->p_filesz) break; total = sizeof(Elf_Note) + elfround(np2->namesz) + elfround(np2->descsz); if (offset + total > ph->p_filesz) break; names |= elf_os_pt_note_name(np2); } } out2: free(np, M_TEMP, pfilesz); out1: free(hph, M_TEMP, phsize); *namesp = names; return ((names & ELF_NOTE_NAME_OPENBSD) ? 0 : ENOEXEC); } /* * Start of routines related to dumping core */ #ifdef SMALL_KERNEL int coredump_elf(struct proc *p, void *cookie) { return EPERM; } #else /* !SMALL_KERNEL */ struct writesegs_state { off_t notestart; off_t secstart; off_t secoff; struct proc *p; void *iocookie; Elf_Phdr *psections; size_t psectionslen; size_t notesize; int npsections; }; uvm_coredump_setup_cb coredump_setup_elf; uvm_coredump_walk_cb coredump_walk_elf; int coredump_notes_elf(struct proc *, void *, size_t *); int coredump_note_elf(struct proc *, void *, size_t *); int coredump_writenote_elf(struct proc *, void *, Elf_Note *, const char *, void *); int coredump_elf(struct proc *p, void *cookie) { #ifdef DIAGNOSTIC off_t offset; #endif struct writesegs_state ws; size_t notesize; int error, i; ws.p = p; ws.iocookie = cookie; ws.psections = NULL; /* * Walk the map to get all the segment offsets and lengths, * write out the ELF header. */ error = uvm_coredump_walkmap(p, coredump_setup_elf, coredump_walk_elf, &ws); if (error) goto out; error = coredump_write(cookie, UIO_SYSSPACE, ws.psections, ws.psectionslen); if (error) goto out; /* Write out the notes. */ error = coredump_notes_elf(p, cookie, &notesize); if (error) goto out; #ifdef DIAGNOSTIC if (notesize != ws.notesize) panic("coredump: notesize changed: %zu != %zu", ws.notesize, notesize); offset = ws.notestart + notesize; if (offset != ws.secstart) panic("coredump: offset %lld != secstart %lld", (long long) offset, (long long) ws.secstart); #endif /* Pass 3: finally, write the sections themselves. */ for (i = 0; i < ws.npsections - 1; i++) { Elf_Phdr *pent = &ws.psections[i]; if (pent->p_filesz == 0) continue; #ifdef DIAGNOSTIC if (offset != pent->p_offset) panic("coredump: offset %lld != p_offset[%d] %lld", (long long) offset, i, (long long) pent->p_filesz); #endif error = coredump_write(cookie, UIO_USERSPACE, (void *)(vaddr_t)pent->p_vaddr, pent->p_filesz); if (error) goto out; coredump_unmap(cookie, (vaddr_t)pent->p_vaddr, (vaddr_t)pent->p_vaddr + pent->p_filesz); #ifdef DIAGNOSTIC offset += ws.psections[i].p_filesz; #endif } out: free(ws.psections, M_TEMP, ws.psectionslen); return (error); } /* * Normally we lay out core files like this: * [ELF Header] [Program headers] [Notes] [data for PT_LOAD segments] * * However, if there's >= 65535 segments then it overflows the field * in the ELF header, so the standard specifies putting a magic * number there and saving the real count in the .sh_info field of * the first *section* header...which requires generating a section * header. To avoid confusing tools, we include an .shstrtab section * as well so all the indexes look valid. So in this case we lay * out the core file like this: * [ELF Header] [Section Headers] [.shstrtab] [Program headers] \ * [Notes] [data for PT_LOAD segments] * * The 'shstrtab' structure below is data for the second of the two * section headers, plus the .shstrtab itself, in one const buffer. */ static const struct { Elf_Shdr shdr; char shstrtab[sizeof(ELF_SHSTRTAB) + 1]; } shstrtab = { .shdr = { .sh_name = 1, /* offset in .shstrtab below */ .sh_type = SHT_STRTAB, .sh_offset = sizeof(Elf_Ehdr) + 2*sizeof(Elf_Shdr), .sh_size = sizeof(ELF_SHSTRTAB) + 1, .sh_addralign = 1, }, .shstrtab = "\0" ELF_SHSTRTAB, }; int coredump_setup_elf(int segment_count, void *cookie) { Elf_Ehdr ehdr; struct writesegs_state *ws = cookie; Elf_Phdr *note; int error; /* Get the count of segments, plus one for the PT_NOTE */ ws->npsections = segment_count + 1; /* Get the size of the notes. */ error = coredump_notes_elf(ws->p, NULL, &ws->notesize); if (error) return error; /* Setup the ELF header */ memset(&ehdr, 0, sizeof(ehdr)); memcpy(ehdr.e_ident, ELFMAG, SELFMAG); ehdr.e_ident[EI_CLASS] = ELF_TARG_CLASS; ehdr.e_ident[EI_DATA] = ELF_TARG_DATA; ehdr.e_ident[EI_VERSION] = EV_CURRENT; /* XXX Should be the OSABI/ABI version of the executable. */ ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV; ehdr.e_ident[EI_ABIVERSION] = 0; ehdr.e_type = ET_CORE; /* XXX This should be the e_machine of the executable. */ ehdr.e_machine = ELF_TARG_MACH; ehdr.e_version = EV_CURRENT; ehdr.e_entry = 0; ehdr.e_flags = 0; ehdr.e_ehsize = sizeof(ehdr); ehdr.e_phentsize = sizeof(Elf_Phdr); if (ws->npsections < PN_XNUM) { ehdr.e_phoff = sizeof(ehdr); ehdr.e_shoff = 0; ehdr.e_phnum = ws->npsections; ehdr.e_shentsize = 0; ehdr.e_shnum = 0; ehdr.e_shstrndx = 0; } else { /* too many segments, use extension setup */ ehdr.e_shoff = sizeof(ehdr); ehdr.e_phnum = PN_XNUM; ehdr.e_shentsize = sizeof(Elf_Shdr); ehdr.e_shnum = 2; ehdr.e_shstrndx = 1; ehdr.e_phoff = shstrtab.shdr.sh_offset + shstrtab.shdr.sh_size; } /* Write out the ELF header. */ error = coredump_write(ws->iocookie, UIO_SYSSPACE, &ehdr, sizeof(ehdr)); if (error) return error; /* * If an section header is needed to store extension info, write * it out after the ELF header and before the program header. */ if (ehdr.e_shnum != 0) { Elf_Shdr shdr = { .sh_info = ws->npsections }; error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shdr, sizeof shdr); if (error) return error; error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shstrtab, sizeof(shstrtab.shdr) + sizeof(shstrtab.shstrtab)); if (error) return error; } /* * Allocate the segment header array and setup to collect * the section sizes and offsets */ ws->psections = mallocarray(ws->npsections, sizeof(Elf_Phdr), M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO); if (ws->psections == NULL) return ENOMEM; ws->psectionslen = ws->npsections * sizeof(Elf_Phdr); ws->notestart = ehdr.e_phoff + ws->psectionslen; ws->secstart = ws->notestart + ws->notesize; ws->secoff = ws->secstart; /* Fill in the PT_NOTE segment header in the last slot */ note = &ws->psections[ws->npsections - 1]; note->p_type = PT_NOTE; note->p_offset = ws->notestart; note->p_vaddr = 0; note->p_paddr = 0; note->p_filesz = ws->notesize; note->p_memsz = 0; note->p_flags = PF_R; note->p_align = ELFROUNDSIZE; return (0); } int coredump_walk_elf(vaddr_t start, vaddr_t realend, vaddr_t end, vm_prot_t prot, int nsegment, void *cookie) { struct writesegs_state *ws = cookie; Elf_Phdr phdr; vsize_t size, realsize; size = end - start; realsize = realend - start; phdr.p_type = PT_LOAD; phdr.p_offset = ws->secoff; phdr.p_vaddr = start; phdr.p_paddr = 0; phdr.p_filesz = realsize; phdr.p_memsz = size; phdr.p_flags = 0; if (prot & PROT_READ) phdr.p_flags |= PF_R; if (prot & PROT_WRITE) phdr.p_flags |= PF_W; if (prot & PROT_EXEC) phdr.p_flags |= PF_X; phdr.p_align = PAGE_SIZE; ws->secoff += phdr.p_filesz; ws->psections[nsegment] = phdr; return (0); } int coredump_notes_elf(struct proc *p, void *iocookie, size_t *sizep) { struct ps_strings pss; struct iovec iov; struct uio uio; struct elfcore_procinfo cpi; Elf_Note nhdr; struct process *pr = p->p_p; struct proc *q; size_t size, notesize; int error; size = 0; /* First, write an elfcore_procinf